relaton-iso 1.2.1 → 1.6.pre1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/macos.yml +2 -0
- data/README.adoc +73 -122
- data/lib/relaton_iso/iso_bibliography.rb +51 -33
- data/lib/relaton_iso/scrapper.rb +60 -31
- data/lib/relaton_iso/version.rb +1 -1
- data/relaton_iso.gemspec +2 -2
- metadata +11 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9633da24028b24feabd31b81e93ee19e2af47994b26305d7c023aecc3ca9a090
|
4
|
+
data.tar.gz: 910de158883fc2f7cff8c36c4d78cd44fdf7f6d0f215096e0375b4a96389dcff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0d413b2093884a16461db0a7358835f2279d8b227f54469caaf5bd215fa9a270f419844678359471fa16c9d7968e0339177ce37326113ebb7b6e88bc933a1115
|
7
|
+
data.tar.gz: 5c883c4a58a857589ad9019507a87e68a137b3df605bd24e3d21a563b0796e5c0f56981515ec72f46dd7769c6504178758c9abcc0640b0b960c2c7c9f00bd7f3
|
data/.github/workflows/macos.yml
CHANGED
@@ -26,6 +26,8 @@ jobs:
|
|
26
26
|
- name: Update gems
|
27
27
|
run: |
|
28
28
|
sudo gem install bundler --force
|
29
|
+
ruby -v | grep 2.5 && bundle config set build.debase --with-cflags="-Wno-error=implicit-function-declaration"
|
30
|
+
ruby -v | grep 2.5 && bundle config set build.ruby-debug-ide --with-cflags="-Wno-error=implicit-function-declaration"
|
29
31
|
bundle install --jobs 4 --retry 3
|
30
32
|
- name: Run specs
|
31
33
|
run: |
|
data/README.adoc
CHANGED
@@ -35,26 +35,28 @@ Or install it yourself as:
|
|
35
35
|
|
36
36
|
[source,ruby]
|
37
37
|
----
|
38
|
-
|
39
|
-
=>
|
40
|
-
|
41
|
-
|
42
|
-
|
38
|
+
require 'relaton_iso'
|
39
|
+
=> true
|
40
|
+
|
41
|
+
hit_collection = RelatonIso::IsoBibliography.search("ISO 19115")
|
42
|
+
=> <RelatonIso::HitCollection:0x007fa5bc847038 @ref=19115 @fetched=false>
|
43
43
|
|
44
|
-
hit_collection
|
45
|
-
=>
|
46
|
-
<RelatonIso::Hit:0x007f9a049df0a8 @text="19115" @fullIdentifier="" @matchedWords=["19115"] @category="standard" @title="ISO 19115-1:2014 Geographic information -- Metadata -- Part 1: Fundamentals">,
|
47
|
-
...
|
48
|
-
<RelatonIso::Hit:0x007f9a049deef0 @text="19115" @fullIdentifier="" @matchedWords=["19115"] @category="standard" @title="ISO/TS 19139-2:2012 Geographic information -- Metadata -- XML schema implementation -- Part 2: Extensions for imagery and gridded data">,
|
49
|
-
<RelatonIso::Hit:0x007f9a049deec8 @text="19115" @fullIdentifier="" @matchedWords=["19115"] @category="standard" @title="ISO/TS 19157-2:2016 Geographic information -- Data quality -- Part 2: XML schema implementation">]
|
44
|
+
hit_collection.first
|
45
|
+
=> <RelatonIso::Hit:0x007f87e71ea9f8 @text="ISO 19115" @fetched="false" @fullIdentifier="" @title="">
|
50
46
|
|
51
|
-
item = hit_collection[
|
52
|
-
=> #<RelatonIsoBib::IsoBibliographicItem:
|
47
|
+
item = hit_collection[2].fetch
|
48
|
+
=> #<RelatonIsoBib::IsoBibliographicItem:0x007fa5dca89510
|
53
49
|
@abstract=
|
54
|
-
[#<RelatonBib::FormattedString:
|
50
|
+
[#<RelatonBib::FormattedString:0x007fa5dca88458
|
55
51
|
@content=
|
56
|
-
"ISO 19115-
|
57
|
-
|
52
|
+
"ISO/TS 19115-3:2016 defines an integrated XML implementation of ISO 19115‑1, ..."
|
53
|
+
|
54
|
+
item.docidentifier
|
55
|
+
=> [#<RelatonBib::DocumentIdentifier:0x007fd9ce9c6878 @id="ISO/TS 19115-3:2016", @scope=nil, @type="ISO">,
|
56
|
+
#<RelatonBib::DocumentIdentifier:0x007fd9ce9c6350 @id="urn:iso:std:iso-ts:ts:19115:-3:stage-90.92:ed-1:en,fr", @scope=nil, @type="URN">]
|
57
|
+
|
58
|
+
item.docidentifier.detect { |di| di.type == "URN" }.id
|
59
|
+
=> "urn:iso:std:iso-ts:ts:19115:-3:stage-90.92:ed-1:en,fr"
|
58
60
|
----
|
59
61
|
|
60
62
|
=== XML serialization
|
@@ -67,79 +69,38 @@ Possible options:
|
|
67
69
|
[source,ruby]
|
68
70
|
----
|
69
71
|
item.to_xml
|
70
|
-
|
71
|
-
<
|
72
|
+
"<bibitem id="ISO/TS19115-3-2016" type="standard">
|
73
|
+
<fetched>2020-09-11</fetched>
|
72
74
|
<title type="title-intro" format="text/plain" language="en" script="Latn">Geographic information</title>
|
73
|
-
<title type="title-
|
74
|
-
<title type="
|
75
|
-
<title type="
|
75
|
+
<title type="title-main" format="text/plain" language="en" script="Latn"> Metadata</title>
|
76
|
+
<title type="title-part" format="text/plain" language="en" script="Latn">Part 3: XML schema implementation for fundamental concepts</title>
|
77
|
+
<title type="main" format="text/plain" language="en" script="Latn">Geographic information - Metadata - Part 3: XML schema implementation for fundamental concepts</title>
|
76
78
|
<title type="title-intro" format="text/plain" language="fr" script="Latn">Information géographique</title>
|
77
|
-
<title type="title-
|
78
|
-
<title type="
|
79
|
-
<
|
80
|
-
<uri type="
|
81
|
-
<uri type="
|
82
|
-
<
|
83
|
-
<
|
84
|
-
|
85
|
-
|
86
|
-
<contributor>
|
87
|
-
<role type="publisher"/>
|
88
|
-
<organization>
|
89
|
-
<name>International Organization for Standardization</name>
|
90
|
-
<abbreviation>ISO</abbreviation>
|
91
|
-
<uri>www.iso.org</uri>
|
92
|
-
</organization>
|
93
|
-
</contributor>
|
94
|
-
<edition>1</edition>
|
95
|
-
<language>en</language>
|
96
|
-
<language>fr</language>
|
97
|
-
<script>Latn</script>
|
98
|
-
<abstract format="text/plain" language="en" script="Latn">ISO 19115-1:2014 defines the schema required for describing geographic information and services by means of metadata. It provides information about the identification, the extent, the quality, the spatial and temporal aspects, the content, the spatial reference, the portrayal, distribution, and other properties of digital geographic data and services.ISO 19115-1:2014 is applicable to:-the cataloguing of all types of resources, clearinghouse activities, and the full description of datasets and services;-geographic services, geographic datasets, dataset series, and individual geographic features and feature properties.ISO 19115-1:2014 defines:-mandatory and conditional metadata sections, metadata entities, and metadata elements;-the minimum set of metadata required to serve most metadata applications (data discovery, determining data fitness for use, data access, data transfer, and use of digital data and services);-optional metadata elements to allow for a more extensive standard description of resources, if required;-a method for extending metadata to fit specialized needs.Though ISO 19115-1:2014 is applicable to digital data and services, its principles can be extended to many other types of resources such as maps, charts, and textual documents as well as non-geographic data. Certain conditional metadata elements might not apply to these other forms of data.</abstract>
|
99
|
-
<abstract format="text/plain" language="fr" script="Latn">L'ISO 19115-1:2014 définit le schéma requis pour décrire des informations géographiques et des services au moyen de métadonnées. Elle fournit des informations concernant l'identification, l'étendue, la qualité, les aspects spatiaux et temporels, le contenu, la référence spatiale, la représentation des données, la distribution et d'autres propriétés des données géographiques numériques et des services.L'ISO 19115-1:2014 est applicable:-au catalogage de tous les types de ressources, des activités des centres d'informations et à la description complète des jeux de données et des services,-aux services géographiques, jeux de données géographiques, séries de jeux de données, entités géographiques individuelles et propriétés d'entités.L'ISO 19115-1:2014 définit:-des sections relatives aux métadonnées obligatoires et facultatives, aux entités de métadonnées et aux éléments de métadonnées,-le jeu minimal de métadonnées requis pour répondre au besoin de la plupart des applications des métadonnées (la découverte des données, la détermination de l'adéquation des données à une utilisation, l'accès aux données, le transfert des données et l'utilisation des données numériques et des services),-les éléments de métadonnées facultatifs pour permettre une description standard plus poussée des ressources, si cela est nécessaire,-un procédé d'extension des métadonnées pour s'adapter aux besoins spéciaux.L'ISO 19115-1:2014 est applicable aux données numériques et services, ses principes peuvent être étendus à bien d'autres types de ressources telles que les cartes, les graphes et les documents textes, de même qu'à des données non géographiques. Certains éléments de métadonnées conditionnels peuvent ne pas s'appliquer à ces autres formes de données.</abstract>
|
100
|
-
<status>
|
101
|
-
<stage>90</stage>
|
102
|
-
<substage>20</substage>
|
103
|
-
</status>
|
104
|
-
<copyright>
|
105
|
-
<from>2014</from>
|
106
|
-
<owner>
|
107
|
-
<organization>
|
108
|
-
<name>ISO</name>
|
109
|
-
</organization>
|
110
|
-
</owner>
|
111
|
-
</copyright>
|
112
|
-
<relation type="obsoletes">
|
113
|
-
<bibitem>
|
114
|
-
<formattedref format="text/plain">ISO 19115:2003</formattedref>
|
115
|
-
</bibitem>
|
116
|
-
</relation>
|
117
|
-
<relation type="obsoletes">
|
118
|
-
<bibitem>
|
119
|
-
<formattedref format="text/plain">ISO 19115:2003/Cor 1:2006</formattedref>
|
120
|
-
</bibitem>
|
121
|
-
</relation>
|
122
|
-
<relation type="updates">
|
123
|
-
<bibitem>
|
124
|
-
<formattedref format="text/plain">ISO 19115-1:2014/Amd 1:2018</formattedref>
|
125
|
-
</bibitem>
|
126
|
-
</relation>
|
79
|
+
<title type="title-main" format="text/plain" language="fr" script="Latn">Métadonnées</title>
|
80
|
+
<title type="title-part" format="text/plain" language="fr" script="Latn">Partie 3: Mise en oeuvre par des schémas XML</title>
|
81
|
+
<title type="main" format="text/plain" language="fr" script="Latn">Information géographique - Métadonnées - Partie 3: Mise en oeuvre par des schémas XML</title>
|
82
|
+
<uri type="src">https://www.iso.org/standard/32579.html</uri>
|
83
|
+
<uri type="obp">https://www.iso.org/obp/ui/#!iso:std:32579:en</uri>
|
84
|
+
<uri type="rss">https://www.iso.org/contents/data/standard/03/25/32579.detail.rss</uri>
|
85
|
+
<docidentifier type="ISO">ISO/TS 19115-3:2016</docidentifier>
|
86
|
+
<docidentifier type="URN">urn:iso:std:iso-ts:ts:19115:-3:stage-90.92:ed-1:en,fr</docidentifier>
|
87
|
+
...
|
127
88
|
</bibitem>"
|
128
89
|
|
129
90
|
item.to_xml bibdata: true
|
130
|
-
=>"<bibdata>
|
91
|
+
=>"<bibdata type="standard">
|
131
92
|
...
|
132
93
|
<ext>
|
133
|
-
<doctype>
|
94
|
+
<doctype>technical-specification</doctype>
|
134
95
|
<editorialgroup>
|
135
|
-
<
|
96
|
+
<technical-committee number="211" type="TC">ISO/TC 211Geographic information/Geomatics</technical-committee>
|
136
97
|
</editorialgroup>
|
137
98
|
<ics>
|
138
99
|
<code>35.240.70</code>
|
139
100
|
<text>IT applications in science</text>
|
140
101
|
</ics>
|
141
102
|
<structuredidentifier type="ISO">
|
142
|
-
<project-number>ISO 19115</project-number>
|
103
|
+
<project-number>ISO/TS 19115</project-number>
|
143
104
|
</structuredidentifier>
|
144
105
|
</ext>
|
145
106
|
</bibdata>"
|
@@ -152,65 +113,55 @@ item.to_xml note: [{ text: "Note", type: "note" }]
|
|
152
113
|
</bibitem>"
|
153
114
|
----
|
154
115
|
|
155
|
-
=== Get
|
116
|
+
=== Get specific language
|
156
117
|
|
157
118
|
[source,ruby]
|
158
119
|
----
|
159
|
-
|
160
|
-
=> [#<
|
161
|
-
@title=#<RelatonBib::FormattedString:
|
162
|
-
@type="title-main">,
|
163
|
-
#<RelatonIsoBib::TypedTitleString:0x007f9a05a1d1b8
|
164
|
-
@title=#<RelatonBib::FormattedString:0x007f9a05a1c970 @content="Geographic information", @format="text/plain", @language=["en"], @script=["Latn"]>,
|
120
|
+
item.title lang: 'en'
|
121
|
+
=> [#<RelatonBib::TypedTitleString:0x007fd9aedcf390
|
122
|
+
@title=#<RelatonBib::FormattedString:0x007fd9aedcf250 @content="Geographic information", @format="text/plain", @language=["en"], @script=["Latn"]>,
|
165
123
|
@type="title-intro">,
|
166
|
-
#<
|
167
|
-
@title=
|
168
|
-
#<RelatonBib::FormattedString:0x007f9a030d3320 @content="Metadata - Geographic information - Part 1: Fundamentals", @format="text/plain", @language=["en"], @script=["Latn"]>,
|
169
|
-
@type="main">,
|
170
|
-
#<RelatonIsoBib::TypedTitleString:0x007f9a030d26a0
|
171
|
-
@title=#<RelatonBib::FormattedString:0x007f9a030d21f0 @content="Métadonnées", @format="text/plain", @language=["fr"], @script=["Latn"]>,
|
124
|
+
#<RelatonBib::TypedTitleString:0x007fd9aedcf188
|
125
|
+
@title=#<RelatonBib::FormattedString:0x007fd9aedcf048 @content=" Metadata", @format="text/plain", @language=["en"], @script=["Latn"]>,
|
172
126
|
@type="title-main">,
|
173
|
-
#<
|
174
|
-
@title=#<RelatonBib::FormattedString:
|
127
|
+
#<RelatonBib::TypedTitleString:0x007fd9aedcef80
|
128
|
+
@title=#<RelatonBib::FormattedString:0x007fd9aedcee40 @content="Part 3: XML schema implementation for fundamental concepts", @format="text/plain", @language=["en"], @script=["Latn"]>,
|
129
|
+
@type="title-part">,
|
130
|
+
#<RelatonBib::TypedTitleString:0x007fd9aedcecb0
|
131
|
+
@title=
|
132
|
+
#<RelatonBib::FormattedString:0x007fd9aedceb70
|
133
|
+
@content="Geographic information - Metadata - Part 3: XML schema implementation for fundamental concepts",
|
134
|
+
@format="text/plain",
|
135
|
+
@language=["en"],
|
136
|
+
@script=["Latn"]>,
|
137
|
+
@type="main">]
|
138
|
+
|
139
|
+
item.title lang: 'fr'
|
140
|
+
=> [#<RelatonBib::TypedTitleString:0x007fd9ce9c7890
|
141
|
+
@title=#<RelatonBib::FormattedString:0x007fd9ce9c7750 @content="Information géographique", @format="text/plain", @language=["fr"], @script=["Latn"]>,
|
175
142
|
@type="title-intro">,
|
176
|
-
#<
|
143
|
+
#<RelatonBib::TypedTitleString:0x007fd9ce9c7688
|
144
|
+
@title=#<RelatonBib::FormattedString:0x007fd9ce9c7548 @content="Métadonnées", @format="text/plain", @language=["fr"], @script=["Latn"]>,
|
145
|
+
@type="title-main">,
|
146
|
+
#<RelatonBib::TypedTitleString:0x007fd9ce9c7480
|
147
|
+
@title=#<RelatonBib::FormattedString:0x007fd9ce9c7340 @content="Partie 3: Mise en oeuvre par des schémas XML", @format="text/plain", @language=["fr"], @script=["Latn"]>,
|
148
|
+
@type="title-part">,
|
149
|
+
#<RelatonBib::TypedTitleString:0x007fd9ce9c71b0
|
177
150
|
@title=
|
178
|
-
#<RelatonBib::FormattedString:
|
179
|
-
@content="
|
151
|
+
#<RelatonBib::FormattedString:0x007fd9ce9c7070
|
152
|
+
@content="Information géographique - Métadonnées - Partie 3: Mise en oeuvre par des schémas XML",
|
180
153
|
@format="text/plain",
|
181
154
|
@language=["fr"],
|
182
155
|
@script=["Latn"]>,
|
183
156
|
@type="main">]
|
184
|
-
----
|
185
|
-
|
186
|
-
=== Get specific language
|
187
157
|
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
<title type="title-main" format="text/plain" language="en" script="Latn">Metadata</title>
|
196
|
-
<title type="main" format="text/plain" language="en" script="Latn">Geographic information – Metadata</title>
|
197
|
-
<uri type=\"src\">https://www.iso.org/standard/53798.html</uri>
|
198
|
-
...
|
199
|
-
</bibitem>
|
200
|
-
|
201
|
-
item = RelatonIso::IsoBibliography.get 'ISO 19115', nil, {lang: "fr"}
|
202
|
-
item.to_xml
|
203
|
-
=> <bibitem id="ISO19115-1-2014" type="standard">
|
204
|
-
<fetched>2020-01-22</fetched>
|
205
|
-
<title type="title-intro" format="text/plain" language="en" script="Latn">Geographic information</title>
|
206
|
-
<title type="title-main" format="text/plain" language="en" script="Latn">Metadata</title>
|
207
|
-
<title type="main" format="text/plain" language="en" script="Latn">Geographic information – Metadata</title>
|
208
|
-
<title type="title-intro" format="text/plain" language="fr" script="Latn">Information géographique</title>
|
209
|
-
<title type="title-main" format="text/plain" language="fr" script="Latn">Métadonnées</title>
|
210
|
-
<title type="main" format="text/plain" language="fr" script="Latn">Information géographique – Métadonnées</title>
|
211
|
-
<uri type="src">https://www.iso.org/standard/53798.html</uri>
|
212
|
-
...
|
213
|
-
</bibitem>
|
158
|
+
item.abstract lang: 'en'
|
159
|
+
=> #<RelatonBib::FormattedString:0x007fd9ce9d9bd0
|
160
|
+
@content=
|
161
|
+
"ISO/TS 19115-3:2016 defines an integrated XML implementation of ISO 19115‑1, ISO 19115‑2, and concepts ..."
|
162
|
+
@format="text/plain",
|
163
|
+
@language=["en"],
|
164
|
+
@script=["Latn"]>
|
214
165
|
----
|
215
166
|
|
216
167
|
== Development
|
@@ -44,10 +44,16 @@ module RelatonIso
|
|
44
44
|
year = year2 || year1
|
45
45
|
end
|
46
46
|
end
|
47
|
-
|
48
|
-
|
47
|
+
%r{\s(?<num>\d+)(-(?<part>[\d-]+))?} =~ code
|
48
|
+
opts[:part] = part
|
49
|
+
opts[:num] = num
|
50
|
+
opts[:corr] = corr
|
51
|
+
opts[:all_parts] ||= !part && opts[:all_parts].nil? && code2.nil?
|
52
|
+
if %r[^ISO/IEC DIR].match? code
|
53
|
+
return RelatonIec::IecBibliography.get(code, year, opts)
|
54
|
+
end
|
49
55
|
|
50
|
-
ret = isobib_get1(code, year,
|
56
|
+
ret = isobib_get1(code, year, opts)
|
51
57
|
return nil if ret.nil?
|
52
58
|
|
53
59
|
if year || opts[:keep_year] || opts[:all_parts]
|
@@ -65,15 +71,18 @@ module RelatonIso
|
|
65
71
|
id = year ? "#{code}:#{year}" : code
|
66
72
|
warn "[relaton-iso] WARNING: no match found online for #{id}. "\
|
67
73
|
"The code must be exactly like it is on the standards website."
|
68
|
-
|
69
|
-
"
|
70
|
-
|
71
|
-
|
72
|
-
|
74
|
+
unless missed_years.empty?
|
75
|
+
warn "[relaton-iso] (There was no match for #{year}, though there "\
|
76
|
+
"were matches found for #{missed_years.join(', ')}.)"
|
77
|
+
end
|
78
|
+
if /\d-\d/.match? code
|
79
|
+
warn "[relaton-iso] The provided document part may not exist, "\
|
80
|
+
"or the document may no longer be published in parts."
|
73
81
|
else
|
74
|
-
warn "[relaton-iso] If you wanted to cite all document parts for
|
75
|
-
"use \"#{code} (all parts)\".\nIf the document is
|
76
|
-
"use its document type abbreviation (TS, TR, PAS,
|
82
|
+
warn "[relaton-iso] If you wanted to cite all document parts for "\
|
83
|
+
"the reference, use \"#{code} (all parts)\".\nIf the document is "\
|
84
|
+
"not a standard, use its document type abbreviation (TS, TR, PAS, "\
|
85
|
+
"Guide)."
|
77
86
|
end
|
78
87
|
nil
|
79
88
|
end
|
@@ -83,66 +92,74 @@ module RelatonIso
|
|
83
92
|
# Search for hits. If no found then trying missed stages and ISO/IEC.
|
84
93
|
#
|
85
94
|
# @param code [String] reference without correction
|
86
|
-
# @param
|
95
|
+
# @param opts [Hash]
|
87
96
|
# @return [Array<RelatonIso::Hit>]
|
88
|
-
def isobib_search_filter(code,
|
97
|
+
def isobib_search_filter(code, opts)
|
89
98
|
warn "[relaton-iso] (\"#{opts[:ref]}\") fetching..."
|
90
99
|
result = search(code)
|
91
|
-
res = search_code result, code,
|
100
|
+
res = search_code result, code, opts
|
92
101
|
return res unless res.empty?
|
93
102
|
|
94
103
|
# try stages
|
95
104
|
if %r{^\w+/[^/]+\s\d+} =~ code # code like ISO/IEC 123, ISO/IEC/IEE 123
|
96
|
-
res = try_stages(result,
|
105
|
+
res = try_stages(result, opts) do |st|
|
97
106
|
code.sub(%r{^(?<pref>[^\s]+\s)}) { "#{$~[:pref]}#{st} " }
|
98
107
|
end
|
99
108
|
return res unless res.empty?
|
100
109
|
elsif %r{^\w+\s\d+} =~ code # code like ISO 123
|
101
|
-
res = try_stages(result,
|
110
|
+
res = try_stages(result, opts) do |st|
|
102
111
|
code.sub(%r{^(?<pref>\w+)}) { "#{$~[:pref]}/#{st}" }
|
103
112
|
end
|
104
113
|
return res unless res.empty?
|
105
114
|
end
|
106
115
|
|
107
|
-
if %r{^ISO\s}
|
116
|
+
if %r{^ISO\s}.match? code # try ISO/IEC if ISO not found
|
108
117
|
warn "[relaton-iso] Attempting ISO/IEC retrieval"
|
109
118
|
c = code.sub "ISO", "ISO/IEC"
|
110
|
-
res = search_code result, c,
|
119
|
+
res = search_code result, c, opts
|
111
120
|
end
|
112
121
|
res
|
113
122
|
end
|
114
123
|
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
115
124
|
|
116
125
|
# @param result [RelatonIso::HitCollection]
|
117
|
-
# @param corr [String] correction
|
118
126
|
# @param opts [Hash]
|
119
|
-
|
127
|
+
# @return [RelatonIso::HitCollection]
|
128
|
+
def try_stages(result, opts)
|
120
129
|
res = nil
|
121
130
|
%w[NP WD CD DIS FDIS PRF IS AWI TR].each do |st| # try stages
|
122
131
|
c = yield st
|
123
|
-
res = search_code result, c,
|
132
|
+
res = search_code result, c, opts
|
124
133
|
return res unless res.empty?
|
125
134
|
end
|
126
135
|
res
|
127
136
|
end
|
128
137
|
|
129
|
-
|
138
|
+
# @param result [RelatonIso::HitCollection]
|
139
|
+
# @param code [String]
|
140
|
+
# @param opts [Hash]
|
141
|
+
# @return [RelatonIso::HitCollection]
|
142
|
+
def search_code(result, code, opts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity
|
143
|
+
ref_regex = %r{^#{code}(?!-)}
|
144
|
+
corr_regex = %r{^#{code}[\w-]*(:\d{4})?/#{opts[:corr]}}
|
145
|
+
no_corr_regex = %r{^#{code}[\w-]*(:\d{4})?/}
|
130
146
|
result.select do |i|
|
131
|
-
(opts[:all_parts] || i.hit["docRef"] =~
|
132
|
-
corr &&
|
133
|
-
!corr &&
|
134
|
-
)
|
147
|
+
(opts[:all_parts] || i.hit["docRef"] =~ ref_regex) && (
|
148
|
+
opts[:corr] && corr_regex =~ i.hit["docRef"] ||
|
149
|
+
!opts[:corr] && no_corr_regex !~ i.hit["docRef"]
|
150
|
+
)
|
135
151
|
end
|
136
152
|
end
|
137
153
|
|
138
154
|
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
139
155
|
|
140
156
|
# Sort through the results from RelatonIso, fetching them three at a time,
|
141
|
-
# and return the first result that matches the code,
|
142
|
-
#
|
157
|
+
# and return the first result that matches the code, matches the year
|
158
|
+
# (if provided), and which # has a title (amendments do not).
|
143
159
|
# Only expects the first page of results to be populated.
|
144
160
|
# Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
|
145
|
-
# If no match, returns any years which caused mismatch, for error
|
161
|
+
# If no match, returns any years which caused mismatch, for error
|
162
|
+
# reporting
|
146
163
|
def isobib_results_filter(result, year, opts)
|
147
164
|
missed_years = []
|
148
165
|
hits = result.reduce!([]) do |hts, h|
|
@@ -155,7 +172,9 @@ module RelatonIso
|
|
155
172
|
end
|
156
173
|
return { years: missed_years } unless hits.any?
|
157
174
|
|
158
|
-
|
175
|
+
if !opts[:all_parts] || hits.size == 1
|
176
|
+
return { ret: hits.first.fetch(opts[:lang]) }
|
177
|
+
end
|
159
178
|
|
160
179
|
{ ret: hits.to_all_parts(opts[:lang]) }
|
161
180
|
end
|
@@ -163,11 +182,10 @@ module RelatonIso
|
|
163
182
|
|
164
183
|
# @param code [String]
|
165
184
|
# @param year [String, NilClass]
|
166
|
-
# @param corr [String, NilClass]
|
167
185
|
# @param opts [Hash]
|
168
|
-
def isobib_get1(code, year,
|
186
|
+
def isobib_get1(code, year, opts)
|
169
187
|
# return iev(code) if /^IEC 60050-/.match code
|
170
|
-
result = isobib_search_filter(code,
|
188
|
+
result = isobib_search_filter(code, opts) || return
|
171
189
|
ret = isobib_results_filter(result, year, opts)
|
172
190
|
if ret[:ret]
|
173
191
|
warn "[relaton-iso] (\"#{opts[:ref]}\") found #{ret[:ret].docidentifier.first.id}"
|
data/lib/relaton_iso/scrapper.rb
CHANGED
@@ -54,20 +54,20 @@ module RelatonIso
|
|
54
54
|
# @param hit_data [Hash]
|
55
55
|
# @param lang [String, NilClass]
|
56
56
|
# @return [Hash]
|
57
|
-
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
58
|
-
|
59
|
-
|
57
|
+
def parse_page(hit_data, lang = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
58
|
+
path = "/contents/data/standard#{hit_data['splitPath']}/"\
|
59
|
+
"#{hit_data['csnumber']}.html"
|
60
60
|
doc, url = get_page path
|
61
61
|
|
62
62
|
# Fetch edition.
|
63
|
-
edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")
|
64
|
-
children&.last&.text&.match(/\d+/)&.to_s
|
63
|
+
edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")
|
64
|
+
&.children&.last&.text&.match(/\d+/)&.to_s
|
65
65
|
|
66
66
|
titles, abstract, langs = fetch_titles_abstract(doc, lang)
|
67
67
|
|
68
68
|
RelatonIsoBib::IsoBibliographicItem.new(
|
69
69
|
fetched: Date.today.to_s,
|
70
|
-
docid: fetch_docid(hit_data
|
70
|
+
docid: fetch_docid(hit_data, langs),
|
71
71
|
docnumber: fetch_docnumber(doc),
|
72
72
|
edition: edition,
|
73
73
|
language: langs.map { |l| l[:lang] },
|
@@ -84,10 +84,9 @@ module RelatonIso
|
|
84
84
|
link: fetch_link(doc, url),
|
85
85
|
relation: fetch_relations(doc),
|
86
86
|
place: ["Geneva"],
|
87
|
-
structuredidentifier: fetch_structuredidentifier(doc)
|
87
|
+
structuredidentifier: fetch_structuredidentifier(doc)
|
88
88
|
)
|
89
89
|
end
|
90
|
-
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
91
90
|
|
92
91
|
private
|
93
92
|
|
@@ -97,12 +96,13 @@ module RelatonIso
|
|
97
96
|
# @return [Array<Array>]
|
98
97
|
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
99
98
|
def fetch_titles_abstract(doc, lang)
|
100
|
-
titles =
|
99
|
+
titles = RelatonBib::TypedTitleStringCollection.new
|
101
100
|
abstract = []
|
102
101
|
langs = languages(doc, lang).reduce([]) do |s, l|
|
103
102
|
# Don't need to get page for en. We already have it.
|
104
103
|
d = l[:path] ? get_page(l[:path])[0] : doc
|
105
|
-
unless d.at("//h5[@class='help-block']
|
104
|
+
unless d.at("//h5[@class='help-block']"\
|
105
|
+
"[.='недоступно на русском языке']")
|
106
106
|
s << l
|
107
107
|
titles += fetch_title(d, l[:lang])
|
108
108
|
|
@@ -160,18 +160,42 @@ module RelatonIso
|
|
160
160
|
n += 1
|
161
161
|
end
|
162
162
|
[Nokogiri::HTML(resp.body), url]
|
163
|
-
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
164
|
-
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
165
|
-
|
163
|
+
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
164
|
+
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
165
|
+
Net::ProtocolError, Errno::ETIMEDOUT
|
166
166
|
raise RelatonBib::RequestError, "Could not access #{url}"
|
167
167
|
end
|
168
168
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
169
169
|
|
170
170
|
# Fetch docid.
|
171
|
-
# @param
|
171
|
+
# @param hit [Hash]
|
172
|
+
# @param langs [Array<Hash>]
|
172
173
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
173
|
-
def fetch_docid(
|
174
|
-
[
|
174
|
+
def fetch_docid(hit, langs)
|
175
|
+
[
|
176
|
+
RelatonBib::DocumentIdentifier.new(id: hit["docRef"], type: "ISO"),
|
177
|
+
RelatonBib::DocumentIdentifier.new(id: fetch_urn(hit, langs),
|
178
|
+
type: "URN"),
|
179
|
+
]
|
180
|
+
end
|
181
|
+
|
182
|
+
# @param hit [Hash]
|
183
|
+
# @param langs [Array<Hash>]
|
184
|
+
# @returnt [String]
|
185
|
+
def fetch_urn(hit, langs) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/MethodLength
|
186
|
+
orig = hit["docRef"].split(" ").first.downcase.split("/").join "-"
|
187
|
+
%r{(?<=)(?<type>DATA|GUIDE|ISP|IWA|PAS|R|TR|TS|TTA)} =~ hit["docRef"]
|
188
|
+
urn = "urn:iso:std:#{orig}"
|
189
|
+
urn += ":#{type.downcase}" if type
|
190
|
+
urn += ":#{hit['docNumber']}"
|
191
|
+
urn += ":-#{hit['docPart']}" if hit["docPart"] && !hit["docPart"].empty?
|
192
|
+
urn += ":stage-#{hit['stageId']}"
|
193
|
+
urn += ":ed-#{hit['docEdition']}" if hit["docEdition"]
|
194
|
+
if hit["docElem"] && !hit["docElem"].empty? && hit["docElem"] != "0"
|
195
|
+
urn += ":#{hit['docElem'].downcase}:#{hit['docElemSeq']}"
|
196
|
+
end
|
197
|
+
urn += ":" + langs.map { |l| l[:lang] }.join(",")
|
198
|
+
urn
|
175
199
|
end
|
176
200
|
|
177
201
|
def fetch_docnumber(doc)
|
@@ -180,11 +204,11 @@ module RelatonIso
|
|
180
204
|
end
|
181
205
|
|
182
206
|
# @param doc [Nokogiri::HTML::Document]
|
183
|
-
def fetch_structuredidentifier(doc)
|
207
|
+
def fetch_structuredidentifier(doc) # rubocop:disable Metrics/MethodLength
|
184
208
|
item_ref = doc.at("//nav[contains(@class, 'heading-condensed')]/h1")
|
185
209
|
unless item_ref
|
186
210
|
return RelatonIsoBib::StructuredIdentifier.new(
|
187
|
-
project_number: "?", part_number: "", prefix: nil, id: "?"
|
211
|
+
project_number: "?", part_number: "", prefix: nil, id: "?"
|
188
212
|
)
|
189
213
|
end
|
190
214
|
|
@@ -201,7 +225,7 @@ module RelatonIso
|
|
201
225
|
# @return [Hash]
|
202
226
|
def fetch_status(doc)
|
203
227
|
stg, substg = doc.css(
|
204
|
-
"li.dropdown.active span.stage-code > strong"
|
228
|
+
"li.dropdown.active span.stage-code > strong"
|
205
229
|
).text.split "."
|
206
230
|
RelatonBib::DocumentStatus.new(stage: stg, substage: substg)
|
207
231
|
end
|
@@ -214,7 +238,7 @@ module RelatonIso
|
|
214
238
|
# Fetch workgroup.
|
215
239
|
# @param doc [Nokogiri::HTML::Document]
|
216
240
|
# @return [Hash]
|
217
|
-
def fetch_workgroup(doc)
|
241
|
+
def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength
|
218
242
|
wg_link = doc.css("div.entry-name.entry-block a")[0]
|
219
243
|
# wg_url = DOMAIN + wg_link['href']
|
220
244
|
workgroup = wg_link.text.split "/"
|
@@ -235,17 +259,19 @@ module RelatonIso
|
|
235
259
|
# Fetch relations.
|
236
260
|
# @param doc [Nokogiri::HTML::Document]
|
237
261
|
# @return [Array<Hash>]
|
238
|
-
def fetch_relations(doc)
|
239
|
-
doc.css("ul.steps li").reduce([]) do |a, r|
|
262
|
+
def fetch_relations(doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
|
263
|
+
doc.css("ul.steps li").reduce([]) do |a, r| # rubocop:disable Metrics/BlockLength
|
240
264
|
r_type = r.css("strong").text
|
241
265
|
date = []
|
242
266
|
type = case r_type
|
243
267
|
when "Previously", "Will be replaced by" then "obsoletes"
|
244
268
|
when "Corrigenda/Amendments", "Revised by", "Now confirmed"
|
245
|
-
on = doc.xpath(
|
269
|
+
on = doc.xpath(
|
270
|
+
'//span[@class="stage-date"][contains(., "-")]'
|
271
|
+
).last
|
246
272
|
if on
|
247
273
|
date << { type: "circulated",
|
248
|
-
|
274
|
+
on: on.text }
|
249
275
|
"updates"
|
250
276
|
end
|
251
277
|
else r_type
|
@@ -254,7 +280,7 @@ module RelatonIso
|
|
254
280
|
else
|
255
281
|
a + r.css("a").map do |id|
|
256
282
|
fref = RelatonBib::FormattedRef.new(
|
257
|
-
content: id.text, format: "text/plain"
|
283
|
+
content: id.text, format: "text/plain"
|
258
284
|
)
|
259
285
|
bibitem = RelatonIsoBib::IsoBibliographicItem.new(
|
260
286
|
formattedref: fref, date: date
|
@@ -290,9 +316,10 @@ module RelatonIso
|
|
290
316
|
# @return [Array<RelatonBib::TypedTitleString>]
|
291
317
|
def fetch_title(doc, lang)
|
292
318
|
content = doc.at(
|
293
|
-
"//nav[contains(@class,'heading-condensed')]/h2 |
|
319
|
+
"//nav[contains(@class,'heading-condensed')]/h2 | "\
|
320
|
+
"//nav[contains(@class,'heading-condensed')]/h3"
|
294
321
|
)&.text&.gsub(/\u2014/, "-")
|
295
|
-
return
|
322
|
+
return RelatonBib::TypedTitleStringCollection.new unless content
|
296
323
|
|
297
324
|
RelatonBib::TypedTitleString.from_string content, lang, script(lang)
|
298
325
|
end
|
@@ -303,7 +330,7 @@ module RelatonIso
|
|
303
330
|
def script(lang)
|
304
331
|
case lang
|
305
332
|
when "en", "fr" then "Latn"
|
306
|
-
|
333
|
+
# when "ru" then "Cyrl"
|
307
334
|
end
|
308
335
|
end
|
309
336
|
|
@@ -312,7 +339,7 @@ module RelatonIso
|
|
312
339
|
# @param doc [Nokogiri::HTML::Document]
|
313
340
|
# @param ref [String]
|
314
341
|
# @return [Array<Hash>]
|
315
|
-
def fetch_dates(doc, ref)
|
342
|
+
def fetch_dates(doc, ref) # rubocop:disable Metrics/AbcSize, Metrics/PerceivedComplexity
|
316
343
|
dates = []
|
317
344
|
%r{^[^\s]+\s[\d-]+:(?<ref_date_str>\d{4})} =~ ref
|
318
345
|
pub_date_str = doc.xpath("//span[@itemprop='releaseDate']").text
|
@@ -367,7 +394,8 @@ module RelatonIso
|
|
367
394
|
links << { type: "obp", content: obp[:href] } if obp
|
368
395
|
rss = doc.at("//a[contains(@href, 'rss')]")
|
369
396
|
links << { type: "rss", content: DOMAIN + rss[:href] } if rss
|
370
|
-
pub = doc.at "//p[contains(., 'publicly available')]/a"
|
397
|
+
pub = doc.at "//p[contains(., 'publicly available')]/a",
|
398
|
+
"//p[contains(., 'can be downloaded from the')]/a"
|
371
399
|
links << { type: "pub", content: pub[:href] } if pub
|
372
400
|
links
|
373
401
|
end
|
@@ -380,7 +408,8 @@ module RelatonIso
|
|
380
408
|
owner_name = ref.match(/.*?(?=\s)/).to_s
|
381
409
|
from = ref.match(/(?<=:)\d{4}/).to_s
|
382
410
|
if from.empty?
|
383
|
-
from = doc.xpath("//span[@itemprop='releaseDate']").text
|
411
|
+
from = doc.xpath("//span[@itemprop='releaseDate']").text
|
412
|
+
.match(/\d{4}/).to_s
|
384
413
|
end
|
385
414
|
[{ owner: [{ name: owner_name }], from: from }]
|
386
415
|
end
|
data/lib/relaton_iso/version.rb
CHANGED
data/relaton_iso.gemspec
CHANGED
@@ -37,6 +37,6 @@ Gem::Specification.new do |spec|
|
|
37
37
|
spec.add_development_dependency "vcr"
|
38
38
|
spec.add_development_dependency "webmock"
|
39
39
|
|
40
|
-
spec.add_dependency "relaton-iec", "~> 1.
|
41
|
-
spec.add_dependency "relaton-iso-bib", "~> 1.
|
40
|
+
spec.add_dependency "relaton-iec", "~> 1.6.pre"
|
41
|
+
spec.add_dependency "relaton-iso-bib", "~> 1.6.pre"
|
42
42
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iso
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.pre1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-11-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: byebug
|
@@ -156,28 +156,28 @@ dependencies:
|
|
156
156
|
requirements:
|
157
157
|
- - "~>"
|
158
158
|
- !ruby/object:Gem::Version
|
159
|
-
version: 1.
|
159
|
+
version: 1.6.pre
|
160
160
|
type: :runtime
|
161
161
|
prerelease: false
|
162
162
|
version_requirements: !ruby/object:Gem::Requirement
|
163
163
|
requirements:
|
164
164
|
- - "~>"
|
165
165
|
- !ruby/object:Gem::Version
|
166
|
-
version: 1.
|
166
|
+
version: 1.6.pre
|
167
167
|
- !ruby/object:Gem::Dependency
|
168
168
|
name: relaton-iso-bib
|
169
169
|
requirement: !ruby/object:Gem::Requirement
|
170
170
|
requirements:
|
171
171
|
- - "~>"
|
172
172
|
- !ruby/object:Gem::Version
|
173
|
-
version: 1.
|
173
|
+
version: 1.6.pre
|
174
174
|
type: :runtime
|
175
175
|
prerelease: false
|
176
176
|
version_requirements: !ruby/object:Gem::Requirement
|
177
177
|
requirements:
|
178
178
|
- - "~>"
|
179
179
|
- !ruby/object:Gem::Version
|
180
|
-
version: 1.
|
180
|
+
version: 1.6.pre
|
181
181
|
description: 'RelatonIso: retrieve ISO Standards for bibliographic use using the IsoBibliographicItem
|
182
182
|
model'
|
183
183
|
email:
|
@@ -225,7 +225,7 @@ homepage: https://github.com/relaton/relaton-iso
|
|
225
225
|
licenses:
|
226
226
|
- BSD-2-Clause
|
227
227
|
metadata: {}
|
228
|
-
post_install_message:
|
228
|
+
post_install_message:
|
229
229
|
rdoc_options: []
|
230
230
|
require_paths:
|
231
231
|
- lib
|
@@ -236,12 +236,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
236
236
|
version: 2.4.0
|
237
237
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
238
238
|
requirements:
|
239
|
-
- - "
|
239
|
+
- - ">"
|
240
240
|
- !ruby/object:Gem::Version
|
241
|
-
version:
|
241
|
+
version: 1.3.1
|
242
242
|
requirements: []
|
243
243
|
rubygems_version: 3.0.6
|
244
|
-
signing_key:
|
244
|
+
signing_key:
|
245
245
|
specification_version: 4
|
246
246
|
summary: 'RelatonIso: retrieve ISO Standards for bibliographic use using the IsoBibliographicItem
|
247
247
|
model'
|