relaton-iec 1.0.1 → 1.5.pre
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ubuntu.yml +1 -0
- data/.rubocop.yml +2 -2
- data/README.adoc +1 -1
- data/grammars/biblio.rng +36 -6
- data/grammars/isodoc.rng +574 -22
- data/grammars/isostandard.rng +13 -2
- data/lib/relaton_iec.rb +4 -0
- data/lib/relaton_iec/iec_bibliography.rb +56 -47
- data/lib/relaton_iec/scrapper.rb +69 -163
- data/lib/relaton_iec/version.rb +1 -1
- data/relaton_iec.gemspec +1 -1
- metadata +11 -11
data/grammars/isostandard.rng
CHANGED
@@ -91,6 +91,12 @@
|
|
91
91
|
</define>
|
92
92
|
<define name="sections">
|
93
93
|
<element name="sections">
|
94
|
+
<zeroOrMore>
|
95
|
+
<choice>
|
96
|
+
<ref name="note"/>
|
97
|
+
<ref name="admonition"/>
|
98
|
+
</choice>
|
99
|
+
</zeroOrMore>
|
94
100
|
<ref name="clause"/>
|
95
101
|
<optional>
|
96
102
|
<choice>
|
@@ -247,7 +253,7 @@
|
|
247
253
|
<define name="preface">
|
248
254
|
<element name="preface">
|
249
255
|
<optional>
|
250
|
-
<ref name="
|
256
|
+
<ref name="abstract"/>
|
251
257
|
</optional>
|
252
258
|
<ref name="foreword"/>
|
253
259
|
<optional>
|
@@ -263,6 +269,8 @@
|
|
263
269
|
<value>publicly-available-specification</value>
|
264
270
|
<value>international-workshop-agreement</value>
|
265
271
|
<value>guide</value>
|
272
|
+
<value>amendment</value>
|
273
|
+
<value>technical-corrigendum</value>
|
266
274
|
</choice>
|
267
275
|
</define>
|
268
276
|
<define name="structuredidentifier">
|
@@ -354,6 +362,9 @@
|
|
354
362
|
<data type="boolean"/>
|
355
363
|
</attribute>
|
356
364
|
</optional>
|
365
|
+
<optional>
|
366
|
+
<attribute name="number"/>
|
367
|
+
</optional>
|
357
368
|
<optional>
|
358
369
|
<attribute name="subsequence"/>
|
359
370
|
</optional>
|
@@ -510,7 +521,7 @@
|
|
510
521
|
</attribute>
|
511
522
|
</optional>
|
512
523
|
<oneOrMore>
|
513
|
-
<ref name="
|
524
|
+
<ref name="BasicBlock"/>
|
514
525
|
</oneOrMore>
|
515
526
|
</element>
|
516
527
|
</define>
|
data/lib/relaton_iec.rb
CHANGED
@@ -22,7 +22,7 @@ module RelatonIec
|
|
22
22
|
# @param opts [Hash] options; restricted to :all_parts if all-parts
|
23
23
|
# reference is required
|
24
24
|
# @return [String] Relaton XML serialisation of reference
|
25
|
-
def get(code, year = nil, opts = {})
|
25
|
+
def get(code, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
26
26
|
if year.nil?
|
27
27
|
/^(?<code1>[^:]+):(?<year1>[^:]+)/ =~ code
|
28
28
|
unless code1.nil?
|
@@ -45,27 +45,33 @@ module RelatonIec
|
|
45
45
|
|
46
46
|
private
|
47
47
|
|
48
|
-
def fetch_ref_err(code, year, missed_years)
|
48
|
+
def fetch_ref_err(code, year, missed_years) # rubocop:disable Metrics/MethodLength
|
49
49
|
id = year ? "#{code}:#{year}" : code
|
50
50
|
warn "[relaton-iec] WARNING: no match found online for #{id}. "\
|
51
51
|
"The code must be exactly like it is on the standards website."
|
52
|
-
|
53
|
-
"
|
54
|
-
|
55
|
-
|
56
|
-
|
52
|
+
unless missed_years.empty?
|
53
|
+
warn "[relaton-iec] (There was no match for #{year}, though there "\
|
54
|
+
"were matches found for #{missed_years.join(', ')}.)"
|
55
|
+
end
|
56
|
+
if /\d-\d/.match? code
|
57
|
+
warn "[relaton-iec] The provided document part may not exist, or "\
|
58
|
+
"the document may no longer be published in parts."
|
57
59
|
else
|
58
|
-
warn "[relaton-iec] If you wanted to cite all document parts for
|
59
|
-
"use \"#{code} (all parts)\".\nIf the document is
|
60
|
-
"use its document type abbreviation (TS, TR, PAS,
|
60
|
+
warn "[relaton-iec] If you wanted to cite all document parts for "\
|
61
|
+
"the reference, use \"#{code} (all parts)\".\nIf the document is "\
|
62
|
+
"not a standard, use its document type abbreviation (TS, TR, PAS, "\
|
63
|
+
"Guide)."
|
61
64
|
end
|
62
65
|
nil
|
63
66
|
end
|
64
67
|
|
65
|
-
|
66
|
-
|
68
|
+
# @param hits [Array<RelatonIec::Hit>]
|
69
|
+
# @param threads [Integer]
|
70
|
+
# @return [Array<RelatonIec::Hit>]
|
71
|
+
def fetch_pages(hits, threads)
|
72
|
+
workers = RelatonBib::WorkersPool.new threads
|
67
73
|
workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
|
68
|
-
|
74
|
+
hits.each_with_index { |hit, i| workers << { i: i, hit: hit } }
|
69
75
|
workers.end
|
70
76
|
workers.result.sort_by { |a| a[:i] }.map { |x| x[:hit] }
|
71
77
|
end
|
@@ -83,45 +89,47 @@ module RelatonIec
|
|
83
89
|
end
|
84
90
|
|
85
91
|
def iev(code = "IEC 60050")
|
86
|
-
RelatonIsoBib::XMLParser.from_xml(<<~"
|
87
|
-
<bibitem>
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
</bibitem>
|
115
|
-
|
92
|
+
RelatonIsoBib::XMLParser.from_xml(<<~"XML")
|
93
|
+
<bibitem>
|
94
|
+
<fetched>#{Date.today}</fetched>
|
95
|
+
<title format="text/plain" language="en" script="Latn">International Electrotechnical Vocabulary</title>
|
96
|
+
<link type="src">http://www.electropedia.org</link>
|
97
|
+
<docidentifier>#{code}:2011</docidentifier>
|
98
|
+
<date type="published"><on>2011</on></date>
|
99
|
+
<contributor>
|
100
|
+
<role type="publisher"/>
|
101
|
+
<organization>
|
102
|
+
<name>International Electrotechnical Commission</name>
|
103
|
+
<abbreviation>IEC</abbreviation>
|
104
|
+
<uri>www.iec.ch</uri>
|
105
|
+
</organization>
|
106
|
+
</contributor>
|
107
|
+
<language>en</language> <language>fr</language>
|
108
|
+
<script>Latn</script>
|
109
|
+
<status> <stage>60</stage> </status>
|
110
|
+
<copyright>
|
111
|
+
<from>2018</from>
|
112
|
+
<owner>
|
113
|
+
<organization>
|
114
|
+
<name>International Electrotechnical Commission</name>
|
115
|
+
<abbreviation>IEC</abbreviation>
|
116
|
+
<uri>www.iec.ch</uri>
|
117
|
+
</organization>
|
118
|
+
</owner>
|
119
|
+
</copyright>
|
120
|
+
</bibitem>
|
121
|
+
XML
|
116
122
|
end
|
117
123
|
|
118
124
|
# Sort through the results from Isobib, fetching them three at a time,
|
119
125
|
# and return the first result that matches the code,
|
120
|
-
# matches the year (if provided), and which
|
126
|
+
# matches the year (if provided), and which
|
127
|
+
# has a title (amendments do not).
|
121
128
|
# Only expects the first page of results to be populated.
|
122
129
|
# Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
|
123
|
-
# If no match, returns any years which caused mismatch, for error
|
124
|
-
|
130
|
+
# If no match, returns any years which caused mismatch, for error
|
131
|
+
# reporting
|
132
|
+
def isobib_results_filter(result, year) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength
|
125
133
|
missed_years = []
|
126
134
|
result.each_slice(3) do |s| # ISO website only allows 3 connections
|
127
135
|
fetch_pages(s, 3).each_with_index do |r, _i|
|
@@ -143,7 +151,8 @@ module RelatonIec
|
|
143
151
|
result = isobib_search_filter(code) || return
|
144
152
|
ret = isobib_results_filter(result, year)
|
145
153
|
if ret[:ret]
|
146
|
-
warn "[relaton-iec] (\"#{code}\") found
|
154
|
+
warn "[relaton-iec] (\"#{code}\") found "\
|
155
|
+
"#{ret[:ret].docidentifier.first.id}"
|
147
156
|
ret[:ret]
|
148
157
|
else
|
149
158
|
fetch_ref_err(code, year, ret[:years])
|
data/lib/relaton_iec/scrapper.rb
CHANGED
@@ -1,10 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "relaton_iso_bib"
|
4
|
-
require "relaton_iec/hit"
|
5
|
-
require "nokogiri"
|
6
|
-
require "net/http"
|
7
|
-
|
8
3
|
# Capybara.request_driver :poltergeist do |app|
|
9
4
|
# Capybara::Poltergeist::Driver.new app, js_errors: false
|
10
5
|
# end
|
@@ -12,7 +7,6 @@ require "net/http"
|
|
12
7
|
|
13
8
|
module RelatonIec
|
14
9
|
# Scrapper.
|
15
|
-
# rubocop:disable Metrics/ModuleLength
|
16
10
|
module Scrapper
|
17
11
|
DOMAIN = "https://webstore.iec.ch"
|
18
12
|
|
@@ -32,33 +26,24 @@ module RelatonIec
|
|
32
26
|
}.freeze
|
33
27
|
|
34
28
|
class << self
|
35
|
-
#
|
36
|
-
# @return [Array<Hash>]
|
37
|
-
# def get(text)
|
38
|
-
# iso_workers = WorkersPool.new 4
|
39
|
-
# iso_workers.worker { |hit| iso_worker(hit, iso_workers) }
|
40
|
-
# algolia_workers = start_algolia_search(text, iso_workers)
|
41
|
-
# iso_docs = iso_workers.result
|
42
|
-
# algolia_workers.end
|
43
|
-
# algolia_workers.result
|
44
|
-
# iso_docs
|
45
|
-
# end
|
29
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
46
30
|
|
47
31
|
# Parse page.
|
48
|
-
# @param
|
32
|
+
# @param hit_data [Hash]
|
49
33
|
# @return [Hash]
|
50
|
-
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
51
34
|
def parse_page(hit_data)
|
52
35
|
doc = get_page hit_data[:url]
|
53
36
|
|
54
37
|
# Fetch edition.
|
55
|
-
edition = doc.at(
|
38
|
+
edition = doc.at(
|
39
|
+
"//th[contains(., 'Edition')]/following-sibling::td/span"
|
40
|
+
).text
|
56
41
|
|
57
42
|
status, relations = fetch_status_relations hit_data[:url]
|
58
43
|
|
59
44
|
IecBibliographicItem.new(
|
60
45
|
fetched: Date.today.to_s,
|
61
|
-
docid:
|
46
|
+
docid: fetch_docid(hit_data),
|
62
47
|
structuredidentifier: fetch_structuredidentifier(doc),
|
63
48
|
edition: edition,
|
64
49
|
language: ["en"],
|
@@ -74,52 +59,47 @@ module RelatonIec
|
|
74
59
|
copyright: fetch_copyright(hit_data[:code], doc),
|
75
60
|
link: fetch_link(doc, hit_data[:url]),
|
76
61
|
relation: relations,
|
77
|
-
place: ["Geneva"]
|
62
|
+
place: ["Geneva"]
|
78
63
|
)
|
79
64
|
end
|
80
65
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
81
66
|
|
82
67
|
private
|
83
68
|
|
84
|
-
# Start search workers.
|
85
|
-
# @param text[String]
|
86
|
-
# @param iec_workers [Isobib::WorkersPool]
|
87
|
-
# @reaturn [Isobib::WorkersPool]
|
88
|
-
# def start_algolia_search(text, iec_workers)
|
89
|
-
# index = Algolia::Index.new 'all_en'
|
90
|
-
# workers = WorkersPool.new
|
91
|
-
# workers.worker do |page|
|
92
|
-
# algolia_worker(index, text, page, workers, iec_workers)
|
93
|
-
# end
|
94
|
-
|
95
|
-
# # Add first page so search worker will start.
|
96
|
-
# workers << 0
|
97
|
-
# end
|
98
|
-
|
99
|
-
# Fetch ISO documents.
|
100
69
|
# @param hit [Hash]
|
101
|
-
# @
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
70
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
71
|
+
def fetch_docid(hit)
|
72
|
+
rest = hit[:code].downcase.sub(%r{
|
73
|
+
(?<head>[^\s]+)\s
|
74
|
+
(?<type>is|ts|tr|pas|srd|guide|tec|wp)?(?(<type>)\s)
|
75
|
+
(?<pnum>[\d-]+)\s?
|
76
|
+
(?<_dd>:)?(?(<_dd>)(?<date>[\d-]+)\s?)
|
77
|
+
}x, "")
|
78
|
+
m = $~
|
79
|
+
deliv = /cmv|csv|exv|prv|rlv|ser/.match(hit[:code].downcase).to_s
|
80
|
+
urn = ["urn", "iec", "std", m[:head].split("/").join("-"), m[:pnum],
|
81
|
+
m[:date], m[:type], deliv, "en"]
|
82
|
+
urn += fetch_ajunct(rest)
|
83
|
+
[
|
84
|
+
RelatonBib::DocumentIdentifier.new(id: hit[:code], type: "IEC"),
|
85
|
+
RelatonBib::DocumentIdentifier.new(id: urn.join(":"), type: "URN"),
|
86
|
+
]
|
87
|
+
end
|
106
88
|
|
107
|
-
#
|
108
|
-
# @
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
# iso_workers.end unless next_page < res['nbPages']
|
122
|
-
# end
|
89
|
+
# @param rest [String]
|
90
|
+
# @return [Array<String, nil>]
|
91
|
+
def fetch_ajunct(rest)
|
92
|
+
r = rest.sub(%r{
|
93
|
+
(?<_pl>\+)(?(<_pl>)(?<adjunct>amd)(?<adjnum>\d+)\s?)
|
94
|
+
(?<_d2>:)?(?(<_d2>)(?<adjdt>[\d-]+)\s?)
|
95
|
+
}x, "")
|
96
|
+
m = $~ || {}
|
97
|
+
return [] unless m[:adjunct]
|
98
|
+
|
99
|
+
plus = m[:adjunct] && "plus"
|
100
|
+
urn = [plus, m[:adjunct], m[:adjnum], m[:adjdt]]
|
101
|
+
urn + fetch_ajunct(r)
|
102
|
+
end
|
123
103
|
|
124
104
|
# Fetch abstracts.
|
125
105
|
# @param doc [Nokigiri::HTML::Document]
|
@@ -134,19 +114,6 @@ module RelatonIec
|
|
134
114
|
}]
|
135
115
|
end
|
136
116
|
|
137
|
-
# Get langs.
|
138
|
-
# @param doc [Nokogiri::HTML::Document]
|
139
|
-
# @return [Array<Hash>]
|
140
|
-
# def langs(doc)
|
141
|
-
# lgs = [{ lang: 'en' }]
|
142
|
-
# doc.css('ul#lang-switcher ul li a').each do |lang_link|
|
143
|
-
# lang_path = lang_link.attr('href')
|
144
|
-
# lang = lang_path.match(%r{^\/(fr)\/})
|
145
|
-
# lgs << { lang: lang[1], path: lang_path } if lang
|
146
|
-
# end
|
147
|
-
# lgs
|
148
|
-
# end
|
149
|
-
|
150
117
|
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
151
118
|
|
152
119
|
# Get page.
|
@@ -154,25 +121,20 @@ module RelatonIec
|
|
154
121
|
# @return [Array<Nokogiri::HTML::Document, String>]
|
155
122
|
def get_page(url)
|
156
123
|
uri = URI url
|
157
|
-
resp = Net::HTTP.get_response(uri)
|
124
|
+
resp = Net::HTTP.get_response(uri)
|
158
125
|
case resp.code
|
159
126
|
when "301"
|
160
127
|
path = resp["location"]
|
161
128
|
url = DOMAIN + path
|
162
129
|
uri = URI url
|
163
|
-
resp = Net::HTTP.get_response(uri)
|
130
|
+
resp = Net::HTTP.get_response(uri)
|
164
131
|
when "404"
|
165
132
|
raise RelatonBib::RequestError, "Page not found #{url}"
|
166
133
|
end
|
167
|
-
# n = 0
|
168
|
-
# while resp.body !~ /<strong/ && n < 10
|
169
|
-
# resp = Net::HTTP.get_response(uri)#.encode("UTF-8")
|
170
|
-
# n += 1
|
171
|
-
# end
|
172
134
|
Nokogiri::HTML(resp.body)
|
173
|
-
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
174
|
-
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
175
|
-
OpenSSL::SSL::SSLError
|
135
|
+
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
136
|
+
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
137
|
+
Net::ProtocolError, OpenSSL::SSL::SSLError
|
176
138
|
raise RelatonBib::RequestError, "Could not access #{url}"
|
177
139
|
end
|
178
140
|
# rubocop:enable Metrics/AbcSize
|
@@ -184,12 +146,12 @@ module RelatonIec
|
|
184
146
|
item_ref = doc.at("//span[@itemprop='productID']")
|
185
147
|
unless item_ref
|
186
148
|
return RelatonIsoBib::StructuredIdentifier.new(
|
187
|
-
project_number: "?", part_number: "", prefix: nil, id: "?"
|
149
|
+
project_number: "?", part_number: "", prefix: nil, id: "?"
|
188
150
|
)
|
189
151
|
end
|
190
152
|
|
191
153
|
m = item_ref.text.match(
|
192
|
-
/(?<=\s)(?<project>\d+)-?(?<part>(?<=-)\d+|)-?(?<subpart>(?<=-)\d+|)
|
154
|
+
/(?<=\s)(?<project>\d+)-?(?<part>(?<=-)\d+|)-?(?<subpart>(?<=-)\d+|)/
|
193
155
|
)
|
194
156
|
RelatonIsoBib::StructuredIdentifier.new(
|
195
157
|
project_number: m[:project],
|
@@ -197,7 +159,7 @@ module RelatonIec
|
|
197
159
|
subpart_number: m[:subpart],
|
198
160
|
prefix: nil,
|
199
161
|
type: "IEC",
|
200
|
-
id: item_ref.text
|
162
|
+
id: item_ref.text
|
201
163
|
)
|
202
164
|
end
|
203
165
|
|
@@ -211,15 +173,12 @@ module RelatonIec
|
|
211
173
|
statuses = YAML.load_file "lib/relaton_iec/statuses.yml"
|
212
174
|
s = wip.at("STAGE").text
|
213
175
|
stage, substage = statuses[s]["stage"].split "."
|
214
|
-
# status = statuses[s]["status"]
|
215
176
|
else
|
216
|
-
# status = "Published"
|
217
177
|
stage = "60"
|
218
178
|
substage = "60"
|
219
179
|
end
|
220
180
|
RelatonBib::DocumentStatus.new(stage: stage, substage: substage)
|
221
181
|
end
|
222
|
-
# rubocop:enable Metrics/MethodLength
|
223
182
|
|
224
183
|
# Fetch workgroup.
|
225
184
|
# @param doc [Nokogiri::HTML::Document]
|
@@ -237,13 +196,15 @@ module RelatonIec
|
|
237
196
|
}],
|
238
197
|
}
|
239
198
|
end
|
199
|
+
# rubocop:enable Metrics/MethodLength
|
240
200
|
|
241
201
|
# Fetch relations.
|
242
202
|
# @param doc [Nokogiri::HTML::Document]
|
243
203
|
# @return [Array<Hash>]
|
244
204
|
# rubocop:disable Metrics/MethodLength
|
245
205
|
def fetch_relations(doc)
|
246
|
-
doc.xpath('//ROW[STATUS[.!="PREPARING"]][STATUS[.!="PUBLISHED"]]')
|
206
|
+
doc.xpath('//ROW[STATUS[.!="PREPARING"]][STATUS[.!="PUBLISHED"]]')
|
207
|
+
.map do |r|
|
247
208
|
r_type = r.at("STATUS").text.downcase
|
248
209
|
type = case r_type
|
249
210
|
# when 'published' then 'obsoletes' # Valid
|
@@ -251,9 +212,8 @@ module RelatonIec
|
|
251
212
|
when "withdrawn" then "obsoletes"
|
252
213
|
else r_type
|
253
214
|
end
|
254
|
-
# url = DOMAIN + "/publication/" + r.at("PUB_ID").text
|
255
215
|
fref = RelatonBib::FormattedRef.new(
|
256
|
-
content: r.at("FULL_NAME").text, format: "text/plain"
|
216
|
+
content: r.at("FULL_NAME").text, format: "text/plain"
|
257
217
|
)
|
258
218
|
bibitem = IecBibliographicItem.new(formattedref: fref)
|
259
219
|
{ type: type, bibitem: bibitem }
|
@@ -270,22 +230,6 @@ module RelatonIec
|
|
270
230
|
status = fetch_status doc
|
271
231
|
relations = fetch_relations doc
|
272
232
|
[status, relations]
|
273
|
-
# doc.css('ul.steps li').inject([]) do |a, r|
|
274
|
-
# r_type = r.css('strong').text
|
275
|
-
# type = case r_type
|
276
|
-
# when 'Previously', 'Will be replaced by' then 'obsoletes'
|
277
|
-
# when 'Corrigenda/Amendments', 'Revised by', 'Now confirmed'
|
278
|
-
# 'updates'
|
279
|
-
# else r_type
|
280
|
-
# end
|
281
|
-
# if ['Now', 'Now under review'].include? type
|
282
|
-
# a
|
283
|
-
# else
|
284
|
-
# a + r.css('a').map do |id|
|
285
|
-
# { type: type, identifier: id.text, url: id['href'] }
|
286
|
-
# end
|
287
|
-
# end
|
288
|
-
# end
|
289
233
|
end
|
290
234
|
# rubocop:enable Metrics/MethodLength
|
291
235
|
|
@@ -293,66 +237,18 @@ module RelatonIec
|
|
293
237
|
# @param doc [Nokogiri::HTML::Document]
|
294
238
|
# @return [String]
|
295
239
|
def fetch_type(doc)
|
296
|
-
doc.at(
|
297
|
-
|
298
|
-
|
299
|
-
# /NP)*\s|/)(TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|Guide|(?=\d+))}x)
|
300
|
-
# #return "international-standard" if type_match.nil?
|
301
|
-
# if TYPES[type_match[2]]
|
302
|
-
# TYPES[type_match[2]]
|
303
|
-
# elsif type_match[1]
|
304
|
-
# elsif type_match[1] == 'ISO'
|
305
|
-
# 'international-standard'
|
306
|
-
# elsif type_match[1] == 'IWA'
|
307
|
-
# 'international-workshop-agreement'
|
308
|
-
# end
|
309
|
-
# # rescue => _e
|
310
|
-
# # puts 'Unknown document type: ' + title
|
240
|
+
doc.at(
|
241
|
+
'//th[contains(., "Publication type")]/following-sibling::td/span'
|
242
|
+
).text.downcase.tr " ", "-"
|
311
243
|
end
|
312
244
|
|
313
245
|
# Fetch titles.
|
314
246
|
# @param hit_data [Hash]
|
315
247
|
# @return [Array<Hash>]
|
316
248
|
def fetch_titles(hit_data)
|
317
|
-
|
318
|
-
case titles.size
|
319
|
-
when 0
|
320
|
-
intro, main, part = nil, "", nil
|
321
|
-
when 1
|
322
|
-
intro, main, part = nil, titles[0], nil
|
323
|
-
when 2
|
324
|
-
if /^(Part|Partie) \d+:/ =~ titles[1]
|
325
|
-
intro, main, part = nil, titles[0], titles[1]
|
326
|
-
else
|
327
|
-
intro, main, part = titles[0], titles[1], nil
|
328
|
-
end
|
329
|
-
when 3
|
330
|
-
if /^(Part|Partie) \d+:/ =~ titles[1]
|
331
|
-
intro, main, part = nil, titles[0], titles[1..2].join(" - ")
|
332
|
-
else
|
333
|
-
intro, main, part = titles[0], titles[1], titles[2]
|
334
|
-
end
|
335
|
-
else
|
336
|
-
intro, main, part = titles[0], titles[1], titles[2..-1]&.join(" -- ")
|
337
|
-
end
|
338
|
-
[{
|
339
|
-
title_intro: intro,
|
340
|
-
title_main: main,
|
341
|
-
title_part: part,
|
342
|
-
language: "en",
|
343
|
-
script: "Latn"
|
344
|
-
}]
|
249
|
+
RelatonBib::TypedTitleString.from_string hit_data[:title], "en", "Latn"
|
345
250
|
end
|
346
251
|
|
347
|
-
# Return ISO script code.
|
348
|
-
# @param lang [String]
|
349
|
-
# @return [String]
|
350
|
-
# def script(lang)
|
351
|
-
# case lang
|
352
|
-
# when 'en', 'fr' then 'Latn'
|
353
|
-
# end
|
354
|
-
# end
|
355
|
-
|
356
252
|
# Fetch dates
|
357
253
|
# @param doc [Nokogiri::HTML::Document]
|
358
254
|
# @return [Array<Hash>]
|
@@ -365,6 +261,8 @@ module RelatonIec
|
|
365
261
|
dates
|
366
262
|
end
|
367
263
|
|
264
|
+
# rubocop:disable Metrics/MethodLength
|
265
|
+
|
368
266
|
def fetch_contributors(code)
|
369
267
|
code.sub(/\s.*/, "").split("/").map do |abbrev|
|
370
268
|
case abbrev
|
@@ -379,12 +277,15 @@ module RelatonIec
|
|
379
277
|
role: [type: "publisher"] }
|
380
278
|
end
|
381
279
|
end
|
280
|
+
# rubocop:enable Metrics/MethodLength
|
382
281
|
|
383
282
|
# Fetch ICS.
|
384
283
|
# @param doc [Nokogiri::HTML::Document]
|
385
284
|
# @return [Array<Hash>]
|
386
285
|
def fetch_ics(doc)
|
387
|
-
doc.xpath(
|
286
|
+
doc.xpath(
|
287
|
+
'//th[contains(text(), "ICS")]/following-sibling::td/a'
|
288
|
+
).map do |i|
|
388
289
|
code = i.text.match(/[\d\.]+/).to_s.split "."
|
389
290
|
{ field: code[0], group: code[1], subgroup: code[2] }
|
390
291
|
end
|
@@ -401,9 +302,11 @@ module RelatonIec
|
|
401
302
|
links
|
402
303
|
end
|
403
304
|
|
305
|
+
# rubocop:disable Metrics/MethodLength
|
306
|
+
|
404
307
|
# Fetch copyright.
|
405
308
|
# @param title [String]
|
406
|
-
# @return [Hash]
|
309
|
+
# @return [Array<Hash>]
|
407
310
|
def fetch_copyright(code, doc)
|
408
311
|
abbreviation = code.match(/.*?(?=\s)/).to_s
|
409
312
|
case abbreviation
|
@@ -416,9 +319,12 @@ module RelatonIec
|
|
416
319
|
from = doc.xpath("//span[@itemprop='releaseDate']").text
|
417
320
|
.match(/\d{4}/).to_s
|
418
321
|
end
|
419
|
-
{
|
322
|
+
[{
|
323
|
+
owner: [{ name: name, abbreviation: abbreviation, url: url }],
|
324
|
+
from: from,
|
325
|
+
}]
|
420
326
|
end
|
327
|
+
# rubocop:enable Metrics/MethodLength
|
421
328
|
end
|
422
329
|
end
|
423
|
-
# rubocop:enable Metrics/ModuleLength
|
424
330
|
end
|