relaton-iec 1.0.1 → 1.5.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ubuntu.yml +1 -0
- data/.rubocop.yml +2 -2
- data/README.adoc +1 -1
- data/grammars/biblio.rng +36 -6
- data/grammars/isodoc.rng +574 -22
- data/grammars/isostandard.rng +13 -2
- data/lib/relaton_iec.rb +4 -0
- data/lib/relaton_iec/iec_bibliography.rb +56 -47
- data/lib/relaton_iec/scrapper.rb +69 -163
- data/lib/relaton_iec/version.rb +1 -1
- data/relaton_iec.gemspec +1 -1
- metadata +11 -11
data/grammars/isostandard.rng
CHANGED
@@ -91,6 +91,12 @@
|
|
91
91
|
</define>
|
92
92
|
<define name="sections">
|
93
93
|
<element name="sections">
|
94
|
+
<zeroOrMore>
|
95
|
+
<choice>
|
96
|
+
<ref name="note"/>
|
97
|
+
<ref name="admonition"/>
|
98
|
+
</choice>
|
99
|
+
</zeroOrMore>
|
94
100
|
<ref name="clause"/>
|
95
101
|
<optional>
|
96
102
|
<choice>
|
@@ -247,7 +253,7 @@
|
|
247
253
|
<define name="preface">
|
248
254
|
<element name="preface">
|
249
255
|
<optional>
|
250
|
-
<ref name="
|
256
|
+
<ref name="abstract"/>
|
251
257
|
</optional>
|
252
258
|
<ref name="foreword"/>
|
253
259
|
<optional>
|
@@ -263,6 +269,8 @@
|
|
263
269
|
<value>publicly-available-specification</value>
|
264
270
|
<value>international-workshop-agreement</value>
|
265
271
|
<value>guide</value>
|
272
|
+
<value>amendment</value>
|
273
|
+
<value>technical-corrigendum</value>
|
266
274
|
</choice>
|
267
275
|
</define>
|
268
276
|
<define name="structuredidentifier">
|
@@ -354,6 +362,9 @@
|
|
354
362
|
<data type="boolean"/>
|
355
363
|
</attribute>
|
356
364
|
</optional>
|
365
|
+
<optional>
|
366
|
+
<attribute name="number"/>
|
367
|
+
</optional>
|
357
368
|
<optional>
|
358
369
|
<attribute name="subsequence"/>
|
359
370
|
</optional>
|
@@ -510,7 +521,7 @@
|
|
510
521
|
</attribute>
|
511
522
|
</optional>
|
512
523
|
<oneOrMore>
|
513
|
-
<ref name="
|
524
|
+
<ref name="BasicBlock"/>
|
514
525
|
</oneOrMore>
|
515
526
|
</element>
|
516
527
|
</define>
|
data/lib/relaton_iec.rb
CHANGED
@@ -22,7 +22,7 @@ module RelatonIec
|
|
22
22
|
# @param opts [Hash] options; restricted to :all_parts if all-parts
|
23
23
|
# reference is required
|
24
24
|
# @return [String] Relaton XML serialisation of reference
|
25
|
-
def get(code, year = nil, opts = {})
|
25
|
+
def get(code, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
26
26
|
if year.nil?
|
27
27
|
/^(?<code1>[^:]+):(?<year1>[^:]+)/ =~ code
|
28
28
|
unless code1.nil?
|
@@ -45,27 +45,33 @@ module RelatonIec
|
|
45
45
|
|
46
46
|
private
|
47
47
|
|
48
|
-
def fetch_ref_err(code, year, missed_years)
|
48
|
+
def fetch_ref_err(code, year, missed_years) # rubocop:disable Metrics/MethodLength
|
49
49
|
id = year ? "#{code}:#{year}" : code
|
50
50
|
warn "[relaton-iec] WARNING: no match found online for #{id}. "\
|
51
51
|
"The code must be exactly like it is on the standards website."
|
52
|
-
|
53
|
-
"
|
54
|
-
|
55
|
-
|
56
|
-
|
52
|
+
unless missed_years.empty?
|
53
|
+
warn "[relaton-iec] (There was no match for #{year}, though there "\
|
54
|
+
"were matches found for #{missed_years.join(', ')}.)"
|
55
|
+
end
|
56
|
+
if /\d-\d/.match? code
|
57
|
+
warn "[relaton-iec] The provided document part may not exist, or "\
|
58
|
+
"the document may no longer be published in parts."
|
57
59
|
else
|
58
|
-
warn "[relaton-iec] If you wanted to cite all document parts for
|
59
|
-
"use \"#{code} (all parts)\".\nIf the document is
|
60
|
-
"use its document type abbreviation (TS, TR, PAS,
|
60
|
+
warn "[relaton-iec] If you wanted to cite all document parts for "\
|
61
|
+
"the reference, use \"#{code} (all parts)\".\nIf the document is "\
|
62
|
+
"not a standard, use its document type abbreviation (TS, TR, PAS, "\
|
63
|
+
"Guide)."
|
61
64
|
end
|
62
65
|
nil
|
63
66
|
end
|
64
67
|
|
65
|
-
|
66
|
-
|
68
|
+
# @param hits [Array<RelatonIec::Hit>]
|
69
|
+
# @param threads [Integer]
|
70
|
+
# @return [Array<RelatonIec::Hit>]
|
71
|
+
def fetch_pages(hits, threads)
|
72
|
+
workers = RelatonBib::WorkersPool.new threads
|
67
73
|
workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
|
68
|
-
|
74
|
+
hits.each_with_index { |hit, i| workers << { i: i, hit: hit } }
|
69
75
|
workers.end
|
70
76
|
workers.result.sort_by { |a| a[:i] }.map { |x| x[:hit] }
|
71
77
|
end
|
@@ -83,45 +89,47 @@ module RelatonIec
|
|
83
89
|
end
|
84
90
|
|
85
91
|
def iev(code = "IEC 60050")
|
86
|
-
RelatonIsoBib::XMLParser.from_xml(<<~"
|
87
|
-
<bibitem>
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
</bibitem>
|
115
|
-
|
92
|
+
RelatonIsoBib::XMLParser.from_xml(<<~"XML")
|
93
|
+
<bibitem>
|
94
|
+
<fetched>#{Date.today}</fetched>
|
95
|
+
<title format="text/plain" language="en" script="Latn">International Electrotechnical Vocabulary</title>
|
96
|
+
<link type="src">http://www.electropedia.org</link>
|
97
|
+
<docidentifier>#{code}:2011</docidentifier>
|
98
|
+
<date type="published"><on>2011</on></date>
|
99
|
+
<contributor>
|
100
|
+
<role type="publisher"/>
|
101
|
+
<organization>
|
102
|
+
<name>International Electrotechnical Commission</name>
|
103
|
+
<abbreviation>IEC</abbreviation>
|
104
|
+
<uri>www.iec.ch</uri>
|
105
|
+
</organization>
|
106
|
+
</contributor>
|
107
|
+
<language>en</language> <language>fr</language>
|
108
|
+
<script>Latn</script>
|
109
|
+
<status> <stage>60</stage> </status>
|
110
|
+
<copyright>
|
111
|
+
<from>2018</from>
|
112
|
+
<owner>
|
113
|
+
<organization>
|
114
|
+
<name>International Electrotechnical Commission</name>
|
115
|
+
<abbreviation>IEC</abbreviation>
|
116
|
+
<uri>www.iec.ch</uri>
|
117
|
+
</organization>
|
118
|
+
</owner>
|
119
|
+
</copyright>
|
120
|
+
</bibitem>
|
121
|
+
XML
|
116
122
|
end
|
117
123
|
|
118
124
|
# Sort through the results from Isobib, fetching them three at a time,
|
119
125
|
# and return the first result that matches the code,
|
120
|
-
# matches the year (if provided), and which
|
126
|
+
# matches the year (if provided), and which
|
127
|
+
# has a title (amendments do not).
|
121
128
|
# Only expects the first page of results to be populated.
|
122
129
|
# Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
|
123
|
-
# If no match, returns any years which caused mismatch, for error
|
124
|
-
|
130
|
+
# If no match, returns any years which caused mismatch, for error
|
131
|
+
# reporting
|
132
|
+
def isobib_results_filter(result, year) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength
|
125
133
|
missed_years = []
|
126
134
|
result.each_slice(3) do |s| # ISO website only allows 3 connections
|
127
135
|
fetch_pages(s, 3).each_with_index do |r, _i|
|
@@ -143,7 +151,8 @@ module RelatonIec
|
|
143
151
|
result = isobib_search_filter(code) || return
|
144
152
|
ret = isobib_results_filter(result, year)
|
145
153
|
if ret[:ret]
|
146
|
-
warn "[relaton-iec] (\"#{code}\") found
|
154
|
+
warn "[relaton-iec] (\"#{code}\") found "\
|
155
|
+
"#{ret[:ret].docidentifier.first.id}"
|
147
156
|
ret[:ret]
|
148
157
|
else
|
149
158
|
fetch_ref_err(code, year, ret[:years])
|
data/lib/relaton_iec/scrapper.rb
CHANGED
@@ -1,10 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "relaton_iso_bib"
|
4
|
-
require "relaton_iec/hit"
|
5
|
-
require "nokogiri"
|
6
|
-
require "net/http"
|
7
|
-
|
8
3
|
# Capybara.request_driver :poltergeist do |app|
|
9
4
|
# Capybara::Poltergeist::Driver.new app, js_errors: false
|
10
5
|
# end
|
@@ -12,7 +7,6 @@ require "net/http"
|
|
12
7
|
|
13
8
|
module RelatonIec
|
14
9
|
# Scrapper.
|
15
|
-
# rubocop:disable Metrics/ModuleLength
|
16
10
|
module Scrapper
|
17
11
|
DOMAIN = "https://webstore.iec.ch"
|
18
12
|
|
@@ -32,33 +26,24 @@ module RelatonIec
|
|
32
26
|
}.freeze
|
33
27
|
|
34
28
|
class << self
|
35
|
-
#
|
36
|
-
# @return [Array<Hash>]
|
37
|
-
# def get(text)
|
38
|
-
# iso_workers = WorkersPool.new 4
|
39
|
-
# iso_workers.worker { |hit| iso_worker(hit, iso_workers) }
|
40
|
-
# algolia_workers = start_algolia_search(text, iso_workers)
|
41
|
-
# iso_docs = iso_workers.result
|
42
|
-
# algolia_workers.end
|
43
|
-
# algolia_workers.result
|
44
|
-
# iso_docs
|
45
|
-
# end
|
29
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
46
30
|
|
47
31
|
# Parse page.
|
48
|
-
# @param
|
32
|
+
# @param hit_data [Hash]
|
49
33
|
# @return [Hash]
|
50
|
-
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
51
34
|
def parse_page(hit_data)
|
52
35
|
doc = get_page hit_data[:url]
|
53
36
|
|
54
37
|
# Fetch edition.
|
55
|
-
edition = doc.at(
|
38
|
+
edition = doc.at(
|
39
|
+
"//th[contains(., 'Edition')]/following-sibling::td/span"
|
40
|
+
).text
|
56
41
|
|
57
42
|
status, relations = fetch_status_relations hit_data[:url]
|
58
43
|
|
59
44
|
IecBibliographicItem.new(
|
60
45
|
fetched: Date.today.to_s,
|
61
|
-
docid:
|
46
|
+
docid: fetch_docid(hit_data),
|
62
47
|
structuredidentifier: fetch_structuredidentifier(doc),
|
63
48
|
edition: edition,
|
64
49
|
language: ["en"],
|
@@ -74,52 +59,47 @@ module RelatonIec
|
|
74
59
|
copyright: fetch_copyright(hit_data[:code], doc),
|
75
60
|
link: fetch_link(doc, hit_data[:url]),
|
76
61
|
relation: relations,
|
77
|
-
place: ["Geneva"]
|
62
|
+
place: ["Geneva"]
|
78
63
|
)
|
79
64
|
end
|
80
65
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
81
66
|
|
82
67
|
private
|
83
68
|
|
84
|
-
# Start search workers.
|
85
|
-
# @param text[String]
|
86
|
-
# @param iec_workers [Isobib::WorkersPool]
|
87
|
-
# @reaturn [Isobib::WorkersPool]
|
88
|
-
# def start_algolia_search(text, iec_workers)
|
89
|
-
# index = Algolia::Index.new 'all_en'
|
90
|
-
# workers = WorkersPool.new
|
91
|
-
# workers.worker do |page|
|
92
|
-
# algolia_worker(index, text, page, workers, iec_workers)
|
93
|
-
# end
|
94
|
-
|
95
|
-
# # Add first page so search worker will start.
|
96
|
-
# workers << 0
|
97
|
-
# end
|
98
|
-
|
99
|
-
# Fetch ISO documents.
|
100
69
|
# @param hit [Hash]
|
101
|
-
# @
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
70
|
+
# @return [Array<RelatonBib::DocumentIdentifier>]
|
71
|
+
def fetch_docid(hit)
|
72
|
+
rest = hit[:code].downcase.sub(%r{
|
73
|
+
(?<head>[^\s]+)\s
|
74
|
+
(?<type>is|ts|tr|pas|srd|guide|tec|wp)?(?(<type>)\s)
|
75
|
+
(?<pnum>[\d-]+)\s?
|
76
|
+
(?<_dd>:)?(?(<_dd>)(?<date>[\d-]+)\s?)
|
77
|
+
}x, "")
|
78
|
+
m = $~
|
79
|
+
deliv = /cmv|csv|exv|prv|rlv|ser/.match(hit[:code].downcase).to_s
|
80
|
+
urn = ["urn", "iec", "std", m[:head].split("/").join("-"), m[:pnum],
|
81
|
+
m[:date], m[:type], deliv, "en"]
|
82
|
+
urn += fetch_ajunct(rest)
|
83
|
+
[
|
84
|
+
RelatonBib::DocumentIdentifier.new(id: hit[:code], type: "IEC"),
|
85
|
+
RelatonBib::DocumentIdentifier.new(id: urn.join(":"), type: "URN"),
|
86
|
+
]
|
87
|
+
end
|
106
88
|
|
107
|
-
#
|
108
|
-
# @
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
# iso_workers.end unless next_page < res['nbPages']
|
122
|
-
# end
|
89
|
+
# @param rest [String]
|
90
|
+
# @return [Array<String, nil>]
|
91
|
+
def fetch_ajunct(rest)
|
92
|
+
r = rest.sub(%r{
|
93
|
+
(?<_pl>\+)(?(<_pl>)(?<adjunct>amd)(?<adjnum>\d+)\s?)
|
94
|
+
(?<_d2>:)?(?(<_d2>)(?<adjdt>[\d-]+)\s?)
|
95
|
+
}x, "")
|
96
|
+
m = $~ || {}
|
97
|
+
return [] unless m[:adjunct]
|
98
|
+
|
99
|
+
plus = m[:adjunct] && "plus"
|
100
|
+
urn = [plus, m[:adjunct], m[:adjnum], m[:adjdt]]
|
101
|
+
urn + fetch_ajunct(r)
|
102
|
+
end
|
123
103
|
|
124
104
|
# Fetch abstracts.
|
125
105
|
# @param doc [Nokigiri::HTML::Document]
|
@@ -134,19 +114,6 @@ module RelatonIec
|
|
134
114
|
}]
|
135
115
|
end
|
136
116
|
|
137
|
-
# Get langs.
|
138
|
-
# @param doc [Nokogiri::HTML::Document]
|
139
|
-
# @return [Array<Hash>]
|
140
|
-
# def langs(doc)
|
141
|
-
# lgs = [{ lang: 'en' }]
|
142
|
-
# doc.css('ul#lang-switcher ul li a').each do |lang_link|
|
143
|
-
# lang_path = lang_link.attr('href')
|
144
|
-
# lang = lang_path.match(%r{^\/(fr)\/})
|
145
|
-
# lgs << { lang: lang[1], path: lang_path } if lang
|
146
|
-
# end
|
147
|
-
# lgs
|
148
|
-
# end
|
149
|
-
|
150
117
|
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
151
118
|
|
152
119
|
# Get page.
|
@@ -154,25 +121,20 @@ module RelatonIec
|
|
154
121
|
# @return [Array<Nokogiri::HTML::Document, String>]
|
155
122
|
def get_page(url)
|
156
123
|
uri = URI url
|
157
|
-
resp = Net::HTTP.get_response(uri)
|
124
|
+
resp = Net::HTTP.get_response(uri)
|
158
125
|
case resp.code
|
159
126
|
when "301"
|
160
127
|
path = resp["location"]
|
161
128
|
url = DOMAIN + path
|
162
129
|
uri = URI url
|
163
|
-
resp = Net::HTTP.get_response(uri)
|
130
|
+
resp = Net::HTTP.get_response(uri)
|
164
131
|
when "404"
|
165
132
|
raise RelatonBib::RequestError, "Page not found #{url}"
|
166
133
|
end
|
167
|
-
# n = 0
|
168
|
-
# while resp.body !~ /<strong/ && n < 10
|
169
|
-
# resp = Net::HTTP.get_response(uri)#.encode("UTF-8")
|
170
|
-
# n += 1
|
171
|
-
# end
|
172
134
|
Nokogiri::HTML(resp.body)
|
173
|
-
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
174
|
-
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
175
|
-
OpenSSL::SSL::SSLError
|
135
|
+
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
136
|
+
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
137
|
+
Net::ProtocolError, OpenSSL::SSL::SSLError
|
176
138
|
raise RelatonBib::RequestError, "Could not access #{url}"
|
177
139
|
end
|
178
140
|
# rubocop:enable Metrics/AbcSize
|
@@ -184,12 +146,12 @@ module RelatonIec
|
|
184
146
|
item_ref = doc.at("//span[@itemprop='productID']")
|
185
147
|
unless item_ref
|
186
148
|
return RelatonIsoBib::StructuredIdentifier.new(
|
187
|
-
project_number: "?", part_number: "", prefix: nil, id: "?"
|
149
|
+
project_number: "?", part_number: "", prefix: nil, id: "?"
|
188
150
|
)
|
189
151
|
end
|
190
152
|
|
191
153
|
m = item_ref.text.match(
|
192
|
-
/(?<=\s)(?<project>\d+)-?(?<part>(?<=-)\d+|)-?(?<subpart>(?<=-)\d+|)
|
154
|
+
/(?<=\s)(?<project>\d+)-?(?<part>(?<=-)\d+|)-?(?<subpart>(?<=-)\d+|)/
|
193
155
|
)
|
194
156
|
RelatonIsoBib::StructuredIdentifier.new(
|
195
157
|
project_number: m[:project],
|
@@ -197,7 +159,7 @@ module RelatonIec
|
|
197
159
|
subpart_number: m[:subpart],
|
198
160
|
prefix: nil,
|
199
161
|
type: "IEC",
|
200
|
-
id: item_ref.text
|
162
|
+
id: item_ref.text
|
201
163
|
)
|
202
164
|
end
|
203
165
|
|
@@ -211,15 +173,12 @@ module RelatonIec
|
|
211
173
|
statuses = YAML.load_file "lib/relaton_iec/statuses.yml"
|
212
174
|
s = wip.at("STAGE").text
|
213
175
|
stage, substage = statuses[s]["stage"].split "."
|
214
|
-
# status = statuses[s]["status"]
|
215
176
|
else
|
216
|
-
# status = "Published"
|
217
177
|
stage = "60"
|
218
178
|
substage = "60"
|
219
179
|
end
|
220
180
|
RelatonBib::DocumentStatus.new(stage: stage, substage: substage)
|
221
181
|
end
|
222
|
-
# rubocop:enable Metrics/MethodLength
|
223
182
|
|
224
183
|
# Fetch workgroup.
|
225
184
|
# @param doc [Nokogiri::HTML::Document]
|
@@ -237,13 +196,15 @@ module RelatonIec
|
|
237
196
|
}],
|
238
197
|
}
|
239
198
|
end
|
199
|
+
# rubocop:enable Metrics/MethodLength
|
240
200
|
|
241
201
|
# Fetch relations.
|
242
202
|
# @param doc [Nokogiri::HTML::Document]
|
243
203
|
# @return [Array<Hash>]
|
244
204
|
# rubocop:disable Metrics/MethodLength
|
245
205
|
def fetch_relations(doc)
|
246
|
-
doc.xpath('//ROW[STATUS[.!="PREPARING"]][STATUS[.!="PUBLISHED"]]')
|
206
|
+
doc.xpath('//ROW[STATUS[.!="PREPARING"]][STATUS[.!="PUBLISHED"]]')
|
207
|
+
.map do |r|
|
247
208
|
r_type = r.at("STATUS").text.downcase
|
248
209
|
type = case r_type
|
249
210
|
# when 'published' then 'obsoletes' # Valid
|
@@ -251,9 +212,8 @@ module RelatonIec
|
|
251
212
|
when "withdrawn" then "obsoletes"
|
252
213
|
else r_type
|
253
214
|
end
|
254
|
-
# url = DOMAIN + "/publication/" + r.at("PUB_ID").text
|
255
215
|
fref = RelatonBib::FormattedRef.new(
|
256
|
-
content: r.at("FULL_NAME").text, format: "text/plain"
|
216
|
+
content: r.at("FULL_NAME").text, format: "text/plain"
|
257
217
|
)
|
258
218
|
bibitem = IecBibliographicItem.new(formattedref: fref)
|
259
219
|
{ type: type, bibitem: bibitem }
|
@@ -270,22 +230,6 @@ module RelatonIec
|
|
270
230
|
status = fetch_status doc
|
271
231
|
relations = fetch_relations doc
|
272
232
|
[status, relations]
|
273
|
-
# doc.css('ul.steps li').inject([]) do |a, r|
|
274
|
-
# r_type = r.css('strong').text
|
275
|
-
# type = case r_type
|
276
|
-
# when 'Previously', 'Will be replaced by' then 'obsoletes'
|
277
|
-
# when 'Corrigenda/Amendments', 'Revised by', 'Now confirmed'
|
278
|
-
# 'updates'
|
279
|
-
# else r_type
|
280
|
-
# end
|
281
|
-
# if ['Now', 'Now under review'].include? type
|
282
|
-
# a
|
283
|
-
# else
|
284
|
-
# a + r.css('a').map do |id|
|
285
|
-
# { type: type, identifier: id.text, url: id['href'] }
|
286
|
-
# end
|
287
|
-
# end
|
288
|
-
# end
|
289
233
|
end
|
290
234
|
# rubocop:enable Metrics/MethodLength
|
291
235
|
|
@@ -293,66 +237,18 @@ module RelatonIec
|
|
293
237
|
# @param doc [Nokogiri::HTML::Document]
|
294
238
|
# @return [String]
|
295
239
|
def fetch_type(doc)
|
296
|
-
doc.at(
|
297
|
-
|
298
|
-
|
299
|
-
# /NP)*\s|/)(TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|Guide|(?=\d+))}x)
|
300
|
-
# #return "international-standard" if type_match.nil?
|
301
|
-
# if TYPES[type_match[2]]
|
302
|
-
# TYPES[type_match[2]]
|
303
|
-
# elsif type_match[1]
|
304
|
-
# elsif type_match[1] == 'ISO'
|
305
|
-
# 'international-standard'
|
306
|
-
# elsif type_match[1] == 'IWA'
|
307
|
-
# 'international-workshop-agreement'
|
308
|
-
# end
|
309
|
-
# # rescue => _e
|
310
|
-
# # puts 'Unknown document type: ' + title
|
240
|
+
doc.at(
|
241
|
+
'//th[contains(., "Publication type")]/following-sibling::td/span'
|
242
|
+
).text.downcase.tr " ", "-"
|
311
243
|
end
|
312
244
|
|
313
245
|
# Fetch titles.
|
314
246
|
# @param hit_data [Hash]
|
315
247
|
# @return [Array<Hash>]
|
316
248
|
def fetch_titles(hit_data)
|
317
|
-
|
318
|
-
case titles.size
|
319
|
-
when 0
|
320
|
-
intro, main, part = nil, "", nil
|
321
|
-
when 1
|
322
|
-
intro, main, part = nil, titles[0], nil
|
323
|
-
when 2
|
324
|
-
if /^(Part|Partie) \d+:/ =~ titles[1]
|
325
|
-
intro, main, part = nil, titles[0], titles[1]
|
326
|
-
else
|
327
|
-
intro, main, part = titles[0], titles[1], nil
|
328
|
-
end
|
329
|
-
when 3
|
330
|
-
if /^(Part|Partie) \d+:/ =~ titles[1]
|
331
|
-
intro, main, part = nil, titles[0], titles[1..2].join(" - ")
|
332
|
-
else
|
333
|
-
intro, main, part = titles[0], titles[1], titles[2]
|
334
|
-
end
|
335
|
-
else
|
336
|
-
intro, main, part = titles[0], titles[1], titles[2..-1]&.join(" -- ")
|
337
|
-
end
|
338
|
-
[{
|
339
|
-
title_intro: intro,
|
340
|
-
title_main: main,
|
341
|
-
title_part: part,
|
342
|
-
language: "en",
|
343
|
-
script: "Latn"
|
344
|
-
}]
|
249
|
+
RelatonBib::TypedTitleString.from_string hit_data[:title], "en", "Latn"
|
345
250
|
end
|
346
251
|
|
347
|
-
# Return ISO script code.
|
348
|
-
# @param lang [String]
|
349
|
-
# @return [String]
|
350
|
-
# def script(lang)
|
351
|
-
# case lang
|
352
|
-
# when 'en', 'fr' then 'Latn'
|
353
|
-
# end
|
354
|
-
# end
|
355
|
-
|
356
252
|
# Fetch dates
|
357
253
|
# @param doc [Nokogiri::HTML::Document]
|
358
254
|
# @return [Array<Hash>]
|
@@ -365,6 +261,8 @@ module RelatonIec
|
|
365
261
|
dates
|
366
262
|
end
|
367
263
|
|
264
|
+
# rubocop:disable Metrics/MethodLength
|
265
|
+
|
368
266
|
def fetch_contributors(code)
|
369
267
|
code.sub(/\s.*/, "").split("/").map do |abbrev|
|
370
268
|
case abbrev
|
@@ -379,12 +277,15 @@ module RelatonIec
|
|
379
277
|
role: [type: "publisher"] }
|
380
278
|
end
|
381
279
|
end
|
280
|
+
# rubocop:enable Metrics/MethodLength
|
382
281
|
|
383
282
|
# Fetch ICS.
|
384
283
|
# @param doc [Nokogiri::HTML::Document]
|
385
284
|
# @return [Array<Hash>]
|
386
285
|
def fetch_ics(doc)
|
387
|
-
doc.xpath(
|
286
|
+
doc.xpath(
|
287
|
+
'//th[contains(text(), "ICS")]/following-sibling::td/a'
|
288
|
+
).map do |i|
|
388
289
|
code = i.text.match(/[\d\.]+/).to_s.split "."
|
389
290
|
{ field: code[0], group: code[1], subgroup: code[2] }
|
390
291
|
end
|
@@ -401,9 +302,11 @@ module RelatonIec
|
|
401
302
|
links
|
402
303
|
end
|
403
304
|
|
305
|
+
# rubocop:disable Metrics/MethodLength
|
306
|
+
|
404
307
|
# Fetch copyright.
|
405
308
|
# @param title [String]
|
406
|
-
# @return [Hash]
|
309
|
+
# @return [Array<Hash>]
|
407
310
|
def fetch_copyright(code, doc)
|
408
311
|
abbreviation = code.match(/.*?(?=\s)/).to_s
|
409
312
|
case abbreviation
|
@@ -416,9 +319,12 @@ module RelatonIec
|
|
416
319
|
from = doc.xpath("//span[@itemprop='releaseDate']").text
|
417
320
|
.match(/\d{4}/).to_s
|
418
321
|
end
|
419
|
-
{
|
322
|
+
[{
|
323
|
+
owner: [{ name: name, abbreviation: abbreviation, url: url }],
|
324
|
+
from: from,
|
325
|
+
}]
|
420
326
|
end
|
327
|
+
# rubocop:enable Metrics/MethodLength
|
421
328
|
end
|
422
329
|
end
|
423
|
-
# rubocop:enable Metrics/ModuleLength
|
424
330
|
end
|