relaton-iec 0.10.0 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ubuntu.yml +1 -0
- data/.rubocop.yml +2 -2
- data/README.adoc +1 -1
- data/grammars/basicdoc.rng +986 -0
- data/grammars/biblio.rng +1237 -0
- data/grammars/iec.rng +43 -0
- data/grammars/isodoc.rng +1504 -0
- data/{grammars → grammars/isostandard.rng} +148 -472
- data/grammars/reqt.rng +165 -0
- data/lib/relaton_iec.rb +16 -9
- data/lib/relaton_iec/hash_converter.rb +14 -0
- data/lib/relaton_iec/hit.rb +1 -1
- data/lib/relaton_iec/hit_collection.rb +0 -1
- data/lib/relaton_iec/iec_bibliographic_item.rb +9 -0
- data/lib/relaton_iec/iec_bibliography.rb +3 -8
- data/lib/relaton_iec/processor.rb +7 -7
- data/lib/relaton_iec/scrapper.rb +35 -166
- data/lib/relaton_iec/version.rb +1 -1
- data/lib/relaton_iec/xml_parser.rb +14 -0
- data/relaton_iec.gemspec +6 -5
- metadata +29 -7
data/grammars/reqt.rng
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<grammar xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
|
3
|
+
<!--
|
4
|
+
Presupposes isodoc.rnc, is included in it
|
5
|
+
include "isodoc.rnc" { }
|
6
|
+
-->
|
7
|
+
<define name="requirement">
|
8
|
+
<element name="requirement">
|
9
|
+
<ref name="RequirementType"/>
|
10
|
+
</element>
|
11
|
+
</define>
|
12
|
+
<define name="recommendation">
|
13
|
+
<element name="recommendation">
|
14
|
+
<ref name="RequirementType"/>
|
15
|
+
</element>
|
16
|
+
</define>
|
17
|
+
<define name="permission">
|
18
|
+
<element name="permission">
|
19
|
+
<ref name="RequirementType"/>
|
20
|
+
</element>
|
21
|
+
</define>
|
22
|
+
<define name="RequirementType">
|
23
|
+
<optional>
|
24
|
+
<attribute name="obligation">
|
25
|
+
<ref name="ObligationType"/>
|
26
|
+
</attribute>
|
27
|
+
</optional>
|
28
|
+
<optional>
|
29
|
+
<attribute name="unnumbered">
|
30
|
+
<data type="boolean"/>
|
31
|
+
</attribute>
|
32
|
+
</optional>
|
33
|
+
<optional>
|
34
|
+
<attribute name="subsequence"/>
|
35
|
+
</optional>
|
36
|
+
<attribute name="id">
|
37
|
+
<data type="ID"/>
|
38
|
+
</attribute>
|
39
|
+
<optional>
|
40
|
+
<attribute name="filename"/>
|
41
|
+
</optional>
|
42
|
+
<optional>
|
43
|
+
<ref name="reqtitle"/>
|
44
|
+
</optional>
|
45
|
+
<optional>
|
46
|
+
<ref name="label"/>
|
47
|
+
</optional>
|
48
|
+
<optional>
|
49
|
+
<ref name="subject"/>
|
50
|
+
</optional>
|
51
|
+
<optional>
|
52
|
+
<ref name="reqinherit"/>
|
53
|
+
</optional>
|
54
|
+
<zeroOrMore>
|
55
|
+
<ref name="classification"/>
|
56
|
+
</zeroOrMore>
|
57
|
+
<zeroOrMore>
|
58
|
+
<choice>
|
59
|
+
<ref name="measurementtarget"/>
|
60
|
+
<ref name="specification"/>
|
61
|
+
<ref name="verification"/>
|
62
|
+
<ref name="import"/>
|
63
|
+
<ref name="description"/>
|
64
|
+
</choice>
|
65
|
+
</zeroOrMore>
|
66
|
+
<optional>
|
67
|
+
<ref name="reqt_references"/>
|
68
|
+
</optional>
|
69
|
+
<zeroOrMore>
|
70
|
+
<choice>
|
71
|
+
<ref name="requirement"/>
|
72
|
+
<ref name="recommendation"/>
|
73
|
+
<ref name="permission"/>
|
74
|
+
</choice>
|
75
|
+
</zeroOrMore>
|
76
|
+
</define>
|
77
|
+
<define name="reqtitle">
|
78
|
+
<element name="title">
|
79
|
+
<ref name="FormattedString"/>
|
80
|
+
</element>
|
81
|
+
</define>
|
82
|
+
<define name="label">
|
83
|
+
<element name="label">
|
84
|
+
<text/>
|
85
|
+
</element>
|
86
|
+
</define>
|
87
|
+
<define name="subject">
|
88
|
+
<element name="subject">
|
89
|
+
<text/>
|
90
|
+
</element>
|
91
|
+
</define>
|
92
|
+
<define name="reqinherit">
|
93
|
+
<element name="inherit">
|
94
|
+
<text/>
|
95
|
+
</element>
|
96
|
+
</define>
|
97
|
+
<define name="measurementtarget">
|
98
|
+
<element name="measurement-target">
|
99
|
+
<ref name="RequirementSubpart"/>
|
100
|
+
</element>
|
101
|
+
</define>
|
102
|
+
<define name="specification">
|
103
|
+
<element name="specification">
|
104
|
+
<ref name="RequirementSubpart"/>
|
105
|
+
</element>
|
106
|
+
</define>
|
107
|
+
<define name="verification">
|
108
|
+
<element name="verification">
|
109
|
+
<ref name="RequirementSubpart"/>
|
110
|
+
</element>
|
111
|
+
</define>
|
112
|
+
<define name="import">
|
113
|
+
<element name="import">
|
114
|
+
<ref name="RequirementSubpart"/>
|
115
|
+
</element>
|
116
|
+
</define>
|
117
|
+
<define name="description">
|
118
|
+
<element name="description">
|
119
|
+
<ref name="RequirementSubpart"/>
|
120
|
+
</element>
|
121
|
+
</define>
|
122
|
+
<define name="reqt_references">
|
123
|
+
<element name="references">
|
124
|
+
<oneOrMore>
|
125
|
+
<ref name="bibitem"/>
|
126
|
+
</oneOrMore>
|
127
|
+
</element>
|
128
|
+
</define>
|
129
|
+
<define name="RequirementSubpart">
|
130
|
+
<optional>
|
131
|
+
<attribute name="type"/>
|
132
|
+
</optional>
|
133
|
+
<optional>
|
134
|
+
<attribute name="exclude">
|
135
|
+
<data type="boolean"/>
|
136
|
+
</attribute>
|
137
|
+
</optional>
|
138
|
+
<oneOrMore>
|
139
|
+
<ref name="BasicBlock"/>
|
140
|
+
</oneOrMore>
|
141
|
+
</define>
|
142
|
+
<define name="ObligationType">
|
143
|
+
<choice>
|
144
|
+
<value>requirement</value>
|
145
|
+
<value>recommendation</value>
|
146
|
+
<value>permission</value>
|
147
|
+
</choice>
|
148
|
+
</define>
|
149
|
+
<define name="classification">
|
150
|
+
<element name="classification">
|
151
|
+
<ref name="classification_tag"/>
|
152
|
+
<ref name="classification_value"/>
|
153
|
+
</element>
|
154
|
+
</define>
|
155
|
+
<define name="classification_tag">
|
156
|
+
<element name="tag">
|
157
|
+
<text/>
|
158
|
+
</element>
|
159
|
+
</define>
|
160
|
+
<define name="classification_value">
|
161
|
+
<element name="value">
|
162
|
+
<text/>
|
163
|
+
</element>
|
164
|
+
</define>
|
165
|
+
</grammar>
|
data/lib/relaton_iec.rb
CHANGED
@@ -1,14 +1,21 @@
|
|
1
|
+
require "relaton_iso_bib"
|
2
|
+
require "relaton_iec/hit"
|
3
|
+
require "nokogiri"
|
4
|
+
require "net/http"
|
1
5
|
require "relaton_iec/version"
|
2
6
|
require "relaton_iec/iec_bibliography"
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
# return if caller.detect { |c| c.include? "register_gems" }
|
8
|
-
|
9
|
-
# Relaton::Registry.instance.register(RelatonIec::Processor)
|
10
|
-
# end
|
7
|
+
require "relaton_iec/iec_bibliographic_item"
|
8
|
+
require "relaton_iec/xml_parser"
|
9
|
+
require "relaton_iec/hash_converter"
|
10
|
+
require "digest/md5"
|
11
11
|
|
12
12
|
module RelatonIec
|
13
|
-
#
|
13
|
+
# Returns hash of XML reammar
|
14
|
+
# @return [String]
|
15
|
+
def self.grammar_hash
|
16
|
+
gem_path = File.expand_path "..", __dir__
|
17
|
+
grammars_path = File.join gem_path, "grammars", "*"
|
18
|
+
grammars = Dir[grammars_path].sort.map { |gp| File.read gp }.join
|
19
|
+
Digest::MD5.hexdigest grammars
|
20
|
+
end
|
14
21
|
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module RelatonIec
|
2
|
+
class HashConverter < RelatonIsoBib::HashConverter
|
3
|
+
class << self
|
4
|
+
#
|
5
|
+
# Ovverides superclass's method
|
6
|
+
#
|
7
|
+
# @param item [Hash]
|
8
|
+
# @retirn [RelatonIec::IecBibliographicItem]
|
9
|
+
def bib_item(item)
|
10
|
+
IecBibliographicItem.new(item)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
data/lib/relaton_iec/hit.rb
CHANGED
@@ -17,15 +17,10 @@ module RelatonIec
|
|
17
17
|
raise RelatonBib::RequestError, "Could not access http://www.iec.ch"
|
18
18
|
end
|
19
19
|
|
20
|
-
# @param text [String]
|
21
|
-
# @return [Array<IsoBibliographicItem>]
|
22
|
-
# def search_and_fetch(text, year = nil)
|
23
|
-
# Scrapper.get(text, year)
|
24
|
-
# end
|
25
|
-
|
26
20
|
# @param code [String] the ISO standard Code to look up (e..g "ISO 9000")
|
27
21
|
# @param year [String] the year the standard was published (optional)
|
28
|
-
# @param opts [Hash] options; restricted to :all_parts if all-parts
|
22
|
+
# @param opts [Hash] options; restricted to :all_parts if all-parts
|
23
|
+
# reference is required
|
29
24
|
# @return [String] Relaton XML serialisation of reference
|
30
25
|
def get(code, year = nil, opts = {})
|
31
26
|
if year.nil?
|
@@ -72,7 +67,7 @@ module RelatonIec
|
|
72
67
|
workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
|
73
68
|
s.each_with_index { |hit, i| workers << { i: i, hit: hit } }
|
74
69
|
workers.end
|
75
|
-
workers.result.
|
70
|
+
workers.result.sort_by { |a| a[:i] }.map { |x| x[:hit] }
|
76
71
|
end
|
77
72
|
|
78
73
|
def isobib_search_filter(code)
|
@@ -12,28 +12,28 @@ module RelatonIec
|
|
12
12
|
# @param code [String]
|
13
13
|
# @param date [String, NilClass] year
|
14
14
|
# @param opts [Hash]
|
15
|
-
# @return [RelatonIsoBib::
|
15
|
+
# @return [RelatonIsoBib::IecBibliographicItem]
|
16
16
|
def get(code, date, opts)
|
17
17
|
::RelatonIec::IecBibliography.get(code, date, opts)
|
18
18
|
end
|
19
19
|
|
20
20
|
# @param xml [String]
|
21
|
-
# @return [RelatonIsoBib::
|
21
|
+
# @return [RelatonIsoBib::IecBibliographicItem]
|
22
22
|
def from_xml(xml)
|
23
|
-
|
23
|
+
RelatonIec::XMLParser.from_xml xml
|
24
24
|
end
|
25
25
|
|
26
26
|
# @param hash [Hash]
|
27
|
-
# @return [RelatonIsoBib::
|
27
|
+
# @return [RelatonIsoBib::IecBibliographicItem]
|
28
28
|
def hash_to_bib(hash)
|
29
|
-
item_hash = ::
|
30
|
-
::
|
29
|
+
item_hash = ::RelatonIec::HashConverter.hash_to_bib(hash)
|
30
|
+
::RelatonIec::IecBibliographicItem.new item_hash
|
31
31
|
end
|
32
32
|
|
33
33
|
# Returns hash of XML grammar
|
34
34
|
# @return [String]
|
35
35
|
def grammar_hash
|
36
|
-
@grammar_hash ||= ::
|
36
|
+
@grammar_hash ||= ::RelatonIec.grammar_hash
|
37
37
|
end
|
38
38
|
end
|
39
39
|
end
|
data/lib/relaton_iec/scrapper.rb
CHANGED
@@ -1,10 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "relaton_iso_bib"
|
4
|
-
require "relaton_iec/hit"
|
5
|
-
require "nokogiri"
|
6
|
-
require "net/http"
|
7
|
-
|
8
3
|
# Capybara.request_driver :poltergeist do |app|
|
9
4
|
# Capybara::Poltergeist::Driver.new app, js_errors: false
|
10
5
|
# end
|
@@ -12,7 +7,6 @@ require "net/http"
|
|
12
7
|
|
13
8
|
module RelatonIec
|
14
9
|
# Scrapper.
|
15
|
-
# rubocop:disable Metrics/ModuleLength
|
16
10
|
module Scrapper
|
17
11
|
DOMAIN = "https://webstore.iec.ch"
|
18
12
|
|
@@ -32,31 +26,22 @@ module RelatonIec
|
|
32
26
|
}.freeze
|
33
27
|
|
34
28
|
class << self
|
35
|
-
#
|
36
|
-
# @return [Array<Hash>]
|
37
|
-
# def get(text)
|
38
|
-
# iso_workers = WorkersPool.new 4
|
39
|
-
# iso_workers.worker { |hit| iso_worker(hit, iso_workers) }
|
40
|
-
# algolia_workers = start_algolia_search(text, iso_workers)
|
41
|
-
# iso_docs = iso_workers.result
|
42
|
-
# algolia_workers.end
|
43
|
-
# algolia_workers.result
|
44
|
-
# iso_docs
|
45
|
-
# end
|
29
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
46
30
|
|
47
31
|
# Parse page.
|
48
32
|
# @param hit [Hash]
|
49
33
|
# @return [Hash]
|
50
|
-
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
51
34
|
def parse_page(hit_data)
|
52
35
|
doc = get_page hit_data[:url]
|
53
36
|
|
54
37
|
# Fetch edition.
|
55
|
-
edition = doc.at(
|
38
|
+
edition = doc.at(
|
39
|
+
"//th[contains(., 'Edition')]/following-sibling::td/span",
|
40
|
+
).text
|
56
41
|
|
57
42
|
status, relations = fetch_status_relations hit_data[:url]
|
58
43
|
|
59
|
-
|
44
|
+
IecBibliographicItem.new(
|
60
45
|
fetched: Date.today.to_s,
|
61
46
|
docid: [RelatonBib::DocumentIdentifier.new(id: hit_data[:code], type: "IEC")],
|
62
47
|
structuredidentifier: fetch_structuredidentifier(doc),
|
@@ -81,46 +66,6 @@ module RelatonIec
|
|
81
66
|
|
82
67
|
private
|
83
68
|
|
84
|
-
# Start search workers.
|
85
|
-
# @param text[String]
|
86
|
-
# @param iec_workers [Isobib::WorkersPool]
|
87
|
-
# @reaturn [Isobib::WorkersPool]
|
88
|
-
# def start_algolia_search(text, iec_workers)
|
89
|
-
# index = Algolia::Index.new 'all_en'
|
90
|
-
# workers = WorkersPool.new
|
91
|
-
# workers.worker do |page|
|
92
|
-
# algolia_worker(index, text, page, workers, iec_workers)
|
93
|
-
# end
|
94
|
-
|
95
|
-
# # Add first page so search worker will start.
|
96
|
-
# workers << 0
|
97
|
-
# end
|
98
|
-
|
99
|
-
# Fetch ISO documents.
|
100
|
-
# @param hit [Hash]
|
101
|
-
# @param isiso_workers [Isobib::WorkersPool]
|
102
|
-
# def iso_worker(hit, iso_workers)
|
103
|
-
# print "Parse #{iso_workers.size} of #{iso_workers.nb_hits} \r"
|
104
|
-
# parse_page hit
|
105
|
-
# end
|
106
|
-
|
107
|
-
# Fetch hits from algolia search service.
|
108
|
-
# @param index[Algolia::Index]
|
109
|
-
# @param text [String]
|
110
|
-
# @param page [Integer]
|
111
|
-
# @param algolia_workers [Isobib::WorkersPool]
|
112
|
-
# @param isiso_workers [Isobib::WorkersPool]
|
113
|
-
# def algolia_worker(index, text, page, algolia_workers, iso_workers)
|
114
|
-
# res = index.search text, facetFilters: ['category:standard'], page: page
|
115
|
-
# next_page = res['page'] + 1
|
116
|
-
# algolia_workers << next_page if next_page < res['nbPages']
|
117
|
-
# res['hits'].each do |hit|
|
118
|
-
# iso_workers.nb_hits = res['nbHits']
|
119
|
-
# iso_workers << hit
|
120
|
-
# end
|
121
|
-
# iso_workers.end unless next_page < res['nbPages']
|
122
|
-
# end
|
123
|
-
|
124
69
|
# Fetch abstracts.
|
125
70
|
# @param doc [Nokigiri::HTML::Document]
|
126
71
|
# @return [Array<Array>]
|
@@ -134,19 +79,6 @@ module RelatonIec
|
|
134
79
|
}]
|
135
80
|
end
|
136
81
|
|
137
|
-
# Get langs.
|
138
|
-
# @param doc [Nokogiri::HTML::Document]
|
139
|
-
# @return [Array<Hash>]
|
140
|
-
# def langs(doc)
|
141
|
-
# lgs = [{ lang: 'en' }]
|
142
|
-
# doc.css('ul#lang-switcher ul li a').each do |lang_link|
|
143
|
-
# lang_path = lang_link.attr('href')
|
144
|
-
# lang = lang_path.match(%r{^\/(fr)\/})
|
145
|
-
# lgs << { lang: lang[1], path: lang_path } if lang
|
146
|
-
# end
|
147
|
-
# lgs
|
148
|
-
# end
|
149
|
-
|
150
82
|
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
151
83
|
|
152
84
|
# Get page.
|
@@ -154,25 +86,20 @@ module RelatonIec
|
|
154
86
|
# @return [Array<Nokogiri::HTML::Document, String>]
|
155
87
|
def get_page(url)
|
156
88
|
uri = URI url
|
157
|
-
resp = Net::HTTP.get_response(uri)
|
89
|
+
resp = Net::HTTP.get_response(uri)
|
158
90
|
case resp.code
|
159
91
|
when "301"
|
160
92
|
path = resp["location"]
|
161
93
|
url = DOMAIN + path
|
162
94
|
uri = URI url
|
163
|
-
resp = Net::HTTP.get_response(uri)
|
95
|
+
resp = Net::HTTP.get_response(uri)
|
164
96
|
when "404"
|
165
97
|
raise RelatonBib::RequestError, "Page not found #{url}"
|
166
98
|
end
|
167
|
-
# n = 0
|
168
|
-
# while resp.body !~ /<strong/ && n < 10
|
169
|
-
# resp = Net::HTTP.get_response(uri)#.encode("UTF-8")
|
170
|
-
# n += 1
|
171
|
-
# end
|
172
99
|
Nokogiri::HTML(resp.body)
|
173
|
-
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
174
|
-
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
175
|
-
OpenSSL::SSL::SSLError
|
100
|
+
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
101
|
+
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
102
|
+
Net::ProtocolError, OpenSSL::SSL::SSLError
|
176
103
|
raise RelatonBib::RequestError, "Could not access #{url}"
|
177
104
|
end
|
178
105
|
# rubocop:enable Metrics/AbcSize
|
@@ -211,15 +138,12 @@ module RelatonIec
|
|
211
138
|
statuses = YAML.load_file "lib/relaton_iec/statuses.yml"
|
212
139
|
s = wip.at("STAGE").text
|
213
140
|
stage, substage = statuses[s]["stage"].split "."
|
214
|
-
# status = statuses[s]["status"]
|
215
141
|
else
|
216
|
-
# status = "Published"
|
217
142
|
stage = "60"
|
218
143
|
substage = "60"
|
219
144
|
end
|
220
145
|
RelatonBib::DocumentStatus.new(stage: stage, substage: substage)
|
221
146
|
end
|
222
|
-
# rubocop:enable Metrics/MethodLength
|
223
147
|
|
224
148
|
# Fetch workgroup.
|
225
149
|
# @param doc [Nokogiri::HTML::Document]
|
@@ -237,27 +161,26 @@ module RelatonIec
|
|
237
161
|
}],
|
238
162
|
}
|
239
163
|
end
|
164
|
+
# rubocop:enable Metrics/MethodLength
|
240
165
|
|
241
166
|
# Fetch relations.
|
242
167
|
# @param doc [Nokogiri::HTML::Document]
|
243
168
|
# @return [Array<Hash>]
|
244
169
|
# rubocop:disable Metrics/MethodLength
|
245
170
|
def fetch_relations(doc)
|
246
|
-
doc.xpath('//ROW[STATUS[.!="PREPARING"]][STATUS[.!="PUBLISHED"]]').
|
171
|
+
doc.xpath('//ROW[STATUS[.!="PREPARING"]][STATUS[.!="PUBLISHED"]]').
|
172
|
+
map do |r|
|
247
173
|
r_type = r.at("STATUS").text.downcase
|
248
174
|
type = case r_type
|
249
|
-
|
175
|
+
# when 'published' then 'obsoletes' # Valid
|
250
176
|
when "revised", "replaced" then "updates"
|
251
177
|
when "withdrawn" then "obsoletes"
|
252
178
|
else r_type
|
253
179
|
end
|
254
|
-
# url = DOMAIN + "/publication/" + r.at("PUB_ID").text
|
255
180
|
fref = RelatonBib::FormattedRef.new(
|
256
181
|
content: r.at("FULL_NAME").text, format: "text/plain",
|
257
182
|
)
|
258
|
-
bibitem =
|
259
|
-
formattedref: fref,
|
260
|
-
)
|
183
|
+
bibitem = IecBibliographicItem.new(formattedref: fref)
|
261
184
|
{ type: type, bibitem: bibitem }
|
262
185
|
end
|
263
186
|
end
|
@@ -272,22 +195,6 @@ module RelatonIec
|
|
272
195
|
status = fetch_status doc
|
273
196
|
relations = fetch_relations doc
|
274
197
|
[status, relations]
|
275
|
-
# doc.css('ul.steps li').inject([]) do |a, r|
|
276
|
-
# r_type = r.css('strong').text
|
277
|
-
# type = case r_type
|
278
|
-
# when 'Previously', 'Will be replaced by' then 'obsoletes'
|
279
|
-
# when 'Corrigenda/Amendments', 'Revised by', 'Now confirmed'
|
280
|
-
# 'updates'
|
281
|
-
# else r_type
|
282
|
-
# end
|
283
|
-
# if ['Now', 'Now under review'].include? type
|
284
|
-
# a
|
285
|
-
# else
|
286
|
-
# a + r.css('a').map do |id|
|
287
|
-
# { type: type, identifier: id.text, url: id['href'] }
|
288
|
-
# end
|
289
|
-
# end
|
290
|
-
# end
|
291
198
|
end
|
292
199
|
# rubocop:enable Metrics/MethodLength
|
293
200
|
|
@@ -295,66 +202,18 @@ module RelatonIec
|
|
295
202
|
# @param doc [Nokogiri::HTML::Document]
|
296
203
|
# @return [String]
|
297
204
|
def fetch_type(doc)
|
298
|
-
doc.at(
|
299
|
-
|
300
|
-
|
301
|
-
# /NP)*\s|/)(TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|Guide|(?=\d+))}x)
|
302
|
-
# #return "international-standard" if type_match.nil?
|
303
|
-
# if TYPES[type_match[2]]
|
304
|
-
# TYPES[type_match[2]]
|
305
|
-
# elsif type_match[1]
|
306
|
-
# elsif type_match[1] == 'ISO'
|
307
|
-
# 'international-standard'
|
308
|
-
# elsif type_match[1] == 'IWA'
|
309
|
-
# 'international-workshop-agreement'
|
310
|
-
# end
|
311
|
-
# # rescue => _e
|
312
|
-
# # puts 'Unknown document type: ' + title
|
205
|
+
doc.at(
|
206
|
+
'//th[contains(., "Publication type")]/following-sibling::td/span',
|
207
|
+
).text.downcase.tr " ", "-"
|
313
208
|
end
|
314
209
|
|
315
210
|
# Fetch titles.
|
316
211
|
# @param hit_data [Hash]
|
317
212
|
# @return [Array<Hash>]
|
318
213
|
def fetch_titles(hit_data)
|
319
|
-
|
320
|
-
case titles.size
|
321
|
-
when 0
|
322
|
-
intro, main, part = nil, "", nil
|
323
|
-
when 1
|
324
|
-
intro, main, part = nil, titles[0], nil
|
325
|
-
when 2
|
326
|
-
if /^(Part|Partie) \d+:/ =~ titles[1]
|
327
|
-
intro, main, part = nil, titles[0], titles[1]
|
328
|
-
else
|
329
|
-
intro, main, part = titles[0], titles[1], nil
|
330
|
-
end
|
331
|
-
when 3
|
332
|
-
if /^(Part|Partie) \d+:/ =~ titles[1]
|
333
|
-
intro, main, part = nil, titles[0], titles[1..2].join(" - ")
|
334
|
-
else
|
335
|
-
intro, main, part = titles[0], titles[1], titles[2]
|
336
|
-
end
|
337
|
-
else
|
338
|
-
intro, main, part = titles[0], titles[1], titles[2..-1]&.join(" -- ")
|
339
|
-
end
|
340
|
-
[{
|
341
|
-
title_intro: intro,
|
342
|
-
title_main: main,
|
343
|
-
title_part: part,
|
344
|
-
language: "en",
|
345
|
-
script: "Latn"
|
346
|
-
}]
|
214
|
+
RelatonBib::TypedTitleString.from_string hit_data[:title], "en", "Latn"
|
347
215
|
end
|
348
216
|
|
349
|
-
# Return ISO script code.
|
350
|
-
# @param lang [String]
|
351
|
-
# @return [String]
|
352
|
-
# def script(lang)
|
353
|
-
# case lang
|
354
|
-
# when 'en', 'fr' then 'Latn'
|
355
|
-
# end
|
356
|
-
# end
|
357
|
-
|
358
217
|
# Fetch dates
|
359
218
|
# @param doc [Nokogiri::HTML::Document]
|
360
219
|
# @return [Array<Hash>]
|
@@ -367,6 +226,8 @@ module RelatonIec
|
|
367
226
|
dates
|
368
227
|
end
|
369
228
|
|
229
|
+
# rubocop:disable Metrics/MethodLength
|
230
|
+
|
370
231
|
def fetch_contributors(code)
|
371
232
|
code.sub(/\s.*/, "").split("/").map do |abbrev|
|
372
233
|
case abbrev
|
@@ -381,12 +242,15 @@ module RelatonIec
|
|
381
242
|
role: [type: "publisher"] }
|
382
243
|
end
|
383
244
|
end
|
245
|
+
# rubocop:enable Metrics/MethodLength
|
384
246
|
|
385
247
|
# Fetch ICS.
|
386
248
|
# @param doc [Nokogiri::HTML::Document]
|
387
249
|
# @return [Array<Hash>]
|
388
250
|
def fetch_ics(doc)
|
389
|
-
doc.xpath(
|
251
|
+
doc.xpath(
|
252
|
+
'//th[contains(text(), "ICS")]/following-sibling::td/a',
|
253
|
+
).map do |i|
|
390
254
|
code = i.text.match(/[\d\.]+/).to_s.split "."
|
391
255
|
{ field: code[0], group: code[1], subgroup: code[2] }
|
392
256
|
end
|
@@ -403,9 +267,11 @@ module RelatonIec
|
|
403
267
|
links
|
404
268
|
end
|
405
269
|
|
270
|
+
# rubocop:disable Metrics/MethodLength
|
271
|
+
|
406
272
|
# Fetch copyright.
|
407
273
|
# @param title [String]
|
408
|
-
# @return [Hash]
|
274
|
+
# @return [Array<Hash>]
|
409
275
|
def fetch_copyright(code, doc)
|
410
276
|
abbreviation = code.match(/.*?(?=\s)/).to_s
|
411
277
|
case abbreviation
|
@@ -415,12 +281,15 @@ module RelatonIec
|
|
415
281
|
end
|
416
282
|
from = code.match(/(?<=:)\d{4}/).to_s
|
417
283
|
if from.empty?
|
418
|
-
from = doc.xpath("//span[@itemprop='releaseDate']").text
|
419
|
-
|
284
|
+
from = doc.xpath("//span[@itemprop='releaseDate']").text.
|
285
|
+
match(/\d{4}/).to_s
|
420
286
|
end
|
421
|
-
{
|
287
|
+
[{
|
288
|
+
owner: [{ name: name, abbreviation: abbreviation, url: url }],
|
289
|
+
from: from,
|
290
|
+
}]
|
422
291
|
end
|
292
|
+
# rubocop:enable Metrics/MethodLength
|
423
293
|
end
|
424
294
|
end
|
425
|
-
# rubocop:enable Metrics/ModuleLength
|
426
295
|
end
|