relaton-iec 1.14.1 → 1.14.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-iec
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.14.1
4
+ version: 1.14.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-12-21 00:00:00.000000000 Z
11
+ date: 2023-05-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: equivalent-xml
@@ -24,20 +24,6 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0.6'
27
- - !ruby/object:Gem::Dependency
28
- name: pry-byebug
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
27
  - !ruby/object:Gem::Dependency
42
28
  name: rake
43
29
  requirement: !ruby/object:Gem::Requirement
@@ -67,27 +53,13 @@ dependencies:
67
53
  - !ruby/object:Gem::Version
68
54
  version: '3.0'
69
55
  - !ruby/object:Gem::Dependency
70
- name: ruby-jing
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: '0'
76
- type: :development
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: '0'
83
- - !ruby/object:Gem::Dependency
84
- name: simplecov
56
+ name: addressable
85
57
  requirement: !ruby/object:Gem::Requirement
86
58
  requirements:
87
59
  - - ">="
88
60
  - !ruby/object:Gem::Version
89
61
  version: '0'
90
- type: :development
62
+ type: :runtime
91
63
  prerelease: false
92
64
  version_requirements: !ruby/object:Gem::Requirement
93
65
  requirements:
@@ -95,35 +67,35 @@ dependencies:
95
67
  - !ruby/object:Gem::Version
96
68
  version: '0'
97
69
  - !ruby/object:Gem::Dependency
98
- name: vcr
70
+ name: relaton-index
99
71
  requirement: !ruby/object:Gem::Requirement
100
72
  requirements:
101
- - - ">="
73
+ - - "~>"
102
74
  - !ruby/object:Gem::Version
103
- version: '0'
104
- type: :development
75
+ version: 0.1.6
76
+ type: :runtime
105
77
  prerelease: false
106
78
  version_requirements: !ruby/object:Gem::Requirement
107
79
  requirements:
108
- - - ">="
80
+ - - "~>"
109
81
  - !ruby/object:Gem::Version
110
- version: '0'
82
+ version: 0.1.6
111
83
  - !ruby/object:Gem::Dependency
112
- name: webmock
84
+ name: relaton-iso-bib
113
85
  requirement: !ruby/object:Gem::Requirement
114
86
  requirements:
115
- - - ">="
87
+ - - "~>"
116
88
  - !ruby/object:Gem::Version
117
- version: '0'
118
- type: :development
89
+ version: 1.14.0
90
+ type: :runtime
119
91
  prerelease: false
120
92
  version_requirements: !ruby/object:Gem::Requirement
121
93
  requirements:
122
- - - ">="
94
+ - - "~>"
123
95
  - !ruby/object:Gem::Version
124
- version: '0'
96
+ version: 1.14.0
125
97
  - !ruby/object:Gem::Dependency
126
- name: addressable
98
+ name: rubyzip
127
99
  requirement: !ruby/object:Gem::Requirement
128
100
  requirements:
129
101
  - - ">="
@@ -136,20 +108,6 @@ dependencies:
136
108
  - - ">="
137
109
  - !ruby/object:Gem::Version
138
110
  version: '0'
139
- - !ruby/object:Gem::Dependency
140
- name: relaton-iso-bib
141
- requirement: !ruby/object:Gem::Requirement
142
- requirements:
143
- - - "~>"
144
- - !ruby/object:Gem::Version
145
- version: 1.14.0
146
- type: :runtime
147
- prerelease: false
148
- version_requirements: !ruby/object:Gem::Requirement
149
- requirements:
150
- - - "~>"
151
- - !ruby/object:Gem::Version
152
- version: 1.14.0
153
111
  description: 'RelatonIec: retrieve IEC Standards for bibliographic use using the IecBibliographicItem
154
112
  model'
155
113
  email:
@@ -193,13 +151,15 @@ files:
193
151
  - lib/relaton_iec/basic_block/stem.rb
194
152
  - lib/relaton_iec/basic_block/table.rb
195
153
  - lib/relaton_iec/basic_block/text_element.rb
154
+ - lib/relaton_iec/data_fetcher.rb
155
+ - lib/relaton_iec/data_parser.rb
196
156
  - lib/relaton_iec/hash_converter.rb
197
157
  - lib/relaton_iec/hit.rb
198
158
  - lib/relaton_iec/hit_collection.rb
199
159
  - lib/relaton_iec/iec_bibliographic_item.rb
200
160
  - lib/relaton_iec/iec_bibliography.rb
161
+ - lib/relaton_iec/index.rb
201
162
  - lib/relaton_iec/processor.rb
202
- - lib/relaton_iec/scrapper.rb
203
163
  - lib/relaton_iec/statuses.yml
204
164
  - lib/relaton_iec/tc_sc_officers_note.rb
205
165
  - lib/relaton_iec/version.rb
@@ -209,7 +169,7 @@ homepage: https://github.com/metanorma/relaton-iec
209
169
  licenses:
210
170
  - MIT
211
171
  metadata: {}
212
- post_install_message:
172
+ post_install_message:
213
173
  rdoc_options: []
214
174
  require_paths:
215
175
  - lib
@@ -224,8 +184,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
224
184
  - !ruby/object:Gem::Version
225
185
  version: '0'
226
186
  requirements: []
227
- rubygems_version: 3.1.6
228
- signing_key:
187
+ rubygems_version: 3.4.9
188
+ signing_key:
229
189
  specification_version: 4
230
190
  summary: 'RelatonIec: retrieve IEC Standards for bibliographic use using the IecBibliographicItem
231
191
  model'
@@ -1,308 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # Capybara.request_driver :poltergeist do |app|
4
- # Capybara::Poltergeist::Driver.new app, js_errors: false
5
- # end
6
- # Capybara.default_driver = :poltergeist
7
-
8
- module RelatonIec
9
- # Scrapper.
10
- module Scrapper
11
- DOMAIN = "https://webstore.iec.ch"
12
- ABBREVS = {
13
- "ISO" => ["International Organization for Standardization", "www.iso.org"],
14
- "IEC" => ["International Electrotechnical Commission", "www.iec.ch"],
15
- "CISPR" => ["International special committee on radio interference", "www.iec.ch"],
16
- }.freeze
17
-
18
- TYPES = {
19
- "ISO" => "international-standard",
20
- "TS" => "technical-specification",
21
- "TR" => "technical-report",
22
- "PAS" => "publicly-available-specification",
23
- "AWI" => "appruved-work-item",
24
- "CD" => "committee-draft",
25
- "FDIS" => "final-draft-international-standard",
26
- "NP" => "new-proposal",
27
- "DIS" => "draft-international-standard",
28
- "WD" => "working-draft",
29
- "R" => "recommendation",
30
- "Guide" => "guide",
31
- "SRD" => "system-reference-deliverable",
32
- }.freeze
33
-
34
- class << self
35
- # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
36
-
37
- # Parse page.
38
- # @param hit_data [Hash]
39
- # @return [Hash]
40
- def parse_page(hit_data)
41
- doc = get_page hit_data[:url]
42
-
43
- # Fetch edition.
44
- edition = doc.at(
45
- "//th[contains(., 'Edition')]/following-sibling::td/span",
46
- ).text
47
-
48
- status, relations = fetch_status_relations hit_data[:url]
49
-
50
- IecBibliographicItem.new(
51
- fetched: Date.today.to_s,
52
- docid: fetch_docid(hit_data),
53
- structuredidentifier: fetch_structuredidentifier(doc),
54
- edition: edition,
55
- language: ["en"],
56
- script: ["Latn"],
57
- title: fetch_titles(hit_data),
58
- doctype: fetch_type(doc),
59
- docstatus: status,
60
- ics: fetch_ics(doc),
61
- date: fetch_dates(doc),
62
- contributor: fetch_contributors(hit_data[:code]),
63
- editorialgroup: fetch_workgroup(doc),
64
- abstract: fetch_abstract(doc),
65
- copyright: fetch_copyright(hit_data[:code], doc),
66
- link: fetch_link(doc, hit_data[:url]),
67
- relation: relations,
68
- place: ["Geneva"],
69
- )
70
- end
71
- # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
72
-
73
- private
74
-
75
- # @param hit [Hash]
76
- # @return [Array<RelatonBib::DocumentIdentifier>]
77
- def fetch_docid(hit)
78
- urn = RelatonIec.code_to_urn hit[:code], "en"
79
- [
80
- RelatonBib::DocumentIdentifier.new(id: hit[:code], type: "IEC", primary: true),
81
- RelatonBib::DocumentIdentifier.new(id: urn, type: "URN"),
82
- ]
83
- end
84
-
85
- # Fetch abstracts.
86
- # @param doc [Nokigiri::HTML::Document]
87
- # @return [Array<Array>]
88
- def fetch_abstract(doc)
89
- abstract_content = doc.at('//div[@itemprop="description"]').text
90
- [{
91
- content: abstract_content,
92
- language: "en",
93
- script: "Latn",
94
- format: "text/plain",
95
- }]
96
- end
97
-
98
- # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
99
-
100
- # Get page.
101
- # @param path [String] page's path
102
- # @return [Array<Nokogiri::HTML::Document, String>]
103
- def get_page(url)
104
- uri = URI url
105
- resp = Net::HTTP.get_response(uri)
106
- case resp.code
107
- when "301"
108
- path = resp["location"]
109
- url = DOMAIN + path
110
- uri = URI url
111
- resp = Net::HTTP.get_response(uri)
112
- when "404"
113
- raise RelatonBib::RequestError, "Page not found #{url}"
114
- end
115
- Nokogiri::HTML(resp.body)
116
- rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
117
- EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
118
- Net::ProtocolError, OpenSSL::SSL::SSLError
119
- raise RelatonBib::RequestError, "Could not access #{url}"
120
- end
121
- # rubocop:enable Metrics/AbcSize
122
-
123
- # Fetch structuredidentifier.
124
- # @param doc [Nokogiri::HTML::Document]
125
- # @return [RelatonIsoBib::StructuredIdentifier]
126
- def fetch_structuredidentifier(doc)
127
- item_ref = doc.at("//span[@itemprop='productID']")
128
- unless item_ref
129
- return RelatonIsoBib::StructuredIdentifier.new(
130
- project_number: "?", part_number: "", prefix: nil, id: "?",
131
- )
132
- end
133
-
134
- m = item_ref.text.match(
135
- /(?<=\s)(?<project>\d+)-?(?<part>(?<=-)\d+|)-?(?<subpart>(?<=-)\d+|)/,
136
- )
137
- RelatonIsoBib::StructuredIdentifier.new(
138
- project_number: m[:project],
139
- part_number: m[:part],
140
- subpart_number: m[:subpart],
141
- prefix: nil,
142
- type: "IEC",
143
- id: item_ref.text,
144
- )
145
- end
146
-
147
- # Fetch status.
148
- # @param doc [Nokogiri::HTML::Document]
149
- # @param status [String]
150
- # @return [Hash]
151
- def fetch_status(doc)
152
- wip = doc.at('//ROW[STATUS[.="PREPARING"]]')
153
- if wip
154
- statuses = YAML.load_file File.join __dir__, "statuses.yml"
155
- s = wip.at("STAGE").text
156
- return unless statuses[s]
157
-
158
- stage, substage = statuses[s]["stage"].split "."
159
- else
160
- stage = "60"
161
- substage = "60"
162
- end
163
- RelatonBib::DocumentStatus.new(stage: stage, substage: substage)
164
- end
165
-
166
- # Fetch workgroup.
167
- # @param doc [Nokogiri::HTML::Document]
168
- # @return [Hash]
169
- def fetch_workgroup(doc)
170
- wg = doc.at('//th/abbr[.="TC"]/../following-sibling::td/a').text
171
- {
172
- name: "International Electrotechnical Commission",
173
- abbreviation: "IEC",
174
- url: "webstore.iec.ch",
175
- technical_committee: [{
176
- name: wg,
177
- type: "technicalCommittee",
178
- number: wg.match(/\d+/)&.to_s&.to_i,
179
- }],
180
- }
181
- end
182
- # rubocop:enable Metrics/MethodLength
183
-
184
- # Fetch relations.
185
- # @param doc [Nokogiri::HTML::Document]
186
- # @return [Array<Hash>]
187
- # rubocop:disable Metrics/MethodLength
188
- def fetch_relations(doc)
189
- doc.xpath('//ROW[STATUS[.!="PREPARING"]][STATUS[.!="PUBLISHED"]]')
190
- .map do |r|
191
- r_type = r.at("STATUS").text.downcase
192
- type = case r_type
193
- # when 'published' then 'obsoletes' # Valid
194
- when "revised", "replaced" then "updates"
195
- when "withdrawn" then "obsoletes"
196
- else r_type
197
- end
198
- ref = r.at("FULL_NAME").text
199
- fref = RelatonBib::FormattedRef.new content: ref, format: "text/plain"
200
- bibitem = IecBibliographicItem.new(
201
- formattedref: fref,
202
- docid: [RelatonBib::DocumentIdentifier.new(id: ref, type: "IEC", primary: true)],
203
- )
204
- { type: type, bibitem: bibitem }
205
- end
206
- end
207
-
208
- def fetch_status_relations(url)
209
- pubid = url.match(/\d+$/).to_s
210
- uri = URI "#{DOMAIN}/webstore/webstore.nsf/AjaxRequestXML?"\
211
- "Openagent&url=#{pubid}"
212
- resp = Net::HTTP.get_response uri
213
- doc = Nokogiri::XML resp.body
214
- status = fetch_status doc
215
- relations = fetch_relations doc
216
- [status, relations]
217
- end
218
- # rubocop:enable Metrics/MethodLength
219
-
220
- # Fetch type.
221
- # @param doc [Nokogiri::HTML::Document]
222
- # @return [String]
223
- def fetch_type(doc)
224
- type = doc.at(
225
- '//th[contains(., "Publication type")]/following-sibling::td/span',
226
- ).text
227
- TYPES[type] || type.downcase.tr(" ", "-")
228
- end
229
-
230
- # Fetch titles.
231
- # @param hit_data [Hash]
232
- # @return [Array<Hash>]
233
- def fetch_titles(hit_data)
234
- RelatonBib::TypedTitleString.from_string hit_data[:title], "en", "Latn"
235
- end
236
-
237
- # Fetch dates
238
- # @param doc [Nokogiri::HTML::Document]
239
- # @return [Array<Hash>]
240
- def fetch_dates(doc)
241
- dates = []
242
- publish_date = doc.at("//span[@itemprop='releaseDate']").text
243
- unless publish_date.empty?
244
- dates << { type: "published", on: publish_date }
245
- end
246
- dates
247
- end
248
-
249
- # rubocop:disable Metrics/MethodLength
250
-
251
- def fetch_contributors(code)
252
- code.sub(/\s.*/, "").split("/").map do |abbrev|
253
- name, url = name_url abbrev
254
- { entity: { name: name, url: url, abbreviation: abbrev },
255
- role: [type: "publisher"] }
256
- end
257
- end
258
- # rubocop:enable Metrics/MethodLength
259
-
260
- # Fetch ICS.
261
- # @param doc [Nokogiri::HTML::Document]
262
- # @return [Array<Hash>]
263
- def fetch_ics(doc)
264
- doc.xpath(
265
- '//th[contains(text(), "ICS")]/following-sibling::td/a',
266
- ).map do |i|
267
- code = i.text.match(/[\d.]+/).to_s.split "."
268
- { field: code[0], group: code[1], subgroup: code[2] }
269
- end
270
- end
271
-
272
- # Fetch links.
273
- # @param doc [Nokogiri::HTML::Document]
274
- # @param url [String]
275
- # @return [Array<Hash>]
276
- def fetch_link(doc, url)
277
- links = [{ type: "src", content: url }]
278
- obp_elms = doc.at_css("p.btn-preview a")
279
- links << { type: "obp", content: obp_elms[:href] } if obp_elms
280
- links
281
- end
282
-
283
- # rubocop:disable Metrics/MethodLength
284
-
285
- # Fetch copyright.
286
- # @param title [String]
287
- # @return [Array<Hash>]
288
- def fetch_copyright(code, doc)
289
- abbreviation = code.match(/.*?(?=\s)/).to_s
290
- name, url = name_url abbreviation
291
- from = code.match(/(?<=:)\d{4}/).to_s
292
- if from.empty?
293
- from = doc.xpath("//span[@itemprop='releaseDate']").text
294
- .match(/\d{4}/).to_s
295
- end
296
- [{
297
- owner: [{ name: name, abbreviation: abbreviation, url: url }],
298
- from: from,
299
- }]
300
- end
301
- # rubocop:enable Metrics/MethodLength
302
-
303
- def name_url(abbrev)
304
- ABBREVS[abbrev]
305
- end
306
- end
307
- end
308
- end