relaton-iec 0.8.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,31 +32,22 @@ module RelatonIec
32
32
  }.freeze
33
33
 
34
34
  class << self
35
- # @param text [String]
36
- # @return [Array<Hash>]
37
- # def get(text)
38
- # iso_workers = WorkersPool.new 4
39
- # iso_workers.worker { |hit| iso_worker(hit, iso_workers) }
40
- # algolia_workers = start_algolia_search(text, iso_workers)
41
- # iso_docs = iso_workers.result
42
- # algolia_workers.end
43
- # algolia_workers.result
44
- # iso_docs
45
- # end
35
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
46
36
 
47
37
  # Parse page.
48
38
  # @param hit [Hash]
49
39
  # @return [Hash]
50
- # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
51
40
  def parse_page(hit_data)
52
41
  doc = get_page hit_data[:url]
53
42
 
54
43
  # Fetch edition.
55
- edition = doc.at("//th[contains(., 'Edition')]/following-sibling::td/span").text
44
+ edition = doc.at(
45
+ "//th[contains(., 'Edition')]/following-sibling::td/span",
46
+ ).text
56
47
 
57
48
  status, relations = fetch_status_relations hit_data[:url]
58
49
 
59
- RelatonIsoBib::IsoBibliographicItem.new(
50
+ IecBibliographicItem.new(
60
51
  fetched: Date.today.to_s,
61
52
  docid: [RelatonBib::DocumentIdentifier.new(id: hit_data[:code], type: "IEC")],
62
53
  structuredidentifier: fetch_structuredidentifier(doc),
@@ -81,46 +72,6 @@ module RelatonIec
81
72
 
82
73
  private
83
74
 
84
- # Start search workers.
85
- # @param text[String]
86
- # @param iec_workers [Isobib::WorkersPool]
87
- # @reaturn [Isobib::WorkersPool]
88
- # def start_algolia_search(text, iec_workers)
89
- # index = Algolia::Index.new 'all_en'
90
- # workers = WorkersPool.new
91
- # workers.worker do |page|
92
- # algolia_worker(index, text, page, workers, iec_workers)
93
- # end
94
-
95
- # # Add first page so search worker will start.
96
- # workers << 0
97
- # end
98
-
99
- # Fetch ISO documents.
100
- # @param hit [Hash]
101
- # @param isiso_workers [Isobib::WorkersPool]
102
- # def iso_worker(hit, iso_workers)
103
- # print "Parse #{iso_workers.size} of #{iso_workers.nb_hits} \r"
104
- # parse_page hit
105
- # end
106
-
107
- # Fetch hits from algolia search service.
108
- # @param index[Algolia::Index]
109
- # @param text [String]
110
- # @param page [Integer]
111
- # @param algolia_workers [Isobib::WorkersPool]
112
- # @param isiso_workers [Isobib::WorkersPool]
113
- # def algolia_worker(index, text, page, algolia_workers, iso_workers)
114
- # res = index.search text, facetFilters: ['category:standard'], page: page
115
- # next_page = res['page'] + 1
116
- # algolia_workers << next_page if next_page < res['nbPages']
117
- # res['hits'].each do |hit|
118
- # iso_workers.nb_hits = res['nbHits']
119
- # iso_workers << hit
120
- # end
121
- # iso_workers.end unless next_page < res['nbPages']
122
- # end
123
-
124
75
  # Fetch abstracts.
125
76
  # @param doc [Nokigiri::HTML::Document]
126
77
  # @return [Array<Array>]
@@ -134,19 +85,6 @@ module RelatonIec
134
85
  }]
135
86
  end
136
87
 
137
- # Get langs.
138
- # @param doc [Nokogiri::HTML::Document]
139
- # @return [Array<Hash>]
140
- # def langs(doc)
141
- # lgs = [{ lang: 'en' }]
142
- # doc.css('ul#lang-switcher ul li a').each do |lang_link|
143
- # lang_path = lang_link.attr('href')
144
- # lang = lang_path.match(%r{^\/(fr)\/})
145
- # lgs << { lang: lang[1], path: lang_path } if lang
146
- # end
147
- # lgs
148
- # end
149
-
150
88
  # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
151
89
 
152
90
  # Get page.
@@ -154,25 +92,20 @@ module RelatonIec
154
92
  # @return [Array<Nokogiri::HTML::Document, String>]
155
93
  def get_page(url)
156
94
  uri = URI url
157
- resp = Net::HTTP.get_response(uri) # .encode("UTF-8")
95
+ resp = Net::HTTP.get_response(uri)
158
96
  case resp.code
159
97
  when "301"
160
98
  path = resp["location"]
161
99
  url = DOMAIN + path
162
100
  uri = URI url
163
- resp = Net::HTTP.get_response(uri) # .encode("UTF-8")
101
+ resp = Net::HTTP.get_response(uri)
164
102
  when "404"
165
103
  raise RelatonBib::RequestError, "Page not found #{url}"
166
104
  end
167
- # n = 0
168
- # while resp.body !~ /<strong/ && n < 10
169
- # resp = Net::HTTP.get_response(uri)#.encode("UTF-8")
170
- # n += 1
171
- # end
172
105
  Nokogiri::HTML(resp.body)
173
- rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
174
- Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError,
175
- OpenSSL::SSL::SSLError
106
+ rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
107
+ EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
108
+ Net::ProtocolError, OpenSSL::SSL::SSLError
176
109
  raise RelatonBib::RequestError, "Could not access #{url}"
177
110
  end
178
111
  # rubocop:enable Metrics/AbcSize
@@ -211,15 +144,12 @@ module RelatonIec
211
144
  statuses = YAML.load_file "lib/relaton_iec/statuses.yml"
212
145
  s = wip.at("STAGE").text
213
146
  stage, substage = statuses[s]["stage"].split "."
214
- # status = statuses[s]["status"]
215
147
  else
216
- # status = "Published"
217
148
  stage = "60"
218
149
  substage = "60"
219
150
  end
220
151
  RelatonBib::DocumentStatus.new(stage: stage, substage: substage)
221
152
  end
222
- # rubocop:enable Metrics/MethodLength
223
153
 
224
154
  # Fetch workgroup.
225
155
  # @param doc [Nokogiri::HTML::Document]
@@ -237,27 +167,26 @@ module RelatonIec
237
167
  }],
238
168
  }
239
169
  end
170
+ # rubocop:enable Metrics/MethodLength
240
171
 
241
172
  # Fetch relations.
242
173
  # @param doc [Nokogiri::HTML::Document]
243
174
  # @return [Array<Hash>]
244
175
  # rubocop:disable Metrics/MethodLength
245
176
  def fetch_relations(doc)
246
- doc.xpath('//ROW[STATUS[.!="PREPARING"]][STATUS[.!="PUBLISHED"]]').map do |r|
177
+ doc.xpath('//ROW[STATUS[.!="PREPARING"]][STATUS[.!="PUBLISHED"]]').
178
+ map do |r|
247
179
  r_type = r.at("STATUS").text.downcase
248
180
  type = case r_type
249
- # when 'published' then 'obsoletes' # Valid
181
+ # when 'published' then 'obsoletes' # Valid
250
182
  when "revised", "replaced" then "updates"
251
183
  when "withdrawn" then "obsoletes"
252
184
  else r_type
253
185
  end
254
- # url = DOMAIN + "/publication/" + r.at("PUB_ID").text
255
186
  fref = RelatonBib::FormattedRef.new(
256
187
  content: r.at("FULL_NAME").text, format: "text/plain",
257
188
  )
258
- bibitem = RelatonIsoBib::IsoBibliographicItem.new(
259
- formattedref: fref,
260
- )
189
+ bibitem = IecBibliographicItem.new(formattedref: fref)
261
190
  { type: type, bibitem: bibitem }
262
191
  end
263
192
  end
@@ -272,22 +201,6 @@ module RelatonIec
272
201
  status = fetch_status doc
273
202
  relations = fetch_relations doc
274
203
  [status, relations]
275
- # doc.css('ul.steps li').inject([]) do |a, r|
276
- # r_type = r.css('strong').text
277
- # type = case r_type
278
- # when 'Previously', 'Will be replaced by' then 'obsoletes'
279
- # when 'Corrigenda/Amendments', 'Revised by', 'Now confirmed'
280
- # 'updates'
281
- # else r_type
282
- # end
283
- # if ['Now', 'Now under review'].include? type
284
- # a
285
- # else
286
- # a + r.css('a').map do |id|
287
- # { type: type, identifier: id.text, url: id['href'] }
288
- # end
289
- # end
290
- # end
291
204
  end
292
205
  # rubocop:enable Metrics/MethodLength
293
206
 
@@ -295,23 +208,13 @@ module RelatonIec
295
208
  # @param doc [Nokogiri::HTML::Document]
296
209
  # @return [String]
297
210
  def fetch_type(doc)
298
- doc.at('//th[contains(., "Publication type")]/following-sibling::td/span').
299
- text.downcase.tr " ", "-"
300
- # type_match = title.match(%r{^(ISO|IWA|IEC)(?:(/IEC|/IEEE|/PRF|
301
- # /NP)*\s|/)(TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|Guide|(?=\d+))}x)
302
- # #return "international-standard" if type_match.nil?
303
- # if TYPES[type_match[2]]
304
- # TYPES[type_match[2]]
305
- # elsif type_match[1]
306
- # elsif type_match[1] == 'ISO'
307
- # 'international-standard'
308
- # elsif type_match[1] == 'IWA'
309
- # 'international-workshop-agreement'
310
- # end
311
- # # rescue => _e
312
- # # puts 'Unknown document type: ' + title
211
+ doc.at(
212
+ '//th[contains(., "Publication type")]/following-sibling::td/span',
213
+ ).text.downcase.tr " ", "-"
313
214
  end
314
215
 
216
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
217
+
315
218
  # Fetch titles.
316
219
  # @param hit_data [Hash]
317
220
  # @return [Array<Hash>]
@@ -339,21 +242,13 @@ module RelatonIec
339
242
  end
340
243
  [{
341
244
  title_intro: intro,
342
- title_main: main,
343
- title_part: part,
344
- language: "en",
345
- script: "Latn"
245
+ title_main: main,
246
+ title_part: part,
247
+ language: "en",
248
+ script: "Latn",
346
249
  }]
347
250
  end
348
-
349
- # Return ISO script code.
350
- # @param lang [String]
351
- # @return [String]
352
- # def script(lang)
353
- # case lang
354
- # when 'en', 'fr' then 'Latn'
355
- # end
356
- # end
251
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
357
252
 
358
253
  # Fetch dates
359
254
  # @param doc [Nokogiri::HTML::Document]
@@ -367,6 +262,8 @@ module RelatonIec
367
262
  dates
368
263
  end
369
264
 
265
+ # rubocop:disable Metrics/MethodLength
266
+
370
267
  def fetch_contributors(code)
371
268
  code.sub(/\s.*/, "").split("/").map do |abbrev|
372
269
  case abbrev
@@ -381,12 +278,15 @@ module RelatonIec
381
278
  role: [type: "publisher"] }
382
279
  end
383
280
  end
281
+ # rubocop:enable Metrics/MethodLength
384
282
 
385
283
  # Fetch ICS.
386
284
  # @param doc [Nokogiri::HTML::Document]
387
285
  # @return [Array<Hash>]
388
286
  def fetch_ics(doc)
389
- doc.xpath('//th[contains(text(), "ICS")]/following-sibling::td/a').map do |i|
287
+ doc.xpath(
288
+ '//th[contains(text(), "ICS")]/following-sibling::td/a',
289
+ ).map do |i|
390
290
  code = i.text.match(/[\d\.]+/).to_s.split "."
391
291
  { field: code[0], group: code[1], subgroup: code[2] }
392
292
  end
@@ -403,9 +303,11 @@ module RelatonIec
403
303
  links
404
304
  end
405
305
 
306
+ # rubocop:disable Metrics/MethodLength
307
+
406
308
  # Fetch copyright.
407
309
  # @param title [String]
408
- # @return [Hash]
310
+ # @return [Array<Hash>]
409
311
  def fetch_copyright(code, doc)
410
312
  abbreviation = code.match(/.*?(?=\s)/).to_s
411
313
  case abbreviation
@@ -415,11 +317,15 @@ module RelatonIec
415
317
  end
416
318
  from = code.match(/(?<=:)\d{4}/).to_s
417
319
  if from.empty?
418
- from = doc.xpath("//span[@itemprop='releaseDate']").text
419
- .match(/\d{4}/).to_s
320
+ from = doc.xpath("//span[@itemprop='releaseDate']").text.
321
+ match(/\d{4}/).to_s
420
322
  end
421
- { owner: { name: name, abbreviation: abbreviation, url: url }, from: from }
323
+ [{
324
+ owner: [{ name: name, abbreviation: abbreviation, url: url }],
325
+ from: from,
326
+ }]
422
327
  end
328
+ # rubocop:enable Metrics/MethodLength
423
329
  end
424
330
  end
425
331
  # rubocop:enable Metrics/ModuleLength
@@ -1,3 +1,3 @@
1
1
  module RelatonIec
2
- VERSION = "0.8.0".freeze
2
+ VERSION = "1.1.0".freeze
3
3
  end
@@ -0,0 +1,14 @@
1
+ module RelatonIec
2
+ class XMLParser < RelatonIsoBib::XMLParser
3
+ class << self
4
+ private
5
+
6
+ # override RelatonIsoBib::IsoBibliographicItem.bib_item method
7
+ # @param item_hash [Hash]
8
+ # @return [RelatonIec::IecBibliographicItem]
9
+ def bib_item(item_hash)
10
+ IecBibliographicItem.new item_hash
11
+ end
12
+ end
13
+ end
14
+ end
@@ -8,10 +8,10 @@ Gem::Specification.new do |spec|
8
8
  spec.authors = ["Ribose Inc."]
9
9
  spec.email = ["open.source@ribose.com"]
10
10
 
11
- spec.summary = "RelatonIec: retrieve IEC Standards for bibliographic use "\
12
- "using the IsoBibliographicItem model"
13
- spec.description = "RelatonIec: retrieve IEC Standards for bibliographic use "\
14
- "using the IsoBibliographicItem model"
11
+ spec.summary = "RelatonIec: retrieve IEC Standards for bibliographic "\
12
+ "use using the IecBibliographicItem model"
13
+ spec.description = "RelatonIec: retrieve IEC Standards for bibliographic "\
14
+ "use using the IecBibliographicItem model"
15
15
  spec.homepage = "https://github.com/metanorma/relaton-iec"
16
16
  spec.license = "MIT"
17
17
 
@@ -29,10 +29,11 @@ Gem::Specification.new do |spec|
29
29
  spec.add_development_dependency "rake", "~> 10.0"
30
30
  spec.add_development_dependency "rspec", "~> 3.0"
31
31
  spec.add_development_dependency "ruby-debug-ide"
32
+ spec.add_development_dependency "ruby-jing"
32
33
  spec.add_development_dependency "simplecov"
33
34
  spec.add_development_dependency "vcr"
34
35
  spec.add_development_dependency "webmock"
35
36
 
36
37
  spec.add_dependency "addressable"
37
- spec.add_dependency "relaton-iso-bib", "~> 0.7.0"
38
+ spec.add_dependency "relaton-iso-bib", "~> 1.1.0"
38
39
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-iec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-02-16 00:00:00.000000000 Z
11
+ date: 2020-06-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: debase
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: ruby-jing
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: simplecov
99
113
  requirement: !ruby/object:Gem::Requirement
@@ -156,15 +170,15 @@ dependencies:
156
170
  requirements:
157
171
  - - "~>"
158
172
  - !ruby/object:Gem::Version
159
- version: 0.7.0
173
+ version: 1.1.0
160
174
  type: :runtime
161
175
  prerelease: false
162
176
  version_requirements: !ruby/object:Gem::Requirement
163
177
  requirements:
164
178
  - - "~>"
165
179
  - !ruby/object:Gem::Version
166
- version: 0.7.0
167
- description: 'RelatonIec: retrieve IEC Standards for bibliographic use using the IsoBibliographicItem
180
+ version: 1.1.0
181
+ description: 'RelatonIec: retrieve IEC Standards for bibliographic use using the IecBibliographicItem
168
182
  model'
169
183
  email:
170
184
  - open.source@ribose.com
@@ -184,15 +198,23 @@ files:
184
198
  - Rakefile
185
199
  - bin/console
186
200
  - bin/setup
187
- - grammars
201
+ - grammars/basicdoc.rng
202
+ - grammars/biblio.rng
203
+ - grammars/iec.rng
204
+ - grammars/isodoc.rng
205
+ - grammars/isostandard.rng
206
+ - grammars/reqt.rng
188
207
  - lib/relaton_iec.rb
208
+ - lib/relaton_iec/hash_converter.rb
189
209
  - lib/relaton_iec/hit.rb
190
210
  - lib/relaton_iec/hit_collection.rb
211
+ - lib/relaton_iec/iec_bibliographic_item.rb
191
212
  - lib/relaton_iec/iec_bibliography.rb
192
213
  - lib/relaton_iec/processor.rb
193
214
  - lib/relaton_iec/scrapper.rb
194
215
  - lib/relaton_iec/statuses.yml
195
216
  - lib/relaton_iec/version.rb
217
+ - lib/relaton_iec/xml_parser.rb
196
218
  - relaton_iec.gemspec
197
219
  homepage: https://github.com/metanorma/relaton-iec
198
220
  licenses:
@@ -216,6 +238,6 @@ requirements: []
216
238
  rubygems_version: 3.0.6
217
239
  signing_key:
218
240
  specification_version: 4
219
- summary: 'RelatonIec: retrieve IEC Standards for bibliographic use using the IsoBibliographicItem
241
+ summary: 'RelatonIec: retrieve IEC Standards for bibliographic use using the IecBibliographicItem
220
242
  model'
221
243
  test_files: []