relaton-iec 0.8.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -32,31 +32,22 @@ module RelatonIec
32
32
  }.freeze
33
33
 
34
34
  class << self
35
- # @param text [String]
36
- # @return [Array<Hash>]
37
- # def get(text)
38
- # iso_workers = WorkersPool.new 4
39
- # iso_workers.worker { |hit| iso_worker(hit, iso_workers) }
40
- # algolia_workers = start_algolia_search(text, iso_workers)
41
- # iso_docs = iso_workers.result
42
- # algolia_workers.end
43
- # algolia_workers.result
44
- # iso_docs
45
- # end
35
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
46
36
 
47
37
  # Parse page.
48
38
  # @param hit [Hash]
49
39
  # @return [Hash]
50
- # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
51
40
  def parse_page(hit_data)
52
41
  doc = get_page hit_data[:url]
53
42
 
54
43
  # Fetch edition.
55
- edition = doc.at("//th[contains(., 'Edition')]/following-sibling::td/span").text
44
+ edition = doc.at(
45
+ "//th[contains(., 'Edition')]/following-sibling::td/span",
46
+ ).text
56
47
 
57
48
  status, relations = fetch_status_relations hit_data[:url]
58
49
 
59
- RelatonIsoBib::IsoBibliographicItem.new(
50
+ IecBibliographicItem.new(
60
51
  fetched: Date.today.to_s,
61
52
  docid: [RelatonBib::DocumentIdentifier.new(id: hit_data[:code], type: "IEC")],
62
53
  structuredidentifier: fetch_structuredidentifier(doc),
@@ -81,46 +72,6 @@ module RelatonIec
81
72
 
82
73
  private
83
74
 
84
- # Start search workers.
85
- # @param text[String]
86
- # @param iec_workers [Isobib::WorkersPool]
87
- # @reaturn [Isobib::WorkersPool]
88
- # def start_algolia_search(text, iec_workers)
89
- # index = Algolia::Index.new 'all_en'
90
- # workers = WorkersPool.new
91
- # workers.worker do |page|
92
- # algolia_worker(index, text, page, workers, iec_workers)
93
- # end
94
-
95
- # # Add first page so search worker will start.
96
- # workers << 0
97
- # end
98
-
99
- # Fetch ISO documents.
100
- # @param hit [Hash]
101
- # @param isiso_workers [Isobib::WorkersPool]
102
- # def iso_worker(hit, iso_workers)
103
- # print "Parse #{iso_workers.size} of #{iso_workers.nb_hits} \r"
104
- # parse_page hit
105
- # end
106
-
107
- # Fetch hits from algolia search service.
108
- # @param index[Algolia::Index]
109
- # @param text [String]
110
- # @param page [Integer]
111
- # @param algolia_workers [Isobib::WorkersPool]
112
- # @param isiso_workers [Isobib::WorkersPool]
113
- # def algolia_worker(index, text, page, algolia_workers, iso_workers)
114
- # res = index.search text, facetFilters: ['category:standard'], page: page
115
- # next_page = res['page'] + 1
116
- # algolia_workers << next_page if next_page < res['nbPages']
117
- # res['hits'].each do |hit|
118
- # iso_workers.nb_hits = res['nbHits']
119
- # iso_workers << hit
120
- # end
121
- # iso_workers.end unless next_page < res['nbPages']
122
- # end
123
-
124
75
  # Fetch abstracts.
125
76
  # @param doc [Nokigiri::HTML::Document]
126
77
  # @return [Array<Array>]
@@ -134,19 +85,6 @@ module RelatonIec
134
85
  }]
135
86
  end
136
87
 
137
- # Get langs.
138
- # @param doc [Nokogiri::HTML::Document]
139
- # @return [Array<Hash>]
140
- # def langs(doc)
141
- # lgs = [{ lang: 'en' }]
142
- # doc.css('ul#lang-switcher ul li a').each do |lang_link|
143
- # lang_path = lang_link.attr('href')
144
- # lang = lang_path.match(%r{^\/(fr)\/})
145
- # lgs << { lang: lang[1], path: lang_path } if lang
146
- # end
147
- # lgs
148
- # end
149
-
150
88
  # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
151
89
 
152
90
  # Get page.
@@ -154,25 +92,20 @@ module RelatonIec
154
92
  # @return [Array<Nokogiri::HTML::Document, String>]
155
93
  def get_page(url)
156
94
  uri = URI url
157
- resp = Net::HTTP.get_response(uri) # .encode("UTF-8")
95
+ resp = Net::HTTP.get_response(uri)
158
96
  case resp.code
159
97
  when "301"
160
98
  path = resp["location"]
161
99
  url = DOMAIN + path
162
100
  uri = URI url
163
- resp = Net::HTTP.get_response(uri) # .encode("UTF-8")
101
+ resp = Net::HTTP.get_response(uri)
164
102
  when "404"
165
103
  raise RelatonBib::RequestError, "Page not found #{url}"
166
104
  end
167
- # n = 0
168
- # while resp.body !~ /<strong/ && n < 10
169
- # resp = Net::HTTP.get_response(uri)#.encode("UTF-8")
170
- # n += 1
171
- # end
172
105
  Nokogiri::HTML(resp.body)
173
- rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
174
- Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError,
175
- OpenSSL::SSL::SSLError
106
+ rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
107
+ EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
108
+ Net::ProtocolError, OpenSSL::SSL::SSLError
176
109
  raise RelatonBib::RequestError, "Could not access #{url}"
177
110
  end
178
111
  # rubocop:enable Metrics/AbcSize
@@ -211,15 +144,12 @@ module RelatonIec
211
144
  statuses = YAML.load_file "lib/relaton_iec/statuses.yml"
212
145
  s = wip.at("STAGE").text
213
146
  stage, substage = statuses[s]["stage"].split "."
214
- # status = statuses[s]["status"]
215
147
  else
216
- # status = "Published"
217
148
  stage = "60"
218
149
  substage = "60"
219
150
  end
220
151
  RelatonBib::DocumentStatus.new(stage: stage, substage: substage)
221
152
  end
222
- # rubocop:enable Metrics/MethodLength
223
153
 
224
154
  # Fetch workgroup.
225
155
  # @param doc [Nokogiri::HTML::Document]
@@ -237,27 +167,26 @@ module RelatonIec
237
167
  }],
238
168
  }
239
169
  end
170
+ # rubocop:enable Metrics/MethodLength
240
171
 
241
172
  # Fetch relations.
242
173
  # @param doc [Nokogiri::HTML::Document]
243
174
  # @return [Array<Hash>]
244
175
  # rubocop:disable Metrics/MethodLength
245
176
  def fetch_relations(doc)
246
- doc.xpath('//ROW[STATUS[.!="PREPARING"]][STATUS[.!="PUBLISHED"]]').map do |r|
177
+ doc.xpath('//ROW[STATUS[.!="PREPARING"]][STATUS[.!="PUBLISHED"]]').
178
+ map do |r|
247
179
  r_type = r.at("STATUS").text.downcase
248
180
  type = case r_type
249
- # when 'published' then 'obsoletes' # Valid
181
+ # when 'published' then 'obsoletes' # Valid
250
182
  when "revised", "replaced" then "updates"
251
183
  when "withdrawn" then "obsoletes"
252
184
  else r_type
253
185
  end
254
- # url = DOMAIN + "/publication/" + r.at("PUB_ID").text
255
186
  fref = RelatonBib::FormattedRef.new(
256
187
  content: r.at("FULL_NAME").text, format: "text/plain",
257
188
  )
258
- bibitem = RelatonIsoBib::IsoBibliographicItem.new(
259
- formattedref: fref,
260
- )
189
+ bibitem = IecBibliographicItem.new(formattedref: fref)
261
190
  { type: type, bibitem: bibitem }
262
191
  end
263
192
  end
@@ -272,22 +201,6 @@ module RelatonIec
272
201
  status = fetch_status doc
273
202
  relations = fetch_relations doc
274
203
  [status, relations]
275
- # doc.css('ul.steps li').inject([]) do |a, r|
276
- # r_type = r.css('strong').text
277
- # type = case r_type
278
- # when 'Previously', 'Will be replaced by' then 'obsoletes'
279
- # when 'Corrigenda/Amendments', 'Revised by', 'Now confirmed'
280
- # 'updates'
281
- # else r_type
282
- # end
283
- # if ['Now', 'Now under review'].include? type
284
- # a
285
- # else
286
- # a + r.css('a').map do |id|
287
- # { type: type, identifier: id.text, url: id['href'] }
288
- # end
289
- # end
290
- # end
291
204
  end
292
205
  # rubocop:enable Metrics/MethodLength
293
206
 
@@ -295,23 +208,13 @@ module RelatonIec
295
208
  # @param doc [Nokogiri::HTML::Document]
296
209
  # @return [String]
297
210
  def fetch_type(doc)
298
- doc.at('//th[contains(., "Publication type")]/following-sibling::td/span').
299
- text.downcase.tr " ", "-"
300
- # type_match = title.match(%r{^(ISO|IWA|IEC)(?:(/IEC|/IEEE|/PRF|
301
- # /NP)*\s|/)(TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|Guide|(?=\d+))}x)
302
- # #return "international-standard" if type_match.nil?
303
- # if TYPES[type_match[2]]
304
- # TYPES[type_match[2]]
305
- # elsif type_match[1]
306
- # elsif type_match[1] == 'ISO'
307
- # 'international-standard'
308
- # elsif type_match[1] == 'IWA'
309
- # 'international-workshop-agreement'
310
- # end
311
- # # rescue => _e
312
- # # puts 'Unknown document type: ' + title
211
+ doc.at(
212
+ '//th[contains(., "Publication type")]/following-sibling::td/span',
213
+ ).text.downcase.tr " ", "-"
313
214
  end
314
215
 
216
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
217
+
315
218
  # Fetch titles.
316
219
  # @param hit_data [Hash]
317
220
  # @return [Array<Hash>]
@@ -339,21 +242,13 @@ module RelatonIec
339
242
  end
340
243
  [{
341
244
  title_intro: intro,
342
- title_main: main,
343
- title_part: part,
344
- language: "en",
345
- script: "Latn"
245
+ title_main: main,
246
+ title_part: part,
247
+ language: "en",
248
+ script: "Latn",
346
249
  }]
347
250
  end
348
-
349
- # Return ISO script code.
350
- # @param lang [String]
351
- # @return [String]
352
- # def script(lang)
353
- # case lang
354
- # when 'en', 'fr' then 'Latn'
355
- # end
356
- # end
251
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength
357
252
 
358
253
  # Fetch dates
359
254
  # @param doc [Nokogiri::HTML::Document]
@@ -367,6 +262,8 @@ module RelatonIec
367
262
  dates
368
263
  end
369
264
 
265
+ # rubocop:disable Metrics/MethodLength
266
+
370
267
  def fetch_contributors(code)
371
268
  code.sub(/\s.*/, "").split("/").map do |abbrev|
372
269
  case abbrev
@@ -381,12 +278,15 @@ module RelatonIec
381
278
  role: [type: "publisher"] }
382
279
  end
383
280
  end
281
+ # rubocop:enable Metrics/MethodLength
384
282
 
385
283
  # Fetch ICS.
386
284
  # @param doc [Nokogiri::HTML::Document]
387
285
  # @return [Array<Hash>]
388
286
  def fetch_ics(doc)
389
- doc.xpath('//th[contains(text(), "ICS")]/following-sibling::td/a').map do |i|
287
+ doc.xpath(
288
+ '//th[contains(text(), "ICS")]/following-sibling::td/a',
289
+ ).map do |i|
390
290
  code = i.text.match(/[\d\.]+/).to_s.split "."
391
291
  { field: code[0], group: code[1], subgroup: code[2] }
392
292
  end
@@ -403,9 +303,11 @@ module RelatonIec
403
303
  links
404
304
  end
405
305
 
306
+ # rubocop:disable Metrics/MethodLength
307
+
406
308
  # Fetch copyright.
407
309
  # @param title [String]
408
- # @return [Hash]
310
+ # @return [Array<Hash>]
409
311
  def fetch_copyright(code, doc)
410
312
  abbreviation = code.match(/.*?(?=\s)/).to_s
411
313
  case abbreviation
@@ -415,11 +317,15 @@ module RelatonIec
415
317
  end
416
318
  from = code.match(/(?<=:)\d{4}/).to_s
417
319
  if from.empty?
418
- from = doc.xpath("//span[@itemprop='releaseDate']").text
419
- .match(/\d{4}/).to_s
320
+ from = doc.xpath("//span[@itemprop='releaseDate']").text.
321
+ match(/\d{4}/).to_s
420
322
  end
421
- { owner: { name: name, abbreviation: abbreviation, url: url }, from: from }
323
+ [{
324
+ owner: [{ name: name, abbreviation: abbreviation, url: url }],
325
+ from: from,
326
+ }]
422
327
  end
328
+ # rubocop:enable Metrics/MethodLength
423
329
  end
424
330
  end
425
331
  # rubocop:enable Metrics/ModuleLength
@@ -1,3 +1,3 @@
1
1
  module RelatonIec
2
- VERSION = "0.8.0".freeze
2
+ VERSION = "1.1.0".freeze
3
3
  end
@@ -0,0 +1,14 @@
1
+ module RelatonIec
2
+ class XMLParser < RelatonIsoBib::XMLParser
3
+ class << self
4
+ private
5
+
6
+ # override RelatonIsoBib::IsoBibliographicItem.bib_item method
7
+ # @param item_hash [Hash]
8
+ # @return [RelatonIec::IecBibliographicItem]
9
+ def bib_item(item_hash)
10
+ IecBibliographicItem.new item_hash
11
+ end
12
+ end
13
+ end
14
+ end
@@ -8,10 +8,10 @@ Gem::Specification.new do |spec|
8
8
  spec.authors = ["Ribose Inc."]
9
9
  spec.email = ["open.source@ribose.com"]
10
10
 
11
- spec.summary = "RelatonIec: retrieve IEC Standards for bibliographic use "\
12
- "using the IsoBibliographicItem model"
13
- spec.description = "RelatonIec: retrieve IEC Standards for bibliographic use "\
14
- "using the IsoBibliographicItem model"
11
+ spec.summary = "RelatonIec: retrieve IEC Standards for bibliographic "\
12
+ "use using the IecBibliographicItem model"
13
+ spec.description = "RelatonIec: retrieve IEC Standards for bibliographic "\
14
+ "use using the IecBibliographicItem model"
15
15
  spec.homepage = "https://github.com/metanorma/relaton-iec"
16
16
  spec.license = "MIT"
17
17
 
@@ -29,10 +29,11 @@ Gem::Specification.new do |spec|
29
29
  spec.add_development_dependency "rake", "~> 10.0"
30
30
  spec.add_development_dependency "rspec", "~> 3.0"
31
31
  spec.add_development_dependency "ruby-debug-ide"
32
+ spec.add_development_dependency "ruby-jing"
32
33
  spec.add_development_dependency "simplecov"
33
34
  spec.add_development_dependency "vcr"
34
35
  spec.add_development_dependency "webmock"
35
36
 
36
37
  spec.add_dependency "addressable"
37
- spec.add_dependency "relaton-iso-bib", "~> 0.7.0"
38
+ spec.add_dependency "relaton-iso-bib", "~> 1.1.0"
38
39
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-iec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-02-16 00:00:00.000000000 Z
11
+ date: 2020-06-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: debase
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: ruby-jing
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: simplecov
99
113
  requirement: !ruby/object:Gem::Requirement
@@ -156,15 +170,15 @@ dependencies:
156
170
  requirements:
157
171
  - - "~>"
158
172
  - !ruby/object:Gem::Version
159
- version: 0.7.0
173
+ version: 1.1.0
160
174
  type: :runtime
161
175
  prerelease: false
162
176
  version_requirements: !ruby/object:Gem::Requirement
163
177
  requirements:
164
178
  - - "~>"
165
179
  - !ruby/object:Gem::Version
166
- version: 0.7.0
167
- description: 'RelatonIec: retrieve IEC Standards for bibliographic use using the IsoBibliographicItem
180
+ version: 1.1.0
181
+ description: 'RelatonIec: retrieve IEC Standards for bibliographic use using the IecBibliographicItem
168
182
  model'
169
183
  email:
170
184
  - open.source@ribose.com
@@ -184,15 +198,23 @@ files:
184
198
  - Rakefile
185
199
  - bin/console
186
200
  - bin/setup
187
- - grammars
201
+ - grammars/basicdoc.rng
202
+ - grammars/biblio.rng
203
+ - grammars/iec.rng
204
+ - grammars/isodoc.rng
205
+ - grammars/isostandard.rng
206
+ - grammars/reqt.rng
188
207
  - lib/relaton_iec.rb
208
+ - lib/relaton_iec/hash_converter.rb
189
209
  - lib/relaton_iec/hit.rb
190
210
  - lib/relaton_iec/hit_collection.rb
211
+ - lib/relaton_iec/iec_bibliographic_item.rb
191
212
  - lib/relaton_iec/iec_bibliography.rb
192
213
  - lib/relaton_iec/processor.rb
193
214
  - lib/relaton_iec/scrapper.rb
194
215
  - lib/relaton_iec/statuses.yml
195
216
  - lib/relaton_iec/version.rb
217
+ - lib/relaton_iec/xml_parser.rb
196
218
  - relaton_iec.gemspec
197
219
  homepage: https://github.com/metanorma/relaton-iec
198
220
  licenses:
@@ -216,6 +238,6 @@ requirements: []
216
238
  rubygems_version: 3.0.6
217
239
  signing_key:
218
240
  specification_version: 4
219
- summary: 'RelatonIec: retrieve IEC Standards for bibliographic use using the IsoBibliographicItem
241
+ summary: 'RelatonIec: retrieve IEC Standards for bibliographic use using the IecBibliographicItem
220
242
  model'
221
243
  test_files: []