relaton-iso 1.16.1 → 1.16.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 45b4a081a62ab5a5f0a4e6f2c2cffb4950861f09e838401a68aa2208731d65ec
4
- data.tar.gz: 01521bd3e1fa7853145a390461390b7a07dfc20e1efb02c2d6d90372d03a8664
3
+ metadata.gz: 479a728a58c56799448fd6d468e0d19fe245b731119f8dcd9ae6f19a7b624e07
4
+ data.tar.gz: ac89507180ca01978bfe98b68fbe02450f2c33015bd38d788752f3bf933911ad
5
5
  SHA512:
6
- metadata.gz: 0e72371e46e2d03875fce213861ab9f087fdafca4abe748436f8ccc217ee2d82b5a089c20d73062d2a022366b79294ac7b1a16f0b3f59593f79d673800286877
7
- data.tar.gz: c6fa8308f8feb86cc08ae3a1fde9e267169c5bf8b5292a6c97f4dbf7f28668b56ac111e3dc03411dfa537ce83905e87a273d866b275f376cef402a3f641a59b6
6
+ metadata.gz: 71cc49dc2afa8690f02f7035ec5cc13981eb620e2b8c3792456401c152a4ca8192b2ffbd7445c6c982886e61f679427a2d5afbf26e13c6ebcfffcc8d54f7e5c9
7
+ data.tar.gz: 853da0772a998533c5f461ff297bef978c75e1f58b2df1fec5eff0fea6d306807420453a6ba37b1348d44e58c3a71dcf743228c7703b73fd9b7d72c9d4309598
@@ -270,11 +270,10 @@ module RelatonIso
270
270
  # @param doc [Nokogiri::HTML::Document]
271
271
  # @return [Hash]
272
272
  def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
273
- wg = doc.at("//div[@class='clearfix']")
274
- wg_link = wg.at "span/a"
275
- return unless wg_link
273
+ wg = doc.at("////div[contains(., 'Technical Committe')]/following-sibling::span/a")
274
+ return unless wg
276
275
 
277
- workgroup = wg_link.text.split "/"
276
+ workgroup = wg.text.split "/"
278
277
  type = workgroup[1]&.match(/^[A-Z]+/)&.to_s || "TC"
279
278
  # {
280
279
  # name: "International Organization for Standardization",
@@ -282,44 +281,48 @@ module RelatonIso
282
281
  # url: "www.iso.org",
283
282
  # }
284
283
  tc_numb = workgroup[1]&.match(/\d+/)&.to_s&.to_i
285
- tc_name = wg.at("span[@class='entry-title']").text
286
- tc = RelatonBib::WorkGroup.new(name: tc_name, identifier: wg_link.text,
284
+ tc_name = wg[:title]
285
+ tc = RelatonBib::WorkGroup.new(name: tc_name, identifier: wg.text,
287
286
  type: type, number: tc_numb)
288
287
  RelatonIsoBib::EditorialGroup.new(technical_committee: [tc])
289
288
  end
290
289
 
291
- # rubocop:disable Metrics/MethodLength
292
-
293
290
  # Fetch relations.
294
291
  # @param doc [Nokogiri::HTML::Document]
295
292
  # @return [Array<Hash>]
296
- def fetch_relations(doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
293
+ def fetch_relations(doc)
297
294
  types = ["Now", "Now under review"]
298
295
  doc.xpath("//ul[@class='steps']/li", "//div[@class='sub-step']").reduce([]) do |a, r|
299
- r_type = r.at("h4", "h5").text
300
- date = []
301
- type = case r_type.strip
302
- when "Previously", "Will be replaced by" then "obsoletes"
303
- when "Corrigenda / Amendments", "Revised by", "Now confirmed"
304
- on = doc.xpath('//span[@class="stage-date"][contains(., "-")]').last
305
- date << { type: "circulated", on: on.text } if on
306
- "updates"
307
- else r_type
308
- end
309
- if types.include?(type) then a
310
- else
311
- a + r.css("a").map do |id|
312
- docid = RelatonBib::DocumentIdentifier.new(type: "ISO", id: id.text, primary: true)
313
- fref = RelatonBib::FormattedRef.new(content: id.text, format: "text/plain")
314
- bibitem = RelatonIsoBib::IsoBibliographicItem.new(
315
- docid: [docid], formattedref: fref, date: date,
316
- )
317
- { type: type, bibitem: bibitem }
296
+ type, date = relation_type(r.at("h4", "h5").text.strip, doc)
297
+ next a if types.include?(type)
298
+
299
+ a + create_relations(r, type, date)
300
+ end
301
+ end
302
+
303
+ def relation_type(type, doc)
304
+ date = []
305
+ t = case type.strip
306
+ when "Previously", "Will be replaced by" then "obsoletes"
307
+ when "Corrigenda / Amendments", "Revised by", "Now confirmed"
308
+ on = doc.xpath('//span[@class="stage-date"][contains(., "-")]').last
309
+ date << { type: "circulated", on: on.text } if on
310
+ "updates"
311
+ else type
318
312
  end
319
- end
313
+ [t, date]
314
+ end
315
+
316
+ def create_relations(rel, type, date)
317
+ rel.css("a").map do |id|
318
+ docid = RelatonBib::DocumentIdentifier.new(type: "ISO", id: id.text, primary: true)
319
+ fref = RelatonBib::FormattedRef.new(content: id.text, format: "text/plain")
320
+ bibitem = RelatonIsoBib::IsoBibliographicItem.new(
321
+ docid: [docid], formattedref: fref, date: date,
322
+ )
323
+ { type: type, bibitem: bibitem }
320
324
  end
321
325
  end
322
- # rubocop:enable Metrics/MethodLength
323
326
 
324
327
  # Fetch type.
325
328
  # @param ref [String]
@@ -343,14 +346,20 @@ module RelatonIso
343
346
  # @param doc [Nokogiri::HTML::Document]
344
347
  # @param lang [String]
345
348
  # @return [Array<RelatonBib::TypedTitleString>]
346
- def fetch_title(doc, lang)
347
- content = doc.at(
348
- "//nav[contains(@class,'heading-condensed')]/h2 | "\
349
- "//nav[contains(@class,'heading-condensed')]/h3",
350
- )&.text&.gsub(/\u2014/, "-")
351
- return RelatonBib::TypedTitleStringCollection.new unless content
352
-
353
- RelatonBib::TypedTitleString.from_string content, lang, script(lang)
349
+ def fetch_title(doc, lang) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
350
+ head = doc.at "//nav[contains(@class,'heading-condensed')]"
351
+ types = { "h2" => "title-intro", "h3" => "title-main", "h4" => "title-part" }
352
+ title_types = head.xpath("h2 | h3 | h4").each_with_object({}) do |t, h|
353
+ h[types[t.name]] = t.text
354
+ end
355
+ title = RelatonBib::TypedTitleStringCollection.new
356
+ title_types.each do |type, content|
357
+ title << RelatonBib::TypedTitleString.new(
358
+ type: type, content: content, language: lang, script: script(lang),
359
+ )
360
+ end
361
+ main = title.map { |t| t.title.content }.join " - "
362
+ title << RelatonBib::TypedTitleString.new(type: "main", content: main, language: lang, script: script(lang))
354
363
  end
355
364
 
356
365
  # Return ISO script code.
@@ -363,12 +372,11 @@ module RelatonIso
363
372
  end
364
373
  end
365
374
 
366
- # rubocop:disable Metrics/MethodLength
367
375
  # Fetch dates
368
376
  # @param doc [Nokogiri::HTML::Document]
369
377
  # @param ref [String]
370
378
  # @return [Array<Hash>]
371
- def fetch_dates(doc, ref) # rubocop:disable Metrics/AbcSize, Metrics/PerceivedComplexity
379
+ def fetch_dates(doc, ref) # rubocop:disable Metrics/AbcSize, Metrics/PerceivedComplexity, Metrics/MethodLength
372
380
  dates = []
373
381
  %r{^[^\s]+\s[\d-]+:(?<ref_date_str>\d{4})} =~ ref
374
382
  pub_date_str = doc.xpath("//span[@itemprop='releaseDate']").text
@@ -400,25 +408,28 @@ module RelatonIso
400
408
  mem << { entity: publisher, role: [type: "publisher"] }
401
409
  end
402
410
  end
403
- # rubocop:enable Metrics/MethodLength
404
411
 
405
412
  # Fetch ICS.
406
413
  # @param doc [Nokogiri::HTML::Document]
407
414
  # @return [Array<Hash>]
408
415
  def fetch_ics(doc)
409
- doc.xpath("//dl[dt/strong[.='ICS']]/dd/span/a").map do |i|
416
+ doc.xpath("//div[contains(., 'ICS')]/following-sibling::span/a").map do |i|
410
417
  code = i.text.match(/[\d.]+/).to_s.split "."
411
418
  { field: code[0], group: code[1], subgroup: code[2] }
412
419
  end
413
420
  end
414
421
 
422
+ #
415
423
  # Fetch links.
416
- # @param doc [Nokogiri::HTML::Document]
417
- # @param url [String]
424
+ #
425
+ # @param doc [Nokogiri::HTML::Document] document to parse
426
+ # @param url [String] document url
427
+ #
418
428
  # @return [Array<Hash>]
429
+ #
419
430
  def fetch_link(doc, url)
420
431
  links = [{ type: "src", content: url }]
421
- obp = doc.at_css("a#obp-preview")
432
+ obp = doc.at("//h4[contains(@class, 'h5')]/a")
422
433
  links << { type: "obp", content: obp[:href] } if obp
423
434
  rss = doc.at("//a[contains(@href, 'rss')]")
424
435
  links << { type: "rss", content: DOMAIN + rss[:href] } if rss
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RelatonIso
4
- VERSION = "1.16.1"
4
+ VERSION = "1.16.2"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-iso
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.16.1
4
+ version: 1.16.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-10-14 00:00:00.000000000 Z
11
+ date: 2023-10-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: algolia