relaton-iso 1.16.1 → 1.16.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 45b4a081a62ab5a5f0a4e6f2c2cffb4950861f09e838401a68aa2208731d65ec
4
- data.tar.gz: 01521bd3e1fa7853145a390461390b7a07dfc20e1efb02c2d6d90372d03a8664
3
+ metadata.gz: 479a728a58c56799448fd6d468e0d19fe245b731119f8dcd9ae6f19a7b624e07
4
+ data.tar.gz: ac89507180ca01978bfe98b68fbe02450f2c33015bd38d788752f3bf933911ad
5
5
  SHA512:
6
- metadata.gz: 0e72371e46e2d03875fce213861ab9f087fdafca4abe748436f8ccc217ee2d82b5a089c20d73062d2a022366b79294ac7b1a16f0b3f59593f79d673800286877
7
- data.tar.gz: c6fa8308f8feb86cc08ae3a1fde9e267169c5bf8b5292a6c97f4dbf7f28668b56ac111e3dc03411dfa537ce83905e87a273d866b275f376cef402a3f641a59b6
6
+ metadata.gz: 71cc49dc2afa8690f02f7035ec5cc13981eb620e2b8c3792456401c152a4ca8192b2ffbd7445c6c982886e61f679427a2d5afbf26e13c6ebcfffcc8d54f7e5c9
7
+ data.tar.gz: 853da0772a998533c5f461ff297bef978c75e1f58b2df1fec5eff0fea6d306807420453a6ba37b1348d44e58c3a71dcf743228c7703b73fd9b7d72c9d4309598
@@ -270,11 +270,10 @@ module RelatonIso
270
270
  # @param doc [Nokogiri::HTML::Document]
271
271
  # @return [Hash]
272
272
  def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
273
- wg = doc.at("//div[@class='clearfix']")
274
- wg_link = wg.at "span/a"
275
- return unless wg_link
273
+ wg = doc.at("////div[contains(., 'Technical Committe')]/following-sibling::span/a")
274
+ return unless wg
276
275
 
277
- workgroup = wg_link.text.split "/"
276
+ workgroup = wg.text.split "/"
278
277
  type = workgroup[1]&.match(/^[A-Z]+/)&.to_s || "TC"
279
278
  # {
280
279
  # name: "International Organization for Standardization",
@@ -282,44 +281,48 @@ module RelatonIso
282
281
  # url: "www.iso.org",
283
282
  # }
284
283
  tc_numb = workgroup[1]&.match(/\d+/)&.to_s&.to_i
285
- tc_name = wg.at("span[@class='entry-title']").text
286
- tc = RelatonBib::WorkGroup.new(name: tc_name, identifier: wg_link.text,
284
+ tc_name = wg[:title]
285
+ tc = RelatonBib::WorkGroup.new(name: tc_name, identifier: wg.text,
287
286
  type: type, number: tc_numb)
288
287
  RelatonIsoBib::EditorialGroup.new(technical_committee: [tc])
289
288
  end
290
289
 
291
- # rubocop:disable Metrics/MethodLength
292
-
293
290
  # Fetch relations.
294
291
  # @param doc [Nokogiri::HTML::Document]
295
292
  # @return [Array<Hash>]
296
- def fetch_relations(doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
293
+ def fetch_relations(doc)
297
294
  types = ["Now", "Now under review"]
298
295
  doc.xpath("//ul[@class='steps']/li", "//div[@class='sub-step']").reduce([]) do |a, r|
299
- r_type = r.at("h4", "h5").text
300
- date = []
301
- type = case r_type.strip
302
- when "Previously", "Will be replaced by" then "obsoletes"
303
- when "Corrigenda / Amendments", "Revised by", "Now confirmed"
304
- on = doc.xpath('//span[@class="stage-date"][contains(., "-")]').last
305
- date << { type: "circulated", on: on.text } if on
306
- "updates"
307
- else r_type
308
- end
309
- if types.include?(type) then a
310
- else
311
- a + r.css("a").map do |id|
312
- docid = RelatonBib::DocumentIdentifier.new(type: "ISO", id: id.text, primary: true)
313
- fref = RelatonBib::FormattedRef.new(content: id.text, format: "text/plain")
314
- bibitem = RelatonIsoBib::IsoBibliographicItem.new(
315
- docid: [docid], formattedref: fref, date: date,
316
- )
317
- { type: type, bibitem: bibitem }
296
+ type, date = relation_type(r.at("h4", "h5").text.strip, doc)
297
+ next a if types.include?(type)
298
+
299
+ a + create_relations(r, type, date)
300
+ end
301
+ end
302
+
303
+ def relation_type(type, doc)
304
+ date = []
305
+ t = case type.strip
306
+ when "Previously", "Will be replaced by" then "obsoletes"
307
+ when "Corrigenda / Amendments", "Revised by", "Now confirmed"
308
+ on = doc.xpath('//span[@class="stage-date"][contains(., "-")]').last
309
+ date << { type: "circulated", on: on.text } if on
310
+ "updates"
311
+ else type
318
312
  end
319
- end
313
+ [t, date]
314
+ end
315
+
316
+ def create_relations(rel, type, date)
317
+ rel.css("a").map do |id|
318
+ docid = RelatonBib::DocumentIdentifier.new(type: "ISO", id: id.text, primary: true)
319
+ fref = RelatonBib::FormattedRef.new(content: id.text, format: "text/plain")
320
+ bibitem = RelatonIsoBib::IsoBibliographicItem.new(
321
+ docid: [docid], formattedref: fref, date: date,
322
+ )
323
+ { type: type, bibitem: bibitem }
320
324
  end
321
325
  end
322
- # rubocop:enable Metrics/MethodLength
323
326
 
324
327
  # Fetch type.
325
328
  # @param ref [String]
@@ -343,14 +346,20 @@ module RelatonIso
343
346
  # @param doc [Nokogiri::HTML::Document]
344
347
  # @param lang [String]
345
348
  # @return [Array<RelatonBib::TypedTitleString>]
346
- def fetch_title(doc, lang)
347
- content = doc.at(
348
- "//nav[contains(@class,'heading-condensed')]/h2 | "\
349
- "//nav[contains(@class,'heading-condensed')]/h3",
350
- )&.text&.gsub(/\u2014/, "-")
351
- return RelatonBib::TypedTitleStringCollection.new unless content
352
-
353
- RelatonBib::TypedTitleString.from_string content, lang, script(lang)
349
+ def fetch_title(doc, lang) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
350
+ head = doc.at "//nav[contains(@class,'heading-condensed')]"
351
+ types = { "h2" => "title-intro", "h3" => "title-main", "h4" => "title-part" }
352
+ title_types = head.xpath("h2 | h3 | h4").each_with_object({}) do |t, h|
353
+ h[types[t.name]] = t.text
354
+ end
355
+ title = RelatonBib::TypedTitleStringCollection.new
356
+ title_types.each do |type, content|
357
+ title << RelatonBib::TypedTitleString.new(
358
+ type: type, content: content, language: lang, script: script(lang),
359
+ )
360
+ end
361
+ main = title.map { |t| t.title.content }.join " - "
362
+ title << RelatonBib::TypedTitleString.new(type: "main", content: main, language: lang, script: script(lang))
354
363
  end
355
364
 
356
365
  # Return ISO script code.
@@ -363,12 +372,11 @@ module RelatonIso
363
372
  end
364
373
  end
365
374
 
366
- # rubocop:disable Metrics/MethodLength
367
375
  # Fetch dates
368
376
  # @param doc [Nokogiri::HTML::Document]
369
377
  # @param ref [String]
370
378
  # @return [Array<Hash>]
371
- def fetch_dates(doc, ref) # rubocop:disable Metrics/AbcSize, Metrics/PerceivedComplexity
379
+ def fetch_dates(doc, ref) # rubocop:disable Metrics/AbcSize, Metrics/PerceivedComplexity, Metrics/MethodLength
372
380
  dates = []
373
381
  %r{^[^\s]+\s[\d-]+:(?<ref_date_str>\d{4})} =~ ref
374
382
  pub_date_str = doc.xpath("//span[@itemprop='releaseDate']").text
@@ -400,25 +408,28 @@ module RelatonIso
400
408
  mem << { entity: publisher, role: [type: "publisher"] }
401
409
  end
402
410
  end
403
- # rubocop:enable Metrics/MethodLength
404
411
 
405
412
  # Fetch ICS.
406
413
  # @param doc [Nokogiri::HTML::Document]
407
414
  # @return [Array<Hash>]
408
415
  def fetch_ics(doc)
409
- doc.xpath("//dl[dt/strong[.='ICS']]/dd/span/a").map do |i|
416
+ doc.xpath("//div[contains(., 'ICS')]/following-sibling::span/a").map do |i|
410
417
  code = i.text.match(/[\d.]+/).to_s.split "."
411
418
  { field: code[0], group: code[1], subgroup: code[2] }
412
419
  end
413
420
  end
414
421
 
422
+ #
415
423
  # Fetch links.
416
- # @param doc [Nokogiri::HTML::Document]
417
- # @param url [String]
424
+ #
425
+ # @param doc [Nokogiri::HTML::Document] document to parse
426
+ # @param url [String] document url
427
+ #
418
428
  # @return [Array<Hash>]
429
+ #
419
430
  def fetch_link(doc, url)
420
431
  links = [{ type: "src", content: url }]
421
- obp = doc.at_css("a#obp-preview")
432
+ obp = doc.at("//h4[contains(@class, 'h5')]/a")
422
433
  links << { type: "obp", content: obp[:href] } if obp
423
434
  rss = doc.at("//a[contains(@href, 'rss')]")
424
435
  links << { type: "rss", content: DOMAIN + rss[:href] } if rss
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RelatonIso
4
- VERSION = "1.16.1"
4
+ VERSION = "1.16.2"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-iso
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.16.1
4
+ version: 1.16.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-10-14 00:00:00.000000000 Z
11
+ date: 2023-10-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: algolia