relaton-iso 1.16.0 → 1.16.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b3ca89f9616730766487b295af824ea9b797aa863f0bbc248e21e8f26862e02c
4
- data.tar.gz: 070dedc5d81dfeea1085da56872e960a021691193c0ad1b7b47c4a3c8704a2ed
3
+ metadata.gz: 479a728a58c56799448fd6d468e0d19fe245b731119f8dcd9ae6f19a7b624e07
4
+ data.tar.gz: ac89507180ca01978bfe98b68fbe02450f2c33015bd38d788752f3bf933911ad
5
5
  SHA512:
6
- metadata.gz: a62229e67a1547c934babfb52d77d1a2c51f0982840bca9a889499479cbdc0b2e7bcbc0291bfa0812afaf25eb5d13f6c760b073a5b6742b588275a922fcbe210
7
- data.tar.gz: 46ef00eeef8fa1b4f55281ae7566fbee4f5ac743c8d90bb0053fd12255121ff5e994e47b16e441c019137e8962a3d4ce298f954b29dd08b219537ba3d8d70edd
6
+ metadata.gz: 71cc49dc2afa8690f02f7035ec5cc13981eb620e2b8c3792456401c152a4ca8192b2ffbd7445c6c982886e61f679427a2d5afbf26e13c6ebcfffcc8d54f7e5c9
7
+ data.tar.gz: 853da0772a998533c5f461ff297bef978c75e1f58b2df1fec5eff0fea6d306807420453a6ba37b1348d44e58c3a71dcf743228c7703b73fd9b7d72c9d4309598
data/README.adoc CHANGED
@@ -72,14 +72,14 @@ item.docidentifier.detect { |di| di.type == "URN" }.id
72
72
  [source,ruby]
73
73
  ----
74
74
  item = RelatonIso::IsoBibliography.get "ISO 19115:2003"
75
- [relaton-iso] ("ISO 19115:2003") fetching from ISO...
76
- [relaton-iso] ("ISO 19115:2003") Found exact match.
75
+ [relaton-iso] (ISO 19115:2003) Fetching from iso.org ...
76
+ [relaton-iso] (ISO 19115:2003) Found: `ISO 19115:2003`
77
77
  => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c83429e30
78
78
  ...
79
79
 
80
80
  item = RelatonIso::IsoBibliography.get "ISO 19115", "2003"
81
- [relaton-iso] ("ISO 19115:2003") Fetching from ISO...
82
- [relaton-iso] ("ISO 19115:2003") Found ("ISO 19115:2003").
81
+ [relaton-iso] (ISO 19115:2003) Fetching from iso.org ...
82
+ [relaton-iso] (ISO 19115:2003) Found: `ISO 19115:2003`
83
83
  => #<RelatonIsoBib::IsoBibliographicItem:0x0000000112c9ca80
84
84
  ...
85
85
 
@@ -92,8 +92,8 @@ item.docidentifier[0].id
92
92
  [source,ruby]
93
93
  ----
94
94
  item = RelatonIso::IsoBibliography.get "ISO 19115"
95
- [relaton-iso] ("ISO 19115") fetching from ISO...
96
- [relaton-iso] ("ISO 19115") Found ("ISO 19115:2003").
95
+ [relaton-iso] (ISO 19115) Fetching from iso.org ...
96
+ [relaton-iso] (ISO 19115) Found: `ISO 19115:2003`
97
97
  => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c830275a8
98
98
  ...
99
99
 
@@ -106,8 +106,8 @@ item.docidentifier[0].id
106
106
  [source,ruby]
107
107
  ----
108
108
  item = RelatonIso::IsoBibliography.get "ISO 19115-1"
109
- [relaton-iso] ("ISO 19115-1") fetching from ISO...
110
- [relaton-iso] ("ISO 19115-1") Found ("ISO 19115-1:2014").
109
+ [relaton-iso] (ISO 19115-1) Fetching from iso.org ...
110
+ [relaton-iso] (ISO 19115-1) Found: `ISO 19115-1:2014`
111
111
  => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c83408af0
112
112
  ...
113
113
 
@@ -120,14 +120,14 @@ item.docidentifier[0].id
120
120
  [source,ruby]
121
121
  ----
122
122
  item = RelatonIso::IsoBibliography.get "ISO 19115 (all parts)"
123
- [relaton-iso] ("ISO 19115") Fetching from ISO...
124
- [relaton-iso] ("ISO 19115") Found ("ISO 19115").
123
+ [relaton-iso] (ISO 19115) Fetching from iso.org ...
124
+ [relaton-iso] (ISO 19115) Found: `ISO 19115`
125
125
  => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8ca216e118
126
126
  ...
127
127
 
128
128
  item = RelatonIso::IsoBibliography.get "ISO 19115", nil, all_parts: true
129
- [relaton-iso] ("ISO 19115") Fetching from ISO...
130
- [relaton-iso] ("ISO 19115") Found ("ISO 19115").
129
+ [relaton-iso] (ISO 19115) Fetching from iso.org ...
130
+ [relaton-iso] (ISO 19115) Found: `ISO 19115`
131
131
  => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c830f3d38
132
132
  ...
133
133
 
@@ -135,13 +135,13 @@ item.docidentifier[0].id
135
135
  => "ISO 19115 (all parts)"
136
136
 
137
137
  item = RelatonIso::IsoBibliography.get "ISO 19115-1 (all parts)"
138
- [relaton-iso] ("ISO 19115") Fetching from ISO...
139
- [relaton-iso] ("ISO 19115") Found ("ISO 19115").
138
+ [relaton-iso] (ISO 19115) Fetching from iso.org ...
139
+ [relaton-iso] (ISO 19115) Found: `ISO 19115`
140
140
  => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c8290e5a0
141
141
 
142
142
  item = RelatonIso::IsoBibliography.get "ISO 19115-1", nil, all_parts: true
143
- [relaton-iso] ("ISO 19115") Fetching from ISO...
144
- [relaton-iso] ("ISO 19115") Found ("ISO 19115").
143
+ [relaton-iso] (ISO 19115) Fetching from iso.org ...
144
+ [relaton-iso] (ISO 19115) Found: `ISO 19115`
145
145
  => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c925355b8
146
146
  ...
147
147
 
@@ -234,8 +234,8 @@ item.title lang: 'fr'
234
234
  @type="main">]>
235
235
 
236
236
  item = RelatonIso::IsoBibliography.get "ISO 19115:2003"
237
- [relaton-iso] ("ISO 19115:2003") Fetching from ISO...
238
- [relaton-iso] ("ISO 19115:2003") Found ("ISO 19115:2003").
237
+ [relaton-iso] (ISO 19115:2003) Fetching from iso.org ...
238
+ [relaton-iso] (ISO 19115:2003) Found: `ISO 19115:2003`
239
239
  => #<RelatonIsoBib::IsoBibliographicItem:0x00007fa8870b69e0
240
240
 
241
241
  item.abstract lang: 'en'
@@ -42,8 +42,9 @@ module RelatonIso
42
42
  # @return [Pubid::Iso::Identifier]
43
43
  def pubid
44
44
  @pubid ||= Pubid::Iso::Identifier.parse_from_title(hit[:title])
45
- rescue Pubid::Iso::Errors::WrongTypeError, Pubid::Iso::Errors::ParseError => e
46
- Util.warn "unable to find an identifier in `#{hit[:title]}`."
45
+ rescue Pubid::Iso::Errors::WrongTypeError,
46
+ Pubid::Iso::Errors::ParseError => e
47
+ Util.warn "Unable to find an identifier in: `#{hit[:title]}`."
47
48
  Util.warn e.message
48
49
  end
49
50
  end
@@ -38,7 +38,7 @@ module RelatonIso
38
38
  query_pubid = Pubid::Iso::Identifier.parse(code)
39
39
  query_pubid.year = year if year
40
40
  query_pubid.part = nil if opts[:all_parts]
41
- Util.warn "(#{query_pubid}) Fetching from ISO..."
41
+ Util.warn "(#{query_pubid}) Fetching from iso.org ..."
42
42
 
43
43
  hits, missed_year_ids = isobib_search_filter(query_pubid, opts)
44
44
  tip_ids = look_up_with_any_types_stages(hits, ref, opts)
@@ -54,7 +54,7 @@ module RelatonIso
54
54
  response_docid = ret.docidentifier.first.id.sub(" (all parts)", "")
55
55
  response_pubid = Pubid::Iso::Identifier.parse(response_docid)
56
56
 
57
- Util.warn "(#{query_pubid}) Found `#{response_pubid}`."
57
+ Util.warn "(#{query_pubid}) Found: `#{response_pubid}`"
58
58
 
59
59
  get_all = (
60
60
  (query_pubid.year && opts[:keep_year].nil?) ||
@@ -65,7 +65,7 @@ module RelatonIso
65
65
 
66
66
  ret.to_most_recent_reference
67
67
  rescue Pubid::Core::Errors::ParseError
68
- Util.warn "(#{code}) is not recognized as a standards identifier."
68
+ Util.warn "(#{code}) Is not recognized as a standards identifier."
69
69
  nil
70
70
  end
71
71
 
@@ -179,7 +179,8 @@ module RelatonIso
179
179
  hit_collection = search(query_pubid_without_year.to_s)
180
180
 
181
181
  # filter only matching hits
182
- filter_hits hit_collection, query_pubid, opts[:all_parts], any_types_stages
182
+ filter_hits hit_collection, query_pubid, opts[:all_parts],
183
+ any_types_stages
183
184
  end
184
185
 
185
186
  #
@@ -196,7 +197,8 @@ module RelatonIso
196
197
  # filter out
197
198
  result = hit_collection.select do |i|
198
199
  hit_pubid = i.pubid
199
- matches_base?(query_pubid, hit_pubid, any_types_stages: any_stypes_tages) &&
200
+ matches_base?(query_pubid, hit_pubid,
201
+ any_types_stages: any_stypes_tages) &&
200
202
  matches_parts?(query_pubid, hit_pubid, all_parts: all_parts) &&
201
203
  query_pubid.corrigendums == hit_pubid.corrigendums &&
202
204
  query_pubid.amendments == hit_pubid.amendments
@@ -270,11 +270,10 @@ module RelatonIso
270
270
  # @param doc [Nokogiri::HTML::Document]
271
271
  # @return [Hash]
272
272
  def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
273
- wg = doc.at("//div[@class='clearfix']")
274
- wg_link = wg.at "span/a"
275
- return unless wg_link
273
+ wg = doc.at("////div[contains(., 'Technical Committe')]/following-sibling::span/a")
274
+ return unless wg
276
275
 
277
- workgroup = wg_link.text.split "/"
276
+ workgroup = wg.text.split "/"
278
277
  type = workgroup[1]&.match(/^[A-Z]+/)&.to_s || "TC"
279
278
  # {
280
279
  # name: "International Organization for Standardization",
@@ -282,44 +281,48 @@ module RelatonIso
282
281
  # url: "www.iso.org",
283
282
  # }
284
283
  tc_numb = workgroup[1]&.match(/\d+/)&.to_s&.to_i
285
- tc_name = wg.at("span[@class='entry-title']").text
286
- tc = RelatonBib::WorkGroup.new(name: tc_name, identifier: wg_link.text,
284
+ tc_name = wg[:title]
285
+ tc = RelatonBib::WorkGroup.new(name: tc_name, identifier: wg.text,
287
286
  type: type, number: tc_numb)
288
287
  RelatonIsoBib::EditorialGroup.new(technical_committee: [tc])
289
288
  end
290
289
 
291
- # rubocop:disable Metrics/MethodLength
292
-
293
290
  # Fetch relations.
294
291
  # @param doc [Nokogiri::HTML::Document]
295
292
  # @return [Array<Hash>]
296
- def fetch_relations(doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
293
+ def fetch_relations(doc)
297
294
  types = ["Now", "Now under review"]
298
295
  doc.xpath("//ul[@class='steps']/li", "//div[@class='sub-step']").reduce([]) do |a, r|
299
- r_type = r.at("h4", "h5").text
300
- date = []
301
- type = case r_type.strip
302
- when "Previously", "Will be replaced by" then "obsoletes"
303
- when "Corrigenda / Amendments", "Revised by", "Now confirmed"
304
- on = doc.xpath('//span[@class="stage-date"][contains(., "-")]').last
305
- date << { type: "circulated", on: on.text } if on
306
- "updates"
307
- else r_type
308
- end
309
- if types.include?(type) then a
310
- else
311
- a + r.css("a").map do |id|
312
- docid = RelatonBib::DocumentIdentifier.new(type: "ISO", id: id.text, primary: true)
313
- fref = RelatonBib::FormattedRef.new(content: id.text, format: "text/plain")
314
- bibitem = RelatonIsoBib::IsoBibliographicItem.new(
315
- docid: [docid], formattedref: fref, date: date,
316
- )
317
- { type: type, bibitem: bibitem }
296
+ type, date = relation_type(r.at("h4", "h5").text.strip, doc)
297
+ next a if types.include?(type)
298
+
299
+ a + create_relations(r, type, date)
300
+ end
301
+ end
302
+
303
+ def relation_type(type, doc)
304
+ date = []
305
+ t = case type.strip
306
+ when "Previously", "Will be replaced by" then "obsoletes"
307
+ when "Corrigenda / Amendments", "Revised by", "Now confirmed"
308
+ on = doc.xpath('//span[@class="stage-date"][contains(., "-")]').last
309
+ date << { type: "circulated", on: on.text } if on
310
+ "updates"
311
+ else type
318
312
  end
319
- end
313
+ [t, date]
314
+ end
315
+
316
+ def create_relations(rel, type, date)
317
+ rel.css("a").map do |id|
318
+ docid = RelatonBib::DocumentIdentifier.new(type: "ISO", id: id.text, primary: true)
319
+ fref = RelatonBib::FormattedRef.new(content: id.text, format: "text/plain")
320
+ bibitem = RelatonIsoBib::IsoBibliographicItem.new(
321
+ docid: [docid], formattedref: fref, date: date,
322
+ )
323
+ { type: type, bibitem: bibitem }
320
324
  end
321
325
  end
322
- # rubocop:enable Metrics/MethodLength
323
326
 
324
327
  # Fetch type.
325
328
  # @param ref [String]
@@ -343,14 +346,20 @@ module RelatonIso
343
346
  # @param doc [Nokogiri::HTML::Document]
344
347
  # @param lang [String]
345
348
  # @return [Array<RelatonBib::TypedTitleString>]
346
- def fetch_title(doc, lang)
347
- content = doc.at(
348
- "//nav[contains(@class,'heading-condensed')]/h2 | "\
349
- "//nav[contains(@class,'heading-condensed')]/h3",
350
- )&.text&.gsub(/\u2014/, "-")
351
- return RelatonBib::TypedTitleStringCollection.new unless content
352
-
353
- RelatonBib::TypedTitleString.from_string content, lang, script(lang)
349
+ def fetch_title(doc, lang) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
350
+ head = doc.at "//nav[contains(@class,'heading-condensed')]"
351
+ types = { "h2" => "title-intro", "h3" => "title-main", "h4" => "title-part" }
352
+ title_types = head.xpath("h2 | h3 | h4").each_with_object({}) do |t, h|
353
+ h[types[t.name]] = t.text
354
+ end
355
+ title = RelatonBib::TypedTitleStringCollection.new
356
+ title_types.each do |type, content|
357
+ title << RelatonBib::TypedTitleString.new(
358
+ type: type, content: content, language: lang, script: script(lang),
359
+ )
360
+ end
361
+ main = title.map { |t| t.title.content }.join " - "
362
+ title << RelatonBib::TypedTitleString.new(type: "main", content: main, language: lang, script: script(lang))
354
363
  end
355
364
 
356
365
  # Return ISO script code.
@@ -363,12 +372,11 @@ module RelatonIso
363
372
  end
364
373
  end
365
374
 
366
- # rubocop:disable Metrics/MethodLength
367
375
  # Fetch dates
368
376
  # @param doc [Nokogiri::HTML::Document]
369
377
  # @param ref [String]
370
378
  # @return [Array<Hash>]
371
- def fetch_dates(doc, ref) # rubocop:disable Metrics/AbcSize, Metrics/PerceivedComplexity
379
+ def fetch_dates(doc, ref) # rubocop:disable Metrics/AbcSize, Metrics/PerceivedComplexity, Metrics/MethodLength
372
380
  dates = []
373
381
  %r{^[^\s]+\s[\d-]+:(?<ref_date_str>\d{4})} =~ ref
374
382
  pub_date_str = doc.xpath("//span[@itemprop='releaseDate']").text
@@ -400,25 +408,28 @@ module RelatonIso
400
408
  mem << { entity: publisher, role: [type: "publisher"] }
401
409
  end
402
410
  end
403
- # rubocop:enable Metrics/MethodLength
404
411
 
405
412
  # Fetch ICS.
406
413
  # @param doc [Nokogiri::HTML::Document]
407
414
  # @return [Array<Hash>]
408
415
  def fetch_ics(doc)
409
- doc.xpath("//dl[dt/strong[.='ICS']]/dd/span/a").map do |i|
416
+ doc.xpath("//div[contains(., 'ICS')]/following-sibling::span/a").map do |i|
410
417
  code = i.text.match(/[\d.]+/).to_s.split "."
411
418
  { field: code[0], group: code[1], subgroup: code[2] }
412
419
  end
413
420
  end
414
421
 
422
+ #
415
423
  # Fetch links.
416
- # @param doc [Nokogiri::HTML::Document]
417
- # @param url [String]
424
+ #
425
+ # @param doc [Nokogiri::HTML::Document] document to parse
426
+ # @param url [String] document url
427
+ #
418
428
  # @return [Array<Hash>]
429
+ #
419
430
  def fetch_link(doc, url)
420
431
  links = [{ type: "src", content: url }]
421
- obp = doc.at_css("a#obp-preview")
432
+ obp = doc.at("//h4[contains(@class, 'h5')]/a")
422
433
  links << { type: "obp", content: obp[:href] } if obp
423
434
  rss = doc.at("//a[contains(@href, 'rss')]")
424
435
  links << { type: "rss", content: DOMAIN + rss[:href] } if rss
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RelatonIso
4
- VERSION = "1.16.0"
4
+ VERSION = "1.16.2"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-iso
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.16.0
4
+ version: 1.16.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-09-03 00:00:00.000000000 Z
11
+ date: 2023-10-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: algolia