relaton-iso 1.16.0 → 1.16.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b3ca89f9616730766487b295af824ea9b797aa863f0bbc248e21e8f26862e02c
4
- data.tar.gz: 070dedc5d81dfeea1085da56872e960a021691193c0ad1b7b47c4a3c8704a2ed
3
+ metadata.gz: 479a728a58c56799448fd6d468e0d19fe245b731119f8dcd9ae6f19a7b624e07
4
+ data.tar.gz: ac89507180ca01978bfe98b68fbe02450f2c33015bd38d788752f3bf933911ad
5
5
  SHA512:
6
- metadata.gz: a62229e67a1547c934babfb52d77d1a2c51f0982840bca9a889499479cbdc0b2e7bcbc0291bfa0812afaf25eb5d13f6c760b073a5b6742b588275a922fcbe210
7
- data.tar.gz: 46ef00eeef8fa1b4f55281ae7566fbee4f5ac743c8d90bb0053fd12255121ff5e994e47b16e441c019137e8962a3d4ce298f954b29dd08b219537ba3d8d70edd
6
+ metadata.gz: 71cc49dc2afa8690f02f7035ec5cc13981eb620e2b8c3792456401c152a4ca8192b2ffbd7445c6c982886e61f679427a2d5afbf26e13c6ebcfffcc8d54f7e5c9
7
+ data.tar.gz: 853da0772a998533c5f461ff297bef978c75e1f58b2df1fec5eff0fea6d306807420453a6ba37b1348d44e58c3a71dcf743228c7703b73fd9b7d72c9d4309598
data/README.adoc CHANGED
@@ -72,14 +72,14 @@ item.docidentifier.detect { |di| di.type == "URN" }.id
72
72
  [source,ruby]
73
73
  ----
74
74
  item = RelatonIso::IsoBibliography.get "ISO 19115:2003"
75
- [relaton-iso] ("ISO 19115:2003") fetching from ISO...
76
- [relaton-iso] ("ISO 19115:2003") Found exact match.
75
+ [relaton-iso] (ISO 19115:2003) Fetching from iso.org ...
76
+ [relaton-iso] (ISO 19115:2003) Found: `ISO 19115:2003`
77
77
  => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c83429e30
78
78
  ...
79
79
 
80
80
  item = RelatonIso::IsoBibliography.get "ISO 19115", "2003"
81
- [relaton-iso] ("ISO 19115:2003") Fetching from ISO...
82
- [relaton-iso] ("ISO 19115:2003") Found ("ISO 19115:2003").
81
+ [relaton-iso] (ISO 19115:2003) Fetching from iso.org ...
82
+ [relaton-iso] (ISO 19115:2003) Found: `ISO 19115:2003`
83
83
  => #<RelatonIsoBib::IsoBibliographicItem:0x0000000112c9ca80
84
84
  ...
85
85
 
@@ -92,8 +92,8 @@ item.docidentifier[0].id
92
92
  [source,ruby]
93
93
  ----
94
94
  item = RelatonIso::IsoBibliography.get "ISO 19115"
95
- [relaton-iso] ("ISO 19115") fetching from ISO...
96
- [relaton-iso] ("ISO 19115") Found ("ISO 19115:2003").
95
+ [relaton-iso] (ISO 19115) Fetching from iso.org ...
96
+ [relaton-iso] (ISO 19115) Found: `ISO 19115:2003`
97
97
  => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c830275a8
98
98
  ...
99
99
 
@@ -106,8 +106,8 @@ item.docidentifier[0].id
106
106
  [source,ruby]
107
107
  ----
108
108
  item = RelatonIso::IsoBibliography.get "ISO 19115-1"
109
- [relaton-iso] ("ISO 19115-1") fetching from ISO...
110
- [relaton-iso] ("ISO 19115-1") Found ("ISO 19115-1:2014").
109
+ [relaton-iso] (ISO 19115-1) Fetching from iso.org ...
110
+ [relaton-iso] (ISO 19115-1) Found: `ISO 19115-1:2014`
111
111
  => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c83408af0
112
112
  ...
113
113
 
@@ -120,14 +120,14 @@ item.docidentifier[0].id
120
120
  [source,ruby]
121
121
  ----
122
122
  item = RelatonIso::IsoBibliography.get "ISO 19115 (all parts)"
123
- [relaton-iso] ("ISO 19115") Fetching from ISO...
124
- [relaton-iso] ("ISO 19115") Found ("ISO 19115").
123
+ [relaton-iso] (ISO 19115) Fetching from iso.org ...
124
+ [relaton-iso] (ISO 19115) Found: `ISO 19115`
125
125
  => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8ca216e118
126
126
  ...
127
127
 
128
128
  item = RelatonIso::IsoBibliography.get "ISO 19115", nil, all_parts: true
129
- [relaton-iso] ("ISO 19115") Fetching from ISO...
130
- [relaton-iso] ("ISO 19115") Found ("ISO 19115").
129
+ [relaton-iso] (ISO 19115) Fetching from iso.org ...
130
+ [relaton-iso] (ISO 19115) Found: `ISO 19115`
131
131
  => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c830f3d38
132
132
  ...
133
133
 
@@ -135,13 +135,13 @@ item.docidentifier[0].id
135
135
  => "ISO 19115 (all parts)"
136
136
 
137
137
  item = RelatonIso::IsoBibliography.get "ISO 19115-1 (all parts)"
138
- [relaton-iso] ("ISO 19115") Fetching from ISO...
139
- [relaton-iso] ("ISO 19115") Found ("ISO 19115").
138
+ [relaton-iso] (ISO 19115) Fetching from iso.org ...
139
+ [relaton-iso] (ISO 19115) Found: `ISO 19115`
140
140
  => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c8290e5a0
141
141
 
142
142
  item = RelatonIso::IsoBibliography.get "ISO 19115-1", nil, all_parts: true
143
- [relaton-iso] ("ISO 19115") Fetching from ISO...
144
- [relaton-iso] ("ISO 19115") Found ("ISO 19115").
143
+ [relaton-iso] (ISO 19115) Fetching from iso.org ...
144
+ [relaton-iso] (ISO 19115) Found: `ISO 19115`
145
145
  => #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c925355b8
146
146
  ...
147
147
 
@@ -234,8 +234,8 @@ item.title lang: 'fr'
234
234
  @type="main">]>
235
235
 
236
236
  item = RelatonIso::IsoBibliography.get "ISO 19115:2003"
237
- [relaton-iso] ("ISO 19115:2003") Fetching from ISO...
238
- [relaton-iso] ("ISO 19115:2003") Found ("ISO 19115:2003").
237
+ [relaton-iso] (ISO 19115:2003) Fetching from iso.org ...
238
+ [relaton-iso] (ISO 19115:2003) Found: `ISO 19115:2003`
239
239
  => #<RelatonIsoBib::IsoBibliographicItem:0x00007fa8870b69e0
240
240
 
241
241
  item.abstract lang: 'en'
@@ -42,8 +42,9 @@ module RelatonIso
42
42
  # @return [Pubid::Iso::Identifier]
43
43
  def pubid
44
44
  @pubid ||= Pubid::Iso::Identifier.parse_from_title(hit[:title])
45
- rescue Pubid::Iso::Errors::WrongTypeError, Pubid::Iso::Errors::ParseError => e
46
- Util.warn "unable to find an identifier in `#{hit[:title]}`."
45
+ rescue Pubid::Iso::Errors::WrongTypeError,
46
+ Pubid::Iso::Errors::ParseError => e
47
+ Util.warn "Unable to find an identifier in: `#{hit[:title]}`."
47
48
  Util.warn e.message
48
49
  end
49
50
  end
@@ -38,7 +38,7 @@ module RelatonIso
38
38
  query_pubid = Pubid::Iso::Identifier.parse(code)
39
39
  query_pubid.year = year if year
40
40
  query_pubid.part = nil if opts[:all_parts]
41
- Util.warn "(#{query_pubid}) Fetching from ISO..."
41
+ Util.warn "(#{query_pubid}) Fetching from iso.org ..."
42
42
 
43
43
  hits, missed_year_ids = isobib_search_filter(query_pubid, opts)
44
44
  tip_ids = look_up_with_any_types_stages(hits, ref, opts)
@@ -54,7 +54,7 @@ module RelatonIso
54
54
  response_docid = ret.docidentifier.first.id.sub(" (all parts)", "")
55
55
  response_pubid = Pubid::Iso::Identifier.parse(response_docid)
56
56
 
57
- Util.warn "(#{query_pubid}) Found `#{response_pubid}`."
57
+ Util.warn "(#{query_pubid}) Found: `#{response_pubid}`"
58
58
 
59
59
  get_all = (
60
60
  (query_pubid.year && opts[:keep_year].nil?) ||
@@ -65,7 +65,7 @@ module RelatonIso
65
65
 
66
66
  ret.to_most_recent_reference
67
67
  rescue Pubid::Core::Errors::ParseError
68
- Util.warn "(#{code}) is not recognized as a standards identifier."
68
+ Util.warn "(#{code}) Is not recognized as a standards identifier."
69
69
  nil
70
70
  end
71
71
 
@@ -179,7 +179,8 @@ module RelatonIso
179
179
  hit_collection = search(query_pubid_without_year.to_s)
180
180
 
181
181
  # filter only matching hits
182
- filter_hits hit_collection, query_pubid, opts[:all_parts], any_types_stages
182
+ filter_hits hit_collection, query_pubid, opts[:all_parts],
183
+ any_types_stages
183
184
  end
184
185
 
185
186
  #
@@ -196,7 +197,8 @@ module RelatonIso
196
197
  # filter out
197
198
  result = hit_collection.select do |i|
198
199
  hit_pubid = i.pubid
199
- matches_base?(query_pubid, hit_pubid, any_types_stages: any_stypes_tages) &&
200
+ matches_base?(query_pubid, hit_pubid,
201
+ any_types_stages: any_stypes_tages) &&
200
202
  matches_parts?(query_pubid, hit_pubid, all_parts: all_parts) &&
201
203
  query_pubid.corrigendums == hit_pubid.corrigendums &&
202
204
  query_pubid.amendments == hit_pubid.amendments
@@ -270,11 +270,10 @@ module RelatonIso
270
270
  # @param doc [Nokogiri::HTML::Document]
271
271
  # @return [Hash]
272
272
  def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
273
- wg = doc.at("//div[@class='clearfix']")
274
- wg_link = wg.at "span/a"
275
- return unless wg_link
273
+ wg = doc.at("////div[contains(., 'Technical Committe')]/following-sibling::span/a")
274
+ return unless wg
276
275
 
277
- workgroup = wg_link.text.split "/"
276
+ workgroup = wg.text.split "/"
278
277
  type = workgroup[1]&.match(/^[A-Z]+/)&.to_s || "TC"
279
278
  # {
280
279
  # name: "International Organization for Standardization",
@@ -282,44 +281,48 @@ module RelatonIso
282
281
  # url: "www.iso.org",
283
282
  # }
284
283
  tc_numb = workgroup[1]&.match(/\d+/)&.to_s&.to_i
285
- tc_name = wg.at("span[@class='entry-title']").text
286
- tc = RelatonBib::WorkGroup.new(name: tc_name, identifier: wg_link.text,
284
+ tc_name = wg[:title]
285
+ tc = RelatonBib::WorkGroup.new(name: tc_name, identifier: wg.text,
287
286
  type: type, number: tc_numb)
288
287
  RelatonIsoBib::EditorialGroup.new(technical_committee: [tc])
289
288
  end
290
289
 
291
- # rubocop:disable Metrics/MethodLength
292
-
293
290
  # Fetch relations.
294
291
  # @param doc [Nokogiri::HTML::Document]
295
292
  # @return [Array<Hash>]
296
- def fetch_relations(doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
293
+ def fetch_relations(doc)
297
294
  types = ["Now", "Now under review"]
298
295
  doc.xpath("//ul[@class='steps']/li", "//div[@class='sub-step']").reduce([]) do |a, r|
299
- r_type = r.at("h4", "h5").text
300
- date = []
301
- type = case r_type.strip
302
- when "Previously", "Will be replaced by" then "obsoletes"
303
- when "Corrigenda / Amendments", "Revised by", "Now confirmed"
304
- on = doc.xpath('//span[@class="stage-date"][contains(., "-")]').last
305
- date << { type: "circulated", on: on.text } if on
306
- "updates"
307
- else r_type
308
- end
309
- if types.include?(type) then a
310
- else
311
- a + r.css("a").map do |id|
312
- docid = RelatonBib::DocumentIdentifier.new(type: "ISO", id: id.text, primary: true)
313
- fref = RelatonBib::FormattedRef.new(content: id.text, format: "text/plain")
314
- bibitem = RelatonIsoBib::IsoBibliographicItem.new(
315
- docid: [docid], formattedref: fref, date: date,
316
- )
317
- { type: type, bibitem: bibitem }
296
+ type, date = relation_type(r.at("h4", "h5").text.strip, doc)
297
+ next a if types.include?(type)
298
+
299
+ a + create_relations(r, type, date)
300
+ end
301
+ end
302
+
303
+ def relation_type(type, doc)
304
+ date = []
305
+ t = case type.strip
306
+ when "Previously", "Will be replaced by" then "obsoletes"
307
+ when "Corrigenda / Amendments", "Revised by", "Now confirmed"
308
+ on = doc.xpath('//span[@class="stage-date"][contains(., "-")]').last
309
+ date << { type: "circulated", on: on.text } if on
310
+ "updates"
311
+ else type
318
312
  end
319
- end
313
+ [t, date]
314
+ end
315
+
316
+ def create_relations(rel, type, date)
317
+ rel.css("a").map do |id|
318
+ docid = RelatonBib::DocumentIdentifier.new(type: "ISO", id: id.text, primary: true)
319
+ fref = RelatonBib::FormattedRef.new(content: id.text, format: "text/plain")
320
+ bibitem = RelatonIsoBib::IsoBibliographicItem.new(
321
+ docid: [docid], formattedref: fref, date: date,
322
+ )
323
+ { type: type, bibitem: bibitem }
320
324
  end
321
325
  end
322
- # rubocop:enable Metrics/MethodLength
323
326
 
324
327
  # Fetch type.
325
328
  # @param ref [String]
@@ -343,14 +346,20 @@ module RelatonIso
343
346
  # @param doc [Nokogiri::HTML::Document]
344
347
  # @param lang [String]
345
348
  # @return [Array<RelatonBib::TypedTitleString>]
346
- def fetch_title(doc, lang)
347
- content = doc.at(
348
- "//nav[contains(@class,'heading-condensed')]/h2 | "\
349
- "//nav[contains(@class,'heading-condensed')]/h3",
350
- )&.text&.gsub(/\u2014/, "-")
351
- return RelatonBib::TypedTitleStringCollection.new unless content
352
-
353
- RelatonBib::TypedTitleString.from_string content, lang, script(lang)
349
+ def fetch_title(doc, lang) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
350
+ head = doc.at "//nav[contains(@class,'heading-condensed')]"
351
+ types = { "h2" => "title-intro", "h3" => "title-main", "h4" => "title-part" }
352
+ title_types = head.xpath("h2 | h3 | h4").each_with_object({}) do |t, h|
353
+ h[types[t.name]] = t.text
354
+ end
355
+ title = RelatonBib::TypedTitleStringCollection.new
356
+ title_types.each do |type, content|
357
+ title << RelatonBib::TypedTitleString.new(
358
+ type: type, content: content, language: lang, script: script(lang),
359
+ )
360
+ end
361
+ main = title.map { |t| t.title.content }.join " - "
362
+ title << RelatonBib::TypedTitleString.new(type: "main", content: main, language: lang, script: script(lang))
354
363
  end
355
364
 
356
365
  # Return ISO script code.
@@ -363,12 +372,11 @@ module RelatonIso
363
372
  end
364
373
  end
365
374
 
366
- # rubocop:disable Metrics/MethodLength
367
375
  # Fetch dates
368
376
  # @param doc [Nokogiri::HTML::Document]
369
377
  # @param ref [String]
370
378
  # @return [Array<Hash>]
371
- def fetch_dates(doc, ref) # rubocop:disable Metrics/AbcSize, Metrics/PerceivedComplexity
379
+ def fetch_dates(doc, ref) # rubocop:disable Metrics/AbcSize, Metrics/PerceivedComplexity, Metrics/MethodLength
372
380
  dates = []
373
381
  %r{^[^\s]+\s[\d-]+:(?<ref_date_str>\d{4})} =~ ref
374
382
  pub_date_str = doc.xpath("//span[@itemprop='releaseDate']").text
@@ -400,25 +408,28 @@ module RelatonIso
400
408
  mem << { entity: publisher, role: [type: "publisher"] }
401
409
  end
402
410
  end
403
- # rubocop:enable Metrics/MethodLength
404
411
 
405
412
  # Fetch ICS.
406
413
  # @param doc [Nokogiri::HTML::Document]
407
414
  # @return [Array<Hash>]
408
415
  def fetch_ics(doc)
409
- doc.xpath("//dl[dt/strong[.='ICS']]/dd/span/a").map do |i|
416
+ doc.xpath("//div[contains(., 'ICS')]/following-sibling::span/a").map do |i|
410
417
  code = i.text.match(/[\d.]+/).to_s.split "."
411
418
  { field: code[0], group: code[1], subgroup: code[2] }
412
419
  end
413
420
  end
414
421
 
422
+ #
415
423
  # Fetch links.
416
- # @param doc [Nokogiri::HTML::Document]
417
- # @param url [String]
424
+ #
425
+ # @param doc [Nokogiri::HTML::Document] document to parse
426
+ # @param url [String] document url
427
+ #
418
428
  # @return [Array<Hash>]
429
+ #
419
430
  def fetch_link(doc, url)
420
431
  links = [{ type: "src", content: url }]
421
- obp = doc.at_css("a#obp-preview")
432
+ obp = doc.at("//h4[contains(@class, 'h5')]/a")
422
433
  links << { type: "obp", content: obp[:href] } if obp
423
434
  rss = doc.at("//a[contains(@href, 'rss')]")
424
435
  links << { type: "rss", content: DOMAIN + rss[:href] } if rss
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RelatonIso
4
- VERSION = "1.16.0"
4
+ VERSION = "1.16.2"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-iso
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.16.0
4
+ version: 1.16.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-09-03 00:00:00.000000000 Z
11
+ date: 2023-10-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: algolia