relaton-iso 1.16.0 → 1.16.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.adoc +18 -18
- data/lib/relaton_iso/hit.rb +3 -2
- data/lib/relaton_iso/iso_bibliography.rb +7 -5
- data/lib/relaton_iso/scrapper.rb +56 -45
- data/lib/relaton_iso/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 479a728a58c56799448fd6d468e0d19fe245b731119f8dcd9ae6f19a7b624e07
|
4
|
+
data.tar.gz: ac89507180ca01978bfe98b68fbe02450f2c33015bd38d788752f3bf933911ad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 71cc49dc2afa8690f02f7035ec5cc13981eb620e2b8c3792456401c152a4ca8192b2ffbd7445c6c982886e61f679427a2d5afbf26e13c6ebcfffcc8d54f7e5c9
|
7
|
+
data.tar.gz: 853da0772a998533c5f461ff297bef978c75e1f58b2df1fec5eff0fea6d306807420453a6ba37b1348d44e58c3a71dcf743228c7703b73fd9b7d72c9d4309598
|
data/README.adoc
CHANGED
@@ -72,14 +72,14 @@ item.docidentifier.detect { |di| di.type == "URN" }.id
|
|
72
72
|
[source,ruby]
|
73
73
|
----
|
74
74
|
item = RelatonIso::IsoBibliography.get "ISO 19115:2003"
|
75
|
-
[relaton-iso] (
|
76
|
-
[relaton-iso] (
|
75
|
+
[relaton-iso] (ISO 19115:2003) Fetching from iso.org ...
|
76
|
+
[relaton-iso] (ISO 19115:2003) Found: `ISO 19115:2003`
|
77
77
|
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c83429e30
|
78
78
|
...
|
79
79
|
|
80
80
|
item = RelatonIso::IsoBibliography.get "ISO 19115", "2003"
|
81
|
-
[relaton-iso] (
|
82
|
-
[relaton-iso] (
|
81
|
+
[relaton-iso] (ISO 19115:2003) Fetching from iso.org ...
|
82
|
+
[relaton-iso] (ISO 19115:2003) Found: `ISO 19115:2003`
|
83
83
|
=> #<RelatonIsoBib::IsoBibliographicItem:0x0000000112c9ca80
|
84
84
|
...
|
85
85
|
|
@@ -92,8 +92,8 @@ item.docidentifier[0].id
|
|
92
92
|
[source,ruby]
|
93
93
|
----
|
94
94
|
item = RelatonIso::IsoBibliography.get "ISO 19115"
|
95
|
-
[relaton-iso] (
|
96
|
-
[relaton-iso] (
|
95
|
+
[relaton-iso] (ISO 19115) Fetching from iso.org ...
|
96
|
+
[relaton-iso] (ISO 19115) Found: `ISO 19115:2003`
|
97
97
|
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c830275a8
|
98
98
|
...
|
99
99
|
|
@@ -106,8 +106,8 @@ item.docidentifier[0].id
|
|
106
106
|
[source,ruby]
|
107
107
|
----
|
108
108
|
item = RelatonIso::IsoBibliography.get "ISO 19115-1"
|
109
|
-
[relaton-iso] (
|
110
|
-
[relaton-iso] (
|
109
|
+
[relaton-iso] (ISO 19115-1) Fetching from iso.org ...
|
110
|
+
[relaton-iso] (ISO 19115-1) Found: `ISO 19115-1:2014`
|
111
111
|
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c83408af0
|
112
112
|
...
|
113
113
|
|
@@ -120,14 +120,14 @@ item.docidentifier[0].id
|
|
120
120
|
[source,ruby]
|
121
121
|
----
|
122
122
|
item = RelatonIso::IsoBibliography.get "ISO 19115 (all parts)"
|
123
|
-
[relaton-iso] (
|
124
|
-
[relaton-iso] (
|
123
|
+
[relaton-iso] (ISO 19115) Fetching from iso.org ...
|
124
|
+
[relaton-iso] (ISO 19115) Found: `ISO 19115`
|
125
125
|
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8ca216e118
|
126
126
|
...
|
127
127
|
|
128
128
|
item = RelatonIso::IsoBibliography.get "ISO 19115", nil, all_parts: true
|
129
|
-
[relaton-iso] (
|
130
|
-
[relaton-iso] (
|
129
|
+
[relaton-iso] (ISO 19115) Fetching from iso.org ...
|
130
|
+
[relaton-iso] (ISO 19115) Found: `ISO 19115`
|
131
131
|
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c830f3d38
|
132
132
|
...
|
133
133
|
|
@@ -135,13 +135,13 @@ item.docidentifier[0].id
|
|
135
135
|
=> "ISO 19115 (all parts)"
|
136
136
|
|
137
137
|
item = RelatonIso::IsoBibliography.get "ISO 19115-1 (all parts)"
|
138
|
-
[relaton-iso] (
|
139
|
-
[relaton-iso] (
|
138
|
+
[relaton-iso] (ISO 19115) Fetching from iso.org ...
|
139
|
+
[relaton-iso] (ISO 19115) Found: `ISO 19115`
|
140
140
|
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c8290e5a0
|
141
141
|
|
142
142
|
item = RelatonIso::IsoBibliography.get "ISO 19115-1", nil, all_parts: true
|
143
|
-
[relaton-iso] (
|
144
|
-
[relaton-iso] (
|
143
|
+
[relaton-iso] (ISO 19115) Fetching from iso.org ...
|
144
|
+
[relaton-iso] (ISO 19115) Found: `ISO 19115`
|
145
145
|
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c925355b8
|
146
146
|
...
|
147
147
|
|
@@ -234,8 +234,8 @@ item.title lang: 'fr'
|
|
234
234
|
@type="main">]>
|
235
235
|
|
236
236
|
item = RelatonIso::IsoBibliography.get "ISO 19115:2003"
|
237
|
-
[relaton-iso] (
|
238
|
-
[relaton-iso] (
|
237
|
+
[relaton-iso] (ISO 19115:2003) Fetching from iso.org ...
|
238
|
+
[relaton-iso] (ISO 19115:2003) Found: `ISO 19115:2003`
|
239
239
|
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007fa8870b69e0
|
240
240
|
|
241
241
|
item.abstract lang: 'en'
|
data/lib/relaton_iso/hit.rb
CHANGED
@@ -42,8 +42,9 @@ module RelatonIso
|
|
42
42
|
# @return [Pubid::Iso::Identifier]
|
43
43
|
def pubid
|
44
44
|
@pubid ||= Pubid::Iso::Identifier.parse_from_title(hit[:title])
|
45
|
-
rescue Pubid::Iso::Errors::WrongTypeError,
|
46
|
-
|
45
|
+
rescue Pubid::Iso::Errors::WrongTypeError,
|
46
|
+
Pubid::Iso::Errors::ParseError => e
|
47
|
+
Util.warn "Unable to find an identifier in: `#{hit[:title]}`."
|
47
48
|
Util.warn e.message
|
48
49
|
end
|
49
50
|
end
|
@@ -38,7 +38,7 @@ module RelatonIso
|
|
38
38
|
query_pubid = Pubid::Iso::Identifier.parse(code)
|
39
39
|
query_pubid.year = year if year
|
40
40
|
query_pubid.part = nil if opts[:all_parts]
|
41
|
-
Util.warn "(#{query_pubid}) Fetching from
|
41
|
+
Util.warn "(#{query_pubid}) Fetching from iso.org ..."
|
42
42
|
|
43
43
|
hits, missed_year_ids = isobib_search_filter(query_pubid, opts)
|
44
44
|
tip_ids = look_up_with_any_types_stages(hits, ref, opts)
|
@@ -54,7 +54,7 @@ module RelatonIso
|
|
54
54
|
response_docid = ret.docidentifier.first.id.sub(" (all parts)", "")
|
55
55
|
response_pubid = Pubid::Iso::Identifier.parse(response_docid)
|
56
56
|
|
57
|
-
Util.warn "(#{query_pubid}) Found `#{response_pubid}
|
57
|
+
Util.warn "(#{query_pubid}) Found: `#{response_pubid}`"
|
58
58
|
|
59
59
|
get_all = (
|
60
60
|
(query_pubid.year && opts[:keep_year].nil?) ||
|
@@ -65,7 +65,7 @@ module RelatonIso
|
|
65
65
|
|
66
66
|
ret.to_most_recent_reference
|
67
67
|
rescue Pubid::Core::Errors::ParseError
|
68
|
-
Util.warn "(#{code})
|
68
|
+
Util.warn "(#{code}) Is not recognized as a standards identifier."
|
69
69
|
nil
|
70
70
|
end
|
71
71
|
|
@@ -179,7 +179,8 @@ module RelatonIso
|
|
179
179
|
hit_collection = search(query_pubid_without_year.to_s)
|
180
180
|
|
181
181
|
# filter only matching hits
|
182
|
-
filter_hits hit_collection, query_pubid, opts[:all_parts],
|
182
|
+
filter_hits hit_collection, query_pubid, opts[:all_parts],
|
183
|
+
any_types_stages
|
183
184
|
end
|
184
185
|
|
185
186
|
#
|
@@ -196,7 +197,8 @@ module RelatonIso
|
|
196
197
|
# filter out
|
197
198
|
result = hit_collection.select do |i|
|
198
199
|
hit_pubid = i.pubid
|
199
|
-
matches_base?(query_pubid, hit_pubid,
|
200
|
+
matches_base?(query_pubid, hit_pubid,
|
201
|
+
any_types_stages: any_stypes_tages) &&
|
200
202
|
matches_parts?(query_pubid, hit_pubid, all_parts: all_parts) &&
|
201
203
|
query_pubid.corrigendums == hit_pubid.corrigendums &&
|
202
204
|
query_pubid.amendments == hit_pubid.amendments
|
data/lib/relaton_iso/scrapper.rb
CHANGED
@@ -270,11 +270,10 @@ module RelatonIso
|
|
270
270
|
# @param doc [Nokogiri::HTML::Document]
|
271
271
|
# @return [Hash]
|
272
272
|
def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
273
|
-
wg = doc.at("
|
274
|
-
|
275
|
-
return unless wg_link
|
273
|
+
wg = doc.at("////div[contains(., 'Technical Committe')]/following-sibling::span/a")
|
274
|
+
return unless wg
|
276
275
|
|
277
|
-
workgroup =
|
276
|
+
workgroup = wg.text.split "/"
|
278
277
|
type = workgroup[1]&.match(/^[A-Z]+/)&.to_s || "TC"
|
279
278
|
# {
|
280
279
|
# name: "International Organization for Standardization",
|
@@ -282,44 +281,48 @@ module RelatonIso
|
|
282
281
|
# url: "www.iso.org",
|
283
282
|
# }
|
284
283
|
tc_numb = workgroup[1]&.match(/\d+/)&.to_s&.to_i
|
285
|
-
tc_name = wg
|
286
|
-
tc = RelatonBib::WorkGroup.new(name: tc_name, identifier:
|
284
|
+
tc_name = wg[:title]
|
285
|
+
tc = RelatonBib::WorkGroup.new(name: tc_name, identifier: wg.text,
|
287
286
|
type: type, number: tc_numb)
|
288
287
|
RelatonIsoBib::EditorialGroup.new(technical_committee: [tc])
|
289
288
|
end
|
290
289
|
|
291
|
-
# rubocop:disable Metrics/MethodLength
|
292
|
-
|
293
290
|
# Fetch relations.
|
294
291
|
# @param doc [Nokogiri::HTML::Document]
|
295
292
|
# @return [Array<Hash>]
|
296
|
-
def fetch_relations(doc)
|
293
|
+
def fetch_relations(doc)
|
297
294
|
types = ["Now", "Now under review"]
|
298
295
|
doc.xpath("//ul[@class='steps']/li", "//div[@class='sub-step']").reduce([]) do |a, r|
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
docid: [docid], formattedref: fref, date: date,
|
316
|
-
)
|
317
|
-
{ type: type, bibitem: bibitem }
|
296
|
+
type, date = relation_type(r.at("h4", "h5").text.strip, doc)
|
297
|
+
next a if types.include?(type)
|
298
|
+
|
299
|
+
a + create_relations(r, type, date)
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
def relation_type(type, doc)
|
304
|
+
date = []
|
305
|
+
t = case type.strip
|
306
|
+
when "Previously", "Will be replaced by" then "obsoletes"
|
307
|
+
when "Corrigenda / Amendments", "Revised by", "Now confirmed"
|
308
|
+
on = doc.xpath('//span[@class="stage-date"][contains(., "-")]').last
|
309
|
+
date << { type: "circulated", on: on.text } if on
|
310
|
+
"updates"
|
311
|
+
else type
|
318
312
|
end
|
319
|
-
|
313
|
+
[t, date]
|
314
|
+
end
|
315
|
+
|
316
|
+
def create_relations(rel, type, date)
|
317
|
+
rel.css("a").map do |id|
|
318
|
+
docid = RelatonBib::DocumentIdentifier.new(type: "ISO", id: id.text, primary: true)
|
319
|
+
fref = RelatonBib::FormattedRef.new(content: id.text, format: "text/plain")
|
320
|
+
bibitem = RelatonIsoBib::IsoBibliographicItem.new(
|
321
|
+
docid: [docid], formattedref: fref, date: date,
|
322
|
+
)
|
323
|
+
{ type: type, bibitem: bibitem }
|
320
324
|
end
|
321
325
|
end
|
322
|
-
# rubocop:enable Metrics/MethodLength
|
323
326
|
|
324
327
|
# Fetch type.
|
325
328
|
# @param ref [String]
|
@@ -343,14 +346,20 @@ module RelatonIso
|
|
343
346
|
# @param doc [Nokogiri::HTML::Document]
|
344
347
|
# @param lang [String]
|
345
348
|
# @return [Array<RelatonBib::TypedTitleString>]
|
346
|
-
def fetch_title(doc, lang)
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
349
|
+
def fetch_title(doc, lang) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
350
|
+
head = doc.at "//nav[contains(@class,'heading-condensed')]"
|
351
|
+
types = { "h2" => "title-intro", "h3" => "title-main", "h4" => "title-part" }
|
352
|
+
title_types = head.xpath("h2 | h3 | h4").each_with_object({}) do |t, h|
|
353
|
+
h[types[t.name]] = t.text
|
354
|
+
end
|
355
|
+
title = RelatonBib::TypedTitleStringCollection.new
|
356
|
+
title_types.each do |type, content|
|
357
|
+
title << RelatonBib::TypedTitleString.new(
|
358
|
+
type: type, content: content, language: lang, script: script(lang),
|
359
|
+
)
|
360
|
+
end
|
361
|
+
main = title.map { |t| t.title.content }.join " - "
|
362
|
+
title << RelatonBib::TypedTitleString.new(type: "main", content: main, language: lang, script: script(lang))
|
354
363
|
end
|
355
364
|
|
356
365
|
# Return ISO script code.
|
@@ -363,12 +372,11 @@ module RelatonIso
|
|
363
372
|
end
|
364
373
|
end
|
365
374
|
|
366
|
-
# rubocop:disable Metrics/MethodLength
|
367
375
|
# Fetch dates
|
368
376
|
# @param doc [Nokogiri::HTML::Document]
|
369
377
|
# @param ref [String]
|
370
378
|
# @return [Array<Hash>]
|
371
|
-
def fetch_dates(doc, ref) # rubocop:disable Metrics/AbcSize, Metrics/PerceivedComplexity
|
379
|
+
def fetch_dates(doc, ref) # rubocop:disable Metrics/AbcSize, Metrics/PerceivedComplexity, Metrics/MethodLength
|
372
380
|
dates = []
|
373
381
|
%r{^[^\s]+\s[\d-]+:(?<ref_date_str>\d{4})} =~ ref
|
374
382
|
pub_date_str = doc.xpath("//span[@itemprop='releaseDate']").text
|
@@ -400,25 +408,28 @@ module RelatonIso
|
|
400
408
|
mem << { entity: publisher, role: [type: "publisher"] }
|
401
409
|
end
|
402
410
|
end
|
403
|
-
# rubocop:enable Metrics/MethodLength
|
404
411
|
|
405
412
|
# Fetch ICS.
|
406
413
|
# @param doc [Nokogiri::HTML::Document]
|
407
414
|
# @return [Array<Hash>]
|
408
415
|
def fetch_ics(doc)
|
409
|
-
doc.xpath("//
|
416
|
+
doc.xpath("//div[contains(., 'ICS')]/following-sibling::span/a").map do |i|
|
410
417
|
code = i.text.match(/[\d.]+/).to_s.split "."
|
411
418
|
{ field: code[0], group: code[1], subgroup: code[2] }
|
412
419
|
end
|
413
420
|
end
|
414
421
|
|
422
|
+
#
|
415
423
|
# Fetch links.
|
416
|
-
#
|
417
|
-
# @param
|
424
|
+
#
|
425
|
+
# @param doc [Nokogiri::HTML::Document] document to parse
|
426
|
+
# @param url [String] document url
|
427
|
+
#
|
418
428
|
# @return [Array<Hash>]
|
429
|
+
#
|
419
430
|
def fetch_link(doc, url)
|
420
431
|
links = [{ type: "src", content: url }]
|
421
|
-
obp = doc.
|
432
|
+
obp = doc.at("//h4[contains(@class, 'h5')]/a")
|
422
433
|
links << { type: "obp", content: obp[:href] } if obp
|
423
434
|
rss = doc.at("//a[contains(@href, 'rss')]")
|
424
435
|
links << { type: "rss", content: DOMAIN + rss[:href] } if rss
|
data/lib/relaton_iso/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iso
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.16.
|
4
|
+
version: 1.16.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-10-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: algolia
|