relaton-iso 1.12.0 → 1.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/thor +29 -0
- data/lib/relaton_iso/document_identifier.rb +4 -5
- data/lib/relaton_iso/hit.rb +4 -4
- data/lib/relaton_iso/hit_collection.rb +1 -1
- data/lib/relaton_iso/iso_bibliography.rb +13 -11
- data/lib/relaton_iso/scrapper.rb +46 -36
- data/lib/relaton_iso/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4479e38048aa0dfae8bcc85f1e9de03b5fe0561048b658ec47b3df8ca64794eb
|
4
|
+
data.tar.gz: c297ddc7b15d8186b85fbb7d4d3f84863d7df6e20ad243f0740364262fe43807
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6fd11d9fe01bd36052cf2762df6e8f728d361fbe23410dcb0229e95436a9f7909e51e51a8be0abc9f7bd269ffd10d642cb0aa7792dba76c0c174b606e319bf58
|
7
|
+
data.tar.gz: 060edbed6bb5b11033911db2200a4e19e98585c4133a1b4a09eaa79b582f2cf23b39f424b84d3110429ff1800247cec4f2b6522a82bbbdc45cb093e63a339bda
|
data/bin/thor
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'thor' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("thor", "thor")
|
@@ -1,16 +1,15 @@
|
|
1
1
|
module RelatonIso
|
2
2
|
class DocumentIdentifier < RelatonBib::DocumentIdentifier
|
3
|
-
attr_accessor :all_parts
|
4
|
-
|
5
3
|
def id
|
4
|
+
id_str = @id.to_s.sub(/\sED\d+/, "")
|
6
5
|
if @all_parts
|
7
6
|
if type == "URN"
|
8
7
|
return "#{@id.urn}:ser"
|
9
|
-
|
10
|
-
return "#{
|
8
|
+
else
|
9
|
+
return "#{id_str} (all parts)"
|
11
10
|
end
|
12
11
|
end
|
13
|
-
type == "URN" ? @id.urn.to_s :
|
12
|
+
type == "URN" ? @id.urn.to_s : id_str
|
14
13
|
end
|
15
14
|
|
16
15
|
def remove_part
|
data/lib/relaton_iso/hit.rb
CHANGED
@@ -7,10 +7,10 @@ module RelatonIso
|
|
7
7
|
attr_writer :fetch, :pubid
|
8
8
|
|
9
9
|
# Parse page.
|
10
|
-
# @param lang [String,
|
10
|
+
# @param lang [String, nil]
|
11
11
|
# @return [RelatonIso::IsoBibliographicItem]
|
12
|
-
def fetch(lang = nil
|
13
|
-
@fetch ||= Scrapper.parse_page
|
12
|
+
def fetch(lang = nil)
|
13
|
+
@fetch ||= Scrapper.parse_page self, lang
|
14
14
|
end
|
15
15
|
|
16
16
|
# @return [Integer]
|
@@ -26,7 +26,7 @@ module RelatonIso
|
|
26
26
|
|
27
27
|
# @return [Pubid::Iso::Identifier]
|
28
28
|
def pubid
|
29
|
-
Pubid::Iso::Identifier.parse_from_title(hit[:title])
|
29
|
+
@pubid ||= Pubid::Iso::Identifier.parse_from_title(hit[:title])
|
30
30
|
end
|
31
31
|
end
|
32
32
|
end
|
@@ -21,7 +21,7 @@ module RelatonIso
|
|
21
21
|
hit = @array.min_by { |h| h.pubid.part }
|
22
22
|
return @array.first.fetch lang unless hit
|
23
23
|
|
24
|
-
bibitem = hit.fetch(lang
|
24
|
+
bibitem = hit.fetch(lang)
|
25
25
|
all_parts_item = bibitem.to_all_parts
|
26
26
|
@array.reject { |h| h.hit[:uuid] == hit.hit[:uuid] }.each do |hi|
|
27
27
|
isobib = RelatonIsoBib::IsoBibliographicItem.new(
|
@@ -73,26 +73,32 @@ module RelatonIso
|
|
73
73
|
end
|
74
74
|
end
|
75
75
|
|
76
|
-
def matches_base?(query_pubid, pubid, any_types_stages: false)
|
76
|
+
def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics?PerceivedComplexity
|
77
77
|
query_pubid.publisher == pubid.publisher &&
|
78
78
|
query_pubid.number == pubid.number &&
|
79
79
|
query_pubid.copublisher == pubid.copublisher &&
|
80
|
-
(any_types_stages && query_pubid.stage.nil? || query_pubid.stage == pubid.stage) &&
|
81
|
-
(any_types_stages && query_pubid.type.nil? || query_pubid.type == pubid.type)
|
80
|
+
((any_types_stages && query_pubid.stage.nil?) || query_pubid.stage == pubid.stage) &&
|
81
|
+
((any_types_stages && query_pubid.type.nil?) || query_pubid.type == pubid.type)
|
82
82
|
end
|
83
83
|
|
84
84
|
# @param hit_collection [RelatonIso::HitCollection]
|
85
85
|
# @param year [String]
|
86
86
|
# @return [RelatonIso::HitCollection]
|
87
|
-
def filter_hits_by_year(hit_collection, year)
|
87
|
+
def filter_hits_by_year(hit_collection, year) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
88
88
|
missed_years = []
|
89
89
|
|
90
90
|
# filter by year
|
91
91
|
hits = hit_collection.select do |hit|
|
92
92
|
if hit.pubid.year == year
|
93
93
|
true
|
94
|
+
elsif hit.pubid.year.nil? && hit.hit[:year].to_s == year
|
95
|
+
hit.pubid.year = year
|
96
|
+
true
|
94
97
|
else
|
95
|
-
|
98
|
+
missed_year = hit.pubid.year || hit.hit[:year].to_s
|
99
|
+
if missed_year && !missed_year.empty? && !missed_years.include?(missed_year)
|
100
|
+
missed_years << missed_year
|
101
|
+
end
|
96
102
|
false
|
97
103
|
end
|
98
104
|
end
|
@@ -106,9 +112,7 @@ module RelatonIso
|
|
106
112
|
|
107
113
|
private
|
108
114
|
|
109
|
-
# rubocop:disable Metrics/MethodLength
|
110
|
-
|
111
|
-
def fetch_ref_err(query_pubid, year)
|
115
|
+
def fetch_ref_err(query_pubid, year) # rubocop:disable Metrics/MethodLength
|
112
116
|
id = year ? "#{query_pubid}:#{year}" : query_pubid
|
113
117
|
warn "[relaton-iso] WARNING: no match found online for #{id}. "\
|
114
118
|
"The code must be exactly like it is on the standards website."
|
@@ -124,14 +128,12 @@ module RelatonIso
|
|
124
128
|
nil
|
125
129
|
end
|
126
130
|
|
127
|
-
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
128
|
-
|
129
131
|
# Search for hits. If no found then trying missed stages and ISO/IEC.
|
130
132
|
#
|
131
133
|
# @param query_pubid [Pubid::Iso::Identifier] reference without correction
|
132
134
|
# @param opts [Hash]
|
133
135
|
# @return [Array<RelatonIso::Hit>]
|
134
|
-
def isobib_search_filter(query_pubid, opts)
|
136
|
+
def isobib_search_filter(query_pubid, opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
135
137
|
query_pubid.part = nil if opts[:all_parts]
|
136
138
|
warn "[relaton-iso] (\"#{query_pubid}\") fetching..."
|
137
139
|
# fetch hits collection
|
data/lib/relaton_iso/scrapper.rb
CHANGED
@@ -50,52 +50,55 @@ module RelatonIso
|
|
50
50
|
|
51
51
|
class << self
|
52
52
|
# Parse page.
|
53
|
-
# @param
|
53
|
+
# @param hit [RelatonIso::Hit]
|
54
54
|
# @param lang [String, NilClass]
|
55
55
|
# @return [RelatonIsoBib::IsoBibliographicItem]
|
56
|
-
def parse_page(
|
56
|
+
def parse_page(hit, lang = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
57
57
|
# path = "/contents/data/standard#{hit_data['splitPath']}/"\
|
58
58
|
# "#{hit_data['csnumber']}.html"
|
59
59
|
|
60
|
-
doc, url = get_page "#{
|
60
|
+
doc, url = get_page "#{hit.hit[:path].sub '/sites/isoorg', ''}.html"
|
61
61
|
|
62
62
|
# Fetch edition.
|
63
63
|
edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")
|
64
64
|
&.children&.last&.text&.match(/\d+/)&.to_s
|
65
|
+
hit.pubid.edition = edition if edition
|
65
66
|
|
66
67
|
titles, abstract, langs = fetch_titles_abstract(doc, lang)
|
67
68
|
|
68
69
|
RelatonIsoBib::IsoBibliographicItem.new(
|
69
70
|
fetched: Date.today.to_s,
|
70
|
-
docid: fetch_relaton_docids(
|
71
|
-
docnumber: fetch_docnumber(
|
71
|
+
docid: fetch_relaton_docids(doc, hit.pubid),
|
72
|
+
docnumber: fetch_docnumber(hit.pubid),
|
72
73
|
edition: edition,
|
73
74
|
language: langs.map { |l| l[:lang] },
|
74
75
|
script: langs.map { |l| script(l[:lang]) }.uniq,
|
75
76
|
title: titles,
|
76
|
-
doctype: fetch_type(
|
77
|
+
doctype: fetch_type(hit.hit[:title]),
|
77
78
|
docstatus: fetch_status(doc),
|
78
79
|
ics: fetch_ics(doc),
|
79
|
-
date: fetch_dates(doc,
|
80
|
-
contributor: fetch_contributors(
|
80
|
+
date: fetch_dates(doc, hit.hit[:title]),
|
81
|
+
contributor: fetch_contributors(hit.hit[:title]),
|
81
82
|
editorialgroup: fetch_workgroup(doc),
|
82
83
|
abstract: abstract,
|
83
84
|
copyright: fetch_copyright(doc),
|
84
85
|
link: fetch_link(doc, url),
|
85
86
|
relation: fetch_relations(doc),
|
86
87
|
place: ["Geneva"],
|
87
|
-
structuredidentifier: fetch_structuredidentifier(
|
88
|
+
structuredidentifier: fetch_structuredidentifier(hit.pubid),
|
88
89
|
)
|
89
90
|
end
|
90
91
|
|
91
|
-
#
|
92
|
-
#
|
93
|
-
#
|
94
|
-
# @param
|
95
|
-
# @param
|
92
|
+
#
|
93
|
+
# Create document ids.
|
94
|
+
#
|
95
|
+
# @param doc [Nokogiri::HTML::Document] document
|
96
|
+
# @param pubid [Pubid::Iso::Identifier] pubid
|
97
|
+
#
|
96
98
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
97
|
-
|
98
|
-
|
99
|
+
#
|
100
|
+
def fetch_relaton_docids(doc, pubid)
|
101
|
+
pubid.urn_stage = stage_code(doc).to_f
|
99
102
|
[
|
100
103
|
RelatonIso::DocumentIdentifier.new(id: pubid, type: "ISO", primary: true),
|
101
104
|
RelatonIso::DocumentIdentifier.new(id: pubid, type: "URN"),
|
@@ -183,22 +186,29 @@ module RelatonIso
|
|
183
186
|
end
|
184
187
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
185
188
|
|
186
|
-
|
187
|
-
|
189
|
+
#
|
190
|
+
# Generate docnumber.
|
191
|
+
#
|
192
|
+
# @param [Pubid::Iso] pubid
|
193
|
+
#
|
194
|
+
# @return [String] docnumber
|
195
|
+
#
|
196
|
+
def fetch_docnumber(pubid)
|
197
|
+
pubid.to_s.match(/\d+/)&.to_s
|
188
198
|
end
|
189
199
|
|
190
|
-
#
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
m = ref.match(/^(.*?\d+)-?((?<=-)\d+|)/)
|
200
|
+
#
|
201
|
+
# Parse structuredidentifier.
|
202
|
+
#
|
203
|
+
# @param pubid [Pubid::Iso::Identifier] pubid
|
204
|
+
#
|
205
|
+
# @return [RelatonBib::StructuredIdentifier] structured identifier
|
206
|
+
#
|
207
|
+
def fetch_structuredidentifier(pubid) # rubocop:disable Metrics/MethodLength
|
200
208
|
RelatonIsoBib::StructuredIdentifier.new(
|
201
|
-
project_number:
|
209
|
+
project_number: "#{pubid.publisher} #{pubid.number}",
|
210
|
+
part: pubid&.part&.sub(/^-/, ""),
|
211
|
+
type: pubid.publisher,
|
202
212
|
)
|
203
213
|
end
|
204
214
|
|
@@ -228,7 +238,7 @@ module RelatonIso
|
|
228
238
|
# Fetch workgroup.
|
229
239
|
# @param doc [Nokogiri::HTML::Document]
|
230
240
|
# @return [Hash]
|
231
|
-
def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength
|
241
|
+
def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity
|
232
242
|
wg_link = doc.css("div.entry-name.entry-block a")[0]
|
233
243
|
# wg_url = DOMAIN + wg_link['href']
|
234
244
|
workgroup = wg_link.text.split "/"
|
@@ -252,6 +262,7 @@ module RelatonIso
|
|
252
262
|
# @param doc [Nokogiri::HTML::Document]
|
253
263
|
# @return [Array<Hash>]
|
254
264
|
def fetch_relations(doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
|
265
|
+
types = ["Now", "Now under review"]
|
255
266
|
doc.xpath("//ul[@class='steps']/li", "//div[@class='sub-step']").reduce([]) do |a, r|
|
256
267
|
r_type = r.at("h4", "h5").text
|
257
268
|
date = []
|
@@ -263,14 +274,13 @@ module RelatonIso
|
|
263
274
|
"updates"
|
264
275
|
else r_type
|
265
276
|
end
|
266
|
-
if
|
277
|
+
if types.include?(type) then a
|
267
278
|
else
|
268
279
|
a + r.css("a").map do |id|
|
269
|
-
|
270
|
-
|
271
|
-
)
|
280
|
+
docid = RelatonBib::DocumentIdentifier.new(type: "ISO", id: id.text, primary: true)
|
281
|
+
fref = RelatonBib::FormattedRef.new(content: id.text, format: "text/plain")
|
272
282
|
bibitem = RelatonIsoBib::IsoBibliographicItem.new(
|
273
|
-
formattedref: fref, date: date,
|
283
|
+
docid: [docid], formattedref: fref, date: date,
|
274
284
|
)
|
275
285
|
{ type: type, bibitem: bibitem }
|
276
286
|
end
|
@@ -285,7 +295,7 @@ module RelatonIso
|
|
285
295
|
def fetch_type(ref)
|
286
296
|
%r{
|
287
297
|
^(?<prefix>ISO|IWA|IEC)
|
288
|
-
(?:(
|
298
|
+
(?:(?:/IEC|/IEEE|/PRF|/NP|/DGuide)*\s|/)
|
289
299
|
(?<type>TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|Guide|(?=\d+))
|
290
300
|
}x =~ ref
|
291
301
|
# return "international-standard" if type_match.nil?
|
data/lib/relaton_iso/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iso
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.12.
|
4
|
+
version: 1.12.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: byebug
|
@@ -244,6 +244,7 @@ files:
|
|
244
244
|
- bin/ruby-rewrite
|
245
245
|
- bin/safe_yaml
|
246
246
|
- bin/setup
|
247
|
+
- bin/thor
|
247
248
|
- lib/relaton_iso.rb
|
248
249
|
- lib/relaton_iso/document_identifier.rb
|
249
250
|
- lib/relaton_iso/hit.rb
|