relaton-iso 1.12.0 → 1.12.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/thor +29 -0
- data/lib/relaton_iso/document_identifier.rb +4 -5
- data/lib/relaton_iso/hit.rb +4 -4
- data/lib/relaton_iso/hit_collection.rb +4 -4
- data/lib/relaton_iso/iso_bibliography.rb +15 -13
- data/lib/relaton_iso/scrapper.rb +46 -36
- data/lib/relaton_iso/version.rb +1 -1
- data/relaton_iso.gemspec +1 -4
- metadata +9 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2430abcb3feaf394c967b11c5fa17a0cc8160c6e0d94a35a190c15f299a088af
|
4
|
+
data.tar.gz: 738eea32609469bf2b588b2a07046da5dd88597acbd7f190ec80b5a32c7a093a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1469ca87aed02788972c9b48e967fdbfbbe690c68170f1cadea2b1575171e3cf61f4fc88ef18d3152cda69ea76057cc57e9de78eababf3dd7c9a6ffa3a8fdcda
|
7
|
+
data.tar.gz: a9cdb6fb9db35dd96a1ced59f80474b6ce0323c9bd3c65bb9bc93e94db1f928a4ffd1e0128eb213603e67756d03d1489bc5c003d9c1f4f064bc9b6588b215ee9
|
data/bin/thor
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'thor' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("thor", "thor")
|
@@ -1,16 +1,15 @@
|
|
1
1
|
module RelatonIso
|
2
2
|
class DocumentIdentifier < RelatonBib::DocumentIdentifier
|
3
|
-
attr_accessor :all_parts
|
4
|
-
|
5
3
|
def id
|
4
|
+
id_str = @id.to_s.sub(/\sED\d+/, "")
|
6
5
|
if @all_parts
|
7
6
|
if type == "URN"
|
8
7
|
return "#{@id.urn}:ser"
|
9
|
-
|
10
|
-
return "#{
|
8
|
+
else
|
9
|
+
return "#{id_str} (all parts)"
|
11
10
|
end
|
12
11
|
end
|
13
|
-
type == "URN" ? @id.urn.to_s :
|
12
|
+
type == "URN" ? @id.urn.to_s : id_str
|
14
13
|
end
|
15
14
|
|
16
15
|
def remove_part
|
data/lib/relaton_iso/hit.rb
CHANGED
@@ -7,10 +7,10 @@ module RelatonIso
|
|
7
7
|
attr_writer :fetch, :pubid
|
8
8
|
|
9
9
|
# Parse page.
|
10
|
-
# @param lang [String,
|
10
|
+
# @param lang [String, nil]
|
11
11
|
# @return [RelatonIso::IsoBibliographicItem]
|
12
|
-
def fetch(lang = nil
|
13
|
-
@fetch ||= Scrapper.parse_page
|
12
|
+
def fetch(lang = nil)
|
13
|
+
@fetch ||= Scrapper.parse_page self, lang
|
14
14
|
end
|
15
15
|
|
16
16
|
# @return [Integer]
|
@@ -26,7 +26,7 @@ module RelatonIso
|
|
26
26
|
|
27
27
|
# @return [Pubid::Iso::Identifier]
|
28
28
|
def pubid
|
29
|
-
Pubid::Iso::Identifier.parse_from_title(hit[:title])
|
29
|
+
@pubid ||= Pubid::Iso::Identifier.parse_from_title(hit[:title])
|
30
30
|
end
|
31
31
|
end
|
32
32
|
end
|
@@ -11,17 +11,17 @@ module RelatonIso
|
|
11
11
|
# @param text [String] reference to search
|
12
12
|
def initialize(text)
|
13
13
|
super
|
14
|
-
@array = text.match?(/^ISO\s(?:TC\s184\/SC\s?4|IEC\sDIR\s(?:\d|IEC|JTC))/) ? fetch_github : fetch_iso
|
14
|
+
@array = text.match?(/^ISO[\s\/](?:TC\s184\/SC\s?4|IEC\sDIR\s(?:\d|IEC|JTC))/) ? fetch_github : fetch_iso
|
15
15
|
end
|
16
16
|
|
17
17
|
# @param lang [String, NilClass]
|
18
|
-
# @return [RelatonIsoBib::IsoBibliographicItem]
|
18
|
+
# @return [RelatonIsoBib::IsoBibliographicItem, nil]
|
19
19
|
def to_all_parts(lang = nil) # rubocop:disable Metrics/CyclomaticComplexity
|
20
20
|
# parts = @array.reject { |h| h.hit["docPart"]&.empty? }
|
21
21
|
hit = @array.min_by { |h| h.pubid.part }
|
22
|
-
return @array.first
|
22
|
+
return @array.first&.fetch lang unless hit
|
23
23
|
|
24
|
-
bibitem = hit.fetch(lang
|
24
|
+
bibitem = hit.fetch(lang)
|
25
25
|
all_parts_item = bibitem.to_all_parts
|
26
26
|
@array.reject { |h| h.hit[:uuid] == hit.hit[:uuid] }.each do |hi|
|
27
27
|
isobib = RelatonIsoBib::IsoBibliographicItem.new(
|
@@ -73,26 +73,32 @@ module RelatonIso
|
|
73
73
|
end
|
74
74
|
end
|
75
75
|
|
76
|
-
def matches_base?(query_pubid, pubid, any_types_stages: false)
|
76
|
+
def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics?PerceivedComplexity
|
77
77
|
query_pubid.publisher == pubid.publisher &&
|
78
78
|
query_pubid.number == pubid.number &&
|
79
79
|
query_pubid.copublisher == pubid.copublisher &&
|
80
|
-
(any_types_stages && query_pubid.stage.nil? || query_pubid.stage == pubid.stage) &&
|
81
|
-
(any_types_stages && query_pubid.type.nil? || query_pubid.type == pubid.type)
|
80
|
+
((any_types_stages && query_pubid.stage.nil?) || query_pubid.stage == pubid.stage) &&
|
81
|
+
((any_types_stages && query_pubid.type.nil?) || query_pubid.type == pubid.type)
|
82
82
|
end
|
83
83
|
|
84
84
|
# @param hit_collection [RelatonIso::HitCollection]
|
85
85
|
# @param year [String]
|
86
86
|
# @return [RelatonIso::HitCollection]
|
87
|
-
def filter_hits_by_year(hit_collection, year)
|
87
|
+
def filter_hits_by_year(hit_collection, year) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
88
88
|
missed_years = []
|
89
89
|
|
90
90
|
# filter by year
|
91
91
|
hits = hit_collection.select do |hit|
|
92
92
|
if hit.pubid.year == year
|
93
93
|
true
|
94
|
+
elsif hit.pubid.year.nil? && hit.hit[:year].to_s == year
|
95
|
+
hit.pubid.year = year
|
96
|
+
true
|
94
97
|
else
|
95
|
-
|
98
|
+
missed_year = hit.pubid.year || hit.hit[:year].to_s
|
99
|
+
if missed_year && !missed_year.empty? && !missed_years.include?(missed_year)
|
100
|
+
missed_years << missed_year
|
101
|
+
end
|
96
102
|
false
|
97
103
|
end
|
98
104
|
end
|
@@ -106,9 +112,7 @@ module RelatonIso
|
|
106
112
|
|
107
113
|
private
|
108
114
|
|
109
|
-
# rubocop:disable Metrics/MethodLength
|
110
|
-
|
111
|
-
def fetch_ref_err(query_pubid, year)
|
115
|
+
def fetch_ref_err(query_pubid, year) # rubocop:disable Metrics/MethodLength
|
112
116
|
id = year ? "#{query_pubid}:#{year}" : query_pubid
|
113
117
|
warn "[relaton-iso] WARNING: no match found online for #{id}. "\
|
114
118
|
"The code must be exactly like it is on the standards website."
|
@@ -124,14 +128,12 @@ module RelatonIso
|
|
124
128
|
nil
|
125
129
|
end
|
126
130
|
|
127
|
-
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
128
|
-
|
129
131
|
# Search for hits. If no found then trying missed stages and ISO/IEC.
|
130
132
|
#
|
131
133
|
# @param query_pubid [Pubid::Iso::Identifier] reference without correction
|
132
134
|
# @param opts [Hash]
|
133
135
|
# @return [Array<RelatonIso::Hit>]
|
134
|
-
def isobib_search_filter(query_pubid, opts)
|
136
|
+
def isobib_search_filter(query_pubid, opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
135
137
|
query_pubid.part = nil if opts[:all_parts]
|
136
138
|
warn "[relaton-iso] (\"#{query_pubid}\") fetching..."
|
137
139
|
# fetch hits collection
|
@@ -166,8 +168,8 @@ module RelatonIso
|
|
166
168
|
hit_pubid = i.pubid
|
167
169
|
matches_base?(query_pubid, hit_pubid, any_types_stages: any_types_stages) &&
|
168
170
|
matches_parts?(query_pubid, hit_pubid, all_parts: all_parts) &&
|
169
|
-
query_pubid.
|
170
|
-
query_pubid.
|
171
|
+
query_pubid.corrigendums == hit_pubid.corrigendums &&
|
172
|
+
query_pubid.amendments == hit_pubid.amendments
|
171
173
|
end
|
172
174
|
|
173
175
|
query_pubid.year ? filter_hits_by_year(result, query_pubid.year) : result
|
data/lib/relaton_iso/scrapper.rb
CHANGED
@@ -50,52 +50,55 @@ module RelatonIso
|
|
50
50
|
|
51
51
|
class << self
|
52
52
|
# Parse page.
|
53
|
-
# @param
|
53
|
+
# @param hit [RelatonIso::Hit]
|
54
54
|
# @param lang [String, NilClass]
|
55
55
|
# @return [RelatonIsoBib::IsoBibliographicItem]
|
56
|
-
def parse_page(
|
56
|
+
def parse_page(hit, lang = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
57
57
|
# path = "/contents/data/standard#{hit_data['splitPath']}/"\
|
58
58
|
# "#{hit_data['csnumber']}.html"
|
59
59
|
|
60
|
-
doc, url = get_page "#{
|
60
|
+
doc, url = get_page "#{hit.hit[:path].sub '/sites/isoorg', ''}.html"
|
61
61
|
|
62
62
|
# Fetch edition.
|
63
63
|
edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")
|
64
64
|
&.children&.last&.text&.match(/\d+/)&.to_s
|
65
|
+
hit.pubid.edition = edition if edition
|
65
66
|
|
66
67
|
titles, abstract, langs = fetch_titles_abstract(doc, lang)
|
67
68
|
|
68
69
|
RelatonIsoBib::IsoBibliographicItem.new(
|
69
70
|
fetched: Date.today.to_s,
|
70
|
-
docid: fetch_relaton_docids(
|
71
|
-
docnumber: fetch_docnumber(
|
71
|
+
docid: fetch_relaton_docids(doc, hit.pubid),
|
72
|
+
docnumber: fetch_docnumber(hit.pubid),
|
72
73
|
edition: edition,
|
73
74
|
language: langs.map { |l| l[:lang] },
|
74
75
|
script: langs.map { |l| script(l[:lang]) }.uniq,
|
75
76
|
title: titles,
|
76
|
-
doctype: fetch_type(
|
77
|
+
doctype: fetch_type(hit.hit[:title]),
|
77
78
|
docstatus: fetch_status(doc),
|
78
79
|
ics: fetch_ics(doc),
|
79
|
-
date: fetch_dates(doc,
|
80
|
-
contributor: fetch_contributors(
|
80
|
+
date: fetch_dates(doc, hit.hit[:title]),
|
81
|
+
contributor: fetch_contributors(hit.hit[:title]),
|
81
82
|
editorialgroup: fetch_workgroup(doc),
|
82
83
|
abstract: abstract,
|
83
84
|
copyright: fetch_copyright(doc),
|
84
85
|
link: fetch_link(doc, url),
|
85
86
|
relation: fetch_relations(doc),
|
86
87
|
place: ["Geneva"],
|
87
|
-
structuredidentifier: fetch_structuredidentifier(
|
88
|
+
structuredidentifier: fetch_structuredidentifier(hit.pubid),
|
88
89
|
)
|
89
90
|
end
|
90
91
|
|
91
|
-
#
|
92
|
-
#
|
93
|
-
#
|
94
|
-
# @param
|
95
|
-
# @param
|
92
|
+
#
|
93
|
+
# Create document ids.
|
94
|
+
#
|
95
|
+
# @param doc [Nokogiri::HTML::Document] document
|
96
|
+
# @param pubid [Pubid::Iso::Identifier] pubid
|
97
|
+
#
|
96
98
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
97
|
-
|
98
|
-
|
99
|
+
#
|
100
|
+
def fetch_relaton_docids(doc, pubid)
|
101
|
+
pubid.urn_stage = stage_code(doc).to_f
|
99
102
|
[
|
100
103
|
RelatonIso::DocumentIdentifier.new(id: pubid, type: "ISO", primary: true),
|
101
104
|
RelatonIso::DocumentIdentifier.new(id: pubid, type: "URN"),
|
@@ -183,22 +186,29 @@ module RelatonIso
|
|
183
186
|
end
|
184
187
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
185
188
|
|
186
|
-
|
187
|
-
|
189
|
+
#
|
190
|
+
# Generate docnumber.
|
191
|
+
#
|
192
|
+
# @param [Pubid::Iso] pubid
|
193
|
+
#
|
194
|
+
# @return [String] docnumber
|
195
|
+
#
|
196
|
+
def fetch_docnumber(pubid)
|
197
|
+
pubid.to_s.match(/\d+/)&.to_s
|
188
198
|
end
|
189
199
|
|
190
|
-
#
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
m = ref.match(/^(.*?\d+)-?((?<=-)\d+|)/)
|
200
|
+
#
|
201
|
+
# Parse structuredidentifier.
|
202
|
+
#
|
203
|
+
# @param pubid [Pubid::Iso::Identifier] pubid
|
204
|
+
#
|
205
|
+
# @return [RelatonBib::StructuredIdentifier] structured identifier
|
206
|
+
#
|
207
|
+
def fetch_structuredidentifier(pubid) # rubocop:disable Metrics/MethodLength
|
200
208
|
RelatonIsoBib::StructuredIdentifier.new(
|
201
|
-
project_number:
|
209
|
+
project_number: "#{pubid.publisher} #{pubid.number}",
|
210
|
+
part: pubid&.part&.sub(/^-/, ""),
|
211
|
+
type: pubid.publisher,
|
202
212
|
)
|
203
213
|
end
|
204
214
|
|
@@ -228,7 +238,7 @@ module RelatonIso
|
|
228
238
|
# Fetch workgroup.
|
229
239
|
# @param doc [Nokogiri::HTML::Document]
|
230
240
|
# @return [Hash]
|
231
|
-
def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength
|
241
|
+
def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity
|
232
242
|
wg_link = doc.css("div.entry-name.entry-block a")[0]
|
233
243
|
# wg_url = DOMAIN + wg_link['href']
|
234
244
|
workgroup = wg_link.text.split "/"
|
@@ -252,6 +262,7 @@ module RelatonIso
|
|
252
262
|
# @param doc [Nokogiri::HTML::Document]
|
253
263
|
# @return [Array<Hash>]
|
254
264
|
def fetch_relations(doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
|
265
|
+
types = ["Now", "Now under review"]
|
255
266
|
doc.xpath("//ul[@class='steps']/li", "//div[@class='sub-step']").reduce([]) do |a, r|
|
256
267
|
r_type = r.at("h4", "h5").text
|
257
268
|
date = []
|
@@ -263,14 +274,13 @@ module RelatonIso
|
|
263
274
|
"updates"
|
264
275
|
else r_type
|
265
276
|
end
|
266
|
-
if
|
277
|
+
if types.include?(type) then a
|
267
278
|
else
|
268
279
|
a + r.css("a").map do |id|
|
269
|
-
|
270
|
-
|
271
|
-
)
|
280
|
+
docid = RelatonBib::DocumentIdentifier.new(type: "ISO", id: id.text, primary: true)
|
281
|
+
fref = RelatonBib::FormattedRef.new(content: id.text, format: "text/plain")
|
272
282
|
bibitem = RelatonIsoBib::IsoBibliographicItem.new(
|
273
|
-
formattedref: fref, date: date,
|
283
|
+
docid: [docid], formattedref: fref, date: date,
|
274
284
|
)
|
275
285
|
{ type: type, bibitem: bibitem }
|
276
286
|
end
|
@@ -285,7 +295,7 @@ module RelatonIso
|
|
285
295
|
def fetch_type(ref)
|
286
296
|
%r{
|
287
297
|
^(?<prefix>ISO|IWA|IEC)
|
288
|
-
(?:(
|
298
|
+
(?:(?:/IEC|/IEEE|/PRF|/NP|/DGuide)*\s|/)
|
289
299
|
(?<type>TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|Guide|(?=\d+))
|
290
300
|
}x =~ ref
|
291
301
|
# return "international-standard" if type_match.nil?
|
data/lib/relaton_iso/version.rb
CHANGED
data/relaton_iso.gemspec
CHANGED
@@ -27,7 +27,6 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
|
28
28
|
|
29
29
|
spec.add_development_dependency "byebug"
|
30
|
-
# spec.add_development_dependency "debase"
|
31
30
|
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
32
31
|
spec.add_development_dependency "pry-byebug"
|
33
32
|
spec.add_development_dependency "rake", "~> 13.0"
|
@@ -35,13 +34,11 @@ Gem::Specification.new do |spec|
|
|
35
34
|
spec.add_development_dependency "rubocop"
|
36
35
|
spec.add_development_dependency "rubocop-performance"
|
37
36
|
spec.add_development_dependency "rubocop-rails"
|
38
|
-
# spec.add_development_dependency "ruby-debug-ide"
|
39
37
|
spec.add_development_dependency "simplecov"
|
40
38
|
spec.add_development_dependency "vcr"
|
41
39
|
spec.add_development_dependency "webmock"
|
42
40
|
|
43
|
-
# spec.add_dependency "relaton-iec", "~> 1.8.0"
|
44
41
|
spec.add_dependency "algolia"
|
42
|
+
spec.add_dependency "pubid-iso", "~> 0.1.8"
|
45
43
|
spec.add_dependency "relaton-iso-bib", "~> 1.12.0"
|
46
|
-
spec.add_dependency "pubid-iso", "~> 0.1.7"
|
47
44
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iso
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.12.
|
4
|
+
version: 1.12.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: byebug
|
@@ -179,33 +179,33 @@ dependencies:
|
|
179
179
|
- !ruby/object:Gem::Version
|
180
180
|
version: '0'
|
181
181
|
- !ruby/object:Gem::Dependency
|
182
|
-
name:
|
182
|
+
name: pubid-iso
|
183
183
|
requirement: !ruby/object:Gem::Requirement
|
184
184
|
requirements:
|
185
185
|
- - "~>"
|
186
186
|
- !ruby/object:Gem::Version
|
187
|
-
version: 1.
|
187
|
+
version: 0.1.8
|
188
188
|
type: :runtime
|
189
189
|
prerelease: false
|
190
190
|
version_requirements: !ruby/object:Gem::Requirement
|
191
191
|
requirements:
|
192
192
|
- - "~>"
|
193
193
|
- !ruby/object:Gem::Version
|
194
|
-
version: 1.
|
194
|
+
version: 0.1.8
|
195
195
|
- !ruby/object:Gem::Dependency
|
196
|
-
name:
|
196
|
+
name: relaton-iso-bib
|
197
197
|
requirement: !ruby/object:Gem::Requirement
|
198
198
|
requirements:
|
199
199
|
- - "~>"
|
200
200
|
- !ruby/object:Gem::Version
|
201
|
-
version:
|
201
|
+
version: 1.12.0
|
202
202
|
type: :runtime
|
203
203
|
prerelease: false
|
204
204
|
version_requirements: !ruby/object:Gem::Requirement
|
205
205
|
requirements:
|
206
206
|
- - "~>"
|
207
207
|
- !ruby/object:Gem::Version
|
208
|
-
version:
|
208
|
+
version: 1.12.0
|
209
209
|
description: 'RelatonIso: retrieve ISO Standards for bibliographic use using the IsoBibliographicItem
|
210
210
|
model'
|
211
211
|
email:
|
@@ -244,6 +244,7 @@ files:
|
|
244
244
|
- bin/ruby-rewrite
|
245
245
|
- bin/safe_yaml
|
246
246
|
- bin/setup
|
247
|
+
- bin/thor
|
247
248
|
- lib/relaton_iso.rb
|
248
249
|
- lib/relaton_iso/document_identifier.rb
|
249
250
|
- lib/relaton_iso/hit.rb
|