relaton-iso 1.12.0 → 1.12.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/thor +29 -0
- data/lib/relaton_iso/document_identifier.rb +4 -5
- data/lib/relaton_iso/hit.rb +4 -4
- data/lib/relaton_iso/hit_collection.rb +4 -4
- data/lib/relaton_iso/iso_bibliography.rb +15 -13
- data/lib/relaton_iso/scrapper.rb +46 -36
- data/lib/relaton_iso/version.rb +1 -1
- data/relaton_iso.gemspec +1 -4
- metadata +9 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2430abcb3feaf394c967b11c5fa17a0cc8160c6e0d94a35a190c15f299a088af
|
4
|
+
data.tar.gz: 738eea32609469bf2b588b2a07046da5dd88597acbd7f190ec80b5a32c7a093a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1469ca87aed02788972c9b48e967fdbfbbe690c68170f1cadea2b1575171e3cf61f4fc88ef18d3152cda69ea76057cc57e9de78eababf3dd7c9a6ffa3a8fdcda
|
7
|
+
data.tar.gz: a9cdb6fb9db35dd96a1ced59f80474b6ce0323c9bd3c65bb9bc93e94db1f928a4ffd1e0128eb213603e67756d03d1489bc5c003d9c1f4f064bc9b6588b215ee9
|
data/bin/thor
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'thor' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("thor", "thor")
|
@@ -1,16 +1,15 @@
|
|
1
1
|
module RelatonIso
|
2
2
|
class DocumentIdentifier < RelatonBib::DocumentIdentifier
|
3
|
-
attr_accessor :all_parts
|
4
|
-
|
5
3
|
def id
|
4
|
+
id_str = @id.to_s.sub(/\sED\d+/, "")
|
6
5
|
if @all_parts
|
7
6
|
if type == "URN"
|
8
7
|
return "#{@id.urn}:ser"
|
9
|
-
|
10
|
-
return "#{
|
8
|
+
else
|
9
|
+
return "#{id_str} (all parts)"
|
11
10
|
end
|
12
11
|
end
|
13
|
-
type == "URN" ? @id.urn.to_s :
|
12
|
+
type == "URN" ? @id.urn.to_s : id_str
|
14
13
|
end
|
15
14
|
|
16
15
|
def remove_part
|
data/lib/relaton_iso/hit.rb
CHANGED
@@ -7,10 +7,10 @@ module RelatonIso
|
|
7
7
|
attr_writer :fetch, :pubid
|
8
8
|
|
9
9
|
# Parse page.
|
10
|
-
# @param lang [String,
|
10
|
+
# @param lang [String, nil]
|
11
11
|
# @return [RelatonIso::IsoBibliographicItem]
|
12
|
-
def fetch(lang = nil
|
13
|
-
@fetch ||= Scrapper.parse_page
|
12
|
+
def fetch(lang = nil)
|
13
|
+
@fetch ||= Scrapper.parse_page self, lang
|
14
14
|
end
|
15
15
|
|
16
16
|
# @return [Integer]
|
@@ -26,7 +26,7 @@ module RelatonIso
|
|
26
26
|
|
27
27
|
# @return [Pubid::Iso::Identifier]
|
28
28
|
def pubid
|
29
|
-
Pubid::Iso::Identifier.parse_from_title(hit[:title])
|
29
|
+
@pubid ||= Pubid::Iso::Identifier.parse_from_title(hit[:title])
|
30
30
|
end
|
31
31
|
end
|
32
32
|
end
|
@@ -11,17 +11,17 @@ module RelatonIso
|
|
11
11
|
# @param text [String] reference to search
|
12
12
|
def initialize(text)
|
13
13
|
super
|
14
|
-
@array = text.match?(/^ISO\s(?:TC\s184\/SC\s?4|IEC\sDIR\s(?:\d|IEC|JTC))/) ? fetch_github : fetch_iso
|
14
|
+
@array = text.match?(/^ISO[\s\/](?:TC\s184\/SC\s?4|IEC\sDIR\s(?:\d|IEC|JTC))/) ? fetch_github : fetch_iso
|
15
15
|
end
|
16
16
|
|
17
17
|
# @param lang [String, NilClass]
|
18
|
-
# @return [RelatonIsoBib::IsoBibliographicItem]
|
18
|
+
# @return [RelatonIsoBib::IsoBibliographicItem, nil]
|
19
19
|
def to_all_parts(lang = nil) # rubocop:disable Metrics/CyclomaticComplexity
|
20
20
|
# parts = @array.reject { |h| h.hit["docPart"]&.empty? }
|
21
21
|
hit = @array.min_by { |h| h.pubid.part }
|
22
|
-
return @array.first
|
22
|
+
return @array.first&.fetch lang unless hit
|
23
23
|
|
24
|
-
bibitem = hit.fetch(lang
|
24
|
+
bibitem = hit.fetch(lang)
|
25
25
|
all_parts_item = bibitem.to_all_parts
|
26
26
|
@array.reject { |h| h.hit[:uuid] == hit.hit[:uuid] }.each do |hi|
|
27
27
|
isobib = RelatonIsoBib::IsoBibliographicItem.new(
|
@@ -73,26 +73,32 @@ module RelatonIso
|
|
73
73
|
end
|
74
74
|
end
|
75
75
|
|
76
|
-
def matches_base?(query_pubid, pubid, any_types_stages: false)
|
76
|
+
def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics?PerceivedComplexity
|
77
77
|
query_pubid.publisher == pubid.publisher &&
|
78
78
|
query_pubid.number == pubid.number &&
|
79
79
|
query_pubid.copublisher == pubid.copublisher &&
|
80
|
-
(any_types_stages && query_pubid.stage.nil? || query_pubid.stage == pubid.stage) &&
|
81
|
-
(any_types_stages && query_pubid.type.nil? || query_pubid.type == pubid.type)
|
80
|
+
((any_types_stages && query_pubid.stage.nil?) || query_pubid.stage == pubid.stage) &&
|
81
|
+
((any_types_stages && query_pubid.type.nil?) || query_pubid.type == pubid.type)
|
82
82
|
end
|
83
83
|
|
84
84
|
# @param hit_collection [RelatonIso::HitCollection]
|
85
85
|
# @param year [String]
|
86
86
|
# @return [RelatonIso::HitCollection]
|
87
|
-
def filter_hits_by_year(hit_collection, year)
|
87
|
+
def filter_hits_by_year(hit_collection, year) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
88
88
|
missed_years = []
|
89
89
|
|
90
90
|
# filter by year
|
91
91
|
hits = hit_collection.select do |hit|
|
92
92
|
if hit.pubid.year == year
|
93
93
|
true
|
94
|
+
elsif hit.pubid.year.nil? && hit.hit[:year].to_s == year
|
95
|
+
hit.pubid.year = year
|
96
|
+
true
|
94
97
|
else
|
95
|
-
|
98
|
+
missed_year = hit.pubid.year || hit.hit[:year].to_s
|
99
|
+
if missed_year && !missed_year.empty? && !missed_years.include?(missed_year)
|
100
|
+
missed_years << missed_year
|
101
|
+
end
|
96
102
|
false
|
97
103
|
end
|
98
104
|
end
|
@@ -106,9 +112,7 @@ module RelatonIso
|
|
106
112
|
|
107
113
|
private
|
108
114
|
|
109
|
-
# rubocop:disable Metrics/MethodLength
|
110
|
-
|
111
|
-
def fetch_ref_err(query_pubid, year)
|
115
|
+
def fetch_ref_err(query_pubid, year) # rubocop:disable Metrics/MethodLength
|
112
116
|
id = year ? "#{query_pubid}:#{year}" : query_pubid
|
113
117
|
warn "[relaton-iso] WARNING: no match found online for #{id}. "\
|
114
118
|
"The code must be exactly like it is on the standards website."
|
@@ -124,14 +128,12 @@ module RelatonIso
|
|
124
128
|
nil
|
125
129
|
end
|
126
130
|
|
127
|
-
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
128
|
-
|
129
131
|
# Search for hits. If no found then trying missed stages and ISO/IEC.
|
130
132
|
#
|
131
133
|
# @param query_pubid [Pubid::Iso::Identifier] reference without correction
|
132
134
|
# @param opts [Hash]
|
133
135
|
# @return [Array<RelatonIso::Hit>]
|
134
|
-
def isobib_search_filter(query_pubid, opts)
|
136
|
+
def isobib_search_filter(query_pubid, opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
135
137
|
query_pubid.part = nil if opts[:all_parts]
|
136
138
|
warn "[relaton-iso] (\"#{query_pubid}\") fetching..."
|
137
139
|
# fetch hits collection
|
@@ -166,8 +168,8 @@ module RelatonIso
|
|
166
168
|
hit_pubid = i.pubid
|
167
169
|
matches_base?(query_pubid, hit_pubid, any_types_stages: any_types_stages) &&
|
168
170
|
matches_parts?(query_pubid, hit_pubid, all_parts: all_parts) &&
|
169
|
-
query_pubid.
|
170
|
-
query_pubid.
|
171
|
+
query_pubid.corrigendums == hit_pubid.corrigendums &&
|
172
|
+
query_pubid.amendments == hit_pubid.amendments
|
171
173
|
end
|
172
174
|
|
173
175
|
query_pubid.year ? filter_hits_by_year(result, query_pubid.year) : result
|
data/lib/relaton_iso/scrapper.rb
CHANGED
@@ -50,52 +50,55 @@ module RelatonIso
|
|
50
50
|
|
51
51
|
class << self
|
52
52
|
# Parse page.
|
53
|
-
# @param
|
53
|
+
# @param hit [RelatonIso::Hit]
|
54
54
|
# @param lang [String, NilClass]
|
55
55
|
# @return [RelatonIsoBib::IsoBibliographicItem]
|
56
|
-
def parse_page(
|
56
|
+
def parse_page(hit, lang = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
57
57
|
# path = "/contents/data/standard#{hit_data['splitPath']}/"\
|
58
58
|
# "#{hit_data['csnumber']}.html"
|
59
59
|
|
60
|
-
doc, url = get_page "#{
|
60
|
+
doc, url = get_page "#{hit.hit[:path].sub '/sites/isoorg', ''}.html"
|
61
61
|
|
62
62
|
# Fetch edition.
|
63
63
|
edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")
|
64
64
|
&.children&.last&.text&.match(/\d+/)&.to_s
|
65
|
+
hit.pubid.edition = edition if edition
|
65
66
|
|
66
67
|
titles, abstract, langs = fetch_titles_abstract(doc, lang)
|
67
68
|
|
68
69
|
RelatonIsoBib::IsoBibliographicItem.new(
|
69
70
|
fetched: Date.today.to_s,
|
70
|
-
docid: fetch_relaton_docids(
|
71
|
-
docnumber: fetch_docnumber(
|
71
|
+
docid: fetch_relaton_docids(doc, hit.pubid),
|
72
|
+
docnumber: fetch_docnumber(hit.pubid),
|
72
73
|
edition: edition,
|
73
74
|
language: langs.map { |l| l[:lang] },
|
74
75
|
script: langs.map { |l| script(l[:lang]) }.uniq,
|
75
76
|
title: titles,
|
76
|
-
doctype: fetch_type(
|
77
|
+
doctype: fetch_type(hit.hit[:title]),
|
77
78
|
docstatus: fetch_status(doc),
|
78
79
|
ics: fetch_ics(doc),
|
79
|
-
date: fetch_dates(doc,
|
80
|
-
contributor: fetch_contributors(
|
80
|
+
date: fetch_dates(doc, hit.hit[:title]),
|
81
|
+
contributor: fetch_contributors(hit.hit[:title]),
|
81
82
|
editorialgroup: fetch_workgroup(doc),
|
82
83
|
abstract: abstract,
|
83
84
|
copyright: fetch_copyright(doc),
|
84
85
|
link: fetch_link(doc, url),
|
85
86
|
relation: fetch_relations(doc),
|
86
87
|
place: ["Geneva"],
|
87
|
-
structuredidentifier: fetch_structuredidentifier(
|
88
|
+
structuredidentifier: fetch_structuredidentifier(hit.pubid),
|
88
89
|
)
|
89
90
|
end
|
90
91
|
|
91
|
-
#
|
92
|
-
#
|
93
|
-
#
|
94
|
-
# @param
|
95
|
-
# @param
|
92
|
+
#
|
93
|
+
# Create document ids.
|
94
|
+
#
|
95
|
+
# @param doc [Nokogiri::HTML::Document] document
|
96
|
+
# @param pubid [Pubid::Iso::Identifier] pubid
|
97
|
+
#
|
96
98
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
97
|
-
|
98
|
-
|
99
|
+
#
|
100
|
+
def fetch_relaton_docids(doc, pubid)
|
101
|
+
pubid.urn_stage = stage_code(doc).to_f
|
99
102
|
[
|
100
103
|
RelatonIso::DocumentIdentifier.new(id: pubid, type: "ISO", primary: true),
|
101
104
|
RelatonIso::DocumentIdentifier.new(id: pubid, type: "URN"),
|
@@ -183,22 +186,29 @@ module RelatonIso
|
|
183
186
|
end
|
184
187
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
185
188
|
|
186
|
-
|
187
|
-
|
189
|
+
#
|
190
|
+
# Generate docnumber.
|
191
|
+
#
|
192
|
+
# @param [Pubid::Iso] pubid
|
193
|
+
#
|
194
|
+
# @return [String] docnumber
|
195
|
+
#
|
196
|
+
def fetch_docnumber(pubid)
|
197
|
+
pubid.to_s.match(/\d+/)&.to_s
|
188
198
|
end
|
189
199
|
|
190
|
-
#
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
m = ref.match(/^(.*?\d+)-?((?<=-)\d+|)/)
|
200
|
+
#
|
201
|
+
# Parse structuredidentifier.
|
202
|
+
#
|
203
|
+
# @param pubid [Pubid::Iso::Identifier] pubid
|
204
|
+
#
|
205
|
+
# @return [RelatonBib::StructuredIdentifier] structured identifier
|
206
|
+
#
|
207
|
+
def fetch_structuredidentifier(pubid) # rubocop:disable Metrics/MethodLength
|
200
208
|
RelatonIsoBib::StructuredIdentifier.new(
|
201
|
-
project_number:
|
209
|
+
project_number: "#{pubid.publisher} #{pubid.number}",
|
210
|
+
part: pubid&.part&.sub(/^-/, ""),
|
211
|
+
type: pubid.publisher,
|
202
212
|
)
|
203
213
|
end
|
204
214
|
|
@@ -228,7 +238,7 @@ module RelatonIso
|
|
228
238
|
# Fetch workgroup.
|
229
239
|
# @param doc [Nokogiri::HTML::Document]
|
230
240
|
# @return [Hash]
|
231
|
-
def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength
|
241
|
+
def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity
|
232
242
|
wg_link = doc.css("div.entry-name.entry-block a")[0]
|
233
243
|
# wg_url = DOMAIN + wg_link['href']
|
234
244
|
workgroup = wg_link.text.split "/"
|
@@ -252,6 +262,7 @@ module RelatonIso
|
|
252
262
|
# @param doc [Nokogiri::HTML::Document]
|
253
263
|
# @return [Array<Hash>]
|
254
264
|
def fetch_relations(doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
|
265
|
+
types = ["Now", "Now under review"]
|
255
266
|
doc.xpath("//ul[@class='steps']/li", "//div[@class='sub-step']").reduce([]) do |a, r|
|
256
267
|
r_type = r.at("h4", "h5").text
|
257
268
|
date = []
|
@@ -263,14 +274,13 @@ module RelatonIso
|
|
263
274
|
"updates"
|
264
275
|
else r_type
|
265
276
|
end
|
266
|
-
if
|
277
|
+
if types.include?(type) then a
|
267
278
|
else
|
268
279
|
a + r.css("a").map do |id|
|
269
|
-
|
270
|
-
|
271
|
-
)
|
280
|
+
docid = RelatonBib::DocumentIdentifier.new(type: "ISO", id: id.text, primary: true)
|
281
|
+
fref = RelatonBib::FormattedRef.new(content: id.text, format: "text/plain")
|
272
282
|
bibitem = RelatonIsoBib::IsoBibliographicItem.new(
|
273
|
-
formattedref: fref, date: date,
|
283
|
+
docid: [docid], formattedref: fref, date: date,
|
274
284
|
)
|
275
285
|
{ type: type, bibitem: bibitem }
|
276
286
|
end
|
@@ -285,7 +295,7 @@ module RelatonIso
|
|
285
295
|
def fetch_type(ref)
|
286
296
|
%r{
|
287
297
|
^(?<prefix>ISO|IWA|IEC)
|
288
|
-
(?:(
|
298
|
+
(?:(?:/IEC|/IEEE|/PRF|/NP|/DGuide)*\s|/)
|
289
299
|
(?<type>TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|Guide|(?=\d+))
|
290
300
|
}x =~ ref
|
291
301
|
# return "international-standard" if type_match.nil?
|
data/lib/relaton_iso/version.rb
CHANGED
data/relaton_iso.gemspec
CHANGED
@@ -27,7 +27,6 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
|
28
28
|
|
29
29
|
spec.add_development_dependency "byebug"
|
30
|
-
# spec.add_development_dependency "debase"
|
31
30
|
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
32
31
|
spec.add_development_dependency "pry-byebug"
|
33
32
|
spec.add_development_dependency "rake", "~> 13.0"
|
@@ -35,13 +34,11 @@ Gem::Specification.new do |spec|
|
|
35
34
|
spec.add_development_dependency "rubocop"
|
36
35
|
spec.add_development_dependency "rubocop-performance"
|
37
36
|
spec.add_development_dependency "rubocop-rails"
|
38
|
-
# spec.add_development_dependency "ruby-debug-ide"
|
39
37
|
spec.add_development_dependency "simplecov"
|
40
38
|
spec.add_development_dependency "vcr"
|
41
39
|
spec.add_development_dependency "webmock"
|
42
40
|
|
43
|
-
# spec.add_dependency "relaton-iec", "~> 1.8.0"
|
44
41
|
spec.add_dependency "algolia"
|
42
|
+
spec.add_dependency "pubid-iso", "~> 0.1.8"
|
45
43
|
spec.add_dependency "relaton-iso-bib", "~> 1.12.0"
|
46
|
-
spec.add_dependency "pubid-iso", "~> 0.1.7"
|
47
44
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iso
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.12.
|
4
|
+
version: 1.12.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: byebug
|
@@ -179,33 +179,33 @@ dependencies:
|
|
179
179
|
- !ruby/object:Gem::Version
|
180
180
|
version: '0'
|
181
181
|
- !ruby/object:Gem::Dependency
|
182
|
-
name:
|
182
|
+
name: pubid-iso
|
183
183
|
requirement: !ruby/object:Gem::Requirement
|
184
184
|
requirements:
|
185
185
|
- - "~>"
|
186
186
|
- !ruby/object:Gem::Version
|
187
|
-
version: 1.
|
187
|
+
version: 0.1.8
|
188
188
|
type: :runtime
|
189
189
|
prerelease: false
|
190
190
|
version_requirements: !ruby/object:Gem::Requirement
|
191
191
|
requirements:
|
192
192
|
- - "~>"
|
193
193
|
- !ruby/object:Gem::Version
|
194
|
-
version: 1.
|
194
|
+
version: 0.1.8
|
195
195
|
- !ruby/object:Gem::Dependency
|
196
|
-
name:
|
196
|
+
name: relaton-iso-bib
|
197
197
|
requirement: !ruby/object:Gem::Requirement
|
198
198
|
requirements:
|
199
199
|
- - "~>"
|
200
200
|
- !ruby/object:Gem::Version
|
201
|
-
version:
|
201
|
+
version: 1.12.0
|
202
202
|
type: :runtime
|
203
203
|
prerelease: false
|
204
204
|
version_requirements: !ruby/object:Gem::Requirement
|
205
205
|
requirements:
|
206
206
|
- - "~>"
|
207
207
|
- !ruby/object:Gem::Version
|
208
|
-
version:
|
208
|
+
version: 1.12.0
|
209
209
|
description: 'RelatonIso: retrieve ISO Standards for bibliographic use using the IsoBibliographicItem
|
210
210
|
model'
|
211
211
|
email:
|
@@ -244,6 +244,7 @@ files:
|
|
244
244
|
- bin/ruby-rewrite
|
245
245
|
- bin/safe_yaml
|
246
246
|
- bin/setup
|
247
|
+
- bin/thor
|
247
248
|
- lib/relaton_iso.rb
|
248
249
|
- lib/relaton_iso/document_identifier.rb
|
249
250
|
- lib/relaton_iso/hit.rb
|