relaton-iso 1.12.0 → 1.12.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/thor +29 -0
- data/lib/relaton_iso/document_identifier.rb +4 -5
- data/lib/relaton_iso/hit.rb +4 -4
- data/lib/relaton_iso/hit_collection.rb +1 -1
- data/lib/relaton_iso/iso_bibliography.rb +13 -11
- data/lib/relaton_iso/scrapper.rb +46 -36
- data/lib/relaton_iso/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4479e38048aa0dfae8bcc85f1e9de03b5fe0561048b658ec47b3df8ca64794eb
|
4
|
+
data.tar.gz: c297ddc7b15d8186b85fbb7d4d3f84863d7df6e20ad243f0740364262fe43807
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6fd11d9fe01bd36052cf2762df6e8f728d361fbe23410dcb0229e95436a9f7909e51e51a8be0abc9f7bd269ffd10d642cb0aa7792dba76c0c174b606e319bf58
|
7
|
+
data.tar.gz: 060edbed6bb5b11033911db2200a4e19e98585c4133a1b4a09eaa79b582f2cf23b39f424b84d3110429ff1800247cec4f2b6522a82bbbdc45cb093e63a339bda
|
data/bin/thor
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'thor' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("thor", "thor")
|
@@ -1,16 +1,15 @@
|
|
1
1
|
module RelatonIso
|
2
2
|
class DocumentIdentifier < RelatonBib::DocumentIdentifier
|
3
|
-
attr_accessor :all_parts
|
4
|
-
|
5
3
|
def id
|
4
|
+
id_str = @id.to_s.sub(/\sED\d+/, "")
|
6
5
|
if @all_parts
|
7
6
|
if type == "URN"
|
8
7
|
return "#{@id.urn}:ser"
|
9
|
-
|
10
|
-
return "#{
|
8
|
+
else
|
9
|
+
return "#{id_str} (all parts)"
|
11
10
|
end
|
12
11
|
end
|
13
|
-
type == "URN" ? @id.urn.to_s :
|
12
|
+
type == "URN" ? @id.urn.to_s : id_str
|
14
13
|
end
|
15
14
|
|
16
15
|
def remove_part
|
data/lib/relaton_iso/hit.rb
CHANGED
@@ -7,10 +7,10 @@ module RelatonIso
|
|
7
7
|
attr_writer :fetch, :pubid
|
8
8
|
|
9
9
|
# Parse page.
|
10
|
-
# @param lang [String,
|
10
|
+
# @param lang [String, nil]
|
11
11
|
# @return [RelatonIso::IsoBibliographicItem]
|
12
|
-
def fetch(lang = nil
|
13
|
-
@fetch ||= Scrapper.parse_page
|
12
|
+
def fetch(lang = nil)
|
13
|
+
@fetch ||= Scrapper.parse_page self, lang
|
14
14
|
end
|
15
15
|
|
16
16
|
# @return [Integer]
|
@@ -26,7 +26,7 @@ module RelatonIso
|
|
26
26
|
|
27
27
|
# @return [Pubid::Iso::Identifier]
|
28
28
|
def pubid
|
29
|
-
Pubid::Iso::Identifier.parse_from_title(hit[:title])
|
29
|
+
@pubid ||= Pubid::Iso::Identifier.parse_from_title(hit[:title])
|
30
30
|
end
|
31
31
|
end
|
32
32
|
end
|
@@ -21,7 +21,7 @@ module RelatonIso
|
|
21
21
|
hit = @array.min_by { |h| h.pubid.part }
|
22
22
|
return @array.first.fetch lang unless hit
|
23
23
|
|
24
|
-
bibitem = hit.fetch(lang
|
24
|
+
bibitem = hit.fetch(lang)
|
25
25
|
all_parts_item = bibitem.to_all_parts
|
26
26
|
@array.reject { |h| h.hit[:uuid] == hit.hit[:uuid] }.each do |hi|
|
27
27
|
isobib = RelatonIsoBib::IsoBibliographicItem.new(
|
@@ -73,26 +73,32 @@ module RelatonIso
|
|
73
73
|
end
|
74
74
|
end
|
75
75
|
|
76
|
-
def matches_base?(query_pubid, pubid, any_types_stages: false)
|
76
|
+
def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics?PerceivedComplexity
|
77
77
|
query_pubid.publisher == pubid.publisher &&
|
78
78
|
query_pubid.number == pubid.number &&
|
79
79
|
query_pubid.copublisher == pubid.copublisher &&
|
80
|
-
(any_types_stages && query_pubid.stage.nil? || query_pubid.stage == pubid.stage) &&
|
81
|
-
(any_types_stages && query_pubid.type.nil? || query_pubid.type == pubid.type)
|
80
|
+
((any_types_stages && query_pubid.stage.nil?) || query_pubid.stage == pubid.stage) &&
|
81
|
+
((any_types_stages && query_pubid.type.nil?) || query_pubid.type == pubid.type)
|
82
82
|
end
|
83
83
|
|
84
84
|
# @param hit_collection [RelatonIso::HitCollection]
|
85
85
|
# @param year [String]
|
86
86
|
# @return [RelatonIso::HitCollection]
|
87
|
-
def filter_hits_by_year(hit_collection, year)
|
87
|
+
def filter_hits_by_year(hit_collection, year) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
88
88
|
missed_years = []
|
89
89
|
|
90
90
|
# filter by year
|
91
91
|
hits = hit_collection.select do |hit|
|
92
92
|
if hit.pubid.year == year
|
93
93
|
true
|
94
|
+
elsif hit.pubid.year.nil? && hit.hit[:year].to_s == year
|
95
|
+
hit.pubid.year = year
|
96
|
+
true
|
94
97
|
else
|
95
|
-
|
98
|
+
missed_year = hit.pubid.year || hit.hit[:year].to_s
|
99
|
+
if missed_year && !missed_year.empty? && !missed_years.include?(missed_year)
|
100
|
+
missed_years << missed_year
|
101
|
+
end
|
96
102
|
false
|
97
103
|
end
|
98
104
|
end
|
@@ -106,9 +112,7 @@ module RelatonIso
|
|
106
112
|
|
107
113
|
private
|
108
114
|
|
109
|
-
# rubocop:disable Metrics/MethodLength
|
110
|
-
|
111
|
-
def fetch_ref_err(query_pubid, year)
|
115
|
+
def fetch_ref_err(query_pubid, year) # rubocop:disable Metrics/MethodLength
|
112
116
|
id = year ? "#{query_pubid}:#{year}" : query_pubid
|
113
117
|
warn "[relaton-iso] WARNING: no match found online for #{id}. "\
|
114
118
|
"The code must be exactly like it is on the standards website."
|
@@ -124,14 +128,12 @@ module RelatonIso
|
|
124
128
|
nil
|
125
129
|
end
|
126
130
|
|
127
|
-
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
128
|
-
|
129
131
|
# Search for hits. If no found then trying missed stages and ISO/IEC.
|
130
132
|
#
|
131
133
|
# @param query_pubid [Pubid::Iso::Identifier] reference without correction
|
132
134
|
# @param opts [Hash]
|
133
135
|
# @return [Array<RelatonIso::Hit>]
|
134
|
-
def isobib_search_filter(query_pubid, opts)
|
136
|
+
def isobib_search_filter(query_pubid, opts) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
135
137
|
query_pubid.part = nil if opts[:all_parts]
|
136
138
|
warn "[relaton-iso] (\"#{query_pubid}\") fetching..."
|
137
139
|
# fetch hits collection
|
data/lib/relaton_iso/scrapper.rb
CHANGED
@@ -50,52 +50,55 @@ module RelatonIso
|
|
50
50
|
|
51
51
|
class << self
|
52
52
|
# Parse page.
|
53
|
-
# @param
|
53
|
+
# @param hit [RelatonIso::Hit]
|
54
54
|
# @param lang [String, NilClass]
|
55
55
|
# @return [RelatonIsoBib::IsoBibliographicItem]
|
56
|
-
def parse_page(
|
56
|
+
def parse_page(hit, lang = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
57
57
|
# path = "/contents/data/standard#{hit_data['splitPath']}/"\
|
58
58
|
# "#{hit_data['csnumber']}.html"
|
59
59
|
|
60
|
-
doc, url = get_page "#{
|
60
|
+
doc, url = get_page "#{hit.hit[:path].sub '/sites/isoorg', ''}.html"
|
61
61
|
|
62
62
|
# Fetch edition.
|
63
63
|
edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")
|
64
64
|
&.children&.last&.text&.match(/\d+/)&.to_s
|
65
|
+
hit.pubid.edition = edition if edition
|
65
66
|
|
66
67
|
titles, abstract, langs = fetch_titles_abstract(doc, lang)
|
67
68
|
|
68
69
|
RelatonIsoBib::IsoBibliographicItem.new(
|
69
70
|
fetched: Date.today.to_s,
|
70
|
-
docid: fetch_relaton_docids(
|
71
|
-
docnumber: fetch_docnumber(
|
71
|
+
docid: fetch_relaton_docids(doc, hit.pubid),
|
72
|
+
docnumber: fetch_docnumber(hit.pubid),
|
72
73
|
edition: edition,
|
73
74
|
language: langs.map { |l| l[:lang] },
|
74
75
|
script: langs.map { |l| script(l[:lang]) }.uniq,
|
75
76
|
title: titles,
|
76
|
-
doctype: fetch_type(
|
77
|
+
doctype: fetch_type(hit.hit[:title]),
|
77
78
|
docstatus: fetch_status(doc),
|
78
79
|
ics: fetch_ics(doc),
|
79
|
-
date: fetch_dates(doc,
|
80
|
-
contributor: fetch_contributors(
|
80
|
+
date: fetch_dates(doc, hit.hit[:title]),
|
81
|
+
contributor: fetch_contributors(hit.hit[:title]),
|
81
82
|
editorialgroup: fetch_workgroup(doc),
|
82
83
|
abstract: abstract,
|
83
84
|
copyright: fetch_copyright(doc),
|
84
85
|
link: fetch_link(doc, url),
|
85
86
|
relation: fetch_relations(doc),
|
86
87
|
place: ["Geneva"],
|
87
|
-
structuredidentifier: fetch_structuredidentifier(
|
88
|
+
structuredidentifier: fetch_structuredidentifier(hit.pubid),
|
88
89
|
)
|
89
90
|
end
|
90
91
|
|
91
|
-
#
|
92
|
-
#
|
93
|
-
#
|
94
|
-
# @param
|
95
|
-
# @param
|
92
|
+
#
|
93
|
+
# Create document ids.
|
94
|
+
#
|
95
|
+
# @param doc [Nokogiri::HTML::Document] document
|
96
|
+
# @param pubid [Pubid::Iso::Identifier] pubid
|
97
|
+
#
|
96
98
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
97
|
-
|
98
|
-
|
99
|
+
#
|
100
|
+
def fetch_relaton_docids(doc, pubid)
|
101
|
+
pubid.urn_stage = stage_code(doc).to_f
|
99
102
|
[
|
100
103
|
RelatonIso::DocumentIdentifier.new(id: pubid, type: "ISO", primary: true),
|
101
104
|
RelatonIso::DocumentIdentifier.new(id: pubid, type: "URN"),
|
@@ -183,22 +186,29 @@ module RelatonIso
|
|
183
186
|
end
|
184
187
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
185
188
|
|
186
|
-
|
187
|
-
|
189
|
+
#
|
190
|
+
# Generate docnumber.
|
191
|
+
#
|
192
|
+
# @param [Pubid::Iso] pubid
|
193
|
+
#
|
194
|
+
# @return [String] docnumber
|
195
|
+
#
|
196
|
+
def fetch_docnumber(pubid)
|
197
|
+
pubid.to_s.match(/\d+/)&.to_s
|
188
198
|
end
|
189
199
|
|
190
|
-
#
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
m = ref.match(/^(.*?\d+)-?((?<=-)\d+|)/)
|
200
|
+
#
|
201
|
+
# Parse structuredidentifier.
|
202
|
+
#
|
203
|
+
# @param pubid [Pubid::Iso::Identifier] pubid
|
204
|
+
#
|
205
|
+
# @return [RelatonBib::StructuredIdentifier] structured identifier
|
206
|
+
#
|
207
|
+
def fetch_structuredidentifier(pubid) # rubocop:disable Metrics/MethodLength
|
200
208
|
RelatonIsoBib::StructuredIdentifier.new(
|
201
|
-
project_number:
|
209
|
+
project_number: "#{pubid.publisher} #{pubid.number}",
|
210
|
+
part: pubid&.part&.sub(/^-/, ""),
|
211
|
+
type: pubid.publisher,
|
202
212
|
)
|
203
213
|
end
|
204
214
|
|
@@ -228,7 +238,7 @@ module RelatonIso
|
|
228
238
|
# Fetch workgroup.
|
229
239
|
# @param doc [Nokogiri::HTML::Document]
|
230
240
|
# @return [Hash]
|
231
|
-
def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength
|
241
|
+
def fetch_workgroup(doc) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity
|
232
242
|
wg_link = doc.css("div.entry-name.entry-block a")[0]
|
233
243
|
# wg_url = DOMAIN + wg_link['href']
|
234
244
|
workgroup = wg_link.text.split "/"
|
@@ -252,6 +262,7 @@ module RelatonIso
|
|
252
262
|
# @param doc [Nokogiri::HTML::Document]
|
253
263
|
# @return [Array<Hash>]
|
254
264
|
def fetch_relations(doc) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
|
265
|
+
types = ["Now", "Now under review"]
|
255
266
|
doc.xpath("//ul[@class='steps']/li", "//div[@class='sub-step']").reduce([]) do |a, r|
|
256
267
|
r_type = r.at("h4", "h5").text
|
257
268
|
date = []
|
@@ -263,14 +274,13 @@ module RelatonIso
|
|
263
274
|
"updates"
|
264
275
|
else r_type
|
265
276
|
end
|
266
|
-
if
|
277
|
+
if types.include?(type) then a
|
267
278
|
else
|
268
279
|
a + r.css("a").map do |id|
|
269
|
-
|
270
|
-
|
271
|
-
)
|
280
|
+
docid = RelatonBib::DocumentIdentifier.new(type: "ISO", id: id.text, primary: true)
|
281
|
+
fref = RelatonBib::FormattedRef.new(content: id.text, format: "text/plain")
|
272
282
|
bibitem = RelatonIsoBib::IsoBibliographicItem.new(
|
273
|
-
formattedref: fref, date: date,
|
283
|
+
docid: [docid], formattedref: fref, date: date,
|
274
284
|
)
|
275
285
|
{ type: type, bibitem: bibitem }
|
276
286
|
end
|
@@ -285,7 +295,7 @@ module RelatonIso
|
|
285
295
|
def fetch_type(ref)
|
286
296
|
%r{
|
287
297
|
^(?<prefix>ISO|IWA|IEC)
|
288
|
-
(?:(
|
298
|
+
(?:(?:/IEC|/IEEE|/PRF|/NP|/DGuide)*\s|/)
|
289
299
|
(?<type>TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|Guide|(?=\d+))
|
290
300
|
}x =~ ref
|
291
301
|
# return "international-standard" if type_match.nil?
|
data/lib/relaton_iso/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iso
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.12.
|
4
|
+
version: 1.12.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: byebug
|
@@ -244,6 +244,7 @@ files:
|
|
244
244
|
- bin/ruby-rewrite
|
245
245
|
- bin/safe_yaml
|
246
246
|
- bin/setup
|
247
|
+
- bin/thor
|
247
248
|
- lib/relaton_iso.rb
|
248
249
|
- lib/relaton_iso/document_identifier.rb
|
249
250
|
- lib/relaton_iso/hit.rb
|