relaton-iec 1.7.0 → 1.7.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/relaton_iec/hit.rb +4 -0
- data/lib/relaton_iec/hit_collection.rb +36 -12
- data/lib/relaton_iec/iec_bibliography.rb +83 -45
- data/lib/relaton_iec/scrapper.rb +3 -1
- data/lib/relaton_iec/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b87aa92de24cc0120f04f0325e2ec419c366b92d775beb1b37ab2cc429c96c1d
|
4
|
+
data.tar.gz: 28aad2f843cd822bca23dc56e0de8526da1993a5e51cab74a80e957d3a125710
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 261ffff809b27f6b6ff84fd936959819fe315cfe187bb0ecc97b5ae48295fcaa515668d66cb229d3cfbb3f31bcb5127112c5da9064f8699671ce50f40fe5df2b
|
7
|
+
data.tar.gz: a7fc694951f0b4b1a0f57c5d3ea9fcb89c4729d4b28dfe945c6d6c3c7add1be498e961bbf2bc71760b7c88034a0a2ad57295c6cd432ee365c9cb2beb0dcf0950
|
data/lib/relaton_iec/hit.rb
CHANGED
@@ -6,6 +6,10 @@ require "addressable/uri"
|
|
6
6
|
module RelatonIec
|
7
7
|
# Page of hit collection.
|
8
8
|
class HitCollection < RelatonBib::HitCollection
|
9
|
+
def_delegators :@array, :detect
|
10
|
+
|
11
|
+
attr_reader :part
|
12
|
+
|
9
13
|
DOMAIN = "https://webstore.iec.ch"
|
10
14
|
|
11
15
|
# @param ref_nbr [String]
|
@@ -13,53 +17,73 @@ module RelatonIec
|
|
13
17
|
# @param part [String, nil]
|
14
18
|
def initialize(ref_nbr, year = nil, part = nil)
|
15
19
|
super ref_nbr, year
|
16
|
-
@
|
20
|
+
@part = part
|
21
|
+
@array = hits ref_nbr, year
|
22
|
+
end
|
23
|
+
|
24
|
+
# @return [RelatonIec::IecBibliographicItem]
|
25
|
+
def to_all_parts # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity
|
26
|
+
parts = @array.reject { |h| h.part.nil? }
|
27
|
+
hit = parts.min_by &:part
|
28
|
+
return @array.first.fetch lang unless hit
|
29
|
+
|
30
|
+
bibitem = hit.fetch
|
31
|
+
all_parts_item = bibitem.to_all_parts
|
32
|
+
parts.reject { |h| h.hit[:code] == hit.hit[:code] }.each do |hi|
|
33
|
+
isobib = RelatonIec::IecBibliographicItem.new(
|
34
|
+
formattedref: RelatonBib::FormattedRef.new(content: hi.hit[:code])
|
35
|
+
)
|
36
|
+
all_parts_item.relation << RelatonBib::DocumentRelation.new(type: "partOf", bibitem: isobib)
|
37
|
+
end
|
38
|
+
all_parts_item
|
17
39
|
end
|
18
40
|
|
19
41
|
private
|
20
42
|
|
21
43
|
# @param ref [String]
|
22
44
|
# @param year [String, nil]
|
23
|
-
# @param part [String, nil]
|
24
45
|
# @return [Array<RelatonIec::Hit>]
|
25
|
-
def hits(ref, year
|
46
|
+
def hits(ref, year)
|
26
47
|
from, to = nil
|
27
48
|
if year
|
28
49
|
from = Date.strptime year, "%Y"
|
29
50
|
to = from.next_year.prev_day
|
30
51
|
end
|
31
|
-
get_results ref, from, to
|
52
|
+
get_results ref, from, to
|
32
53
|
end
|
33
54
|
|
34
55
|
# @param ref [String]
|
35
56
|
# @param from [Date, nil]
|
36
57
|
# @param to [Date, nil]
|
37
|
-
# @param part [String, nil]
|
38
58
|
# @return [Array<RelatonIec::Hit>]
|
39
|
-
def get_results(ref, from, to
|
59
|
+
def get_results(ref, from, to)
|
40
60
|
code = part ? ref.sub(/(?<=-\d)\d+/, "*") : ref
|
41
61
|
[nil, "trf", "wr"].reduce([]) do |m, t|
|
42
62
|
url = "#{DOMAIN}/searchkey"
|
43
63
|
url += "&type=#{t}" if t
|
44
64
|
url += "&RefNbr=#{code}&From=#{from}&To=#{to}&start=1"
|
45
|
-
m + results(Addressable::URI.parse(url).normalize
|
65
|
+
m + results(Addressable::URI.parse(url).normalize)
|
46
66
|
end
|
47
67
|
end
|
48
68
|
|
49
69
|
# @param url [String]
|
50
|
-
# @param part [String, nil]
|
51
70
|
# @return [Array<RelatonIec::Hit>]
|
52
|
-
def results(uri
|
71
|
+
def results(uri)
|
53
72
|
contains = "[contains(.,'Part #{part}:')]" if part
|
54
|
-
|
73
|
+
resp = OpenURI.open_uri(uri, "User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) "\
|
74
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36")
|
75
|
+
doc = Nokogiri::HTML(resp)
|
76
|
+
doc.xpath(
|
55
77
|
"//body/li#{contains}",
|
56
78
|
"//ul[contains(@class,'search-results')]/li#{contains}",
|
57
79
|
"//ul[contains(@class,'morethesame')]/li#{contains}"
|
58
|
-
).map { |h| make_hit h }
|
80
|
+
).map { |h| make_hit h }.compact
|
59
81
|
end
|
60
82
|
|
61
83
|
def make_hit(hit)
|
62
|
-
link
|
84
|
+
link = hit.at('a[@href!="#"]')
|
85
|
+
return unless link
|
86
|
+
|
63
87
|
code = link.text.tr [194, 160].pack("c*").force_encoding("UTF-8"), ""
|
64
88
|
title = hit.xpath("text()").text.gsub(/[\r\n]/, "")
|
65
89
|
Hit.new({ code: code, title: title, url: DOMAIN + link[:href] }, self)
|
@@ -21,7 +21,7 @@ module RelatonIec
|
|
21
21
|
# @param part [String, nil] search for packaged stndard if not nil
|
22
22
|
# @return [RelatonIec::HitCollection]
|
23
23
|
def search(text, year = nil, part = nil)
|
24
|
-
HitCollection.new text, year, part
|
24
|
+
HitCollection.new text, year&.strip, part
|
25
25
|
rescue SocketError, OpenURI::HTTPError, OpenSSL::SSL::SSLError
|
26
26
|
raise RelatonBib::RequestError, "Could not access http://www.iec.ch"
|
27
27
|
end
|
@@ -32,23 +32,21 @@ module RelatonIec
|
|
32
32
|
# reference is required
|
33
33
|
# @return [String] Relaton XML serialisation of reference
|
34
34
|
def get(code, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
35
|
+
opts[:all_parts] ||= code.match? /\s\(all parts\)/
|
36
|
+
ref = code.sub /\s\(all parts\)/, ""
|
35
37
|
if year.nil?
|
36
|
-
/^(?<code1>[^:]+):(?<year1>[^:]+)/ =~
|
38
|
+
/^(?<code1>[^:]+):(?<year1>[^:]+)/ =~ ref
|
37
39
|
unless code1.nil?
|
38
|
-
|
40
|
+
ref = code1
|
39
41
|
year = year1
|
40
42
|
end
|
41
43
|
end
|
44
|
+
return iev if ref.casecmp("IEV").zero?
|
42
45
|
|
43
|
-
|
44
|
-
|
45
|
-
opts[:all_parts] ||= !(code =~ / \(all parts\)/).nil?
|
46
|
-
code = code.sub(/ \(all parts\)/, "")
|
47
|
-
ret = iecbib_get1(code, year, opts)
|
46
|
+
ret = iecbib_get(ref, year, opts)
|
48
47
|
return nil if ret.nil?
|
49
48
|
|
50
49
|
ret = ret.to_most_recent_reference unless year || opts[:keep_year]
|
51
|
-
ret = ret.to_all_parts if opts[:all_parts]
|
52
50
|
ret
|
53
51
|
end
|
54
52
|
|
@@ -77,29 +75,43 @@ module RelatonIec
|
|
77
75
|
# @param hits [Array<RelatonIec::Hit>]
|
78
76
|
# @param threads [Integer]
|
79
77
|
# @return [Array<RelatonIec::Hit>]
|
80
|
-
def fetch_pages(hits, threads)
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
end
|
87
|
-
|
88
|
-
def
|
89
|
-
|
90
|
-
|
91
|
-
|
78
|
+
# def fetch_pages(hits, threads)
|
79
|
+
# workers = RelatonBib::WorkersPool.new threads
|
80
|
+
# workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
|
81
|
+
# hits.each_with_index { |hit, i| workers << { i: i, hit: hit } }
|
82
|
+
# workers.end
|
83
|
+
# workers.result.sort_by { |a| a[:i] }.map { |x| x[:hit] }
|
84
|
+
# end
|
85
|
+
|
86
|
+
def search_filter(reference, year, opts) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
87
|
+
%r{
|
88
|
+
^(?<code>(?:ISO|IEC)[^\d]*\s\d+((?:-\w+)+)?)
|
89
|
+
(:(?<year1>\d{4}))?
|
90
|
+
(?<bundle>\+[^\s\/]+)?
|
91
|
+
(\/(?<corr>AMD\s\d+))?
|
92
|
+
}x =~ reference.upcase
|
93
|
+
year ||= year1
|
94
|
+
corr&.sub! " ", ""
|
95
|
+
warn "[relaton-iec] (\"#{reference}\") fetching..."
|
92
96
|
result = search(code, year)
|
93
|
-
if result.empty? && /(?<=-)(?<part
|
97
|
+
if result.empty? && /(?<=-)(?<part>[\w-]+)/ =~ code
|
94
98
|
# try to search packaged standard
|
95
99
|
result = search code, year, part
|
96
|
-
ref = code.sub /(?<=-\d)\
|
97
|
-
|
100
|
+
# ref = code.sub /(?<=-\d)\w+/, ""
|
101
|
+
# else ref = code
|
98
102
|
end
|
103
|
+
result = search code if result.empty?
|
104
|
+
code.sub! /((?:-\w+)+)/, ""
|
99
105
|
result.select do |i|
|
100
|
-
|
101
|
-
|
102
|
-
|
106
|
+
%r{
|
107
|
+
^(?<code2>(?:ISO|IEC)[^\d]*\s\d+)((?:-\w+)+)?
|
108
|
+
(:(?<year2>\d{4}))?
|
109
|
+
(?<bundle2>\+[^\s\/]+)?
|
110
|
+
(\/(?<corr2>AMD\d+))?
|
111
|
+
}x =~ i.hit[:code]
|
112
|
+
# code2.sub! /(?<=-\d)\w*/, "" if part
|
113
|
+
# code2.sub! /((?:-\w+)+)/, "" if opts[:all_parts]
|
114
|
+
code == code2 && bundle == bundle2 && corr == corr2 # (year.nil? || year == year2) &&
|
103
115
|
end
|
104
116
|
end
|
105
117
|
|
@@ -144,30 +156,56 @@ module RelatonIec
|
|
144
156
|
# Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
|
145
157
|
# If no match, returns any years which caused mismatch, for error
|
146
158
|
# reporting
|
147
|
-
def
|
159
|
+
def results_filter(result, ref, year, opts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
160
|
+
r_code, r_year = code_year ref, result.part
|
161
|
+
r_year ||= year
|
148
162
|
missed_years = []
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
163
|
+
missed_parts = false
|
164
|
+
# result.each_slice(3) do |s| # ISO website only allows 3 connections
|
165
|
+
ret = if opts[:all_parts]
|
166
|
+
result.to_all_parts
|
167
|
+
else
|
168
|
+
result.detect do |h|
|
169
|
+
h_code, h_year = code_year h.hit[:code], result.part
|
170
|
+
missed_parts ||= !opts[:all_parts] && r_code != h_code
|
171
|
+
missed_years << h_year unless !r_year || h_year == r_year
|
172
|
+
r_code == h_code && (!year || h_year == r_year)
|
173
|
+
# fetch_pages(s, 3).each_with_index do |r, _i|
|
174
|
+
# return { ret: r } if !year
|
175
|
+
|
176
|
+
# r.date.select { |d| d.type == "published" }.each do |d|
|
177
|
+
# return { ret: r } if year.to_i == d.on(:year)
|
178
|
+
|
179
|
+
# missed_years << d.on(:year)
|
180
|
+
# end
|
181
|
+
# end
|
182
|
+
end&.fetch
|
183
|
+
end
|
184
|
+
{ ret: ret, years: missed_years, missed_parts: missed_parts }
|
185
|
+
end
|
155
186
|
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
187
|
+
def code_year(ref, part)
|
188
|
+
%r{
|
189
|
+
^(?<code>(?:ISO|IEC)[^\d]*\s\d+((?:-\w+)+)?)
|
190
|
+
(:(?<year>\d{4}))?
|
191
|
+
}x =~ ref
|
192
|
+
code.sub!(/-\d+/, "") if part
|
193
|
+
[code, year]
|
161
194
|
end
|
162
195
|
|
163
|
-
def
|
164
|
-
return iev if code.casecmp("IEV").zero?
|
196
|
+
def iecbib_get(code, year, opts)
|
197
|
+
# return iev if code.casecmp("IEV").zero?
|
165
198
|
|
166
|
-
result =
|
167
|
-
ret =
|
199
|
+
result = search_filter(code, year, opts) || return
|
200
|
+
ret = results_filter(result, code, year, opts)
|
168
201
|
if ret[:ret]
|
169
|
-
|
170
|
-
|
202
|
+
if ret[:missed_parts]
|
203
|
+
warn "[relaton-iec] WARNING: #{code} found as #{ret[:ret].docidentifier.first.id} "\
|
204
|
+
"but also contain parts. If you wanted to cite all document parts for the reference, use "\
|
205
|
+
"\"#{code} (all parts)\""
|
206
|
+
else
|
207
|
+
warn "[relaton-iec] (\"#{code}\") found #{ret[:ret].docidentifier.first.id}"
|
208
|
+
end
|
171
209
|
ret[:ret]
|
172
210
|
else
|
173
211
|
fetch_ref_err(code, year, ret[:years])
|
data/lib/relaton_iec/scrapper.rb
CHANGED
@@ -170,8 +170,10 @@ module RelatonIec
|
|
170
170
|
def fetch_status(doc)
|
171
171
|
wip = doc.at('//ROW[STATUS[.="PREPARING"]]')
|
172
172
|
if wip
|
173
|
-
statuses = YAML.load_file "
|
173
|
+
statuses = YAML.load_file File.join __dir__, "statuses.yml"
|
174
174
|
s = wip.at("STAGE").text
|
175
|
+
return unless statuses[s]
|
176
|
+
|
175
177
|
stage, substage = statuses[s]["stage"].split "."
|
176
178
|
else
|
177
179
|
stage = "60"
|
data/lib/relaton_iec/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iec
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.7.
|
4
|
+
version: 1.7.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: debase
|