relaton-iec 1.7.0 → 1.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/relaton_iec/hit.rb +4 -0
- data/lib/relaton_iec/hit_collection.rb +36 -12
- data/lib/relaton_iec/iec_bibliography.rb +83 -45
- data/lib/relaton_iec/scrapper.rb +3 -1
- data/lib/relaton_iec/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b87aa92de24cc0120f04f0325e2ec419c366b92d775beb1b37ab2cc429c96c1d
|
4
|
+
data.tar.gz: 28aad2f843cd822bca23dc56e0de8526da1993a5e51cab74a80e957d3a125710
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 261ffff809b27f6b6ff84fd936959819fe315cfe187bb0ecc97b5ae48295fcaa515668d66cb229d3cfbb3f31bcb5127112c5da9064f8699671ce50f40fe5df2b
|
7
|
+
data.tar.gz: a7fc694951f0b4b1a0f57c5d3ea9fcb89c4729d4b28dfe945c6d6c3c7add1be498e961bbf2bc71760b7c88034a0a2ad57295c6cd432ee365c9cb2beb0dcf0950
|
data/lib/relaton_iec/hit.rb
CHANGED
@@ -6,6 +6,10 @@ require "addressable/uri"
|
|
6
6
|
module RelatonIec
|
7
7
|
# Page of hit collection.
|
8
8
|
class HitCollection < RelatonBib::HitCollection
|
9
|
+
def_delegators :@array, :detect
|
10
|
+
|
11
|
+
attr_reader :part
|
12
|
+
|
9
13
|
DOMAIN = "https://webstore.iec.ch"
|
10
14
|
|
11
15
|
# @param ref_nbr [String]
|
@@ -13,53 +17,73 @@ module RelatonIec
|
|
13
17
|
# @param part [String, nil]
|
14
18
|
def initialize(ref_nbr, year = nil, part = nil)
|
15
19
|
super ref_nbr, year
|
16
|
-
@
|
20
|
+
@part = part
|
21
|
+
@array = hits ref_nbr, year
|
22
|
+
end
|
23
|
+
|
24
|
+
# @return [RelatonIec::IecBibliographicItem]
|
25
|
+
def to_all_parts # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity
|
26
|
+
parts = @array.reject { |h| h.part.nil? }
|
27
|
+
hit = parts.min_by &:part
|
28
|
+
return @array.first.fetch lang unless hit
|
29
|
+
|
30
|
+
bibitem = hit.fetch
|
31
|
+
all_parts_item = bibitem.to_all_parts
|
32
|
+
parts.reject { |h| h.hit[:code] == hit.hit[:code] }.each do |hi|
|
33
|
+
isobib = RelatonIec::IecBibliographicItem.new(
|
34
|
+
formattedref: RelatonBib::FormattedRef.new(content: hi.hit[:code])
|
35
|
+
)
|
36
|
+
all_parts_item.relation << RelatonBib::DocumentRelation.new(type: "partOf", bibitem: isobib)
|
37
|
+
end
|
38
|
+
all_parts_item
|
17
39
|
end
|
18
40
|
|
19
41
|
private
|
20
42
|
|
21
43
|
# @param ref [String]
|
22
44
|
# @param year [String, nil]
|
23
|
-
# @param part [String, nil]
|
24
45
|
# @return [Array<RelatonIec::Hit>]
|
25
|
-
def hits(ref, year
|
46
|
+
def hits(ref, year)
|
26
47
|
from, to = nil
|
27
48
|
if year
|
28
49
|
from = Date.strptime year, "%Y"
|
29
50
|
to = from.next_year.prev_day
|
30
51
|
end
|
31
|
-
get_results ref, from, to
|
52
|
+
get_results ref, from, to
|
32
53
|
end
|
33
54
|
|
34
55
|
# @param ref [String]
|
35
56
|
# @param from [Date, nil]
|
36
57
|
# @param to [Date, nil]
|
37
|
-
# @param part [String, nil]
|
38
58
|
# @return [Array<RelatonIec::Hit>]
|
39
|
-
def get_results(ref, from, to
|
59
|
+
def get_results(ref, from, to)
|
40
60
|
code = part ? ref.sub(/(?<=-\d)\d+/, "*") : ref
|
41
61
|
[nil, "trf", "wr"].reduce([]) do |m, t|
|
42
62
|
url = "#{DOMAIN}/searchkey"
|
43
63
|
url += "&type=#{t}" if t
|
44
64
|
url += "&RefNbr=#{code}&From=#{from}&To=#{to}&start=1"
|
45
|
-
m + results(Addressable::URI.parse(url).normalize
|
65
|
+
m + results(Addressable::URI.parse(url).normalize)
|
46
66
|
end
|
47
67
|
end
|
48
68
|
|
49
69
|
# @param url [String]
|
50
|
-
# @param part [String, nil]
|
51
70
|
# @return [Array<RelatonIec::Hit>]
|
52
|
-
def results(uri
|
71
|
+
def results(uri)
|
53
72
|
contains = "[contains(.,'Part #{part}:')]" if part
|
54
|
-
|
73
|
+
resp = OpenURI.open_uri(uri, "User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) "\
|
74
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36")
|
75
|
+
doc = Nokogiri::HTML(resp)
|
76
|
+
doc.xpath(
|
55
77
|
"//body/li#{contains}",
|
56
78
|
"//ul[contains(@class,'search-results')]/li#{contains}",
|
57
79
|
"//ul[contains(@class,'morethesame')]/li#{contains}"
|
58
|
-
).map { |h| make_hit h }
|
80
|
+
).map { |h| make_hit h }.compact
|
59
81
|
end
|
60
82
|
|
61
83
|
def make_hit(hit)
|
62
|
-
link
|
84
|
+
link = hit.at('a[@href!="#"]')
|
85
|
+
return unless link
|
86
|
+
|
63
87
|
code = link.text.tr [194, 160].pack("c*").force_encoding("UTF-8"), ""
|
64
88
|
title = hit.xpath("text()").text.gsub(/[\r\n]/, "")
|
65
89
|
Hit.new({ code: code, title: title, url: DOMAIN + link[:href] }, self)
|
@@ -21,7 +21,7 @@ module RelatonIec
|
|
21
21
|
# @param part [String, nil] search for packaged stndard if not nil
|
22
22
|
# @return [RelatonIec::HitCollection]
|
23
23
|
def search(text, year = nil, part = nil)
|
24
|
-
HitCollection.new text, year, part
|
24
|
+
HitCollection.new text, year&.strip, part
|
25
25
|
rescue SocketError, OpenURI::HTTPError, OpenSSL::SSL::SSLError
|
26
26
|
raise RelatonBib::RequestError, "Could not access http://www.iec.ch"
|
27
27
|
end
|
@@ -32,23 +32,21 @@ module RelatonIec
|
|
32
32
|
# reference is required
|
33
33
|
# @return [String] Relaton XML serialisation of reference
|
34
34
|
def get(code, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
35
|
+
opts[:all_parts] ||= code.match? /\s\(all parts\)/
|
36
|
+
ref = code.sub /\s\(all parts\)/, ""
|
35
37
|
if year.nil?
|
36
|
-
/^(?<code1>[^:]+):(?<year1>[^:]+)/ =~
|
38
|
+
/^(?<code1>[^:]+):(?<year1>[^:]+)/ =~ ref
|
37
39
|
unless code1.nil?
|
38
|
-
|
40
|
+
ref = code1
|
39
41
|
year = year1
|
40
42
|
end
|
41
43
|
end
|
44
|
+
return iev if ref.casecmp("IEV").zero?
|
42
45
|
|
43
|
-
|
44
|
-
|
45
|
-
opts[:all_parts] ||= !(code =~ / \(all parts\)/).nil?
|
46
|
-
code = code.sub(/ \(all parts\)/, "")
|
47
|
-
ret = iecbib_get1(code, year, opts)
|
46
|
+
ret = iecbib_get(ref, year, opts)
|
48
47
|
return nil if ret.nil?
|
49
48
|
|
50
49
|
ret = ret.to_most_recent_reference unless year || opts[:keep_year]
|
51
|
-
ret = ret.to_all_parts if opts[:all_parts]
|
52
50
|
ret
|
53
51
|
end
|
54
52
|
|
@@ -77,29 +75,43 @@ module RelatonIec
|
|
77
75
|
# @param hits [Array<RelatonIec::Hit>]
|
78
76
|
# @param threads [Integer]
|
79
77
|
# @return [Array<RelatonIec::Hit>]
|
80
|
-
def fetch_pages(hits, threads)
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
end
|
87
|
-
|
88
|
-
def
|
89
|
-
|
90
|
-
|
91
|
-
|
78
|
+
# def fetch_pages(hits, threads)
|
79
|
+
# workers = RelatonBib::WorkersPool.new threads
|
80
|
+
# workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
|
81
|
+
# hits.each_with_index { |hit, i| workers << { i: i, hit: hit } }
|
82
|
+
# workers.end
|
83
|
+
# workers.result.sort_by { |a| a[:i] }.map { |x| x[:hit] }
|
84
|
+
# end
|
85
|
+
|
86
|
+
def search_filter(reference, year, opts) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
87
|
+
%r{
|
88
|
+
^(?<code>(?:ISO|IEC)[^\d]*\s\d+((?:-\w+)+)?)
|
89
|
+
(:(?<year1>\d{4}))?
|
90
|
+
(?<bundle>\+[^\s\/]+)?
|
91
|
+
(\/(?<corr>AMD\s\d+))?
|
92
|
+
}x =~ reference.upcase
|
93
|
+
year ||= year1
|
94
|
+
corr&.sub! " ", ""
|
95
|
+
warn "[relaton-iec] (\"#{reference}\") fetching..."
|
92
96
|
result = search(code, year)
|
93
|
-
if result.empty? && /(?<=-)(?<part
|
97
|
+
if result.empty? && /(?<=-)(?<part>[\w-]+)/ =~ code
|
94
98
|
# try to search packaged standard
|
95
99
|
result = search code, year, part
|
96
|
-
ref = code.sub /(?<=-\d)\
|
97
|
-
|
100
|
+
# ref = code.sub /(?<=-\d)\w+/, ""
|
101
|
+
# else ref = code
|
98
102
|
end
|
103
|
+
result = search code if result.empty?
|
104
|
+
code.sub! /((?:-\w+)+)/, ""
|
99
105
|
result.select do |i|
|
100
|
-
|
101
|
-
|
102
|
-
|
106
|
+
%r{
|
107
|
+
^(?<code2>(?:ISO|IEC)[^\d]*\s\d+)((?:-\w+)+)?
|
108
|
+
(:(?<year2>\d{4}))?
|
109
|
+
(?<bundle2>\+[^\s\/]+)?
|
110
|
+
(\/(?<corr2>AMD\d+))?
|
111
|
+
}x =~ i.hit[:code]
|
112
|
+
# code2.sub! /(?<=-\d)\w*/, "" if part
|
113
|
+
# code2.sub! /((?:-\w+)+)/, "" if opts[:all_parts]
|
114
|
+
code == code2 && bundle == bundle2 && corr == corr2 # (year.nil? || year == year2) &&
|
103
115
|
end
|
104
116
|
end
|
105
117
|
|
@@ -144,30 +156,56 @@ module RelatonIec
|
|
144
156
|
# Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
|
145
157
|
# If no match, returns any years which caused mismatch, for error
|
146
158
|
# reporting
|
147
|
-
def
|
159
|
+
def results_filter(result, ref, year, opts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
160
|
+
r_code, r_year = code_year ref, result.part
|
161
|
+
r_year ||= year
|
148
162
|
missed_years = []
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
163
|
+
missed_parts = false
|
164
|
+
# result.each_slice(3) do |s| # ISO website only allows 3 connections
|
165
|
+
ret = if opts[:all_parts]
|
166
|
+
result.to_all_parts
|
167
|
+
else
|
168
|
+
result.detect do |h|
|
169
|
+
h_code, h_year = code_year h.hit[:code], result.part
|
170
|
+
missed_parts ||= !opts[:all_parts] && r_code != h_code
|
171
|
+
missed_years << h_year unless !r_year || h_year == r_year
|
172
|
+
r_code == h_code && (!year || h_year == r_year)
|
173
|
+
# fetch_pages(s, 3).each_with_index do |r, _i|
|
174
|
+
# return { ret: r } if !year
|
175
|
+
|
176
|
+
# r.date.select { |d| d.type == "published" }.each do |d|
|
177
|
+
# return { ret: r } if year.to_i == d.on(:year)
|
178
|
+
|
179
|
+
# missed_years << d.on(:year)
|
180
|
+
# end
|
181
|
+
# end
|
182
|
+
end&.fetch
|
183
|
+
end
|
184
|
+
{ ret: ret, years: missed_years, missed_parts: missed_parts }
|
185
|
+
end
|
155
186
|
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
187
|
+
def code_year(ref, part)
|
188
|
+
%r{
|
189
|
+
^(?<code>(?:ISO|IEC)[^\d]*\s\d+((?:-\w+)+)?)
|
190
|
+
(:(?<year>\d{4}))?
|
191
|
+
}x =~ ref
|
192
|
+
code.sub!(/-\d+/, "") if part
|
193
|
+
[code, year]
|
161
194
|
end
|
162
195
|
|
163
|
-
def
|
164
|
-
return iev if code.casecmp("IEV").zero?
|
196
|
+
def iecbib_get(code, year, opts)
|
197
|
+
# return iev if code.casecmp("IEV").zero?
|
165
198
|
|
166
|
-
result =
|
167
|
-
ret =
|
199
|
+
result = search_filter(code, year, opts) || return
|
200
|
+
ret = results_filter(result, code, year, opts)
|
168
201
|
if ret[:ret]
|
169
|
-
|
170
|
-
|
202
|
+
if ret[:missed_parts]
|
203
|
+
warn "[relaton-iec] WARNING: #{code} found as #{ret[:ret].docidentifier.first.id} "\
|
204
|
+
"but also contain parts. If you wanted to cite all document parts for the reference, use "\
|
205
|
+
"\"#{code} (all parts)\""
|
206
|
+
else
|
207
|
+
warn "[relaton-iec] (\"#{code}\") found #{ret[:ret].docidentifier.first.id}"
|
208
|
+
end
|
171
209
|
ret[:ret]
|
172
210
|
else
|
173
211
|
fetch_ref_err(code, year, ret[:years])
|
data/lib/relaton_iec/scrapper.rb
CHANGED
@@ -170,8 +170,10 @@ module RelatonIec
|
|
170
170
|
def fetch_status(doc)
|
171
171
|
wip = doc.at('//ROW[STATUS[.="PREPARING"]]')
|
172
172
|
if wip
|
173
|
-
statuses = YAML.load_file "
|
173
|
+
statuses = YAML.load_file File.join __dir__, "statuses.yml"
|
174
174
|
s = wip.at("STAGE").text
|
175
|
+
return unless statuses[s]
|
176
|
+
|
175
177
|
stage, substage = statuses[s]["stage"].split "."
|
176
178
|
else
|
177
179
|
stage = "60"
|
data/lib/relaton_iec/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iec
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.7.
|
4
|
+
version: 1.7.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: debase
|