relaton-iec 1.6.0 → 1.7.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/grammars/isodoc.rng +14 -43
- data/lib/relaton_iec/hit_collection.rb +36 -14
- data/lib/relaton_iec/iec_bibliography.rb +40 -12
- data/lib/relaton_iec/scrapper.rb +3 -1
- data/lib/relaton_iec/version.rb +1 -1
- data/relaton_iec.gemspec +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8a149738910c8542c434a360acc80e28b47d0a506610644c46c59ac0b5275bd4
|
4
|
+
data.tar.gz: cef0a735b8ee7049e2f7efe845f60773c9bba620a236422ea52d4dee9b63e83a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cf5dced7765bf8060ed5b0b1f73a404f930d2ce3bad4e4b7f500b66c74b24f9d255495e0a049ebd1a3cae99b9c0af4d5f52710f42d5f8eb8e07de2b32a2abf3b
|
7
|
+
data.tar.gz: 7367a1efe7c9ed2e8d976051286c7d2672da7253c0943a00c9e9c100637aff3ece058abb7e69cb8ee74fddb91c908c9df2f1d58741c7a27f1afa513102068c85
|
data/grammars/isodoc.rng
CHANGED
@@ -24,6 +24,14 @@
|
|
24
24
|
<start>
|
25
25
|
<ref name="standard-document"/>
|
26
26
|
</start>
|
27
|
+
<define name="doctype">
|
28
|
+
<element name="doctype">
|
29
|
+
<optional>
|
30
|
+
<attribute name="abbreviation"/>
|
31
|
+
</optional>
|
32
|
+
<ref name="DocumentType"/>
|
33
|
+
</element>
|
34
|
+
</define>
|
27
35
|
<define name="hyperlink">
|
28
36
|
<element name="link">
|
29
37
|
<attribute name="target">
|
@@ -141,6 +149,11 @@
|
|
141
149
|
<data type="boolean"/>
|
142
150
|
</attribute>
|
143
151
|
</optional>
|
152
|
+
<optional>
|
153
|
+
<attribute name="key">
|
154
|
+
<data type="boolean"/>
|
155
|
+
</attribute>
|
156
|
+
</optional>
|
144
157
|
<oneOrMore>
|
145
158
|
<ref name="dt"/>
|
146
159
|
<ref name="dd"/>
|
@@ -1164,49 +1177,7 @@
|
|
1164
1177
|
</define>
|
1165
1178
|
<define name="annex">
|
1166
1179
|
<element name="annex">
|
1167
|
-
<
|
1168
|
-
<attribute name="id">
|
1169
|
-
<data type="ID"/>
|
1170
|
-
</attribute>
|
1171
|
-
</optional>
|
1172
|
-
<optional>
|
1173
|
-
<attribute name="language"/>
|
1174
|
-
</optional>
|
1175
|
-
<optional>
|
1176
|
-
<attribute name="script"/>
|
1177
|
-
</optional>
|
1178
|
-
<optional>
|
1179
|
-
<attribute name="inline-header">
|
1180
|
-
<data type="boolean"/>
|
1181
|
-
</attribute>
|
1182
|
-
</optional>
|
1183
|
-
<attribute name="obligation">
|
1184
|
-
<choice>
|
1185
|
-
<value>normative</value>
|
1186
|
-
<value>informative</value>
|
1187
|
-
</choice>
|
1188
|
-
</attribute>
|
1189
|
-
<optional>
|
1190
|
-
<ref name="section-title"/>
|
1191
|
-
</optional>
|
1192
|
-
<group>
|
1193
|
-
<group>
|
1194
|
-
<zeroOrMore>
|
1195
|
-
<ref name="BasicBlock"/>
|
1196
|
-
</zeroOrMore>
|
1197
|
-
<zeroOrMore>
|
1198
|
-
<ref name="note"/>
|
1199
|
-
</zeroOrMore>
|
1200
|
-
</group>
|
1201
|
-
<zeroOrMore>
|
1202
|
-
<choice>
|
1203
|
-
<ref name="annex-subsection"/>
|
1204
|
-
<ref name="terms"/>
|
1205
|
-
<ref name="definitions"/>
|
1206
|
-
<ref name="references"/>
|
1207
|
-
</choice>
|
1208
|
-
</zeroOrMore>
|
1209
|
-
</group>
|
1180
|
+
<ref name="Annex-Section"/>
|
1210
1181
|
</element>
|
1211
1182
|
</define>
|
1212
1183
|
<define name="terms">
|
@@ -9,40 +9,62 @@ module RelatonIec
|
|
9
9
|
DOMAIN = "https://webstore.iec.ch"
|
10
10
|
|
11
11
|
# @param ref_nbr [String]
|
12
|
-
# @param year [String]
|
13
|
-
|
14
|
-
|
15
|
-
|
12
|
+
# @param year [String, nil]
|
13
|
+
# @param part [String, nil]
|
14
|
+
def initialize(ref_nbr, year = nil, part = nil)
|
15
|
+
super ref_nbr, year
|
16
|
+
@array = hits ref_nbr, year, part
|
16
17
|
end
|
17
18
|
|
18
19
|
private
|
19
20
|
|
20
|
-
|
21
|
+
# @param ref [String]
|
22
|
+
# @param year [String, nil]
|
23
|
+
# @param part [String, nil]
|
24
|
+
# @return [Array<RelatonIec::Hit>]
|
25
|
+
def hits(ref, year, part)
|
21
26
|
from, to = nil
|
22
27
|
if year
|
23
28
|
from = Date.strptime year, "%Y"
|
24
29
|
to = from.next_year.prev_day
|
25
30
|
end
|
26
|
-
get_results ref, from, to
|
31
|
+
get_results ref, from, to, part
|
27
32
|
end
|
28
33
|
|
29
|
-
|
34
|
+
# @param ref [String]
|
35
|
+
# @param from [Date, nil]
|
36
|
+
# @param to [Date, nil]
|
37
|
+
# @param part [String, nil]
|
38
|
+
# @return [Array<RelatonIec::Hit>]
|
39
|
+
def get_results(ref, from, to, part = nil)
|
40
|
+
code = part ? ref.sub(/(?<=-\d)\d+/, "*") : ref
|
30
41
|
[nil, "trf", "wr"].reduce([]) do |m, t|
|
31
42
|
url = "#{DOMAIN}/searchkey"
|
32
43
|
url += "&type=#{t}" if t
|
33
|
-
url += "&RefNbr=#{
|
34
|
-
m + results(Addressable::URI.parse(url).normalize)
|
44
|
+
url += "&RefNbr=#{code}&From=#{from}&To=#{to}&start=1"
|
45
|
+
m + results(Addressable::URI.parse(url).normalize, part)
|
35
46
|
end
|
36
47
|
end
|
37
48
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
49
|
+
# @param url [String]
|
50
|
+
# @param part [String, nil]
|
51
|
+
# @return [Array<RelatonIec::Hit>]
|
52
|
+
def results(uri, part)
|
53
|
+
contains = "[contains(.,'Part #{part}:')]" if part
|
54
|
+
resp = OpenURI.open_uri(uri, "User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) "\
|
55
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36")
|
56
|
+
doc = Nokogiri::HTML(resp)
|
57
|
+
doc.xpath(
|
58
|
+
"//body/li#{contains}",
|
59
|
+
"//ul[contains(@class,'search-results')]/li#{contains}",
|
60
|
+
"//ul[contains(@class,'morethesame')]/li#{contains}"
|
61
|
+
).map { |h| make_hit h }.compact
|
42
62
|
end
|
43
63
|
|
44
64
|
def make_hit(hit)
|
45
|
-
link
|
65
|
+
link = hit.at('a[@href!="#"]')
|
66
|
+
return unless link
|
67
|
+
|
46
68
|
code = link.text.tr [194, 160].pack("c*").force_encoding("UTF-8"), ""
|
47
69
|
title = hit.xpath("text()").text.gsub(/[\r\n]/, "")
|
48
70
|
Hit.new({ code: code, title: title, url: DOMAIN + link[:href] }, self)
|
@@ -9,10 +9,19 @@ module RelatonIec
|
|
9
9
|
# Class methods for search ISO standards.
|
10
10
|
class IecBibliography
|
11
11
|
class << self
|
12
|
+
##
|
13
|
+
# Search for standards entries. To seach packaged document it needs to
|
14
|
+
# pass part parametr.
|
15
|
+
#
|
16
|
+
# @example Search for packaged standard
|
17
|
+
# RelatonIec::IecBibliography.search 'IEC 60050-311', nil, '311'
|
18
|
+
#
|
12
19
|
# @param text [String]
|
20
|
+
# @param year [String, nil]
|
21
|
+
# @param part [String, nil] search for packaged stndard if not nil
|
13
22
|
# @return [RelatonIec::HitCollection]
|
14
|
-
def search(text, year = nil)
|
15
|
-
HitCollection.new text, year
|
23
|
+
def search(text, year = nil, part = nil)
|
24
|
+
HitCollection.new text, year&.strip, part
|
16
25
|
rescue SocketError, OpenURI::HTTPError, OpenSSL::SSL::SSLError
|
17
26
|
raise RelatonBib::RequestError, "Could not access http://www.iec.ch"
|
18
27
|
end
|
@@ -76,15 +85,34 @@ module RelatonIec
|
|
76
85
|
workers.result.sort_by { |a| a[:i] }.map { |x| x[:hit] }
|
77
86
|
end
|
78
87
|
|
79
|
-
def isobib_search_filter(
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
88
|
+
def isobib_search_filter(reference, year, opts) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
89
|
+
%r{
|
90
|
+
^(?<code>(?:ISO|IEC)[^\d]*\s[\d-]+\w?)
|
91
|
+
(:(?<year1>\d{4}))?
|
92
|
+
(?<bundle>\+[^\s\/]+)?
|
93
|
+
(\/(?<corr>AMD\s\d+))?
|
94
|
+
}x =~ reference.upcase
|
95
|
+
year ||= year1
|
96
|
+
corr&.sub! " ", ""
|
97
|
+
warn "[relaton-iec] (\"#{reference}\") fetching..."
|
98
|
+
result = search(code, year)
|
99
|
+
if result.empty? && /(?<=-)(?<part>\d+)/ =~ code
|
100
|
+
# try to search packaged standard
|
101
|
+
result = search code, year, part
|
102
|
+
ref = code.sub /(?<=-\d)\d+/, ""
|
103
|
+
else ref = code
|
104
|
+
end
|
84
105
|
result.select do |i|
|
85
|
-
|
86
|
-
|
87
|
-
|
106
|
+
%r{
|
107
|
+
^(?<code2>(?:ISO|IEC)[^\d]*\s\d+(-\w+)?)
|
108
|
+
(:(?<year2>\d{4}))?
|
109
|
+
(?<bundle2>\+[^\s\/]+)?
|
110
|
+
(\/(?<corr2>AMD\d+))?
|
111
|
+
}x =~ i.hit[:code]
|
112
|
+
code2.sub! /(?<=-\d)\w*/, "" if part
|
113
|
+
code2.sub! /-\d+\w*/, "" if opts[:all_parts]
|
114
|
+
ref == code2 && (year.nil? || year == year2) && bundle == bundle2 &&
|
115
|
+
corr == corr2
|
88
116
|
end
|
89
117
|
end
|
90
118
|
|
@@ -145,10 +173,10 @@ module RelatonIec
|
|
145
173
|
{ years: missed_years }
|
146
174
|
end
|
147
175
|
|
148
|
-
def iecbib_get1(code, year,
|
176
|
+
def iecbib_get1(code, year, opts)
|
149
177
|
return iev if code.casecmp("IEV").zero?
|
150
178
|
|
151
|
-
result = isobib_search_filter(code) || return
|
179
|
+
result = isobib_search_filter(code, year, opts) || return
|
152
180
|
ret = isobib_results_filter(result, year)
|
153
181
|
if ret[:ret]
|
154
182
|
warn "[relaton-iec] (\"#{code}\") found "\
|
data/lib/relaton_iec/scrapper.rb
CHANGED
@@ -170,8 +170,10 @@ module RelatonIec
|
|
170
170
|
def fetch_status(doc)
|
171
171
|
wip = doc.at('//ROW[STATUS[.="PREPARING"]]')
|
172
172
|
if wip
|
173
|
-
statuses = YAML.load_file "
|
173
|
+
statuses = YAML.load_file File.join __dir__, "statuses.yml"
|
174
174
|
s = wip.at("STAGE").text
|
175
|
+
return unless statuses[s]
|
176
|
+
|
175
177
|
stage, substage = statuses[s]["stage"].split "."
|
176
178
|
else
|
177
179
|
stage = "60"
|
data/lib/relaton_iec/version.rb
CHANGED
data/relaton_iec.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iec
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.7.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-12-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: debase
|
@@ -170,14 +170,14 @@ dependencies:
|
|
170
170
|
requirements:
|
171
171
|
- - "~>"
|
172
172
|
- !ruby/object:Gem::Version
|
173
|
-
version: 1.
|
173
|
+
version: 1.7.0
|
174
174
|
type: :runtime
|
175
175
|
prerelease: false
|
176
176
|
version_requirements: !ruby/object:Gem::Requirement
|
177
177
|
requirements:
|
178
178
|
- - "~>"
|
179
179
|
- !ruby/object:Gem::Version
|
180
|
-
version: 1.
|
180
|
+
version: 1.7.0
|
181
181
|
description: 'RelatonIec: retrieve IEC Standards for bibliographic use using the IecBibliographicItem
|
182
182
|
model'
|
183
183
|
email:
|