relaton-iec 1.7.1 → 1.7.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.adoc +205 -1
- data/bin/rspec +29 -0
- data/lib/relaton_iec.rb +71 -7
- data/lib/relaton_iec/hit.rb +4 -0
- data/lib/relaton_iec/hit_collection.rb +35 -13
- data/lib/relaton_iec/iec_bibliography.rb +91 -46
- data/lib/relaton_iec/processor.rb +6 -0
- data/lib/relaton_iec/scrapper.rb +5 -28
- data/lib/relaton_iec/statuses.yml +84 -17
- data/lib/relaton_iec/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '08ac0e0f5df714485be57354b74b7e172758ffe218467667a63c670ea33ea068'
|
4
|
+
data.tar.gz: e2887cb6b9346fc7d1eb9941fe759dc2d3dd90c9c4826d977e1d9b191ea3ddf8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 77bb3d3b8f2c829147434bec3dbcff6840ef1048c24613c9eca3b4b3c1013f6b58d100b987b36713a200de918b240ba8ee4dd461b8887ba5e234b2b7f56eef13
|
7
|
+
data.tar.gz: 93d012040298e5570b91730a260a50009c79889d905daac3fa274660a740b997c50f1914f243fa7c964207061e4d21b60a48ecc0a6b808a555a8688f6598ab4f
|
data/README.adoc
CHANGED
@@ -31,7 +31,211 @@ Or install it yourself as:
|
|
31
31
|
|
32
32
|
== Usage
|
33
33
|
|
34
|
-
|
34
|
+
=== Search for a standard using keywords
|
35
|
+
|
36
|
+
`RelatonIec::IecBibliography.search(ref, year)` method returns hits collection. Each hit can be used to fetch a document.
|
37
|
+
|
38
|
+
- `ref` - reference to search document
|
39
|
+
- `year` - filter by year (optional)
|
40
|
+
|
41
|
+
[source,ruby]
|
42
|
+
----
|
43
|
+
require 'relaton_iec'
|
44
|
+
=> true
|
45
|
+
|
46
|
+
hit_collection = RelatonIec::IecBibliography.search("60050")
|
47
|
+
=> <RelatonIec::HitCollection:0x007fe0d7126f28 @ref=60050 @fetched=false>
|
48
|
+
|
49
|
+
hit_collection.first
|
50
|
+
=> <RelatonIec::Hit:0x007fe1068b4d10 @text="60050" @fetched="false" @fullIdentifier="" @title="IEC 60050-102:2007">
|
51
|
+
|
52
|
+
item = hit_collection[2].fetch
|
53
|
+
=> #<RelatonIec::IecBibliographicItem:0x007fe1171a06f8
|
54
|
+
...
|
55
|
+
|
56
|
+
item.docidentifier
|
57
|
+
=> [#<RelatonBib::DocumentIdentifier:0x007fe1171bb930 @id="IEC 60050-112:2010", @scope=nil, @type="IEC">,
|
58
|
+
#<RelatonBib::DocumentIdentifier:0x007fe1171bb728 @id="urn:iec:std:iec:60050-112:2010:::en", @scope=nil, @type="URN">]
|
59
|
+
|
60
|
+
item.docidentifier.detect { |di| di.type == "URN" }.id
|
61
|
+
=> "urn:iec:std:iec:60050-112:2010:::en"
|
62
|
+
----
|
63
|
+
|
64
|
+
=== Fetch documen by keywords
|
65
|
+
|
66
|
+
`RelatonIec::IecBibliography.get(ref, year, opts)` method returns document.
|
67
|
+
|
68
|
+
- `ref` - reference to search document
|
69
|
+
- `year` - filter by year (optional)
|
70
|
+
- `opts` - hash of options (optional). Supported options are `:all_parts` (boolean), `:keep_year` (boolean).
|
71
|
+
|
72
|
+
[source,ruby]
|
73
|
+
----
|
74
|
+
item = RelatonIec::IecBibliography.get("IEC 60050-112:2010")
|
75
|
+
[relaton-iec] ("IEC 60050-112") fetching...
|
76
|
+
[relaton-iec] ("IEC 60050-112") found IEC 60050-112:2010
|
77
|
+
=> #<RelatonIec::IecBibliographicItem:0x007fa64e874f78
|
78
|
+
|
79
|
+
item = RelatonIec::IecBibliography.get("IEC 60050-112", "2010", all_parts: true)
|
80
|
+
[relaton-iec] ("IEC 60050-112") fetching...
|
81
|
+
[relaton-iec] ("IEC 60050-112") found IEC 60050 (all parts)
|
82
|
+
=> #<RelatonIec::IecBibliographicItem:0x007fa69e9b3948
|
83
|
+
|
84
|
+
item.docidentifier.first
|
85
|
+
=> #<RelatonBib::DocumentIdentifier:0x007fa69e9abd10 @id="IEC 60050 (all parts)", @scope=nil, @type="IEC">
|
86
|
+
|
87
|
+
item = RelatonIec::IecBibliography.get("IEC 60050-112", "2010")
|
88
|
+
[relaton-iec] ("IEC 60050-112") fetching...
|
89
|
+
[relaton-iec] ("IEC 60050-112") found IEC 60050-112:2010
|
90
|
+
=> #<RelatonIec::IecBibliographicItem:0x007fa69f296da8
|
91
|
+
|
92
|
+
item.docidentifier.first
|
93
|
+
=> #<RelatonBib::DocumentIdentifier:0x007fa69f2a65f0 @id="IEC 60050-112:2010", @scope=nil, @type="IEC">
|
94
|
+
----
|
95
|
+
|
96
|
+
=== XML serialization
|
97
|
+
|
98
|
+
Possible options:
|
99
|
+
|
100
|
+
- *bibdata* - If true then wrapp item with _bibdata_ element and add _ext_ element.
|
101
|
+
- *note* - Array of hashes `{ text: "Note", type: "note" }`. These notes will be added to XML.
|
102
|
+
|
103
|
+
[source,ruby]
|
104
|
+
----
|
105
|
+
item.to_xml
|
106
|
+
=> "<bibitem id="IEC60050-112-2010" type="standard">
|
107
|
+
<fetched>2021-01-28</fetched>
|
108
|
+
<title type="title-main" format="text/plain" language="en" script="Latn">International Electrotechnical Vocabulary (IEV)</title>
|
109
|
+
<title type="title-part" format="text/plain" language="en" script="Latn">Part 112: Quantities and units</title>
|
110
|
+
<title type="main" format="text/plain" language="en" script="Latn">International Electrotechnical Vocabulary (IEV) - Part 112: Quantities and units</title>
|
111
|
+
<uri type="src">https://webstore.iec.ch/publication/162</uri>
|
112
|
+
<uri type="obp">/preview/info_iec60050-112%7Bed1.0%7Db.pdf</uri>
|
113
|
+
<docidentifier type="IEC">IEC 60050-112:2010</docidentifier>
|
114
|
+
<docidentifier type="URN">urn:iec:std:iec:60050-112:2010:::en</docidentifier>
|
115
|
+
<date type="published">
|
116
|
+
<on>2010-01-27</on>
|
117
|
+
</date>
|
118
|
+
<contributor>
|
119
|
+
<role type="publisher"/>
|
120
|
+
<organization>
|
121
|
+
<name>International Electrotechnical Commission</name>
|
122
|
+
<abbreviation>IEC</abbreviation>
|
123
|
+
<uri>www.iec.ch</uri>
|
124
|
+
</organization>
|
125
|
+
</contributor>
|
126
|
+
<edition>1.0</edition>
|
127
|
+
<language>en</language>
|
128
|
+
<script>Latn</script>
|
129
|
+
<abstract format="text/plain" language="en" script="Latn">
|
130
|
+
IEC 60050-112:2010 gives the general terminology concerning quantities and units, the terminology of SI, terms used in names and definitions of quantities, and some basic concepts in metrology. It cancels and replaces Sections 111-11 and 111-12 of International Standard IEC 60050-111:1996. It has the status of a horizontal standard in accordance with IEC Guide 108.
|
131
|
+
</abstract>
|
132
|
+
<status>
|
133
|
+
<stage>60</stage>
|
134
|
+
<substage>60</substage>
|
135
|
+
</status>
|
136
|
+
<copyright>
|
137
|
+
<from>2010</from>
|
138
|
+
<owner>
|
139
|
+
<organization>
|
140
|
+
<name>International Electrotechnical Commission</name>
|
141
|
+
<abbreviation>IEC</abbreviation>
|
142
|
+
<uri>www.iec.ch</uri>
|
143
|
+
</organization>
|
144
|
+
</owner>
|
145
|
+
</copyright>
|
146
|
+
<place>Geneva</place>
|
147
|
+
</bibitem>"
|
148
|
+
|
149
|
+
item.to_xml bibdata: true
|
150
|
+
=> "<bibdata type="standard">
|
151
|
+
<fetched>2021-01-28</fetched>
|
152
|
+
<title type="title-main" format="text/plain" language="en" script="Latn">International Electrotechnical Vocabulary (IEV)</title>
|
153
|
+
<title type="title-part" format="text/plain" language="en" script="Latn">Part 112: Quantities and units</title>
|
154
|
+
<title type="main" format="text/plain" language="en" script="Latn">International Electrotechnical Vocabulary (IEV) - Part 112: Quantities and units</title>
|
155
|
+
<uri type="src">https://webstore.iec.ch/publication/162</uri>
|
156
|
+
<uri type="obp">/preview/info_iec60050-112%7Bed1.0%7Db.pdf</uri>
|
157
|
+
<docidentifier type="IEC">IEC 60050-112:2010</docidentifier>
|
158
|
+
<docidentifier type="URN">urn:iec:std:iec:60050-112:2010:::en</docidentifier>
|
159
|
+
...
|
160
|
+
<ext>
|
161
|
+
<doctype>international-standard</doctype>
|
162
|
+
<editorialgroup>
|
163
|
+
<technical-committee number="1" type="technicalCommittee">TC 1 - Terminology</technical-committee>
|
164
|
+
</editorialgroup>
|
165
|
+
<ics>
|
166
|
+
<code>01.040.01</code>
|
167
|
+
<text>Generalities. Terminology. Standardization. Documentation (Vocabularies)</text>
|
168
|
+
</ics>
|
169
|
+
<ics>
|
170
|
+
<code>01.060</code>
|
171
|
+
<text>Quantities and units</text>
|
172
|
+
</ics>
|
173
|
+
<structuredidentifier type="IEC">
|
174
|
+
<project-number>60050</project-number>
|
175
|
+
</structuredidentifier>
|
176
|
+
</ext>
|
177
|
+
</bibdata>"
|
178
|
+
|
179
|
+
item.to_xml note: [{ text: "Note", type: "note" }]
|
180
|
+
=> "<bibitem id="IEC60050-112-2010" type="standard">
|
181
|
+
...
|
182
|
+
<note format="text/plain" type="note">Note</note>
|
183
|
+
...
|
184
|
+
</bibitem>"
|
185
|
+
----
|
186
|
+
|
187
|
+
=== Converting reference to URN
|
188
|
+
|
189
|
+
URN is document a identifier format. It has fields delimited by colon. If any field is absent then it's place is empty. All values are in lower-case.
|
190
|
+
|
191
|
+
URN structure: +
|
192
|
+
`urn:sdo_namespace:content_type_namespace:header:project_number:date:type:deliverable:language:relation:adjunct_type:adjunct_number:date[#/=]component_or_related_asset`
|
193
|
+
|
194
|
+
* prefix
|
195
|
+
- `urn` - value: urn
|
196
|
+
- `sdo_namespace` - value: iec
|
197
|
+
- `content_type_namespace` - value: std
|
198
|
+
* base document information
|
199
|
+
- `header` - possible values are: iec, iso, iec-iso, iec-ieee, iec-itu, iec-astm
|
200
|
+
- `project_number` - number and partnumber. For example 67654, 60601-1, 61076-7-101
|
201
|
+
- `date` - document date (optional). Examples: 2010-03, 2010, 2010-10-11 etc.
|
202
|
+
- `type` - documant type (optional). Possible values: ts, tr, pas, guide, is, ser.
|
203
|
+
- `deliverable` - (optional) possible values: prv, csv, exv, rlv, cmv
|
204
|
+
- `language` - (optional) examples: en, fr, ru, en-fr, en-fr-ru etc.
|
205
|
+
* adjunct document information (optional)
|
206
|
+
- `relation` - "plus" for consolidations of ajancts with a base document or "/" for ajancts itself
|
207
|
+
- `adjunct_type` - possible values: amd, cor, ish
|
208
|
+
- `adjunct_number` - adjunct number. Examples: 1, 2, 3, etc.
|
209
|
+
- `date` - adjanct date. Example: 2009
|
210
|
+
* component or related asset information (optional)
|
211
|
+
- `component_or_related_asset` - # or = followed by component id or related asset. Exammples: #fig-1, #sec-1, =forum
|
212
|
+
|
213
|
+
For more information see https://github.com/relaton/relaton-iec/issues/22
|
214
|
+
|
215
|
+
The method `RelatonIec.code_to_urn(code, lang)` converts document identifier to URN.
|
216
|
+
|
217
|
+
* `code` is a document identifier
|
218
|
+
* `lang` is a laguage code (optional). Examples: en, fr, en-fr etc.
|
219
|
+
|
220
|
+
[source,ruby]
|
221
|
+
----
|
222
|
+
RelatonIec.code_to_urn "IEC 60050-102:2007/AMD1:2017"
|
223
|
+
=> "urn:iec:std:iec:60050-102:2007:::::amd:1:2017"
|
224
|
+
|
225
|
+
RelatonIec.code_to_urn "IEC 60034-1:1969+AMD1:1977+AMD2:1979+AMD3:1980 CSV", "en-fr"
|
226
|
+
=> "urn:iec:std:iec:60034-1:1969::csv:en-fr:plus:amd:1:1977:plus:amd:2:1979:plus:amd:3:1980"
|
227
|
+
----
|
228
|
+
|
229
|
+
The method `RelatonIec.urn_to_code(urn)` converts URN to document identifier.
|
230
|
+
|
231
|
+
[source,ruby]
|
232
|
+
----
|
233
|
+
RelatonIec.urn_to_code "urn:iec:std:iec:60050-102:2007:::::amd:1:2017"
|
234
|
+
=> ["IEC 60050-102:2007/AMD1:2017", ""]
|
235
|
+
|
236
|
+
RelatonIec.urn_to_code "urn:iec:std:iec:60034-1:1969::csv:en-fr:plus:amd:1:1977:plus:amd:2:1979:plus:amd:3:1980"
|
237
|
+
=> ["IEC 60034-1:1969+AMD1:1977+AMD2:1979+AMD3:1980 CSV", "en-fr"]
|
238
|
+
----
|
35
239
|
|
36
240
|
== Development
|
37
241
|
|
data/bin/rspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'rspec' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("rspec-core", "rspec")
|
data/lib/relaton_iec.rb
CHANGED
@@ -10,12 +10,76 @@ require "relaton_iec/hash_converter"
|
|
10
10
|
require "digest/md5"
|
11
11
|
|
12
12
|
module RelatonIec
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
13
|
+
class << self
|
14
|
+
# Returns hash of XML reammar
|
15
|
+
# @return [String]
|
16
|
+
def grammar_hash
|
17
|
+
gem_path = File.expand_path "..", __dir__
|
18
|
+
grammars_path = File.join gem_path, "grammars", "*"
|
19
|
+
grammars = Dir[grammars_path].sort.map { |gp| File.read gp }.join
|
20
|
+
Digest::MD5.hexdigest grammars
|
21
|
+
end
|
22
|
+
|
23
|
+
# @param code [String]
|
24
|
+
# @param lang [String]
|
25
|
+
# @return [String, nil]
|
26
|
+
def code_to_urn(code, lang = nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
27
|
+
rest = code.downcase.sub(%r{
|
28
|
+
(?<head>[^\s]+)\s
|
29
|
+
(?<type>is|ts|tr|pas|srd|guide|tec|wp)?(?(<type>)\s)
|
30
|
+
(?<pnum>[\d-]+)\s?
|
31
|
+
(?<_dd>:)?(?(<_dd>)(?<date>[\d-]+)\s?)
|
32
|
+
}x, "")
|
33
|
+
m = $~
|
34
|
+
return unless m[:head] && m[:pnum]
|
35
|
+
|
36
|
+
deliv = /cmv|csv|exv|prv|rlv|ser/.match(code.downcase).to_s
|
37
|
+
urn = ["urn", "iec", "std", m[:head].split("/").join("-"), m[:pnum], m[:date], m[:type], deliv, lang]
|
38
|
+
(urn + ajunct_to_urn(rest)).join ":"
|
39
|
+
end
|
40
|
+
|
41
|
+
# @param urn [String]
|
42
|
+
# @return [Array<String>, nil] urn & language
|
43
|
+
def urn_to_code(urn) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
44
|
+
fields = urn.upcase.split ":"
|
45
|
+
return if fields.size < 5
|
46
|
+
|
47
|
+
head, num, date, type, deliv, lang = fields[3, 8]
|
48
|
+
code = head.gsub("-", "/")
|
49
|
+
code += " #{type}" unless type.nil? || type.empty?
|
50
|
+
code += " #{num}"
|
51
|
+
code += ":#{date}" unless date.nil? || date.empty?
|
52
|
+
code += ajanct_to_code(fields[9..-1])
|
53
|
+
code += " #{deliv}" unless deliv.nil? || deliv.empty?
|
54
|
+
[code, lang&.downcase]
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
# @param fields [Array<String>]
|
60
|
+
# @return [String]
|
61
|
+
def ajanct_to_code(fields)
|
62
|
+
return "" if fields.nil? || fields.empty?
|
63
|
+
|
64
|
+
rel, type, num, date = fields[0..3]
|
65
|
+
code = (rel.empty? ? "/" : "+") + type + num
|
66
|
+
code += ":#{date}" unless date.empty?
|
67
|
+
code + ajanct_to_code(fields[4..-1])
|
68
|
+
end
|
69
|
+
|
70
|
+
# @param rest [String]
|
71
|
+
# @return [Array<String, nil>]
|
72
|
+
def ajunct_to_urn(rest)
|
73
|
+
r = rest.sub(%r{
|
74
|
+
(?<pl>\+|\/)(?(<pl>)(?<adjunct>(amd|cor|ish))(?<adjnum>\d+)\s?)
|
75
|
+
(?<_d2>:)?(?(<_d2>)(?<adjdt>[\d-]+)\s?)
|
76
|
+
}x, "")
|
77
|
+
m = $~ || {}
|
78
|
+
return [] unless m[:adjunct]
|
79
|
+
|
80
|
+
plus = "plus" if m[:pl] == "+"
|
81
|
+
urn = [plus, m[:adjunct], m[:adjnum], m[:adjdt]]
|
82
|
+
urn + ajunct_to_urn(r)
|
83
|
+
end
|
20
84
|
end
|
21
85
|
end
|
data/lib/relaton_iec/hit.rb
CHANGED
@@ -6,52 +6,74 @@ require "addressable/uri"
|
|
6
6
|
module RelatonIec
|
7
7
|
# Page of hit collection.
|
8
8
|
class HitCollection < RelatonBib::HitCollection
|
9
|
+
def_delegators :@array, :detect
|
10
|
+
|
11
|
+
attr_reader :part
|
12
|
+
|
9
13
|
DOMAIN = "https://webstore.iec.ch"
|
10
14
|
|
11
|
-
# @param
|
15
|
+
# @param ref [String]
|
12
16
|
# @param year [String, nil]
|
13
17
|
# @param part [String, nil]
|
14
|
-
def initialize(
|
15
|
-
super
|
16
|
-
@
|
18
|
+
def initialize(ref, year = nil, part = nil)
|
19
|
+
super ref, year
|
20
|
+
@part = part
|
21
|
+
@array = ref ? hits(ref, year) : []
|
22
|
+
end
|
23
|
+
|
24
|
+
# @return [RelatonIec::IecBibliographicItem]
|
25
|
+
def to_all_parts # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity
|
26
|
+
parts = @array.reject { |h| h.part.nil? }
|
27
|
+
hit = parts.min_by &:part
|
28
|
+
return @array.first.fetch lang unless hit
|
29
|
+
|
30
|
+
bibitem = hit.fetch
|
31
|
+
all_parts_item = bibitem.to_all_parts
|
32
|
+
parts.reject { |h| h.hit[:code] == hit.hit[:code] }.each do |hi|
|
33
|
+
isobib = RelatonIec::IecBibliographicItem.new(
|
34
|
+
formattedref: RelatonBib::FormattedRef.new(content: hi.hit[:code])
|
35
|
+
)
|
36
|
+
all_parts_item.relation << RelatonBib::DocumentRelation.new(type: "partOf", bibitem: isobib)
|
37
|
+
end
|
38
|
+
all_parts_item
|
17
39
|
end
|
18
40
|
|
19
41
|
private
|
20
42
|
|
21
43
|
# @param ref [String]
|
22
44
|
# @param year [String, nil]
|
23
|
-
# @param part [String, nil]
|
24
45
|
# @return [Array<RelatonIec::Hit>]
|
25
|
-
def hits(ref, year
|
46
|
+
def hits(ref, year)
|
26
47
|
from, to = nil
|
27
48
|
if year
|
28
49
|
from = Date.strptime year, "%Y"
|
29
50
|
to = from.next_year.prev_day
|
30
51
|
end
|
31
|
-
get_results ref, from, to
|
52
|
+
get_results ref, from, to
|
32
53
|
end
|
33
54
|
|
34
55
|
# @param ref [String]
|
35
56
|
# @param from [Date, nil]
|
36
57
|
# @param to [Date, nil]
|
37
|
-
# @param part [String, nil]
|
38
58
|
# @return [Array<RelatonIec::Hit>]
|
39
|
-
def get_results(ref, from, to
|
59
|
+
def get_results(ref, from, to)
|
40
60
|
code = part ? ref.sub(/(?<=-\d)\d+/, "*") : ref
|
41
61
|
[nil, "trf", "wr"].reduce([]) do |m, t|
|
42
62
|
url = "#{DOMAIN}/searchkey"
|
43
63
|
url += "&type=#{t}" if t
|
44
64
|
url += "&RefNbr=#{code}&From=#{from}&To=#{to}&start=1"
|
45
|
-
m + results(Addressable::URI.parse(url).normalize
|
65
|
+
m + results(Addressable::URI.parse(url).normalize)
|
46
66
|
end
|
47
67
|
end
|
48
68
|
|
49
69
|
# @param url [String]
|
50
|
-
# @param part [String, nil]
|
51
70
|
# @return [Array<RelatonIec::Hit>]
|
52
|
-
def results(uri
|
71
|
+
def results(uri)
|
53
72
|
contains = "[contains(.,'Part #{part}:')]" if part
|
54
|
-
|
73
|
+
resp = OpenURI.open_uri(uri, "User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) "\
|
74
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36")
|
75
|
+
doc = Nokogiri::HTML(resp)
|
76
|
+
doc.xpath(
|
55
77
|
"//body/li#{contains}",
|
56
78
|
"//ul[contains(@class,'search-results')]/li#{contains}",
|
57
79
|
"//ul[contains(@class,'morethesame')]/li#{contains}"
|
@@ -21,7 +21,7 @@ module RelatonIec
|
|
21
21
|
# @param part [String, nil] search for packaged stndard if not nil
|
22
22
|
# @return [RelatonIec::HitCollection]
|
23
23
|
def search(text, year = nil, part = nil)
|
24
|
-
HitCollection.new text, year, part
|
24
|
+
HitCollection.new text, year&.strip, part
|
25
25
|
rescue SocketError, OpenURI::HTTPError, OpenSSL::SSL::SSLError
|
26
26
|
raise RelatonBib::RequestError, "Could not access http://www.iec.ch"
|
27
27
|
end
|
@@ -32,28 +32,29 @@ module RelatonIec
|
|
32
32
|
# reference is required
|
33
33
|
# @return [String] Relaton XML serialisation of reference
|
34
34
|
def get(code, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
35
|
+
opts[:all_parts] ||= code.match? /\s\(all parts\)/
|
36
|
+
ref = code.sub /\s\(all parts\)/, ""
|
35
37
|
if year.nil?
|
36
|
-
/^(?<code1>[^:]+):(?<year1>[^:]+)/ =~
|
38
|
+
/^(?<code1>[^:]+):(?<year1>[^:]+)/ =~ ref
|
37
39
|
unless code1.nil?
|
38
|
-
|
40
|
+
ref = code1
|
39
41
|
year = year1
|
40
42
|
end
|
41
43
|
end
|
44
|
+
return iev if ref.casecmp("IEV").zero?
|
42
45
|
|
43
|
-
|
44
|
-
|
45
|
-
opts[:all_parts] ||= !(code =~ / \(all parts\)/).nil?
|
46
|
-
code = code.sub(/ \(all parts\)/, "")
|
47
|
-
ret = iecbib_get1(code, year, opts)
|
46
|
+
ret = iecbib_get(ref, year, opts)
|
48
47
|
return nil if ret.nil?
|
49
48
|
|
50
49
|
ret = ret.to_most_recent_reference unless year || opts[:keep_year]
|
51
|
-
ret = ret.to_all_parts if opts[:all_parts]
|
52
50
|
ret
|
53
51
|
end
|
54
52
|
|
55
53
|
private
|
56
54
|
|
55
|
+
# @param code [String]
|
56
|
+
# @param year [String]
|
57
|
+
# @param missed_years [Array<String>]
|
57
58
|
def fetch_ref_err(code, year, missed_years) # rubocop:disable Metrics/MethodLength
|
58
59
|
id = year ? "#{code}:#{year}" : code
|
59
60
|
warn "[relaton-iec] WARNING: no match found online for #{id}. "\
|
@@ -77,29 +78,42 @@ module RelatonIec
|
|
77
78
|
# @param hits [Array<RelatonIec::Hit>]
|
78
79
|
# @param threads [Integer]
|
79
80
|
# @return [Array<RelatonIec::Hit>]
|
80
|
-
def fetch_pages(hits, threads)
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
end
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
81
|
+
# def fetch_pages(hits, threads)
|
82
|
+
# workers = RelatonBib::WorkersPool.new threads
|
83
|
+
# workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
|
84
|
+
# hits.each_with_index { |hit, i| workers << { i: i, hit: hit } }
|
85
|
+
# workers.end
|
86
|
+
# workers.result.sort_by { |a| a[:i] }.map { |x| x[:hit] }
|
87
|
+
# end
|
88
|
+
|
89
|
+
# @param ref [String]
|
90
|
+
# @param year [String, nil]
|
91
|
+
# @return [RelatonIec::HitCollection]
|
92
|
+
def search_filter(ref, year) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
93
|
+
%r{
|
94
|
+
^(?<code>\S+[^\d]*\s\d+((?:-\w+)+)?)
|
95
|
+
(:(?<year1>\d{4}))?
|
96
|
+
(?<bundle>\+[^\s\/]+)?
|
97
|
+
(\/(?<corr>AMD\s\d+))?
|
98
|
+
}x =~ ref.upcase
|
99
|
+
year ||= year1
|
100
|
+
corr&.sub! " ", ""
|
101
|
+
warn "[relaton-iec] (\"#{ref}\") fetching..."
|
92
102
|
result = search(code, year)
|
93
|
-
if result.empty? && /(?<=-)(?<part
|
103
|
+
if result.empty? && /(?<=-)(?<part>[\w-]+)/ =~ code
|
94
104
|
# try to search packaged standard
|
95
105
|
result = search code, year, part
|
96
|
-
ref = code.sub /(?<=-\d)\d+/, ""
|
97
|
-
else ref = code
|
98
106
|
end
|
107
|
+
result = search code if result.empty?
|
108
|
+
code&.sub! /((?:-\w+)+)/, ""
|
99
109
|
result.select do |i|
|
100
|
-
|
101
|
-
|
102
|
-
|
110
|
+
%r{
|
111
|
+
^(?<code2>\S+[^\d]*\s\d+)((?:-\w+)+)?
|
112
|
+
(:\d{4})?
|
113
|
+
(?<bundle2>\+[^\s\/]+)?
|
114
|
+
(\/(?<corr2>AMD\d+))?
|
115
|
+
}x =~ i.hit[:code]
|
116
|
+
code == code2 && bundle == bundle2 && corr == corr2
|
103
117
|
end
|
104
118
|
end
|
105
119
|
|
@@ -144,30 +158,61 @@ module RelatonIec
|
|
144
158
|
# Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
|
145
159
|
# If no match, returns any years which caused mismatch, for error
|
146
160
|
# reporting
|
147
|
-
def
|
161
|
+
def results_filter(result, ref, year, opts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
162
|
+
r_code, r_year = code_year ref, result.part
|
163
|
+
r_year ||= year
|
148
164
|
missed_years = []
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
165
|
+
missed_parts = false
|
166
|
+
# result.each_slice(3) do |s| # ISO website only allows 3 connections
|
167
|
+
ret = if opts[:all_parts]
|
168
|
+
result.to_all_parts
|
169
|
+
else
|
170
|
+
result.detect do |h|
|
171
|
+
h_code, h_year = code_year h.hit[:code], result.part
|
172
|
+
missed_parts ||= !opts[:all_parts] && r_code != h_code
|
173
|
+
missed_years << h_year unless !r_year || h_year == r_year
|
174
|
+
r_code == h_code && (!year || h_year == r_year)
|
175
|
+
# fetch_pages(s, 3).each_with_index do |r, _i|
|
176
|
+
# return { ret: r } if !year
|
177
|
+
|
178
|
+
# r.date.select { |d| d.type == "published" }.each do |d|
|
179
|
+
# return { ret: r } if year.to_i == d.on(:year)
|
180
|
+
|
181
|
+
# missed_years << d.on(:year)
|
182
|
+
# end
|
183
|
+
# end
|
184
|
+
end&.fetch
|
185
|
+
end
|
186
|
+
{ ret: ret, years: missed_years, missed_parts: missed_parts }
|
161
187
|
end
|
162
188
|
|
163
|
-
|
164
|
-
|
189
|
+
# @param ref [string]
|
190
|
+
# @param part [String, nil]
|
191
|
+
# @return [Array<String, nil>]
|
192
|
+
def code_year(ref, part)
|
193
|
+
%r{
|
194
|
+
^(?<code>\S+[^\d]*\s\d+((?:-\w+)+)?)
|
195
|
+
(:(?<year>\d{4}))?
|
196
|
+
}x =~ ref
|
197
|
+
code.sub!(/-\d+/, "") if part
|
198
|
+
[code, year]
|
199
|
+
end
|
165
200
|
|
166
|
-
|
167
|
-
|
201
|
+
# @param code [String]
|
202
|
+
# @param year [String, nil]
|
203
|
+
# @param opts [Hash]
|
204
|
+
# @return [RelatonIec::IecBibliographicItem, nil]
|
205
|
+
def iecbib_get(code, year, opts) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
206
|
+
result = search_filter(code, year) || return
|
207
|
+
ret = results_filter(result, code, year, opts)
|
168
208
|
if ret[:ret]
|
169
|
-
|
170
|
-
|
209
|
+
if ret[:missed_parts]
|
210
|
+
warn "[relaton-iec] WARNING: #{code} found as #{ret[:ret].docidentifier.first.id} "\
|
211
|
+
"but also contain parts. If you wanted to cite all document parts for the reference, use "\
|
212
|
+
"\"#{code} (all parts)\""
|
213
|
+
else
|
214
|
+
warn "[relaton-iec] (\"#{code}\") found #{ret[:ret].docidentifier.first.id}"
|
215
|
+
end
|
171
216
|
ret[:ret]
|
172
217
|
else
|
173
218
|
fetch_ref_err(code, year, ret[:years])
|
data/lib/relaton_iec/scrapper.rb
CHANGED
@@ -69,38 +69,13 @@ module RelatonIec
|
|
69
69
|
# @param hit [Hash]
|
70
70
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
71
71
|
def fetch_docid(hit)
|
72
|
-
|
73
|
-
(?<head>[^\s]+)\s
|
74
|
-
(?<type>is|ts|tr|pas|srd|guide|tec|wp)?(?(<type>)\s)
|
75
|
-
(?<pnum>[\d-]+)\s?
|
76
|
-
(?<_dd>:)?(?(<_dd>)(?<date>[\d-]+)\s?)
|
77
|
-
}x, "")
|
78
|
-
m = $~
|
79
|
-
deliv = /cmv|csv|exv|prv|rlv|ser/.match(hit[:code].downcase).to_s
|
80
|
-
urn = ["urn", "iec", "std", m[:head].split("/").join("-"), m[:pnum],
|
81
|
-
m[:date], m[:type], deliv, "en"]
|
82
|
-
urn += fetch_ajunct(rest)
|
72
|
+
urn = RelatonIec.code_to_urn hit[:code], "en"
|
83
73
|
[
|
84
74
|
RelatonBib::DocumentIdentifier.new(id: hit[:code], type: "IEC"),
|
85
|
-
RelatonBib::DocumentIdentifier.new(id: urn
|
75
|
+
RelatonBib::DocumentIdentifier.new(id: urn, type: "URN"),
|
86
76
|
]
|
87
77
|
end
|
88
78
|
|
89
|
-
# @param rest [String]
|
90
|
-
# @return [Array<String, nil>]
|
91
|
-
def fetch_ajunct(rest)
|
92
|
-
r = rest.sub(%r{
|
93
|
-
(?<_pl>\+)(?(<_pl>)(?<adjunct>amd)(?<adjnum>\d+)\s?)
|
94
|
-
(?<_d2>:)?(?(<_d2>)(?<adjdt>[\d-]+)\s?)
|
95
|
-
}x, "")
|
96
|
-
m = $~ || {}
|
97
|
-
return [] unless m[:adjunct]
|
98
|
-
|
99
|
-
plus = m[:adjunct] && "plus"
|
100
|
-
urn = [plus, m[:adjunct], m[:adjnum], m[:adjdt]]
|
101
|
-
urn + fetch_ajunct(r)
|
102
|
-
end
|
103
|
-
|
104
79
|
# Fetch abstracts.
|
105
80
|
# @param doc [Nokigiri::HTML::Document]
|
106
81
|
# @return [Array<Array>]
|
@@ -170,8 +145,10 @@ module RelatonIec
|
|
170
145
|
def fetch_status(doc)
|
171
146
|
wip = doc.at('//ROW[STATUS[.="PREPARING"]]')
|
172
147
|
if wip
|
173
|
-
statuses = YAML.load_file "
|
148
|
+
statuses = YAML.load_file File.join __dir__, "statuses.yml"
|
174
149
|
s = wip.at("STAGE").text
|
150
|
+
return unless statuses[s]
|
151
|
+
|
175
152
|
stage, substage = statuses[s]["stage"].split "."
|
176
153
|
else
|
177
154
|
stage = "60"
|
@@ -4,6 +4,9 @@ ACD:
|
|
4
4
|
ACDV:
|
5
5
|
status: Approved for CDV
|
6
6
|
stage: '30.99'
|
7
|
+
ADISSB:
|
8
|
+
status: Preparation of text subcontracted to CO
|
9
|
+
stage: '40.95'
|
7
10
|
ADTR:
|
8
11
|
status: Approved for DTR
|
9
12
|
stage: '40.99'
|
@@ -13,30 +16,39 @@ ADTS:
|
|
13
16
|
AFDIS:
|
14
17
|
status: Approved for FDIS
|
15
18
|
stage: '40.99'
|
19
|
+
AMW:
|
20
|
+
status: Document under revision
|
21
|
+
stage: '92.20'
|
22
|
+
ANW:
|
23
|
+
status: Registration of new project
|
24
|
+
stage: '20.00'
|
16
25
|
APUB:
|
17
26
|
status: Approved for publication
|
18
27
|
stage: '50.99'
|
28
|
+
APUBSB:
|
29
|
+
status: Preparation of text subcontracted to CO
|
30
|
+
stage: '50.95'
|
19
31
|
BPUB:
|
20
32
|
status: Being published
|
21
33
|
stage: '60.00'
|
34
|
+
BWG:
|
35
|
+
status: Return to drafting phase or redefine project
|
36
|
+
stage: '30.92'
|
22
37
|
CAN:
|
23
38
|
status: Draft cancelled
|
24
39
|
stage: '20.98'
|
40
|
+
CCDV:
|
41
|
+
status: Draft circulated as CDV
|
42
|
+
stage: '40.00'
|
25
43
|
CD:
|
26
44
|
status: Draft circulated as CD
|
27
45
|
stage: '30.00'
|
46
|
+
CDISH:
|
47
|
+
status: Draft circulated as DISH
|
48
|
+
stage: '50.20'
|
28
49
|
CDM:
|
29
50
|
status: CD to be discussed at meeting
|
30
51
|
stage: '30.20'
|
31
|
-
CCDV:
|
32
|
-
status: Draft circulated as CDV
|
33
|
-
stage: '40.00'
|
34
|
-
CDVM:
|
35
|
-
status: Rejected CDV to be discussed at a meeting
|
36
|
-
stage: '40.93'
|
37
|
-
CFDIS:
|
38
|
-
status: Draft circulated as FDIS
|
39
|
-
stage: '50.20'
|
40
52
|
CDPAS:
|
41
53
|
status: Draft circulated as DPAS
|
42
54
|
stage: '50.20'
|
@@ -46,12 +58,15 @@ CDTR:
|
|
46
58
|
CDTS:
|
47
59
|
status: Draft circulated as DTS
|
48
60
|
stage: '50.20'
|
49
|
-
|
50
|
-
status: Rejected
|
51
|
-
stage: '
|
52
|
-
|
53
|
-
status:
|
54
|
-
stage: '50.
|
61
|
+
CDVM:
|
62
|
+
status: Rejected CDV to be discussed at a meeting
|
63
|
+
stage: '40.93'
|
64
|
+
CFDIS:
|
65
|
+
status: Draft circulated as FDIS
|
66
|
+
stage: '50.20'
|
67
|
+
DECDISH:
|
68
|
+
status: DISH at editing check
|
69
|
+
stage: '40.99'
|
55
70
|
DECFDIS:
|
56
71
|
status: FDIS at editing check
|
57
72
|
stage: '50.60'
|
@@ -64,6 +79,21 @@ DEL:
|
|
64
79
|
DELPUB:
|
65
80
|
status: Deleted publication
|
66
81
|
stage: '90.99'
|
82
|
+
DREJ:
|
83
|
+
status: Abandon
|
84
|
+
stage: '30.98'
|
85
|
+
DTRM:
|
86
|
+
status: Rejected DTR to be discussed at meeting
|
87
|
+
stage: '50.92'
|
88
|
+
DTSM:
|
89
|
+
status: Rejected DTS to be discussed at meeting
|
90
|
+
stage: '50.92'
|
91
|
+
MERGED:
|
92
|
+
status: Fragment merged
|
93
|
+
stage: '30.97'
|
94
|
+
NADIS:
|
95
|
+
status: Repeat enquiry
|
96
|
+
stage: '40.93'
|
67
97
|
NCDV:
|
68
98
|
status: CDV rejected
|
69
99
|
stage: '40.98'
|
@@ -84,13 +114,16 @@ PNW:
|
|
84
114
|
stage: '10.00'
|
85
115
|
PPUB:
|
86
116
|
status: Publication issued
|
87
|
-
stage: '60.60'
|
117
|
+
stage: '60.60'
|
88
118
|
PRVC:
|
89
119
|
status: Preparation of RVC
|
90
120
|
stage: '40.92'
|
91
121
|
PRVD:
|
92
122
|
status: Preparation of RVD
|
93
123
|
stage: '40.92'
|
124
|
+
PRVDISH:
|
125
|
+
status: Preparation of RVDISH
|
126
|
+
stage: '40.92' # ?
|
94
127
|
PRVDPAS:
|
95
128
|
status: Preparation of RVDPAS
|
96
129
|
stage: '40.92'
|
@@ -106,15 +139,34 @@ PRVN:
|
|
106
139
|
PWI:
|
107
140
|
status: Preliminary work item
|
108
141
|
stage: '00.00'
|
142
|
+
RDIS:
|
143
|
+
status: Registration for formal approval
|
144
|
+
stage: '50.00'
|
145
|
+
RDISH:
|
146
|
+
status: DISH received and registered
|
147
|
+
stage: '50.00'
|
109
148
|
RFDIS:
|
110
149
|
status: FDIS received and registered
|
111
150
|
stage: '50.00'
|
112
151
|
RPUB:
|
113
152
|
status: Publication received and registered
|
114
153
|
stage: '60.60'
|
154
|
+
SPE:
|
155
|
+
stage: SPE # ?
|
156
|
+
SPLIT:
|
157
|
+
status: Project Fragmented
|
158
|
+
stage: SPLIT # ?
|
159
|
+
SRP:
|
160
|
+
stage: SRP
|
161
|
+
SUSPENDED:
|
162
|
+
status: Project Suspended
|
163
|
+
stage: SUSPENDED # ?
|
115
164
|
TCDV:
|
116
165
|
status: Translation of CDV
|
117
166
|
stage: '50.00'
|
167
|
+
TDISH:
|
168
|
+
status: Translation of DISH
|
169
|
+
stage: '50.00' # ?
|
118
170
|
TDTR:
|
119
171
|
status: Translation of DTR
|
120
172
|
stage: '50.00'
|
@@ -129,4 +181,19 @@ TPUB:
|
|
129
181
|
stage: '60.00'
|
130
182
|
WPUB:
|
131
183
|
status: Publication withdrawn
|
132
|
-
stage: '95.99'
|
184
|
+
stage: '95.99'
|
185
|
+
preCD:
|
186
|
+
status: Preparation of CD document
|
187
|
+
stage: preCD
|
188
|
+
preCDPAS:
|
189
|
+
status: Preparation of DPAS
|
190
|
+
stage: preCDPAS
|
191
|
+
preDISH:
|
192
|
+
status: Preparation of DISH
|
193
|
+
stage: preDISH
|
194
|
+
preDTR:
|
195
|
+
status: Preparation of DTR document
|
196
|
+
stage: preDTR
|
197
|
+
prePNW:
|
198
|
+
status: Preparation of NP document
|
199
|
+
stage: prePNW
|
data/lib/relaton_iec/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iec
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.7.
|
4
|
+
version: 1.7.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-02-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: debase
|
@@ -197,6 +197,7 @@ files:
|
|
197
197
|
- README.adoc
|
198
198
|
- Rakefile
|
199
199
|
- bin/console
|
200
|
+
- bin/rspec
|
200
201
|
- bin/setup
|
201
202
|
- grammars/basicdoc.rng
|
202
203
|
- grammars/biblio.rng
|