relaton-iec 1.14.1 → 1.14.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +6 -0
- data/README.adoc +53 -26
- data/lib/relaton_iec/data_fetcher.rb +190 -0
- data/lib/relaton_iec/data_parser.rb +289 -0
- data/lib/relaton_iec/hit.rb +17 -1
- data/lib/relaton_iec/hit_collection.rb +17 -79
- data/lib/relaton_iec/iec_bibliographic_item.rb +3 -1
- data/lib/relaton_iec/iec_bibliography.rb +83 -111
- data/lib/relaton_iec/index.rb +133 -0
- data/lib/relaton_iec/processor.rb +13 -0
- data/lib/relaton_iec/version.rb +1 -1
- data/lib/relaton_iec.rb +10 -6
- data/relaton_iec.gemspec +5 -8
- metadata +24 -64
- data/lib/relaton_iec/scrapper.rb +0 -308
@@ -6,25 +6,22 @@ require "addressable/uri"
|
|
6
6
|
module RelatonIec
|
7
7
|
# Page of hit collection.
|
8
8
|
class HitCollection < RelatonBib::HitCollection
|
9
|
-
def_delegators :@array, :detect
|
9
|
+
def_delegators :@array, :detect, :map
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
DOMAIN = "https://webstore.iec.ch"
|
11
|
+
# DOMAIN = "https://webstore.iec.ch"
|
14
12
|
|
15
13
|
# @param ref [String]
|
16
14
|
# @param year [String, nil]
|
17
|
-
|
18
|
-
def initialize(ref, year = nil, part = nil)
|
15
|
+
def initialize(ref, year = nil)
|
19
16
|
super ref, year
|
20
|
-
@
|
21
|
-
@array =
|
17
|
+
@index = Relaton::Index.find_or_create :IEC, url: "#{Hit::GHURL}index1.zip" , file: "index1.yaml"
|
18
|
+
@array = fetch_from_gh
|
22
19
|
end
|
23
20
|
|
24
21
|
# @return [RelatonIec::IecBibliographicItem]
|
25
|
-
def to_all_parts # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity
|
26
|
-
parts = @array.
|
27
|
-
hit = parts.min_by
|
22
|
+
def to_all_parts(r_year) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
23
|
+
parts = @array.select { |h| h.part && h.hit[:code].match?(/^[\s\w-]+:#{r_year}/) }
|
24
|
+
hit = parts.min_by { |h| h.part.to_i }
|
28
25
|
return @array.first&.fetch unless hit
|
29
26
|
|
30
27
|
bibitem = hit.fetch
|
@@ -41,76 +38,17 @@ module RelatonIec
|
|
41
38
|
|
42
39
|
private
|
43
40
|
|
44
|
-
|
45
|
-
|
46
|
-
# @return [Array<RelatonIec::Hit>]
|
47
|
-
def hits(ref, year) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
48
|
-
if /61360-4\sDB|ISO[\s\/]IEC\sDIR/.match?(ref)
|
49
|
-
fetch_from_gh ref
|
50
|
-
else
|
51
|
-
from, to = nil
|
52
|
-
if year
|
53
|
-
from = Date.strptime year, "%Y"
|
54
|
-
to = from.next_year.prev_day
|
55
|
-
end
|
56
|
-
get_results ref, from, to
|
57
|
-
end
|
58
|
-
# file = "../data/#{ref.sub(/^IEC\s/, '').gsub(/[\s\/]/, '_').upcase}.yaml"
|
59
|
-
# path = File.expand_path file, __dir__
|
60
|
-
# if File.exist? path
|
61
|
-
# hash = YAML.safe_load File.read(path, encoding: "utf-8")
|
62
|
-
# hit = Hit.new({ code: ref }, self)
|
63
|
-
# hit.fetch = IecBibliographicItem.from_hash hash
|
64
|
-
# return [hit]
|
65
|
-
# end
|
66
|
-
end
|
67
|
-
|
68
|
-
def fetch_from_gh(ref)
|
69
|
-
file = ref.sub(/^IEC\s(?=ISO|CISPR)/, "").gsub(/[\s\/]/, "_").upcase
|
70
|
-
url = "https://raw.githubusercontent.com/relaton/relaton-data-iec/main/data/#{file}.yaml"
|
71
|
-
resp = Net::HTTP.get URI(url)
|
72
|
-
hash = YAML.safe_load resp
|
73
|
-
hash["fetched"] = Date.today.to_s
|
74
|
-
hit = Hit.new({ code: ref }, self)
|
75
|
-
hit.fetch = IecBibliographicItem.from_hash hash
|
76
|
-
[hit]
|
77
|
-
end
|
41
|
+
def fetch_from_gh
|
42
|
+
return [] unless text
|
78
43
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
url = "#{DOMAIN}/searchkey"
|
87
|
-
url += "&type=#{t}" if t
|
88
|
-
url += "&RefNbr=#{code}&From=#{from}&To=#{to}&start=1"
|
89
|
-
m + results(Addressable::URI.parse(url).normalize)
|
44
|
+
ref = year && !/:\d{4}$/.match?(text) ? "#{text}:#{year}" : text
|
45
|
+
reference = ref.sub(/^IEC\s(?=ISO\/IEC\sDIR)/, "")
|
46
|
+
@index.search do |row|
|
47
|
+
row[:id].include? reference
|
48
|
+
end.sort_by { |row| row[:id] }.map do |row|
|
49
|
+
# pubid = row[:pubid].is_a?(Array) ? row[:pubid][0] : row[:pubid]
|
50
|
+
Hit.new({ code: row[:id], file: row[:file] }, self)
|
90
51
|
end
|
91
52
|
end
|
92
|
-
|
93
|
-
# @param url [String]
|
94
|
-
# @return [Array<RelatonIec::Hit>]
|
95
|
-
def results(uri)
|
96
|
-
contains = "[contains(.,'Part #{part}:')]" if part
|
97
|
-
resp = OpenURI.open_uri(uri, "User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) "\
|
98
|
-
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36")
|
99
|
-
doc = Nokogiri::HTML(resp)
|
100
|
-
doc.xpath(
|
101
|
-
"//body/li#{contains}",
|
102
|
-
"//ul[contains(@class,'search-results')]/li#{contains}",
|
103
|
-
"//ul[contains(@class,'morethesame')]/li#{contains}"
|
104
|
-
).map { |h| make_hit h }.compact
|
105
|
-
end
|
106
|
-
|
107
|
-
def make_hit(hit)
|
108
|
-
link = hit.at('a[@href!="#"]')
|
109
|
-
return unless link
|
110
|
-
|
111
|
-
code = link.text.tr [194, 160].pack("c*").force_encoding("UTF-8"), ""
|
112
|
-
title = hit.xpath("text()").text.gsub(/[\r\n]/, "")
|
113
|
-
Hit.new({ code: code, title: title, url: DOMAIN + link[:href] }, self)
|
114
|
-
end
|
115
53
|
end
|
116
54
|
end
|
@@ -26,6 +26,8 @@ module RelatonIec
|
|
26
26
|
# @option args [String, nil] :function function
|
27
27
|
# @option args [String, nil] :updates_document_type updates document type
|
28
28
|
# @option args [String, nil] :price_code price code
|
29
|
+
# @option args [Boolean, nil] :cen_processing
|
30
|
+
# @option args [String, nil] :secretary
|
29
31
|
# @option args [String, nil] :secretary secretary
|
30
32
|
# @option args [String, nil] :interest_to_committees interest to committees
|
31
33
|
# @option args [Boolean, nil] :accessibility_color_inside accessibility color inside
|
@@ -103,7 +105,7 @@ module RelatonIec
|
|
103
105
|
end
|
104
106
|
|
105
107
|
# @return [Hash]
|
106
|
-
def to_hash # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
108
|
+
def to_hash(embedded: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
107
109
|
hash = super
|
108
110
|
hash["function"] = function if function
|
109
111
|
if updates_document_type
|
@@ -1,7 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
# require 'isobib/iso_bibliographic_item'
|
4
|
-
require "relaton_iec/scrapper"
|
5
4
|
require "relaton_iec/hit_collection"
|
6
5
|
require "date"
|
7
6
|
|
@@ -10,37 +9,27 @@ module RelatonIec
|
|
10
9
|
class IecBibliography
|
11
10
|
class << self
|
12
11
|
##
|
13
|
-
# Search for standards entries.
|
14
|
-
# pass part parametr.
|
12
|
+
# Search for standards entries.
|
15
13
|
#
|
16
|
-
# @
|
17
|
-
# RelatonIec::IecBibliography.search 'IEC 60050-311', nil, '311'
|
18
|
-
#
|
19
|
-
# @param text [String]
|
14
|
+
# @param ref [String]
|
20
15
|
# @param year [String, nil]
|
21
|
-
# @param part [String, nil] search for packaged stndard if not nil
|
22
16
|
# @return [RelatonIec::HitCollection]
|
23
|
-
def search(
|
24
|
-
HitCollection.new text&.sub(/(^\w+)\//, '\1 '), year&.strip
|
25
|
-
|
26
|
-
|
17
|
+
def search(ref, year = nil)
|
18
|
+
# HitCollection.new text&.sub(/(^\w+)\//, '\1 '), year&.strip
|
19
|
+
HitCollection.new ref, year&.strip
|
20
|
+
rescue SocketError, OpenURI::HTTPError, OpenSSL::SSL::SSLError => e
|
21
|
+
raise RelatonBib::RequestError, e.message
|
27
22
|
end
|
28
23
|
|
29
|
-
# @param code [String] the
|
24
|
+
# @param code [String] the IEC standard code to look up (e..g "IEC 8000")
|
30
25
|
# @param year [String] the year the standard was published (optional)
|
31
26
|
# @param opts [Hash] options; restricted to :all_parts if all-parts
|
32
27
|
# reference is required
|
33
28
|
# @return [String] Relaton XML serialisation of reference
|
34
|
-
def get(code, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity
|
29
|
+
def get(code, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity
|
35
30
|
opts[:all_parts] ||= code.match?(/\s\(all parts\)/)
|
36
31
|
ref = code.sub(/\s\(all parts\)/, "")
|
37
|
-
|
38
|
-
/^(?<code1>[^:]+):(?<year1>[^:]+)/ =~ ref
|
39
|
-
unless code1.nil?
|
40
|
-
ref = code1
|
41
|
-
year = year1
|
42
|
-
end
|
43
|
-
end
|
32
|
+
year ||= ref_parts(ref)[:year]
|
44
33
|
return iev if ref.casecmp("IEV").zero?
|
45
34
|
|
46
35
|
ret = iecbib_get(ref, year, opts)
|
@@ -57,65 +46,30 @@ module RelatonIec
|
|
57
46
|
# @param missed_years [Array<String>]
|
58
47
|
def fetch_ref_err(code, year, missed_years) # rubocop:disable Metrics/MethodLength
|
59
48
|
id = year ? "#{code}:#{year}" : code
|
60
|
-
warn "[relaton-iec] WARNING: no match found online for #{id}. "\
|
49
|
+
warn "[relaton-iec] WARNING: no match found online for #{id}. " \
|
61
50
|
"The code must be exactly like it is on the standards website."
|
62
51
|
unless missed_years.empty?
|
63
|
-
warn "[relaton-iec] (There was no match for #{year}, though there "\
|
52
|
+
warn "[relaton-iec] (There was no match for #{year}, though there " \
|
64
53
|
"were matches found for #{missed_years.join(', ')}.)"
|
65
54
|
end
|
66
55
|
if /\d-\d/.match? code
|
67
|
-
warn "[relaton-iec] The provided document part may not exist, or "\
|
56
|
+
warn "[relaton-iec] The provided document part may not exist, or " \
|
68
57
|
"the document may no longer be published in parts."
|
69
58
|
else
|
70
|
-
warn "[relaton-iec] If you wanted to cite all document parts for "\
|
71
|
-
"the reference, use \"#{code} (all parts)\".\nIf the document "\
|
72
|
-
"is not a standard, use its document type abbreviation (TS, "\
|
59
|
+
warn "[relaton-iec] If you wanted to cite all document parts for " \
|
60
|
+
"the reference, use \"#{code} (all parts)\".\nIf the document " \
|
61
|
+
"is not a standard, use its document type abbreviation (TS, " \
|
73
62
|
"TR, PAS, Guide)."
|
74
63
|
end
|
75
64
|
nil
|
76
65
|
end
|
77
66
|
|
78
|
-
# @param hits [Array<RelatonIec::Hit>]
|
79
|
-
# @param threads [Integer]
|
80
|
-
# @return [Array<RelatonIec::Hit>]
|
81
|
-
# def fetch_pages(hits, threads)
|
82
|
-
# workers = RelatonBib::WorkersPool.new threads
|
83
|
-
# workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
|
84
|
-
# hits.each_with_index { |hit, i| workers << { i: i, hit: hit } }
|
85
|
-
# workers.end
|
86
|
-
# workers.result.sort_by { |a| a[:i] }.map { |x| x[:hit] }
|
87
|
-
# end
|
88
|
-
|
89
67
|
# @param ref [String]
|
90
|
-
# @param year [String, nil]
|
91
68
|
# @return [RelatonIec::HitCollection]
|
92
|
-
def search_filter(ref
|
93
|
-
|
94
|
-
year ||= rp1[:year]
|
95
|
-
corr = rp1[:corr]&.sub " ", ""
|
69
|
+
def search_filter(ref)
|
70
|
+
code = ref.split(":").first
|
96
71
|
warn "[relaton-iec] (\"#{ref}\") fetching..."
|
97
|
-
|
98
|
-
code = result.text.dup
|
99
|
-
if result.empty? && /(?<=\d-)(?<part>[\w-]+)/ =~ rp1[:code]
|
100
|
-
# try to search packaged standard
|
101
|
-
result = search rp1[:code], year, part
|
102
|
-
pkg_std = result.any?
|
103
|
-
end
|
104
|
-
result = search rp1[:code] if result.empty?
|
105
|
-
if pkg_std
|
106
|
-
code.sub!(/(?<=\d-)#{part}/, part[0])
|
107
|
-
else
|
108
|
-
code.sub!(/-[-\d]+/, "")
|
109
|
-
end
|
110
|
-
result.select do |i|
|
111
|
-
rp2 = ref_parts i.hit[:code]
|
112
|
-
code2 = if pkg_std
|
113
|
-
rp2[:code].sub(/(?<=\d-\d)\d+/, "")
|
114
|
-
else
|
115
|
-
rp2[:code].sub(/-[-\d]+/, "")
|
116
|
-
end
|
117
|
-
code == code2 && rp1[:bundle] == rp2[:bundle] && corr == rp2[:corr]
|
118
|
-
end
|
72
|
+
search(code)
|
119
73
|
end
|
120
74
|
|
121
75
|
def ref_parts(ref)
|
@@ -160,72 +114,90 @@ module RelatonIec
|
|
160
114
|
XML
|
161
115
|
end
|
162
116
|
|
163
|
-
#
|
117
|
+
# Look for a code in the search results
|
164
118
|
# and return the first result that matches the code,
|
165
|
-
# matches the year (if provided), and which
|
119
|
+
# matches the year (if provided), and which a part
|
166
120
|
# has a title (amendments do not).
|
167
|
-
#
|
168
|
-
|
169
|
-
|
170
|
-
# reporting
|
171
|
-
def results_filter(result, year, opts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
172
|
-
r_code, r_year = code_year result.text, result.part
|
121
|
+
# If no match, returns any years which caused mismatch, for error reporting
|
122
|
+
def results_filter(result, ref, year, opts)
|
123
|
+
r_code, r_year, r_amd = code_year ref
|
173
124
|
r_year ||= year
|
174
|
-
|
125
|
+
if opts[:all_parts]
|
126
|
+
ret = result.to_all_parts(r_year)
|
127
|
+
else
|
128
|
+
ret, missed_parts = match_result(result, r_code, r_year, r_amd)
|
129
|
+
end
|
130
|
+
{ ret: ret, years: missed_years(result, r_year), missed_parts: missed_parts }
|
131
|
+
end
|
132
|
+
|
133
|
+
def missed_years(result, year)
|
134
|
+
result.map { |h| codes_years(h.hit[:code])[1] }.flatten.uniq.reject { |y| y == year }
|
135
|
+
end
|
136
|
+
|
137
|
+
#
|
138
|
+
# Find a match in the search results
|
139
|
+
#
|
140
|
+
# @param [RelatonIec::HitCollection] result search results
|
141
|
+
# @param [String] code code of the document
|
142
|
+
# @param [String] year year of the document
|
143
|
+
# @param [String] amd amendment of the document
|
144
|
+
#
|
145
|
+
# @return [Array<RelatonIec::IecBibliographicItem, Array, nil>] result, missed parts
|
146
|
+
#
|
147
|
+
def match_result(result, code, year, amd) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity
|
175
148
|
missed_parts = false
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
# r.date.select { |d| d.type == "published" }.each do |d|
|
189
|
-
# return { ret: r } if year.to_i == d.on(:year)
|
190
|
-
|
191
|
-
# missed_years << d.on(:year)
|
192
|
-
# end
|
193
|
-
# end
|
194
|
-
end&.fetch
|
195
|
-
end
|
196
|
-
{ ret: ret, years: missed_years, missed_parts: missed_parts }
|
149
|
+
ret = result.detect do |h|
|
150
|
+
h_codes, h_years, h_amds = codes_years h.hit[:code]
|
151
|
+
match_code = h_codes.include? code
|
152
|
+
match_year = h_years.include?(year)
|
153
|
+
missed_parts ||= !match_code
|
154
|
+
match_code && (!year || match_year) && match_amd(amd, h_amds)
|
155
|
+
end&.fetch
|
156
|
+
[ret, missed_parts]
|
157
|
+
end
|
158
|
+
|
159
|
+
def match_amd(amd, h_amds)
|
160
|
+
(!amd && h_amds.empty?) || h_amds.include?(amd)
|
197
161
|
end
|
198
162
|
|
199
163
|
# @param ref [String]
|
200
|
-
# @
|
201
|
-
|
202
|
-
def code_year(ref, part)
|
164
|
+
# @return [Array<Stringl, nil>] code, year, amd
|
165
|
+
def code_year(ref)
|
203
166
|
%r{
|
204
|
-
^(?<code>\S+[^\d]*\s\d+(?:-\w+)*)
|
167
|
+
# ^(?<code>\S+[^\d]*\s\d+(?:-\w+)*)
|
168
|
+
^(?<code>\S+\s[^:/]+)
|
205
169
|
(?::(?<year>\d{4}))?
|
170
|
+
(?:/(?<amd>\w+)(?::\d{4})?)?
|
206
171
|
}x =~ ref
|
207
|
-
code
|
208
|
-
[code, year]
|
172
|
+
[code, year, amd&.upcase]
|
209
173
|
end
|
210
174
|
|
211
|
-
# @param
|
175
|
+
# @param ref [String]
|
176
|
+
# @return [Array<Array<Stringl>>] codes, years, amds
|
177
|
+
def codes_years(refs)
|
178
|
+
RelatonBib.array(refs).map do |r|
|
179
|
+
code_year r
|
180
|
+
end.transpose.map { |a| a.compact.uniq }
|
181
|
+
end
|
182
|
+
|
183
|
+
# @param ref [String]
|
212
184
|
# @param year [String, nil]
|
213
185
|
# @param opts [Hash]
|
214
186
|
# @return [RelatonIec::IecBibliographicItem, nil]
|
215
|
-
def iecbib_get(
|
216
|
-
result = search_filter(
|
217
|
-
ret = results_filter(result, year, opts)
|
187
|
+
def iecbib_get(ref, year, opts) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
188
|
+
result = search_filter(ref) || return
|
189
|
+
ret = results_filter(result, ref, year, opts)
|
218
190
|
if ret[:ret]
|
219
|
-
if ret[:missed_parts]
|
220
|
-
warn "[relaton-iec] WARNING: #{
|
221
|
-
|
222
|
-
|
191
|
+
if ret[:missed_parts] && !opts[:all_parts]
|
192
|
+
warn "[relaton-iec] WARNING: #{ref} found as #{ret[:ret].docidentifier.first.id} " \
|
193
|
+
"but also contain parts. If you wanted to cite all document " \
|
194
|
+
"parts for the reference, use \"#{ref} (all parts)\""
|
223
195
|
else
|
224
|
-
warn "[relaton-iec] (\"#{
|
196
|
+
warn "[relaton-iec] (\"#{ref}\") found #{ret[:ret].docidentifier.first.id}"
|
225
197
|
end
|
226
198
|
ret[:ret]
|
227
199
|
else
|
228
|
-
fetch_ref_err(
|
200
|
+
fetch_ref_err(ref, year, ret[:years])
|
229
201
|
end
|
230
202
|
end
|
231
203
|
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
module RelatonIec
|
2
|
+
class Index
|
3
|
+
#
|
4
|
+
# Initialize index.
|
5
|
+
# If index argument is nil, read index from file or from GitHub.
|
6
|
+
# If index argument is not nil, then read index from file or create new
|
7
|
+
# empty index. (use this option for creating index for dataset)
|
8
|
+
#
|
9
|
+
# @param [String, nil] index to index file
|
10
|
+
#
|
11
|
+
def initialize(index = nil)
|
12
|
+
if index
|
13
|
+
@path = index
|
14
|
+
@index = create_index_file
|
15
|
+
else
|
16
|
+
@index = read_index_file || get_index_from_gh
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
#
|
21
|
+
# Add document to index or update existing document
|
22
|
+
#
|
23
|
+
# @param [String, Array<String>] pubid document identifier
|
24
|
+
# @param [String] file document file name
|
25
|
+
# @param [String] change last change date time
|
26
|
+
#
|
27
|
+
# @return [void]
|
28
|
+
#
|
29
|
+
def add(pubid, file, change = nil)
|
30
|
+
item = @index.find { |i| i[:pubid] == pubid }
|
31
|
+
unless item
|
32
|
+
item = { pubid: pubid }
|
33
|
+
@index << item
|
34
|
+
end
|
35
|
+
item[:file] = file
|
36
|
+
item[:last_change] = change if change
|
37
|
+
end
|
38
|
+
|
39
|
+
#
|
40
|
+
# Clear index
|
41
|
+
#
|
42
|
+
# @return [void]
|
43
|
+
#
|
44
|
+
def clear
|
45
|
+
@index.clear
|
46
|
+
end
|
47
|
+
|
48
|
+
#
|
49
|
+
# Last change date
|
50
|
+
#
|
51
|
+
# @return [String] <description>
|
52
|
+
#
|
53
|
+
def last_change
|
54
|
+
return unless @index.any?
|
55
|
+
|
56
|
+
@last_change ||= @index.max_by { |i| i[:last_change].to_s }[:last_change]
|
57
|
+
end
|
58
|
+
|
59
|
+
#
|
60
|
+
# Find document in index by reference and sort results by document ID
|
61
|
+
#
|
62
|
+
# @param [String] ref reference
|
63
|
+
#
|
64
|
+
# @return [Array<Hash>] search result
|
65
|
+
#
|
66
|
+
def search(ref)
|
67
|
+
upcase_ref = ref.upcase
|
68
|
+
@index.select do |i|
|
69
|
+
RelatonBib.array(i[:pubid]).detect { |r| r.include? upcase_ref }
|
70
|
+
end.sort_by { |r| r[:pubid].is_a?(Array) ? r[:pubid].min : r[:pubid] }
|
71
|
+
end
|
72
|
+
|
73
|
+
#
|
74
|
+
# Save index to file
|
75
|
+
#
|
76
|
+
# @return [void]
|
77
|
+
#
|
78
|
+
def save
|
79
|
+
File.write @path, @index.to_yaml, encoding: "UTF-8"
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
#
|
85
|
+
# Create dir if need and return path to index file
|
86
|
+
#
|
87
|
+
# @return [String] path to index file
|
88
|
+
#
|
89
|
+
def path
|
90
|
+
@path ||= begin
|
91
|
+
dir = File.join Dir.home, ".relaton", "iec"
|
92
|
+
FileUtils.mkdir_p dir
|
93
|
+
File.join dir, "index.yaml"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
#
|
98
|
+
# Create index file for dataset
|
99
|
+
#
|
100
|
+
# @return [Array<Hash>] index content
|
101
|
+
#
|
102
|
+
def create_index_file
|
103
|
+
return [] unless File.exist? path
|
104
|
+
|
105
|
+
RelatonBib.parse_yaml File.read(path, encoding: "UTF-8"), [Symbol]
|
106
|
+
end
|
107
|
+
|
108
|
+
#
|
109
|
+
# Read index from file if it exists and not outdated
|
110
|
+
#
|
111
|
+
# @return [Hash, nil] index content
|
112
|
+
#
|
113
|
+
def read_index_file
|
114
|
+
return if !File.exist?(path) || File.ctime(path).to_date < Date.today
|
115
|
+
|
116
|
+
RelatonBib.parse_yaml File.read(path, encoding: "UTF-8"), [Symbol]
|
117
|
+
end
|
118
|
+
|
119
|
+
#
|
120
|
+
# Get index from a GitHub repository
|
121
|
+
#
|
122
|
+
# @return [Hash] index content
|
123
|
+
#
|
124
|
+
def get_index_from_gh # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
125
|
+
resp = Zip::InputStream.new URI("#{Hit::GHURL}index.zip").open
|
126
|
+
zip = resp.get_next_entry
|
127
|
+
yaml = zip.get_input_stream.read
|
128
|
+
index = RelatonBib.parse_yaml yaml, [Symbol]
|
129
|
+
File.write path, index.to_yaml, encoding: "UTF-8"
|
130
|
+
index
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
@@ -7,6 +7,7 @@ module RelatonIec
|
|
7
7
|
@prefix = "IEC"
|
8
8
|
@defaultprefix = %r{^IEC\s|^IEV($|\s)}
|
9
9
|
@idtype = "IEC"
|
10
|
+
@datasets = %w[iec-harmonized-all iec-harmonized-latest]
|
10
11
|
end
|
11
12
|
|
12
13
|
# @param code [String]
|
@@ -17,6 +18,18 @@ module RelatonIec
|
|
17
18
|
::RelatonIec::IecBibliography.get(code, date, opts)
|
18
19
|
end
|
19
20
|
|
21
|
+
#
|
22
|
+
# Fetch all the documents from a source
|
23
|
+
#
|
24
|
+
# @param [String] source source name (iec-harmonized-all, iec-harmonized-latest)
|
25
|
+
# @param [Hash] opts
|
26
|
+
# @option opts [String] :output directory to output documents
|
27
|
+
# @option opts [String] :format output format (xml, yaml, bibxml)
|
28
|
+
#
|
29
|
+
def fetch_data(source, opts)
|
30
|
+
DataFetcher.new(source, **opts).fetch
|
31
|
+
end
|
32
|
+
|
20
33
|
# @param xml [String]
|
21
34
|
# @return [RelatonIsoBib::IecBibliographicItem]
|
22
35
|
def from_xml(xml)
|
data/lib/relaton_iec/version.rb
CHANGED
data/lib/relaton_iec.rb
CHANGED
@@ -1,15 +1,19 @@
|
|
1
|
-
require "
|
2
|
-
require "relaton_iec/hit"
|
3
|
-
require "nokogiri"
|
1
|
+
require "digest/md5"
|
4
2
|
require "net/http"
|
3
|
+
require "nokogiri"
|
4
|
+
require "zip"
|
5
5
|
require "open-uri"
|
6
|
-
require "
|
6
|
+
require "relaton/index"
|
7
|
+
require "relaton_iso_bib"
|
8
|
+
require "relaton_iec/hit"
|
7
9
|
require "relaton_iec/version"
|
8
10
|
require "relaton_iec/iec_bibliography"
|
9
11
|
require "relaton_iec/iec_bibliographic_item"
|
10
12
|
require "relaton_iec/xml_parser"
|
11
13
|
require "relaton_iec/hash_converter"
|
12
|
-
require "
|
14
|
+
# require "relaton_iec/index"
|
15
|
+
require "relaton_iec/data_fetcher"
|
16
|
+
require "relaton_iec/data_parser"
|
13
17
|
|
14
18
|
module RelatonIec
|
15
19
|
class << self
|
@@ -73,7 +77,7 @@ module RelatonIec
|
|
73
77
|
# @return [Array<String, nil>]
|
74
78
|
def ajunct_to_urn(rest)
|
75
79
|
r = rest.sub(%r{
|
76
|
-
(?<pl
|
80
|
+
(?<pl>\+|/)(?(<pl>)(?<adjunct>(?:amd|cor|ish))(?<adjnum>\d+)\s?)
|
77
81
|
(?<_d2>:)?(?(<_d2>)(?<adjdt>[\d-]+)\s?)
|
78
82
|
}x, "")
|
79
83
|
m = $~ || {}
|
data/relaton_iec.gemspec
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
lib = File.expand_path("
|
1
|
+
lib = File.expand_path("lib", __dir__)
|
2
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
3
|
require "relaton_iec/version"
|
4
4
|
|
@@ -8,9 +8,9 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.authors = ["Ribose Inc."]
|
9
9
|
spec.email = ["open.source@ribose.com"]
|
10
10
|
|
11
|
-
spec.summary = "RelatonIec: retrieve IEC Standards for bibliographic "\
|
11
|
+
spec.summary = "RelatonIec: retrieve IEC Standards for bibliographic " \
|
12
12
|
"use using the IecBibliographicItem model"
|
13
|
-
spec.description = "RelatonIec: retrieve IEC Standards for bibliographic "\
|
13
|
+
spec.description = "RelatonIec: retrieve IEC Standards for bibliographic " \
|
14
14
|
"use using the IecBibliographicItem model"
|
15
15
|
spec.homepage = "https://github.com/metanorma/relaton-iec"
|
16
16
|
spec.license = "MIT"
|
@@ -24,14 +24,11 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
|
25
25
|
|
26
26
|
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
27
|
-
spec.add_development_dependency "pry-byebug"
|
28
27
|
spec.add_development_dependency "rake", "~> 13.0"
|
29
28
|
spec.add_development_dependency "rspec", "~> 3.0"
|
30
|
-
spec.add_development_dependency "ruby-jing"
|
31
|
-
spec.add_development_dependency "simplecov"
|
32
|
-
spec.add_development_dependency "vcr"
|
33
|
-
spec.add_development_dependency "webmock"
|
34
29
|
|
35
30
|
spec.add_dependency "addressable"
|
31
|
+
spec.add_dependency "relaton-index", "~> 0.1.6"
|
36
32
|
spec.add_dependency "relaton-iso-bib", "~> 1.14.0"
|
33
|
+
spec.add_dependency "rubyzip"
|
37
34
|
end
|