relaton-iec 1.14.0 → 1.14.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.adoc +31 -26
- data/lib/relaton_iec/data_fetcher.rb +166 -0
- data/lib/relaton_iec/data_parser.rb +287 -0
- data/lib/relaton_iec/hit.rb +9 -1
- data/lib/relaton_iec/hit_collection.rb +15 -79
- data/lib/relaton_iec/iec_bibliographic_item.rb +20 -5
- data/lib/relaton_iec/iec_bibliography.rb +83 -111
- data/lib/relaton_iec/index.rb +133 -0
- data/lib/relaton_iec/processor.rb +13 -0
- data/lib/relaton_iec/version.rb +1 -1
- data/lib/relaton_iec.rb +9 -6
- data/relaton_iec.gemspec +4 -3
- metadata +23 -7
- data/lib/relaton_iec/scrapper.rb +0 -308
@@ -6,25 +6,22 @@ require "addressable/uri"
|
|
6
6
|
module RelatonIec
|
7
7
|
# Page of hit collection.
|
8
8
|
class HitCollection < RelatonBib::HitCollection
|
9
|
-
def_delegators :@array, :detect
|
9
|
+
def_delegators :@array, :detect, :map
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
DOMAIN = "https://webstore.iec.ch"
|
11
|
+
# DOMAIN = "https://webstore.iec.ch"
|
14
12
|
|
15
13
|
# @param ref [String]
|
16
14
|
# @param year [String, nil]
|
17
|
-
|
18
|
-
def initialize(ref, year = nil, part = nil)
|
15
|
+
def initialize(ref, year = nil)
|
19
16
|
super ref, year
|
20
|
-
@
|
21
|
-
@array =
|
17
|
+
@index = Index.new
|
18
|
+
@array = fetch_from_gh
|
22
19
|
end
|
23
20
|
|
24
21
|
# @return [RelatonIec::IecBibliographicItem]
|
25
|
-
def to_all_parts # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity
|
26
|
-
parts = @array.
|
27
|
-
hit = parts.min_by
|
22
|
+
def to_all_parts(r_year) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
23
|
+
parts = @array.select { |h| h.part && h.hit[:code].match?(/^[\s\w-]+:#{r_year}/) }
|
24
|
+
hit = parts.min_by { |h| h.part.to_i }
|
28
25
|
return @array.first&.fetch unless hit
|
29
26
|
|
30
27
|
bibitem = hit.fetch
|
@@ -41,76 +38,15 @@ module RelatonIec
|
|
41
38
|
|
42
39
|
private
|
43
40
|
|
44
|
-
|
45
|
-
|
46
|
-
# @return [Array<RelatonIec::Hit>]
|
47
|
-
def hits(ref, year) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
48
|
-
if /61360-4\sDB|ISO[\s\/]IEC\sDIR/.match?(ref)
|
49
|
-
fetch_from_gh ref
|
50
|
-
else
|
51
|
-
from, to = nil
|
52
|
-
if year
|
53
|
-
from = Date.strptime year, "%Y"
|
54
|
-
to = from.next_year.prev_day
|
55
|
-
end
|
56
|
-
get_results ref, from, to
|
57
|
-
end
|
58
|
-
# file = "../data/#{ref.sub(/^IEC\s/, '').gsub(/[\s\/]/, '_').upcase}.yaml"
|
59
|
-
# path = File.expand_path file, __dir__
|
60
|
-
# if File.exist? path
|
61
|
-
# hash = YAML.safe_load File.read(path, encoding: "utf-8")
|
62
|
-
# hit = Hit.new({ code: ref }, self)
|
63
|
-
# hit.fetch = IecBibliographicItem.from_hash hash
|
64
|
-
# return [hit]
|
65
|
-
# end
|
66
|
-
end
|
67
|
-
|
68
|
-
def fetch_from_gh(ref)
|
69
|
-
file = ref.sub(/^IEC\s(?=ISO|CISPR)/, "").gsub(/[\s\/]/, "_").upcase
|
70
|
-
url = "https://raw.githubusercontent.com/relaton/relaton-data-iec/main/data/#{file}.yaml"
|
71
|
-
resp = Net::HTTP.get URI(url)
|
72
|
-
hash = YAML.safe_load resp
|
73
|
-
hash["fetched"] = Date.today.to_s
|
74
|
-
hit = Hit.new({ code: ref }, self)
|
75
|
-
hit.fetch = IecBibliographicItem.from_hash hash
|
76
|
-
[hit]
|
77
|
-
end
|
41
|
+
def fetch_from_gh
|
42
|
+
return [] unless text
|
78
43
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
code = part ? ref.sub(/(?<=-\d)\d+/, "*") : ref
|
85
|
-
[nil, "trf", "wr"].reduce([]) do |m, t|
|
86
|
-
url = "#{DOMAIN}/searchkey"
|
87
|
-
url += "&type=#{t}" if t
|
88
|
-
url += "&RefNbr=#{code}&From=#{from}&To=#{to}&start=1"
|
89
|
-
m + results(Addressable::URI.parse(url).normalize)
|
44
|
+
ref = year && !/:\d{4}$/.match?(text) ? "#{text}:#{year}" : text
|
45
|
+
reference = ref.sub(/^IEC\s(?=ISO\/IEC\sDIR)/, "")
|
46
|
+
@index.search(reference).map do |row|
|
47
|
+
# pubid = row[:pubid].is_a?(Array) ? row[:pubid][0] : row[:pubid]
|
48
|
+
Hit.new({ code: row[:pubid], file: row[:file] }, self)
|
90
49
|
end
|
91
50
|
end
|
92
|
-
|
93
|
-
# @param url [String]
|
94
|
-
# @return [Array<RelatonIec::Hit>]
|
95
|
-
def results(uri)
|
96
|
-
contains = "[contains(.,'Part #{part}:')]" if part
|
97
|
-
resp = OpenURI.open_uri(uri, "User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) "\
|
98
|
-
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36")
|
99
|
-
doc = Nokogiri::HTML(resp)
|
100
|
-
doc.xpath(
|
101
|
-
"//body/li#{contains}",
|
102
|
-
"//ul[contains(@class,'search-results')]/li#{contains}",
|
103
|
-
"//ul[contains(@class,'morethesame')]/li#{contains}"
|
104
|
-
).map { |h| make_hit h }.compact
|
105
|
-
end
|
106
|
-
|
107
|
-
def make_hit(hit)
|
108
|
-
link = hit.at('a[@href!="#"]')
|
109
|
-
return unless link
|
110
|
-
|
111
|
-
code = link.text.tr [194, 160].pack("c*").force_encoding("UTF-8"), ""
|
112
|
-
title = hit.xpath("text()").text.gsub(/[\r\n]/, "")
|
113
|
-
Hit.new({ code: code, title: title, url: DOMAIN + link[:href] }, self)
|
114
|
-
end
|
115
51
|
end
|
116
52
|
end
|
@@ -1,11 +1,13 @@
|
|
1
1
|
module RelatonIec
|
2
2
|
class IecBibliographicItem < RelatonIsoBib::IsoBibliographicItem
|
3
|
-
|
3
|
+
DOCTYPES = %w[
|
4
4
|
international-standard technical-specification technical-report
|
5
5
|
publicly-available-specification international-workshop-agreement
|
6
|
-
guide industry-technical-agreement system-reference-
|
6
|
+
guide industry-technical-agreement system-reference-deliverable
|
7
7
|
].freeze
|
8
8
|
|
9
|
+
DOCSUBTYPES = %w[specification method-of-test vocabulary code-of-practice].freeze
|
10
|
+
|
9
11
|
FUNCTION = %w[emc safety enviroment quality-assurance].freeze
|
10
12
|
|
11
13
|
# @return [String, nil]
|
@@ -17,17 +19,30 @@ module RelatonIec
|
|
17
19
|
|
18
20
|
# attr_reader :tc_sc_officers_note
|
19
21
|
|
22
|
+
#
|
23
|
+
# Initialize instance of RelatonIec::IecBibliographicItem
|
24
|
+
#
|
25
|
+
# @param [Hash] **args hash of attributes
|
26
|
+
# @option args [String, nil] :function function
|
27
|
+
# @option args [String, nil] :updates_document_type updates document type
|
28
|
+
# @option args [String, nil] :price_code price code
|
29
|
+
# @option args [Boolean, nil] :cen_processing
|
30
|
+
# @option args [String, nil] :secretary
|
31
|
+
# @option args [String, nil] :secretary secretary
|
32
|
+
# @option args [String, nil] :interest_to_committees interest to committees
|
33
|
+
# @option args [Boolean, nil] :accessibility_color_inside accessibility color inside
|
34
|
+
#
|
20
35
|
def initialize(**args) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
21
36
|
if args[:function] && !FUNCTION.include?(args[:function])
|
22
37
|
warn "[relaton-iec] WARNING: invalid function \"#{args[:function]}\""
|
23
38
|
warn "[relaton-iec] allowed function values are: #{FUNCTION.join(', ')}"
|
24
39
|
end
|
25
40
|
if args[:updates_document_type] &&
|
26
|
-
!
|
41
|
+
!DOCTYPES.include?(args[:updates_document_type])
|
27
42
|
warn "[relaton-iec] WARNING: invalid updates_document_type "\
|
28
43
|
"\"#{args[:updates_document_type]}\""
|
29
44
|
warn "[relaton-iec] allowed updates_document_type values are: "\
|
30
|
-
"#{
|
45
|
+
"#{DOCTYPES.join(', ')}"
|
31
46
|
end
|
32
47
|
@function = args.delete :function
|
33
48
|
@updates_document_type = args.delete :updates_document_type
|
@@ -90,7 +105,7 @@ module RelatonIec
|
|
90
105
|
end
|
91
106
|
|
92
107
|
# @return [Hash]
|
93
|
-
def to_hash # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
108
|
+
def to_hash(embedded: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
94
109
|
hash = super
|
95
110
|
hash["function"] = function if function
|
96
111
|
if updates_document_type
|
@@ -1,7 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
# require 'isobib/iso_bibliographic_item'
|
4
|
-
require "relaton_iec/scrapper"
|
5
4
|
require "relaton_iec/hit_collection"
|
6
5
|
require "date"
|
7
6
|
|
@@ -10,37 +9,27 @@ module RelatonIec
|
|
10
9
|
class IecBibliography
|
11
10
|
class << self
|
12
11
|
##
|
13
|
-
# Search for standards entries.
|
14
|
-
# pass part parametr.
|
12
|
+
# Search for standards entries.
|
15
13
|
#
|
16
|
-
# @
|
17
|
-
# RelatonIec::IecBibliography.search 'IEC 60050-311', nil, '311'
|
18
|
-
#
|
19
|
-
# @param text [String]
|
14
|
+
# @param ref [String]
|
20
15
|
# @param year [String, nil]
|
21
|
-
# @param part [String, nil] search for packaged stndard if not nil
|
22
16
|
# @return [RelatonIec::HitCollection]
|
23
|
-
def search(
|
24
|
-
HitCollection.new text&.sub(/(^\w+)\//, '\1 '), year&.strip
|
25
|
-
|
26
|
-
|
17
|
+
def search(ref, year = nil)
|
18
|
+
# HitCollection.new text&.sub(/(^\w+)\//, '\1 '), year&.strip
|
19
|
+
HitCollection.new ref, year&.strip
|
20
|
+
rescue SocketError, OpenURI::HTTPError, OpenSSL::SSL::SSLError => e
|
21
|
+
raise RelatonBib::RequestError, e.message
|
27
22
|
end
|
28
23
|
|
29
|
-
# @param code [String] the
|
24
|
+
# @param code [String] the IEC standard code to look up (e..g "IEC 8000")
|
30
25
|
# @param year [String] the year the standard was published (optional)
|
31
26
|
# @param opts [Hash] options; restricted to :all_parts if all-parts
|
32
27
|
# reference is required
|
33
28
|
# @return [String] Relaton XML serialisation of reference
|
34
|
-
def get(code, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity
|
29
|
+
def get(code, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity
|
35
30
|
opts[:all_parts] ||= code.match?(/\s\(all parts\)/)
|
36
31
|
ref = code.sub(/\s\(all parts\)/, "")
|
37
|
-
|
38
|
-
/^(?<code1>[^:]+):(?<year1>[^:]+)/ =~ ref
|
39
|
-
unless code1.nil?
|
40
|
-
ref = code1
|
41
|
-
year = year1
|
42
|
-
end
|
43
|
-
end
|
32
|
+
year ||= ref_parts(ref)[:year]
|
44
33
|
return iev if ref.casecmp("IEV").zero?
|
45
34
|
|
46
35
|
ret = iecbib_get(ref, year, opts)
|
@@ -57,65 +46,30 @@ module RelatonIec
|
|
57
46
|
# @param missed_years [Array<String>]
|
58
47
|
def fetch_ref_err(code, year, missed_years) # rubocop:disable Metrics/MethodLength
|
59
48
|
id = year ? "#{code}:#{year}" : code
|
60
|
-
warn "[relaton-iec] WARNING: no match found online for #{id}. "\
|
49
|
+
warn "[relaton-iec] WARNING: no match found online for #{id}. " \
|
61
50
|
"The code must be exactly like it is on the standards website."
|
62
51
|
unless missed_years.empty?
|
63
|
-
warn "[relaton-iec] (There was no match for #{year}, though there "\
|
52
|
+
warn "[relaton-iec] (There was no match for #{year}, though there " \
|
64
53
|
"were matches found for #{missed_years.join(', ')}.)"
|
65
54
|
end
|
66
55
|
if /\d-\d/.match? code
|
67
|
-
warn "[relaton-iec] The provided document part may not exist, or "\
|
56
|
+
warn "[relaton-iec] The provided document part may not exist, or " \
|
68
57
|
"the document may no longer be published in parts."
|
69
58
|
else
|
70
|
-
warn "[relaton-iec] If you wanted to cite all document parts for "\
|
71
|
-
"the reference, use \"#{code} (all parts)\".\nIf the document "\
|
72
|
-
"is not a standard, use its document type abbreviation (TS, "\
|
59
|
+
warn "[relaton-iec] If you wanted to cite all document parts for " \
|
60
|
+
"the reference, use \"#{code} (all parts)\".\nIf the document " \
|
61
|
+
"is not a standard, use its document type abbreviation (TS, " \
|
73
62
|
"TR, PAS, Guide)."
|
74
63
|
end
|
75
64
|
nil
|
76
65
|
end
|
77
66
|
|
78
|
-
# @param hits [Array<RelatonIec::Hit>]
|
79
|
-
# @param threads [Integer]
|
80
|
-
# @return [Array<RelatonIec::Hit>]
|
81
|
-
# def fetch_pages(hits, threads)
|
82
|
-
# workers = RelatonBib::WorkersPool.new threads
|
83
|
-
# workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
|
84
|
-
# hits.each_with_index { |hit, i| workers << { i: i, hit: hit } }
|
85
|
-
# workers.end
|
86
|
-
# workers.result.sort_by { |a| a[:i] }.map { |x| x[:hit] }
|
87
|
-
# end
|
88
|
-
|
89
67
|
# @param ref [String]
|
90
|
-
# @param year [String, nil]
|
91
68
|
# @return [RelatonIec::HitCollection]
|
92
|
-
def search_filter(ref
|
93
|
-
|
94
|
-
year ||= rp1[:year]
|
95
|
-
corr = rp1[:corr]&.sub " ", ""
|
69
|
+
def search_filter(ref)
|
70
|
+
code = ref.split(":").first
|
96
71
|
warn "[relaton-iec] (\"#{ref}\") fetching..."
|
97
|
-
|
98
|
-
code = result.text.dup
|
99
|
-
if result.empty? && /(?<=\d-)(?<part>[\w-]+)/ =~ rp1[:code]
|
100
|
-
# try to search packaged standard
|
101
|
-
result = search rp1[:code], year, part
|
102
|
-
pkg_std = result.any?
|
103
|
-
end
|
104
|
-
result = search rp1[:code] if result.empty?
|
105
|
-
if pkg_std
|
106
|
-
code.sub!(/(?<=\d-)#{part}/, part[0])
|
107
|
-
else
|
108
|
-
code.sub!(/-[-\d]+/, "")
|
109
|
-
end
|
110
|
-
result.select do |i|
|
111
|
-
rp2 = ref_parts i.hit[:code]
|
112
|
-
code2 = if pkg_std
|
113
|
-
rp2[:code].sub(/(?<=\d-\d)\d+/, "")
|
114
|
-
else
|
115
|
-
rp2[:code].sub(/-[-\d]+/, "")
|
116
|
-
end
|
117
|
-
code == code2 && rp1[:bundle] == rp2[:bundle] && corr == rp2[:corr]
|
118
|
-
end
|
72
|
+
search(code)
|
119
73
|
end
|
120
74
|
|
121
75
|
def ref_parts(ref)
|
@@ -160,72 +114,90 @@ module RelatonIec
|
|
160
114
|
XML
|
161
115
|
end
|
162
116
|
|
163
|
-
#
|
117
|
+
# Look for a code in the search results
|
164
118
|
# and return the first result that matches the code,
|
165
|
-
# matches the year (if provided), and which
|
119
|
+
# matches the year (if provided), and which a part
|
166
120
|
# has a title (amendments do not).
|
167
|
-
#
|
168
|
-
|
169
|
-
|
170
|
-
# reporting
|
171
|
-
def results_filter(result, year, opts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
|
172
|
-
r_code, r_year = code_year result.text, result.part
|
121
|
+
# If no match, returns any years which caused mismatch, for error reporting
|
122
|
+
def results_filter(result, ref, year, opts)
|
123
|
+
r_code, r_year, r_amd = code_year ref
|
173
124
|
r_year ||= year
|
174
|
-
|
125
|
+
if opts[:all_parts]
|
126
|
+
ret = result.to_all_parts(r_year)
|
127
|
+
else
|
128
|
+
ret, missed_parts = match_result(result, r_code, r_year, r_amd)
|
129
|
+
end
|
130
|
+
{ ret: ret, years: missed_years(result, r_year), missed_parts: missed_parts }
|
131
|
+
end
|
132
|
+
|
133
|
+
def missed_years(result, year)
|
134
|
+
result.map { |h| codes_years(h.hit[:code])[1] }.flatten.uniq.reject { |y| y == year }
|
135
|
+
end
|
136
|
+
|
137
|
+
#
|
138
|
+
# Find a match in the search results
|
139
|
+
#
|
140
|
+
# @param [RelatonIec::HitCollection] result search results
|
141
|
+
# @param [String] code code of the document
|
142
|
+
# @param [String] year year of the document
|
143
|
+
# @param [String] amd amendment of the document
|
144
|
+
#
|
145
|
+
# @return [Array<RelatonIec::IecBibliographicItem, Array, nil>] result, missed parts
|
146
|
+
#
|
147
|
+
def match_result(result, code, year, amd) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity
|
175
148
|
missed_parts = false
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
# r.date.select { |d| d.type == "published" }.each do |d|
|
189
|
-
# return { ret: r } if year.to_i == d.on(:year)
|
190
|
-
|
191
|
-
# missed_years << d.on(:year)
|
192
|
-
# end
|
193
|
-
# end
|
194
|
-
end&.fetch
|
195
|
-
end
|
196
|
-
{ ret: ret, years: missed_years, missed_parts: missed_parts }
|
149
|
+
ret = result.detect do |h|
|
150
|
+
h_codes, h_years, h_amds = codes_years h.hit[:code]
|
151
|
+
match_code = h_codes.include? code
|
152
|
+
match_year = h_years.include?(year)
|
153
|
+
missed_parts ||= !match_code
|
154
|
+
match_code && (!year || match_year) && match_amd(amd, h_amds)
|
155
|
+
end&.fetch
|
156
|
+
[ret, missed_parts]
|
157
|
+
end
|
158
|
+
|
159
|
+
def match_amd(amd, h_amds)
|
160
|
+
(!amd && h_amds.empty?) || h_amds.include?(amd)
|
197
161
|
end
|
198
162
|
|
199
163
|
# @param ref [String]
|
200
|
-
# @
|
201
|
-
|
202
|
-
def code_year(ref, part)
|
164
|
+
# @return [Array<Stringl, nil>] code, year, amd
|
165
|
+
def code_year(ref)
|
203
166
|
%r{
|
204
|
-
^(?<code>\S+[^\d]*\s\d+(?:-\w+)*)
|
167
|
+
# ^(?<code>\S+[^\d]*\s\d+(?:-\w+)*)
|
168
|
+
^(?<code>\S+\s[^:/]+)
|
205
169
|
(?::(?<year>\d{4}))?
|
170
|
+
(?:/(?<amd>\w+)(?::\d{4})?)?
|
206
171
|
}x =~ ref
|
207
|
-
code
|
208
|
-
[code, year]
|
172
|
+
[code, year, amd&.upcase]
|
209
173
|
end
|
210
174
|
|
211
|
-
# @param
|
175
|
+
# @param ref [String]
|
176
|
+
# @return [Array<Array<Stringl>>] codes, years, amds
|
177
|
+
def codes_years(refs)
|
178
|
+
RelatonBib.array(refs).map do |r|
|
179
|
+
code_year r
|
180
|
+
end.transpose.map { |a| a.compact.uniq }
|
181
|
+
end
|
182
|
+
|
183
|
+
# @param ref [String]
|
212
184
|
# @param year [String, nil]
|
213
185
|
# @param opts [Hash]
|
214
186
|
# @return [RelatonIec::IecBibliographicItem, nil]
|
215
|
-
def iecbib_get(
|
216
|
-
result = search_filter(
|
217
|
-
ret = results_filter(result, year, opts)
|
187
|
+
def iecbib_get(ref, year, opts) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
188
|
+
result = search_filter(ref) || return
|
189
|
+
ret = results_filter(result, ref, year, opts)
|
218
190
|
if ret[:ret]
|
219
|
-
if ret[:missed_parts]
|
220
|
-
warn "[relaton-iec] WARNING: #{
|
221
|
-
|
222
|
-
|
191
|
+
if ret[:missed_parts] && !opts[:all_parts]
|
192
|
+
warn "[relaton-iec] WARNING: #{ref} found as #{ret[:ret].docidentifier.first.id} " \
|
193
|
+
"but also contain parts. If you wanted to cite all document " \
|
194
|
+
"parts for the reference, use \"#{ref} (all parts)\""
|
223
195
|
else
|
224
|
-
warn "[relaton-iec] (\"#{
|
196
|
+
warn "[relaton-iec] (\"#{ref}\") found #{ret[:ret].docidentifier.first.id}"
|
225
197
|
end
|
226
198
|
ret[:ret]
|
227
199
|
else
|
228
|
-
fetch_ref_err(
|
200
|
+
fetch_ref_err(ref, year, ret[:years])
|
229
201
|
end
|
230
202
|
end
|
231
203
|
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
module RelatonIec
|
2
|
+
class Index
|
3
|
+
#
|
4
|
+
# Initialize index.
|
5
|
+
# If index argument is nil, read index from file or from GitHub.
|
6
|
+
# If index argument is not nil, then read index from file or create new
|
7
|
+
# empty index. (use this option for creating index for dataset)
|
8
|
+
#
|
9
|
+
# @param [String, nil] index to index file
|
10
|
+
#
|
11
|
+
def initialize(index = nil)
|
12
|
+
if index
|
13
|
+
@path = index
|
14
|
+
@index = create_index_file
|
15
|
+
else
|
16
|
+
@index = read_index_file || get_index_from_gh
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
#
|
21
|
+
# Add document to index or update existing document
|
22
|
+
#
|
23
|
+
# @param [String, Array<String>] pubid document identifier
|
24
|
+
# @param [String] file document file name
|
25
|
+
# @param [String] change last change date time
|
26
|
+
#
|
27
|
+
# @return [void]
|
28
|
+
#
|
29
|
+
def add(pubid, file, change = nil)
|
30
|
+
item = @index.find { |i| i[:pubid] == pubid }
|
31
|
+
unless item
|
32
|
+
item = { pubid: pubid }
|
33
|
+
@index << item
|
34
|
+
end
|
35
|
+
item[:file] = file
|
36
|
+
item[:last_change] = change if change
|
37
|
+
end
|
38
|
+
|
39
|
+
#
|
40
|
+
# Clear index
|
41
|
+
#
|
42
|
+
# @return [void]
|
43
|
+
#
|
44
|
+
def clear
|
45
|
+
@index.clear
|
46
|
+
end
|
47
|
+
|
48
|
+
#
|
49
|
+
# Last change date
|
50
|
+
#
|
51
|
+
# @return [String] <description>
|
52
|
+
#
|
53
|
+
def last_change
|
54
|
+
return unless @index.any?
|
55
|
+
|
56
|
+
@last_change ||= @index.max_by { |i| i[:last_change].to_s }[:last_change]
|
57
|
+
end
|
58
|
+
|
59
|
+
#
|
60
|
+
# Find document in index by reference and sort results by document ID
|
61
|
+
#
|
62
|
+
# @param [String] ref reference
|
63
|
+
#
|
64
|
+
# @return [Array<Hash>] search result
|
65
|
+
#
|
66
|
+
def search(ref)
|
67
|
+
upcase_ref = ref.upcase
|
68
|
+
@index.select do |i|
|
69
|
+
RelatonBib.array(i[:pubid]).detect { |r| r.include? upcase_ref }
|
70
|
+
end.sort_by { |r| r[:pubid].is_a?(Array) ? r[:pubid].min : r[:pubid] }
|
71
|
+
end
|
72
|
+
|
73
|
+
#
|
74
|
+
# Save index to file
|
75
|
+
#
|
76
|
+
# @return [void]
|
77
|
+
#
|
78
|
+
def save
|
79
|
+
File.write @path, @index.to_yaml, encoding: "UTF-8"
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
#
|
85
|
+
# Create dir if need and return path to index file
|
86
|
+
#
|
87
|
+
# @return [String] path to index file
|
88
|
+
#
|
89
|
+
def path
|
90
|
+
@path ||= begin
|
91
|
+
dir = File.join Dir.home, ".relaton", "iec"
|
92
|
+
FileUtils.mkdir_p dir
|
93
|
+
File.join dir, "index.yaml"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
#
|
98
|
+
# Create index file for dataset
|
99
|
+
#
|
100
|
+
# @return [Array<Hash>] index content
|
101
|
+
#
|
102
|
+
def create_index_file
|
103
|
+
return [] unless File.exist? path
|
104
|
+
|
105
|
+
RelatonBib.parse_yaml File.read(path, encoding: "UTF-8"), [Symbol]
|
106
|
+
end
|
107
|
+
|
108
|
+
#
|
109
|
+
# Read index from file if it exists and not outdated
|
110
|
+
#
|
111
|
+
# @return [Hash, nil] index content
|
112
|
+
#
|
113
|
+
def read_index_file
|
114
|
+
return if !File.exist?(path) || File.ctime(path).to_date < Date.today
|
115
|
+
|
116
|
+
RelatonBib.parse_yaml File.read(path, encoding: "UTF-8"), [Symbol]
|
117
|
+
end
|
118
|
+
|
119
|
+
#
|
120
|
+
# Get index from a GitHub repository
|
121
|
+
#
|
122
|
+
# @return [Hash] index content
|
123
|
+
#
|
124
|
+
def get_index_from_gh # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
125
|
+
resp = Zip::InputStream.new URI("#{Hit::GHURL}index.zip").open
|
126
|
+
zip = resp.get_next_entry
|
127
|
+
yaml = zip.get_input_stream.read
|
128
|
+
index = RelatonBib.parse_yaml yaml, [Symbol]
|
129
|
+
File.write path, index.to_yaml, encoding: "UTF-8"
|
130
|
+
index
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
@@ -7,6 +7,7 @@ module RelatonIec
|
|
7
7
|
@prefix = "IEC"
|
8
8
|
@defaultprefix = %r{^IEC\s|^IEV($|\s)}
|
9
9
|
@idtype = "IEC"
|
10
|
+
@datasets = %w[iec-harmonized-all iec-harmonized-latest]
|
10
11
|
end
|
11
12
|
|
12
13
|
# @param code [String]
|
@@ -17,6 +18,18 @@ module RelatonIec
|
|
17
18
|
::RelatonIec::IecBibliography.get(code, date, opts)
|
18
19
|
end
|
19
20
|
|
21
|
+
#
|
22
|
+
# Fetch all the documents from a source
|
23
|
+
#
|
24
|
+
# @param [String] source source name (iec-harmonized-all, iec-harmonized-latest)
|
25
|
+
# @param [Hash] opts
|
26
|
+
# @option opts [String] :output directory to output documents
|
27
|
+
# @option opts [String] :format output format (xml, yaml, bibxml)
|
28
|
+
#
|
29
|
+
def fetch_data(source, opts)
|
30
|
+
DataFetcher.new(source, **opts).fetch
|
31
|
+
end
|
32
|
+
|
20
33
|
# @param xml [String]
|
21
34
|
# @return [RelatonIsoBib::IecBibliographicItem]
|
22
35
|
def from_xml(xml)
|
data/lib/relaton_iec/version.rb
CHANGED
data/lib/relaton_iec.rb
CHANGED
@@ -1,15 +1,18 @@
|
|
1
|
-
require "
|
2
|
-
require "relaton_iec/hit"
|
3
|
-
require "nokogiri"
|
1
|
+
require "digest/md5"
|
4
2
|
require "net/http"
|
3
|
+
require "nokogiri"
|
4
|
+
require "zip"
|
5
5
|
require "open-uri"
|
6
|
-
require "
|
6
|
+
require "relaton_iso_bib"
|
7
|
+
require "relaton_iec/hit"
|
7
8
|
require "relaton_iec/version"
|
8
9
|
require "relaton_iec/iec_bibliography"
|
9
10
|
require "relaton_iec/iec_bibliographic_item"
|
10
11
|
require "relaton_iec/xml_parser"
|
11
12
|
require "relaton_iec/hash_converter"
|
12
|
-
require "
|
13
|
+
require "relaton_iec/index"
|
14
|
+
require "relaton_iec/data_fetcher"
|
15
|
+
require "relaton_iec/data_parser"
|
13
16
|
|
14
17
|
module RelatonIec
|
15
18
|
class << self
|
@@ -73,7 +76,7 @@ module RelatonIec
|
|
73
76
|
# @return [Array<String, nil>]
|
74
77
|
def ajunct_to_urn(rest)
|
75
78
|
r = rest.sub(%r{
|
76
|
-
(?<pl
|
79
|
+
(?<pl>\+|/)(?(<pl>)(?<adjunct>(?:amd|cor|ish))(?<adjnum>\d+)\s?)
|
77
80
|
(?<_d2>:)?(?(<_d2>)(?<adjdt>[\d-]+)\s?)
|
78
81
|
}x, "")
|
79
82
|
m = $~ || {}
|
data/relaton_iec.gemspec
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
lib = File.expand_path("
|
1
|
+
lib = File.expand_path("lib", __dir__)
|
2
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
3
|
require "relaton_iec/version"
|
4
4
|
|
@@ -8,9 +8,9 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.authors = ["Ribose Inc."]
|
9
9
|
spec.email = ["open.source@ribose.com"]
|
10
10
|
|
11
|
-
spec.summary = "RelatonIec: retrieve IEC Standards for bibliographic "\
|
11
|
+
spec.summary = "RelatonIec: retrieve IEC Standards for bibliographic " \
|
12
12
|
"use using the IecBibliographicItem model"
|
13
|
-
spec.description = "RelatonIec: retrieve IEC Standards for bibliographic "\
|
13
|
+
spec.description = "RelatonIec: retrieve IEC Standards for bibliographic " \
|
14
14
|
"use using the IecBibliographicItem model"
|
15
15
|
spec.homepage = "https://github.com/metanorma/relaton-iec"
|
16
16
|
spec.license = "MIT"
|
@@ -34,4 +34,5 @@ Gem::Specification.new do |spec|
|
|
34
34
|
|
35
35
|
spec.add_dependency "addressable"
|
36
36
|
spec.add_dependency "relaton-iso-bib", "~> 1.14.0"
|
37
|
+
spec.add_dependency "rubyzip"
|
37
38
|
end
|