relaton-iec 1.7.0 → 1.7.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: affb3c4e8c5d26115be469d456d5b3de4f89f1184226b7a51b8dbabdd1cae937
4
- data.tar.gz: cb850e8a8cbae05c66b629db7f2ee548debf0f1dfe3d03d6dc75e8c6e85900d2
3
+ metadata.gz: b87aa92de24cc0120f04f0325e2ec419c366b92d775beb1b37ab2cc429c96c1d
4
+ data.tar.gz: 28aad2f843cd822bca23dc56e0de8526da1993a5e51cab74a80e957d3a125710
5
5
  SHA512:
6
- metadata.gz: 107761f95543a8f188d45e5aa3ea676fe904a152d32d0ebdd92e1e66ac42e2164287ac0feef3906fbb596bd6807636c3f70938fe301a2b53f2ffaf34cf316b54
7
- data.tar.gz: bcd04f967647bed2d2a4cebaff71cf5acf03ad1e388a81d44f150e9d1d4bbdbd9d16e3de222134d5806112453a3e0e4fd82aa3cc2f9ce1e55047b7f9439b9d4c
6
+ metadata.gz: 261ffff809b27f6b6ff84fd936959819fe315cfe187bb0ecc97b5ae48295fcaa515668d66cb229d3cfbb3f31bcb5127112c5da9064f8699671ce50f40fe5df2b
7
+ data.tar.gz: a7fc694951f0b4b1a0f57c5d3ea9fcb89c4729d4b28dfe945c6d6c3c7add1be498e961bbf2bc71760b7c88034a0a2ad57295c6cd432ee365c9cb2beb0dcf0950
@@ -8,5 +8,9 @@ module RelatonIec
8
8
  def fetch
9
9
  @fetch ||= Scrapper.parse_page @hit
10
10
  end
11
+
12
+ def part
13
+ @part ||= hit[:code].match(/(?<=-)[\w-]+/)&.to_s
14
+ end
11
15
  end
12
16
  end
@@ -6,6 +6,10 @@ require "addressable/uri"
6
6
  module RelatonIec
7
7
  # Page of hit collection.
8
8
  class HitCollection < RelatonBib::HitCollection
9
+ def_delegators :@array, :detect
10
+
11
+ attr_reader :part
12
+
9
13
  DOMAIN = "https://webstore.iec.ch"
10
14
 
11
15
  # @param ref_nbr [String]
@@ -13,53 +17,73 @@ module RelatonIec
13
17
  # @param part [String, nil]
14
18
  def initialize(ref_nbr, year = nil, part = nil)
15
19
  super ref_nbr, year
16
- @array = hits ref_nbr, year, part
20
+ @part = part
21
+ @array = hits ref_nbr, year
22
+ end
23
+
24
+ # @return [RelatonIec::IecBibliographicItem]
25
+ def to_all_parts # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity
26
+ parts = @array.reject { |h| h.part.nil? }
27
+ hit = parts.min_by &:part
28
+ return @array.first.fetch lang unless hit
29
+
30
+ bibitem = hit.fetch
31
+ all_parts_item = bibitem.to_all_parts
32
+ parts.reject { |h| h.hit[:code] == hit.hit[:code] }.each do |hi|
33
+ isobib = RelatonIec::IecBibliographicItem.new(
34
+ formattedref: RelatonBib::FormattedRef.new(content: hi.hit[:code])
35
+ )
36
+ all_parts_item.relation << RelatonBib::DocumentRelation.new(type: "partOf", bibitem: isobib)
37
+ end
38
+ all_parts_item
17
39
  end
18
40
 
19
41
  private
20
42
 
21
43
  # @param ref [String]
22
44
  # @param year [String, nil]
23
- # @param part [String, nil]
24
45
  # @return [Array<RelatonIec::Hit>]
25
- def hits(ref, year, part)
46
+ def hits(ref, year)
26
47
  from, to = nil
27
48
  if year
28
49
  from = Date.strptime year, "%Y"
29
50
  to = from.next_year.prev_day
30
51
  end
31
- get_results ref, from, to, part
52
+ get_results ref, from, to
32
53
  end
33
54
 
34
55
  # @param ref [String]
35
56
  # @param from [Date, nil]
36
57
  # @param to [Date, nil]
37
- # @param part [String, nil]
38
58
  # @return [Array<RelatonIec::Hit>]
39
- def get_results(ref, from, to, part = nil)
59
+ def get_results(ref, from, to)
40
60
  code = part ? ref.sub(/(?<=-\d)\d+/, "*") : ref
41
61
  [nil, "trf", "wr"].reduce([]) do |m, t|
42
62
  url = "#{DOMAIN}/searchkey"
43
63
  url += "&type=#{t}" if t
44
64
  url += "&RefNbr=#{code}&From=#{from}&To=#{to}&start=1"
45
- m + results(Addressable::URI.parse(url).normalize, part)
65
+ m + results(Addressable::URI.parse(url).normalize)
46
66
  end
47
67
  end
48
68
 
49
69
  # @param url [String]
50
- # @param part [String, nil]
51
70
  # @return [Array<RelatonIec::Hit>]
52
- def results(uri, part)
71
+ def results(uri)
53
72
  contains = "[contains(.,'Part #{part}:')]" if part
54
- Nokogiri::HTML(OpenURI.open_uri(uri)).xpath(
73
+ resp = OpenURI.open_uri(uri, "User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) "\
74
+ "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36")
75
+ doc = Nokogiri::HTML(resp)
76
+ doc.xpath(
55
77
  "//body/li#{contains}",
56
78
  "//ul[contains(@class,'search-results')]/li#{contains}",
57
79
  "//ul[contains(@class,'morethesame')]/li#{contains}"
58
- ).map { |h| make_hit h }
80
+ ).map { |h| make_hit h }.compact
59
81
  end
60
82
 
61
83
  def make_hit(hit)
62
- link = hit.at('a[@href!="#"]')
84
+ link = hit.at('a[@href!="#"]')
85
+ return unless link
86
+
63
87
  code = link.text.tr [194, 160].pack("c*").force_encoding("UTF-8"), ""
64
88
  title = hit.xpath("text()").text.gsub(/[\r\n]/, "")
65
89
  Hit.new({ code: code, title: title, url: DOMAIN + link[:href] }, self)
@@ -21,7 +21,7 @@ module RelatonIec
21
21
  # @param part [String, nil] search for packaged stndard if not nil
22
22
  # @return [RelatonIec::HitCollection]
23
23
  def search(text, year = nil, part = nil)
24
- HitCollection.new text, year, part
24
+ HitCollection.new text, year&.strip, part
25
25
  rescue SocketError, OpenURI::HTTPError, OpenSSL::SSL::SSLError
26
26
  raise RelatonBib::RequestError, "Could not access http://www.iec.ch"
27
27
  end
@@ -32,23 +32,21 @@ module RelatonIec
32
32
  # reference is required
33
33
  # @return [String] Relaton XML serialisation of reference
34
34
  def get(code, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
35
+ opts[:all_parts] ||= code.match? /\s\(all parts\)/
36
+ ref = code.sub /\s\(all parts\)/, ""
35
37
  if year.nil?
36
- /^(?<code1>[^:]+):(?<year1>[^:]+)/ =~ code
38
+ /^(?<code1>[^:]+):(?<year1>[^:]+)/ =~ ref
37
39
  unless code1.nil?
38
- code = code1
40
+ ref = code1
39
41
  year = year1
40
42
  end
41
43
  end
44
+ return iev if ref.casecmp("IEV").zero?
42
45
 
43
- return iev if code.casecmp("IEV").zero?
44
-
45
- opts[:all_parts] ||= !(code =~ / \(all parts\)/).nil?
46
- code = code.sub(/ \(all parts\)/, "")
47
- ret = iecbib_get1(code, year, opts)
46
+ ret = iecbib_get(ref, year, opts)
48
47
  return nil if ret.nil?
49
48
 
50
49
  ret = ret.to_most_recent_reference unless year || opts[:keep_year]
51
- ret = ret.to_all_parts if opts[:all_parts]
52
50
  ret
53
51
  end
54
52
 
@@ -77,29 +75,43 @@ module RelatonIec
77
75
  # @param hits [Array<RelatonIec::Hit>]
78
76
  # @param threads [Integer]
79
77
  # @return [Array<RelatonIec::Hit>]
80
- def fetch_pages(hits, threads)
81
- workers = RelatonBib::WorkersPool.new threads
82
- workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
83
- hits.each_with_index { |hit, i| workers << { i: i, hit: hit } }
84
- workers.end
85
- workers.result.sort_by { |a| a[:i] }.map { |x| x[:hit] }
86
- end
87
-
88
- def isobib_search_filter(code, year) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
89
- docidrx = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+}
90
- corrigrx = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+:[0-9]+/}
91
- warn "[relaton-iec] (\"#{code}\") fetching..."
78
+ # def fetch_pages(hits, threads)
79
+ # workers = RelatonBib::WorkersPool.new threads
80
+ # workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
81
+ # hits.each_with_index { |hit, i| workers << { i: i, hit: hit } }
82
+ # workers.end
83
+ # workers.result.sort_by { |a| a[:i] }.map { |x| x[:hit] }
84
+ # end
85
+
86
+ def search_filter(reference, year, opts) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
87
+ %r{
88
+ ^(?<code>(?:ISO|IEC)[^\d]*\s\d+((?:-\w+)+)?)
89
+ (:(?<year1>\d{4}))?
90
+ (?<bundle>\+[^\s\/]+)?
91
+ (\/(?<corr>AMD\s\d+))?
92
+ }x =~ reference.upcase
93
+ year ||= year1
94
+ corr&.sub! " ", ""
95
+ warn "[relaton-iec] (\"#{reference}\") fetching..."
92
96
  result = search(code, year)
93
- if result.empty? && /(?<=-)(?<part>\d+)/ =~ code
97
+ if result.empty? && /(?<=-)(?<part>[\w-]+)/ =~ code
94
98
  # try to search packaged standard
95
99
  result = search code, year, part
96
- ref = code.sub /(?<=-\d)\d+/, ""
97
- else ref = code
100
+ # ref = code.sub /(?<=-\d)\w+/, ""
101
+ # else ref = code
98
102
  end
103
+ result = search code if result.empty?
104
+ code.sub! /((?:-\w+)+)/, ""
99
105
  result.select do |i|
100
- i.hit[:code] &&
101
- i.hit[:code].match(docidrx).to_s.include?(ref) &&
102
- corrigrx !~ i.hit[:code]
106
+ %r{
107
+ ^(?<code2>(?:ISO|IEC)[^\d]*\s\d+)((?:-\w+)+)?
108
+ (:(?<year2>\d{4}))?
109
+ (?<bundle2>\+[^\s\/]+)?
110
+ (\/(?<corr2>AMD\d+))?
111
+ }x =~ i.hit[:code]
112
+ # code2.sub! /(?<=-\d)\w*/, "" if part
113
+ # code2.sub! /((?:-\w+)+)/, "" if opts[:all_parts]
114
+ code == code2 && bundle == bundle2 && corr == corr2 # (year.nil? || year == year2) &&
103
115
  end
104
116
  end
105
117
 
@@ -144,30 +156,56 @@ module RelatonIec
144
156
  # Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
145
157
  # If no match, returns any years which caused mismatch, for error
146
158
  # reporting
147
- def isobib_results_filter(result, year) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength
159
+ def results_filter(result, ref, year, opts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
160
+ r_code, r_year = code_year ref, result.part
161
+ r_year ||= year
148
162
  missed_years = []
149
- result.each_slice(3) do |s| # ISO website only allows 3 connections
150
- fetch_pages(s, 3).each_with_index do |r, _i|
151
- return { ret: r } if !year
152
-
153
- r.date.select { |d| d.type == "published" }.each do |d|
154
- return { ret: r } if year.to_i == d.on(:year)
163
+ missed_parts = false
164
+ # result.each_slice(3) do |s| # ISO website only allows 3 connections
165
+ ret = if opts[:all_parts]
166
+ result.to_all_parts
167
+ else
168
+ result.detect do |h|
169
+ h_code, h_year = code_year h.hit[:code], result.part
170
+ missed_parts ||= !opts[:all_parts] && r_code != h_code
171
+ missed_years << h_year unless !r_year || h_year == r_year
172
+ r_code == h_code && (!year || h_year == r_year)
173
+ # fetch_pages(s, 3).each_with_index do |r, _i|
174
+ # return { ret: r } if !year
175
+
176
+ # r.date.select { |d| d.type == "published" }.each do |d|
177
+ # return { ret: r } if year.to_i == d.on(:year)
178
+
179
+ # missed_years << d.on(:year)
180
+ # end
181
+ # end
182
+ end&.fetch
183
+ end
184
+ { ret: ret, years: missed_years, missed_parts: missed_parts }
185
+ end
155
186
 
156
- missed_years << d.on(:year)
157
- end
158
- end
159
- end
160
- { years: missed_years }
187
+ def code_year(ref, part)
188
+ %r{
189
+ ^(?<code>(?:ISO|IEC)[^\d]*\s\d+((?:-\w+)+)?)
190
+ (:(?<year>\d{4}))?
191
+ }x =~ ref
192
+ code.sub!(/-\d+/, "") if part
193
+ [code, year]
161
194
  end
162
195
 
163
- def iecbib_get1(code, year, _opts)
164
- return iev if code.casecmp("IEV").zero?
196
+ def iecbib_get(code, year, opts)
197
+ # return iev if code.casecmp("IEV").zero?
165
198
 
166
- result = isobib_search_filter(code, year) || return
167
- ret = isobib_results_filter(result, year)
199
+ result = search_filter(code, year, opts) || return
200
+ ret = results_filter(result, code, year, opts)
168
201
  if ret[:ret]
169
- warn "[relaton-iec] (\"#{code}\") found "\
170
- "#{ret[:ret].docidentifier.first.id}"
202
+ if ret[:missed_parts]
203
+ warn "[relaton-iec] WARNING: #{code} found as #{ret[:ret].docidentifier.first.id} "\
204
+ "but also contain parts. If you wanted to cite all document parts for the reference, use "\
205
+ "\"#{code} (all parts)\""
206
+ else
207
+ warn "[relaton-iec] (\"#{code}\") found #{ret[:ret].docidentifier.first.id}"
208
+ end
171
209
  ret[:ret]
172
210
  else
173
211
  fetch_ref_err(code, year, ret[:years])
@@ -170,8 +170,10 @@ module RelatonIec
170
170
  def fetch_status(doc)
171
171
  wip = doc.at('//ROW[STATUS[.="PREPARING"]]')
172
172
  if wip
173
- statuses = YAML.load_file "lib/relaton_iec/statuses.yml"
173
+ statuses = YAML.load_file File.join __dir__, "statuses.yml"
174
174
  s = wip.at("STAGE").text
175
+ return unless statuses[s]
176
+
175
177
  stage, substage = statuses[s]["stage"].split "."
176
178
  else
177
179
  stage = "60"
@@ -1,3 +1,3 @@
1
1
  module RelatonIec
2
- VERSION = "1.7.0".freeze
2
+ VERSION = "1.7.5".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-iec
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.0
4
+ version: 1.7.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-11-27 00:00:00.000000000 Z
11
+ date: 2021-01-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: debase