relaton-iec 1.7.0 → 1.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: affb3c4e8c5d26115be469d456d5b3de4f89f1184226b7a51b8dbabdd1cae937
4
- data.tar.gz: cb850e8a8cbae05c66b629db7f2ee548debf0f1dfe3d03d6dc75e8c6e85900d2
3
+ metadata.gz: b87aa92de24cc0120f04f0325e2ec419c366b92d775beb1b37ab2cc429c96c1d
4
+ data.tar.gz: 28aad2f843cd822bca23dc56e0de8526da1993a5e51cab74a80e957d3a125710
5
5
  SHA512:
6
- metadata.gz: 107761f95543a8f188d45e5aa3ea676fe904a152d32d0ebdd92e1e66ac42e2164287ac0feef3906fbb596bd6807636c3f70938fe301a2b53f2ffaf34cf316b54
7
- data.tar.gz: bcd04f967647bed2d2a4cebaff71cf5acf03ad1e388a81d44f150e9d1d4bbdbd9d16e3de222134d5806112453a3e0e4fd82aa3cc2f9ce1e55047b7f9439b9d4c
6
+ metadata.gz: 261ffff809b27f6b6ff84fd936959819fe315cfe187bb0ecc97b5ae48295fcaa515668d66cb229d3cfbb3f31bcb5127112c5da9064f8699671ce50f40fe5df2b
7
+ data.tar.gz: a7fc694951f0b4b1a0f57c5d3ea9fcb89c4729d4b28dfe945c6d6c3c7add1be498e961bbf2bc71760b7c88034a0a2ad57295c6cd432ee365c9cb2beb0dcf0950
@@ -8,5 +8,9 @@ module RelatonIec
8
8
  def fetch
9
9
  @fetch ||= Scrapper.parse_page @hit
10
10
  end
11
+
12
+ def part
13
+ @part ||= hit[:code].match(/(?<=-)[\w-]+/)&.to_s
14
+ end
11
15
  end
12
16
  end
@@ -6,6 +6,10 @@ require "addressable/uri"
6
6
  module RelatonIec
7
7
  # Page of hit collection.
8
8
  class HitCollection < RelatonBib::HitCollection
9
+ def_delegators :@array, :detect
10
+
11
+ attr_reader :part
12
+
9
13
  DOMAIN = "https://webstore.iec.ch"
10
14
 
11
15
  # @param ref_nbr [String]
@@ -13,53 +17,73 @@ module RelatonIec
13
17
  # @param part [String, nil]
14
18
  def initialize(ref_nbr, year = nil, part = nil)
15
19
  super ref_nbr, year
16
- @array = hits ref_nbr, year, part
20
+ @part = part
21
+ @array = hits ref_nbr, year
22
+ end
23
+
24
+ # @return [RelatonIec::IecBibliographicItem]
25
+ def to_all_parts # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity
26
+ parts = @array.reject { |h| h.part.nil? }
27
+ hit = parts.min_by &:part
28
+ return @array.first.fetch lang unless hit
29
+
30
+ bibitem = hit.fetch
31
+ all_parts_item = bibitem.to_all_parts
32
+ parts.reject { |h| h.hit[:code] == hit.hit[:code] }.each do |hi|
33
+ isobib = RelatonIec::IecBibliographicItem.new(
34
+ formattedref: RelatonBib::FormattedRef.new(content: hi.hit[:code])
35
+ )
36
+ all_parts_item.relation << RelatonBib::DocumentRelation.new(type: "partOf", bibitem: isobib)
37
+ end
38
+ all_parts_item
17
39
  end
18
40
 
19
41
  private
20
42
 
21
43
  # @param ref [String]
22
44
  # @param year [String, nil]
23
- # @param part [String, nil]
24
45
  # @return [Array<RelatonIec::Hit>]
25
- def hits(ref, year, part)
46
+ def hits(ref, year)
26
47
  from, to = nil
27
48
  if year
28
49
  from = Date.strptime year, "%Y"
29
50
  to = from.next_year.prev_day
30
51
  end
31
- get_results ref, from, to, part
52
+ get_results ref, from, to
32
53
  end
33
54
 
34
55
  # @param ref [String]
35
56
  # @param from [Date, nil]
36
57
  # @param to [Date, nil]
37
- # @param part [String, nil]
38
58
  # @return [Array<RelatonIec::Hit>]
39
- def get_results(ref, from, to, part = nil)
59
+ def get_results(ref, from, to)
40
60
  code = part ? ref.sub(/(?<=-\d)\d+/, "*") : ref
41
61
  [nil, "trf", "wr"].reduce([]) do |m, t|
42
62
  url = "#{DOMAIN}/searchkey"
43
63
  url += "&type=#{t}" if t
44
64
  url += "&RefNbr=#{code}&From=#{from}&To=#{to}&start=1"
45
- m + results(Addressable::URI.parse(url).normalize, part)
65
+ m + results(Addressable::URI.parse(url).normalize)
46
66
  end
47
67
  end
48
68
 
49
69
  # @param url [String]
50
- # @param part [String, nil]
51
70
  # @return [Array<RelatonIec::Hit>]
52
- def results(uri, part)
71
+ def results(uri)
53
72
  contains = "[contains(.,'Part #{part}:')]" if part
54
- Nokogiri::HTML(OpenURI.open_uri(uri)).xpath(
73
+ resp = OpenURI.open_uri(uri, "User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) "\
74
+ "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36")
75
+ doc = Nokogiri::HTML(resp)
76
+ doc.xpath(
55
77
  "//body/li#{contains}",
56
78
  "//ul[contains(@class,'search-results')]/li#{contains}",
57
79
  "//ul[contains(@class,'morethesame')]/li#{contains}"
58
- ).map { |h| make_hit h }
80
+ ).map { |h| make_hit h }.compact
59
81
  end
60
82
 
61
83
  def make_hit(hit)
62
- link = hit.at('a[@href!="#"]')
84
+ link = hit.at('a[@href!="#"]')
85
+ return unless link
86
+
63
87
  code = link.text.tr [194, 160].pack("c*").force_encoding("UTF-8"), ""
64
88
  title = hit.xpath("text()").text.gsub(/[\r\n]/, "")
65
89
  Hit.new({ code: code, title: title, url: DOMAIN + link[:href] }, self)
@@ -21,7 +21,7 @@ module RelatonIec
21
21
  # @param part [String, nil] search for packaged stndard if not nil
22
22
  # @return [RelatonIec::HitCollection]
23
23
  def search(text, year = nil, part = nil)
24
- HitCollection.new text, year, part
24
+ HitCollection.new text, year&.strip, part
25
25
  rescue SocketError, OpenURI::HTTPError, OpenSSL::SSL::SSLError
26
26
  raise RelatonBib::RequestError, "Could not access http://www.iec.ch"
27
27
  end
@@ -32,23 +32,21 @@ module RelatonIec
32
32
  # reference is required
33
33
  # @return [String] Relaton XML serialisation of reference
34
34
  def get(code, year = nil, opts = {}) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
35
+ opts[:all_parts] ||= code.match? /\s\(all parts\)/
36
+ ref = code.sub /\s\(all parts\)/, ""
35
37
  if year.nil?
36
- /^(?<code1>[^:]+):(?<year1>[^:]+)/ =~ code
38
+ /^(?<code1>[^:]+):(?<year1>[^:]+)/ =~ ref
37
39
  unless code1.nil?
38
- code = code1
40
+ ref = code1
39
41
  year = year1
40
42
  end
41
43
  end
44
+ return iev if ref.casecmp("IEV").zero?
42
45
 
43
- return iev if code.casecmp("IEV").zero?
44
-
45
- opts[:all_parts] ||= !(code =~ / \(all parts\)/).nil?
46
- code = code.sub(/ \(all parts\)/, "")
47
- ret = iecbib_get1(code, year, opts)
46
+ ret = iecbib_get(ref, year, opts)
48
47
  return nil if ret.nil?
49
48
 
50
49
  ret = ret.to_most_recent_reference unless year || opts[:keep_year]
51
- ret = ret.to_all_parts if opts[:all_parts]
52
50
  ret
53
51
  end
54
52
 
@@ -77,29 +75,43 @@ module RelatonIec
77
75
  # @param hits [Array<RelatonIec::Hit>]
78
76
  # @param threads [Integer]
79
77
  # @return [Array<RelatonIec::Hit>]
80
- def fetch_pages(hits, threads)
81
- workers = RelatonBib::WorkersPool.new threads
82
- workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
83
- hits.each_with_index { |hit, i| workers << { i: i, hit: hit } }
84
- workers.end
85
- workers.result.sort_by { |a| a[:i] }.map { |x| x[:hit] }
86
- end
87
-
88
- def isobib_search_filter(code, year) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
89
- docidrx = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+}
90
- corrigrx = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+:[0-9]+/}
91
- warn "[relaton-iec] (\"#{code}\") fetching..."
78
+ # def fetch_pages(hits, threads)
79
+ # workers = RelatonBib::WorkersPool.new threads
80
+ # workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
81
+ # hits.each_with_index { |hit, i| workers << { i: i, hit: hit } }
82
+ # workers.end
83
+ # workers.result.sort_by { |a| a[:i] }.map { |x| x[:hit] }
84
+ # end
85
+
86
+ def search_filter(reference, year, opts) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
87
+ %r{
88
+ ^(?<code>(?:ISO|IEC)[^\d]*\s\d+((?:-\w+)+)?)
89
+ (:(?<year1>\d{4}))?
90
+ (?<bundle>\+[^\s\/]+)?
91
+ (\/(?<corr>AMD\s\d+))?
92
+ }x =~ reference.upcase
93
+ year ||= year1
94
+ corr&.sub! " ", ""
95
+ warn "[relaton-iec] (\"#{reference}\") fetching..."
92
96
  result = search(code, year)
93
- if result.empty? && /(?<=-)(?<part>\d+)/ =~ code
97
+ if result.empty? && /(?<=-)(?<part>[\w-]+)/ =~ code
94
98
  # try to search packaged standard
95
99
  result = search code, year, part
96
- ref = code.sub /(?<=-\d)\d+/, ""
97
- else ref = code
100
+ # ref = code.sub /(?<=-\d)\w+/, ""
101
+ # else ref = code
98
102
  end
103
+ result = search code if result.empty?
104
+ code.sub! /((?:-\w+)+)/, ""
99
105
  result.select do |i|
100
- i.hit[:code] &&
101
- i.hit[:code].match(docidrx).to_s.include?(ref) &&
102
- corrigrx !~ i.hit[:code]
106
+ %r{
107
+ ^(?<code2>(?:ISO|IEC)[^\d]*\s\d+)((?:-\w+)+)?
108
+ (:(?<year2>\d{4}))?
109
+ (?<bundle2>\+[^\s\/]+)?
110
+ (\/(?<corr2>AMD\d+))?
111
+ }x =~ i.hit[:code]
112
+ # code2.sub! /(?<=-\d)\w*/, "" if part
113
+ # code2.sub! /((?:-\w+)+)/, "" if opts[:all_parts]
114
+ code == code2 && bundle == bundle2 && corr == corr2 # (year.nil? || year == year2) &&
103
115
  end
104
116
  end
105
117
 
@@ -144,30 +156,56 @@ module RelatonIec
144
156
  # Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
145
157
  # If no match, returns any years which caused mismatch, for error
146
158
  # reporting
147
- def isobib_results_filter(result, year) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength
159
+ def results_filter(result, ref, year, opts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
160
+ r_code, r_year = code_year ref, result.part
161
+ r_year ||= year
148
162
  missed_years = []
149
- result.each_slice(3) do |s| # ISO website only allows 3 connections
150
- fetch_pages(s, 3).each_with_index do |r, _i|
151
- return { ret: r } if !year
152
-
153
- r.date.select { |d| d.type == "published" }.each do |d|
154
- return { ret: r } if year.to_i == d.on(:year)
163
+ missed_parts = false
164
+ # result.each_slice(3) do |s| # ISO website only allows 3 connections
165
+ ret = if opts[:all_parts]
166
+ result.to_all_parts
167
+ else
168
+ result.detect do |h|
169
+ h_code, h_year = code_year h.hit[:code], result.part
170
+ missed_parts ||= !opts[:all_parts] && r_code != h_code
171
+ missed_years << h_year unless !r_year || h_year == r_year
172
+ r_code == h_code && (!year || h_year == r_year)
173
+ # fetch_pages(s, 3).each_with_index do |r, _i|
174
+ # return { ret: r } if !year
175
+
176
+ # r.date.select { |d| d.type == "published" }.each do |d|
177
+ # return { ret: r } if year.to_i == d.on(:year)
178
+
179
+ # missed_years << d.on(:year)
180
+ # end
181
+ # end
182
+ end&.fetch
183
+ end
184
+ { ret: ret, years: missed_years, missed_parts: missed_parts }
185
+ end
155
186
 
156
- missed_years << d.on(:year)
157
- end
158
- end
159
- end
160
- { years: missed_years }
187
+ def code_year(ref, part)
188
+ %r{
189
+ ^(?<code>(?:ISO|IEC)[^\d]*\s\d+((?:-\w+)+)?)
190
+ (:(?<year>\d{4}))?
191
+ }x =~ ref
192
+ code.sub!(/-\d+/, "") if part
193
+ [code, year]
161
194
  end
162
195
 
163
- def iecbib_get1(code, year, _opts)
164
- return iev if code.casecmp("IEV").zero?
196
+ def iecbib_get(code, year, opts)
197
+ # return iev if code.casecmp("IEV").zero?
165
198
 
166
- result = isobib_search_filter(code, year) || return
167
- ret = isobib_results_filter(result, year)
199
+ result = search_filter(code, year, opts) || return
200
+ ret = results_filter(result, code, year, opts)
168
201
  if ret[:ret]
169
- warn "[relaton-iec] (\"#{code}\") found "\
170
- "#{ret[:ret].docidentifier.first.id}"
202
+ if ret[:missed_parts]
203
+ warn "[relaton-iec] WARNING: #{code} found as #{ret[:ret].docidentifier.first.id} "\
204
+ "but also contain parts. If you wanted to cite all document parts for the reference, use "\
205
+ "\"#{code} (all parts)\""
206
+ else
207
+ warn "[relaton-iec] (\"#{code}\") found #{ret[:ret].docidentifier.first.id}"
208
+ end
171
209
  ret[:ret]
172
210
  else
173
211
  fetch_ref_err(code, year, ret[:years])
@@ -170,8 +170,10 @@ module RelatonIec
170
170
  def fetch_status(doc)
171
171
  wip = doc.at('//ROW[STATUS[.="PREPARING"]]')
172
172
  if wip
173
- statuses = YAML.load_file "lib/relaton_iec/statuses.yml"
173
+ statuses = YAML.load_file File.join __dir__, "statuses.yml"
174
174
  s = wip.at("STAGE").text
175
+ return unless statuses[s]
176
+
175
177
  stage, substage = statuses[s]["stage"].split "."
176
178
  else
177
179
  stage = "60"
@@ -1,3 +1,3 @@
1
1
  module RelatonIec
2
- VERSION = "1.7.0".freeze
2
+ VERSION = "1.7.5".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-iec
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.0
4
+ version: 1.7.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-11-27 00:00:00.000000000 Z
11
+ date: 2021-01-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: debase