relaton-iso 0.5.2 → 0.5.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 251529f7af2fbfc0a28760e6f7100a3245091229
4
- data.tar.gz: d9be71b4df57130f0437b9112072741c37cd9b08
3
+ metadata.gz: dd542380eec781e5113ea5455b3f0b528188306d
4
+ data.tar.gz: e457fa95214e668f42a2eda1a1ba5f0cef1e54a1
5
5
  SHA512:
6
- metadata.gz: e4fc36c5c32d790c39f7b34034960fb73da5329f1ef2f63af2a0417dd8663e915a622a9981abc10cdf334f8dbda5db12708cd3f162428ba8a9283984de3b2793
7
- data.tar.gz: eaeadce774f686eaedfcc096071e4fe03b25b72f8c76b702ad302eb21e659ddf5d26d49411064aa5c5c75a3fe1e44dbc86bcc3ec049c116c1ee6fa0b37304acf
6
+ metadata.gz: d15d13af842f09e14b48ae43c82ae362b37df6394da6b84bc146c359be029ee207bd5dc5ae87691c33d9951d4c4ff86a04542355780fb4a7918848ef665469cd
7
+ data.tar.gz: 4c0b4c5d1ac62f9418b611a59991c89cf7e29cfa1614319341162aed676d80ad7dc88f13438f1ae83010a09838d971b826e4d517b1a3f5212e863e93b9470b16
data/.travis.yml CHANGED
@@ -6,6 +6,7 @@ os:
6
6
  - linux
7
7
  - osx
8
8
  rvm:
9
+ - 2.6
9
10
  - 2.5
10
11
  - 2.4
11
12
  - ruby-head
data/Gemfile.lock CHANGED
@@ -1,8 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- relaton-iso (0.5.2)
5
- algoliasearch
4
+ relaton-iso (0.5.4)
6
5
  relaton-iec (~> 0.3.0)
7
6
  relaton-iso-bib (~> 0.2.0)
8
7
 
@@ -11,14 +10,11 @@ GEM
11
10
  specs:
12
11
  addressable (2.6.0)
13
12
  public_suffix (>= 2.0.2, < 4.0)
14
- algoliasearch (1.26.0)
15
- httpclient (~> 2.8, >= 2.8.3)
16
- json (>= 1.5.1)
17
13
  byebug (11.0.1)
18
14
  coderay (1.1.2)
19
15
  crack (0.4.3)
20
16
  safe_yaml (~> 1.0.0)
21
- debase (0.2.2)
17
+ debase (0.2.3)
22
18
  debase-ruby_core_source (>= 0.10.2)
23
19
  debase-ruby_core_source (0.10.5)
24
20
  diff-lcs (1.3)
@@ -26,13 +22,12 @@ GEM
26
22
  equivalent-xml (0.6.0)
27
23
  nokogiri (>= 1.4.3)
28
24
  hashdiff (0.4.0)
29
- httpclient (2.8.3)
30
25
  isoics (0.1.7)
31
26
  json (2.2.0)
32
27
  method_source (0.9.2)
33
- mini_portile2 (2.3.0)
34
- nokogiri (1.8.5)
35
- mini_portile2 (~> 2.3.0)
28
+ mini_portile2 (2.4.0)
29
+ nokogiri (1.10.3)
30
+ mini_portile2 (~> 2.4.0)
36
31
  pry (0.12.2)
37
32
  coderay (~> 1.1.0)
38
33
  method_source (~> 0.9.0)
@@ -41,22 +36,21 @@ GEM
41
36
  pry (~> 0.10)
42
37
  public_suffix (3.1.1)
43
38
  rake (10.5.0)
44
- relaton-bib (0.2.3)
39
+ relaton-bib (0.2.5)
45
40
  addressable
46
- nokogiri (~> 1.8.4)
47
- relaton-iec (0.3.1)
41
+ nokogiri (~> 1.10)
42
+ relaton-iec (0.3.2)
48
43
  addressable
49
44
  relaton-iso-bib (~> 0.2.0)
50
- relaton-iso-bib (0.2.3)
45
+ relaton-iso-bib (0.2.4)
51
46
  isoics (~> 0.1.6)
52
- nokogiri (~> 1.8.4)
53
47
  relaton-bib (~> 0.2.0)
54
48
  ruby_deep_clone (~> 0.8.0)
55
49
  rspec (3.8.0)
56
50
  rspec-core (~> 3.8.0)
57
51
  rspec-expectations (~> 3.8.0)
58
52
  rspec-mocks (~> 3.8.0)
59
- rspec-core (3.8.1)
53
+ rspec-core (3.8.2)
60
54
  rspec-support (~> 3.8.0)
61
55
  rspec-expectations (3.8.4)
62
56
  diff-lcs (>= 1.2.0, < 2.0)
@@ -69,7 +63,7 @@ GEM
69
63
  rake (>= 0.8.1)
70
64
  ruby_deep_clone (0.8.0)
71
65
  safe_yaml (1.0.5)
72
- simplecov (0.16.1)
66
+ simplecov (0.17.0)
73
67
  docile (~> 1.1)
74
68
  json (>= 1.8, < 3)
75
69
  simplecov-html (~> 0.10.0)
data/appveyor.yml CHANGED
@@ -7,6 +7,7 @@ cache:
7
7
 
8
8
  environment:
9
9
  matrix:
10
+ - RUBY_VERSION: 26
10
11
  - RUBY_VERSION: 25
11
12
  - RUBY_VERSION: 24
12
13
  - RUBY_VERSION: _trunk
@@ -29,15 +29,12 @@ module RelatonIso
29
29
 
30
30
  # @return [String]
31
31
  def inspect
32
- matched_words = @hit["_highlightResult"].
33
- reduce([]) { |a, (_k, v)| a + v["matchedWords"] }.uniq
32
+ # matched_words = @hit["_highlightResult"].
33
+ # reduce([]) { |a, (_k, v)| a + v["matchedWords"] }.uniq
34
34
 
35
35
  "<#{self.class}:#{format('%#.14x', object_id << 1)} "\
36
- "@text=\"#{@hit_collection&.hit_pages&.text}\" "\
37
- "@fullIdentifier=\"#{@fetch&.shortref}\" "\
38
- "@matchedWords=#{matched_words} "\
39
- "@category=\"#{@hit['category']}\" "\
40
- "@title=\"#{@hit['title']}\">"
36
+ "@text=\"#{@hit_collection&.ref}\" "\
37
+ "@reference=\"#{@hit["docRef"]}\""
41
38
  end
42
39
 
43
40
  # @param builder [Nokogiri::XML::Builder]
@@ -6,37 +6,66 @@ module RelatonIso
6
6
  # Page of hit collection.
7
7
  class HitCollection < Array
8
8
  # @return [TrueClass, FalseClass]
9
- attr_reader :fetched
9
+ # attr_reader :fetched
10
10
 
11
11
  # @return [RelatonIso::HitPages]
12
- attr_reader :hit_pages
12
+ # attr_reader :hit_pages
13
+
14
+ # @return [String]
15
+ attr_reader :ref
13
16
 
14
17
  # @param hits [Array<Hash>]
15
- def initialize(hits, hit_pages = nil)
16
- concat(hits.map { |h| Hit.new(h, self) })
17
- @fetched = false
18
- @hit_pages = hit_pages
18
+ def initialize(ref)
19
+ # concat(hits.map { |h| Hit.new(h, self) })
20
+ # @fetched = false
21
+ # @hit_pages = hit_pages
22
+ @ref = ref
23
+ %r{(?<num>\d+)(-(?<part>\d+))?} =~ ref
24
+ http = Net::HTTP.new "www.iso.org", 443
25
+ http.use_ssl = true
26
+ search = ["status=ENT_ACTIVE,ENT_PROGRESS,ENT_INACTIVE,ENT_DELETED"]
27
+ search << "docNumber=#{num}"
28
+ search << "docPartNo=#{part}" if part
29
+ q = search.join "&"
30
+ resp = http.get(
31
+ "/cms/render/live/en/sites/isoorg.advancedSearch.do?#{q}",
32
+ { 'Accept' => 'application/json, text/plain, */*' }
33
+ )
34
+ json = JSON.parse resp.body
35
+ concat(json["standards"].map { |h| Hit.new h, self })
19
36
  end
20
37
 
21
38
  # @return [RelatonIso::HitCollection]
22
- def fetch
23
- workers = RelatonBib::WorkersPool.new 4
24
- workers.worker(&:fetch)
25
- each do |hit|
26
- workers << hit
27
- end
28
- workers.end
29
- workers.result
30
- @fetched = true
31
- self
32
- end
39
+ # def fetch
40
+ # workers = RelatonBib::WorkersPool.new 4
41
+ # workers.worker(&:fetch)
42
+ # each do |hit|
43
+ # workers << hit
44
+ # end
45
+ # workers.end
46
+ # workers.result
47
+ # @fetched = true
48
+ # self
49
+ # end
33
50
 
34
51
  def to_s
35
52
  inspect
36
53
  end
37
54
 
38
55
  def inspect
39
- "<#{self.class}:#{format('%#.14x', object_id << 1)} @fetched=#{@fetched}>"
56
+ "<#{self.class}:#{format('%#.14x', object_id << 1)} @ref=#{@ref}>"
57
+ end
58
+
59
+ def to_xml(**opts)
60
+ builder = Nokogiri::XML::Builder.new(encoding: "UTF-8") do |xml|
61
+ xml.documents do
62
+ each do |hit|
63
+ hit.fetch
64
+ hit.to_xml xml, **opts
65
+ end
66
+ end
67
+ end
68
+ builder.to_xml
40
69
  end
41
70
  end
42
71
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  # require 'relaton_iso/iso_bibliographic_item'
4
4
  require "relaton_iso/scrapper"
5
- require "relaton_iso/hit_pages"
5
+ require "relaton_iso/hit_collection"
6
6
  require "relaton_iec"
7
7
 
8
8
  module RelatonIso
@@ -12,10 +12,9 @@ module RelatonIso
12
12
  # @param text [String]
13
13
  # @return [RelatonIso::HitPages]
14
14
  def search(text)
15
- HitPages.new text
16
- rescue Algolia::AlgoliaProtocolError, SocketError
17
- # warn "Could not access http://www.iso.org"
18
- # []
15
+ HitCollection.new text
16
+ rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
17
+ Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError
19
18
  raise RelatonBib::RequestError, "Could not access http://www.iso.org"
20
19
  end
21
20
 
@@ -34,7 +33,7 @@ module RelatonIso
34
33
  %r{
35
34
  ^(?<code1>[^\s]+\s[^/]+) # match code
36
35
  /?
37
- (?<corr>(Amd|CD Amd|Cor|CD Cor)\s\d+:?(\d{4})?(/Cor \d+:\d{4})?) # match correction
36
+ (?<corr>(Amd|DAmd|CD Amd|Cor|CD Cor)\s\d+:?(\d{4})?(/Cor \d+:\d{4})?) # match correction
38
37
  }x =~ code
39
38
  code = code1 if code1
40
39
 
@@ -46,14 +45,9 @@ module RelatonIso
46
45
  end
47
46
  end
48
47
  code += "-1" if opts[:all_parts]
49
- return RelatonIec::IecBibliography.get(code, year, opts) if %r[^ISO/IEC DIR].match code
48
+ return RelatonIec::IecBibliography.get(code, year, opts) if %r[^ISO/IEC DIR] =~ code
50
49
 
51
50
  ret = isobib_get1(code, year, corr)
52
- if ret.nil? && code =~ %r[^ISO\s]
53
- c = code.gsub "ISO", "ISO/IEC"
54
- warn "Attempting ISO/IEC retrieval"
55
- ret = isobib_get1(c, year, corr)
56
- end
57
51
  return nil if ret.nil?
58
52
 
59
53
  ret.to_most_recent_reference unless year || opts[:keep_year]
@@ -80,32 +74,63 @@ module RelatonIso
80
74
  nil
81
75
  end
82
76
 
83
- def fetch_pages(s, n)
84
- workers = RelatonBib::WorkersPool.new n
85
- workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
86
- s.each_with_index { |hit, i| workers << { i: i, hit: hit } }
87
- workers.end
88
- workers.result.sort { |x, y| x[:i] <=> y[:i] }.map { |x| x[:hit] }
89
- end
77
+ # def fetch_pages(s, n)
78
+ # workers = RelatonBib::WorkersPool.new n
79
+ # workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
80
+ # s.each_with_index { |hit, i| workers << { i: i, hit: hit } }
81
+ # workers.end
82
+ # workers.result.sort { |x, y| x[:i] <=> y[:i] }.map { |x| x[:hit] }
83
+ # end
90
84
 
85
+ # Search for hits. If no found then trying missed stages and ISO/IEC.
86
+ #
87
+ # @param code [String] reference without correction
88
+ # @param corr [String] correction
89
+ # @return [Array<RelatonIso::Hit>]
91
90
  def isobib_search_filter(code, corr)
92
- # docidrx = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+}
93
- # corrigrx = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+:[0-9]+/}
94
91
  warn "fetching #{code}..."
95
92
  result = search(code)
96
- result.reduce([]) do |ret, page|
97
- ret += page.select do |i|
98
- i.hit["title"] &&
99
- i.hit["title"] =~ %r{^#{code}} && (
100
- corr && %r{^#{code}[d-]*(:\d{4})?/#{corr}} =~ i.hit["title"] ||
101
- %r{^#{code}[\d-]*(:\d{4})?/} !~ i.hit["title"] && !corr
102
- )
93
+ res = search_code result, code, corr
94
+ return res unless res.empty?
95
+
96
+ # try stages
97
+ if %r{^\w+/[^/]+\s\d+} =~ code # code like ISO/IEC 123, ISO/IEC/IEE 123
98
+ res = try_stages(result, corr) do |st|
99
+ code.sub(%r{^(?<pref>[^\s]+\s)}) { "#{$~[:pref]}#{st} " }
100
+ end
101
+ return res unless res.empty?
102
+ elsif %r{^\w+\s\d+} =~ code # code like ISO 123
103
+ res = try_stages(result, corr) do |st|
104
+ code.sub(%r{^(?<pref>\w+)}) { "#{$~[:pref]}/#{st}" }
103
105
  end
104
- return ret if ret.size > 9
106
+ return res unless res.empty?
107
+ end
105
108
 
106
- ret
109
+ if %r{^ISO\s} =~ code # try ISO/IEC if ISO not found
110
+ warn "Attempting ISO/IEC retrieval"
111
+ c = code.sub "ISO", "ISO/IEC"
112
+ res = search_code result, c, corr
113
+ end
114
+ res
115
+ end
116
+
117
+ def try_stages(result, corr)
118
+ %w[NP WD CD DIS FDIS PRF IS].each do |st| # try stages
119
+ warn "Attempting #{st} stage retrieval"
120
+ c = yield st
121
+ res = search_code result, c, corr
122
+ return res unless res.empty?
123
+ end
124
+ []
125
+ end
126
+
127
+ def search_code(result, code, corr)
128
+ result.select do |i|
129
+ i.hit["docRef"] =~ %r{^#{code}(?!-)} && (
130
+ corr && %r{^#{code}[\w-]*(:\d{4})?/#{corr}} =~ i.hit["docRef"] ||
131
+ %r{^#{code}[\w-]*(:\d{4})?/} !~ i.hit["docRef"] && !corr
132
+ )
107
133
  end
108
- # []
109
134
  end
110
135
 
111
136
  # Sort through the results from RelatonIso, fetching them three at a time,
@@ -116,17 +141,13 @@ module RelatonIso
116
141
  # If no match, returns any years which caused mismatch, for error reporting
117
142
  def isobib_results_filter(result, year)
118
143
  missed_years = []
119
- result.each_slice(3) do |s| # ISO website only allows 3 connections
120
- fetch_pages(s, 3).each_with_index do |r, _i|
121
- next if r.nil?
122
- return { ret: r } if !year
144
+ result.each do |s|
145
+ return { ret: s.fetch } if !year
123
146
 
124
- r.dates.select { |d| d.type == "published" }.each do |d|
125
- return { ret: r } if year.to_i == d.on.year
147
+ %r{:(?<iyear>\d{4})} =~ s.hit["docRef"]
148
+ return { ret: s.fetch } if iyear == year
126
149
 
127
- missed_years << d.on.year
128
- end
129
- end
150
+ missed_years << iyear
130
151
  end
131
152
  { years: missed_years }
132
153
  end
@@ -1,14 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "algoliasearch"
4
3
  require "relaton_iso_bib"
5
4
  require "relaton_iso/hit"
6
5
  require "nokogiri"
7
6
  require "net/http"
8
7
 
9
- Algolia.init application_id: "JCL49WV5AR",
10
- api_key: "dd1b9e1ab383f4d4817d29cd5e96d3f0"
11
-
12
8
  module RelatonIso
13
9
  # Scrapper.
14
10
  # rubocop:disable Metrics/ModuleLength
@@ -50,9 +46,8 @@ module RelatonIso
50
46
  # @return [Hash]
51
47
  # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
52
48
  def parse_page(hit_data)
53
- return unless hit_data["path"] =~ /\d+$/
54
-
55
- doc, url = get_page "/standard/#{hit_data['path'].match(/\d+$/)}.html"
49
+ path = "/contents/data/standard#{hit_data["splitPath"]}/#{hit_data["csnumber"]}.html"
50
+ doc, url = get_page path
56
51
 
57
52
  # Fetch edition.
58
53
  edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")&.
@@ -67,14 +62,14 @@ module RelatonIso
67
62
  language: langs(doc).map { |l| l[:lang] },
68
63
  script: langs(doc).map { |l| script(l[:lang]) }.uniq,
69
64
  titles: titles,
70
- type: fetch_type(hit_data["title"]),
71
- docstatus: fetch_status(doc, hit_data["status"]),
65
+ type: fetch_type(hit_data["docRef"]),
66
+ docstatus: fetch_status(doc),
72
67
  ics: fetch_ics(doc),
73
- dates: fetch_dates(doc, hit_data["title"]),
74
- contributors: fetch_contributors(hit_data["title"]),
68
+ dates: fetch_dates(doc, hit_data["docRef"]),
69
+ contributors: fetch_contributors(hit_data["docRef"]),
75
70
  editorialgroup: fetch_workgroup(doc),
76
71
  abstract: abstract,
77
- copyright: fetch_copyright(hit_data["title"], doc),
72
+ copyright: fetch_copyright(hit_data["docRef"], doc),
78
73
  link: fetch_link(doc, url),
79
74
  relations: fetch_relations(doc),
80
75
  structuredidentifier: fetch_structuredidentifier(doc),
@@ -227,7 +222,7 @@ module RelatonIso
227
222
  # @param doc [Nokogiri::HTML::Document]
228
223
  # @param status [String]
229
224
  # @return [Hash]
230
- def fetch_status(doc, _status)
225
+ def fetch_status(doc)
231
226
  stage, substage = doc.css("li.dropdown.active span.stage-code > strong").text.split "."
232
227
  RelatonBib::DocumentStatus.new(stage: stage, substage: substage)
233
228
  end
@@ -283,18 +278,18 @@ module RelatonIso
283
278
  # rubocop:enable Metrics/MethodLength
284
279
 
285
280
  # Fetch type.
286
- # @param title [String]
281
+ # @param ref [String]
287
282
  # @return [String]
288
- def fetch_type(title)
289
- type_match = title.match(%r{^(ISO|IWA|IEC)(?:(/IEC|/IEEE|/PRF|
290
- /NP)*\s|/)(TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|Guide|(?=\d+))}x)
283
+ def fetch_type(ref)
284
+ %r{
285
+ ^(?<prefix>ISO|IWA|IEC)
286
+ (?:(/IEC|/IEEE|/PRF|/NP|/DGuide)*\s|/)
287
+ (?<type>TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|Guide|(?=\d+))
288
+ }x =~ ref
291
289
  # return "international-standard" if type_match.nil?
292
- if TYPES[type_match[3]]
293
- TYPES[type_match[3]]
294
- elsif type_match[1] == "ISO"
295
- "international-standard"
296
- elsif type_match[1] == "IWA"
297
- "international-workshop-agreement"
290
+ if TYPES[type] then TYPES[type]
291
+ elsif prefix == "ISO" then "international-standard"
292
+ elsif prefix == "IWA" then "international-workshop-agreement"
298
293
  end
299
294
  # rescue => _e
300
295
  # puts 'Unknown document type: ' + title
@@ -305,10 +300,11 @@ module RelatonIso
305
300
  # @param lang [String]
306
301
  # @return [Hash]
307
302
  def fetch_title(doc, lang)
308
- titles = doc.at("//h3[@itemprop='description'] | //h2[@itemprop='description']").
309
- text.split " -- "
310
- case titles.size
311
- when 0
303
+ titles = doc.at(
304
+ "//h3[@itemprop='description'] | //h2[@itemprop='description']",
305
+ )&.text&.split " -- "
306
+ case titles&.size
307
+ when nil, 0
312
308
  intro, main, part = nil, "", nil
313
309
  when 1
314
310
  intro, main, part = nil, titles[0], nil
@@ -344,10 +340,11 @@ module RelatonIso
344
340
  # rubocop:disable Metrics/MethodLength
345
341
  # Fetch dates
346
342
  # @param doc [Nokogiri::HTML::Document]
343
+ # @param ref [String]
347
344
  # @return [Array<Hash>]
348
- def fetch_dates(doc, title)
345
+ def fetch_dates(doc, ref)
349
346
  dates = []
350
- %r{^[^\s]+\s[\d-]+:(?<ref_date_str>\d{4})} =~ title
347
+ %r{^[^\s]+\s[\d-]+:(?<ref_date_str>\d{4})} =~ ref
351
348
  pub_date_str = doc.xpath("//span[@itemprop='releaseDate']").text
352
349
  if ref_date_str
353
350
  ref_date = Date.strptime ref_date_str, "%Y"
@@ -368,8 +365,8 @@ module RelatonIso
368
365
  dates
369
366
  end
370
367
 
371
- def fetch_contributors(title)
372
- title.sub(/\s.*/, "").split("/").map do |abbrev|
368
+ def fetch_contributors(ref)
369
+ ref.sub(/\s.*/, "").split("/").map do |abbrev|
373
370
  case abbrev
374
371
  when "IEC"
375
372
  name = "International Electrotechnical Commission"
@@ -400,22 +397,23 @@ module RelatonIso
400
397
  # @param url [String]
401
398
  # @return [Array<Hash>]
402
399
  def fetch_link(doc, url)
403
- obp_elms = doc.xpath("//a[contains(@href, '/obp/ui/')]")
404
- obp = obp_elms.attr("href").value if obp_elms.any?
405
- rss = DOMAIN + doc.xpath("//a[contains(@href, 'rss')]").attr("href").value
406
- [
407
- { type: "src", content: url },
408
- { type: "obp", content: obp },
409
- { type: "rss", content: rss },
410
- ]
400
+ links = [{ type: "src", content: url }]
401
+ obp = doc.at("//a[contains(@href, '/obp/ui/')]")
402
+ links << { type: "obp", content: obp[:href] } if obp
403
+ rss = doc.at("//a[contains(@href, 'rss')]")
404
+ links << { type: "rss", content: DOMAIN + rss[:href] } if rss
405
+ pub = doc.at "//p[contains(., 'publicly available')]/a"
406
+ links << { type: "pub", content: pub[:href] } if pub
407
+ links
411
408
  end
412
409
 
413
410
  # Fetch copyright.
414
- # @param title [String]
411
+ # @param ref [String]
412
+ # @param doc [Nokogiri::HTML::Document]
415
413
  # @return [Hash]
416
- def fetch_copyright(title, doc)
417
- owner_name = title.match(/.*?(?=\s)/).to_s
418
- from = title.match(/(?<=:)\d{4}/).to_s
414
+ def fetch_copyright(ref, doc)
415
+ owner_name = ref.match(/.*?(?=\s)/).to_s
416
+ from = ref.match(/(?<=:)\d{4}/).to_s
419
417
  if from.empty?
420
418
  from = doc.xpath("//span[@itemprop='releaseDate']").text.match(/\d{4}/).to_s
421
419
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RelatonIso
4
- VERSION = "0.5.2"
4
+ VERSION = "0.5.4"
5
5
  end
data/relaton_iso.gemspec CHANGED
@@ -38,7 +38,6 @@ Gem::Specification.new do |spec|
38
38
  spec.add_development_dependency "vcr"
39
39
  spec.add_development_dependency "webmock"
40
40
 
41
- spec.add_dependency "algoliasearch"
42
41
  spec.add_dependency "relaton-iec", "~> 0.3.0"
43
42
  spec.add_dependency "relaton-iso-bib", "~> 0.2.0"
44
43
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-iso
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.5.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-07-09 00:00:00.000000000 Z
11
+ date: 2019-07-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -164,20 +164,6 @@ dependencies:
164
164
  - - ">="
165
165
  - !ruby/object:Gem::Version
166
166
  version: '0'
167
- - !ruby/object:Gem::Dependency
168
- name: algoliasearch
169
- requirement: !ruby/object:Gem::Requirement
170
- requirements:
171
- - - ">="
172
- - !ruby/object:Gem::Version
173
- version: '0'
174
- type: :runtime
175
- prerelease: false
176
- version_requirements: !ruby/object:Gem::Requirement
177
- requirements:
178
- - - ">="
179
- - !ruby/object:Gem::Version
180
- version: '0'
181
167
  - !ruby/object:Gem::Dependency
182
168
  name: relaton-iec
183
169
  requirement: !ruby/object:Gem::Requirement
@@ -245,7 +231,6 @@ files:
245
231
  - lib/relaton_iso.rb
246
232
  - lib/relaton_iso/hit.rb
247
233
  - lib/relaton_iso/hit_collection.rb
248
- - lib/relaton_iso/hit_pages.rb
249
234
  - lib/relaton_iso/iso_bibliography.rb
250
235
  - lib/relaton_iso/scrapper.rb
251
236
  - lib/relaton_iso/version.rb
@@ -1,96 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "algoliasearch"
4
- require "relaton_iso/hit_collection"
5
-
6
- module RelatonIso
7
- # Pages of hits.
8
- class HitPages < Array
9
- Algolia.init application_id: "JCL49WV5AR",
10
- api_key: "dd1b9e1ab383f4d4817d29cd5e96d3f0"
11
-
12
- # @return [String]
13
- attr_reader :text
14
-
15
- # @param text [String]
16
- def initialize(text)
17
- @text = text
18
- @index = Algolia::Index.new "all_en"
19
- resp = @index.search(text, facetFilters: ["category:standard"])
20
- @nb_pages = resp["nbPages"]
21
- self << HitCollection.new(resp["hits"], self)
22
- end
23
-
24
- # @return [RelatonIso::HitCollection]
25
- def last
26
- collection(@nb_pages - 1)
27
- end
28
-
29
- # @param i [Integer]
30
- # @return [RelatonIso::HitCollection]
31
- def [](idx)
32
- # collection i
33
- return if idx + 1 > @nb_pages
34
-
35
- collection idx
36
- super
37
- end
38
-
39
- # @return [Array]
40
- def map(&block)
41
- m = []
42
- @nb_pages.times do |n|
43
- m << yield(self[n]) if block
44
- end
45
- m
46
- end
47
-
48
- def each(&block)
49
- @nb_pages.times do |n|
50
- yield self[n] if block
51
- end
52
- end
53
-
54
- def to_s
55
- inspect
56
- end
57
-
58
- def inspect
59
- "<#{self.class}:#{format('%#.14x', object_id << 1)} @text=#{@text} "\
60
- "@pages=#{@nb_pages}>"
61
- end
62
-
63
- # @return [Integer]
64
- def size
65
- @nb_pages
66
- end
67
-
68
- def to_xml(**opts)
69
- builder = Nokogiri::XML::Builder.new(encoding: "UTF-8") do |xml|
70
- xml.documents do
71
- each do |page|
72
- page.fetch
73
- page.each { |hit| hit.to_xml xml, **opts }
74
- end
75
- end
76
- end
77
- builder.to_xml
78
- end
79
-
80
- private
81
-
82
- # @param i [Integer]
83
- # @return [RelatonIso::HitCollection]
84
- def collection(idx)
85
- return if idx + 1 > @nb_pages
86
-
87
- while Array.instance_method(:size).bind(self).call < idx + 1
88
- resp = @index.search(@text,
89
- facetFilters: ["category:standard"],
90
- page: idx)
91
- self << HitCollection.new(resp["hits"], self)
92
- end
93
- Array.instance_method(:[]).bind(self).call idx
94
- end
95
- end
96
- end