relaton-iso 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 251529f7af2fbfc0a28760e6f7100a3245091229
4
- data.tar.gz: d9be71b4df57130f0437b9112072741c37cd9b08
3
+ metadata.gz: dd542380eec781e5113ea5455b3f0b528188306d
4
+ data.tar.gz: e457fa95214e668f42a2eda1a1ba5f0cef1e54a1
5
5
  SHA512:
6
- metadata.gz: e4fc36c5c32d790c39f7b34034960fb73da5329f1ef2f63af2a0417dd8663e915a622a9981abc10cdf334f8dbda5db12708cd3f162428ba8a9283984de3b2793
7
- data.tar.gz: eaeadce774f686eaedfcc096071e4fe03b25b72f8c76b702ad302eb21e659ddf5d26d49411064aa5c5c75a3fe1e44dbc86bcc3ec049c116c1ee6fa0b37304acf
6
+ metadata.gz: d15d13af842f09e14b48ae43c82ae362b37df6394da6b84bc146c359be029ee207bd5dc5ae87691c33d9951d4c4ff86a04542355780fb4a7918848ef665469cd
7
+ data.tar.gz: 4c0b4c5d1ac62f9418b611a59991c89cf7e29cfa1614319341162aed676d80ad7dc88f13438f1ae83010a09838d971b826e4d517b1a3f5212e863e93b9470b16
data/.travis.yml CHANGED
@@ -6,6 +6,7 @@ os:
6
6
  - linux
7
7
  - osx
8
8
  rvm:
9
+ - 2.6
9
10
  - 2.5
10
11
  - 2.4
11
12
  - ruby-head
data/Gemfile.lock CHANGED
@@ -1,8 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- relaton-iso (0.5.2)
5
- algoliasearch
4
+ relaton-iso (0.5.4)
6
5
  relaton-iec (~> 0.3.0)
7
6
  relaton-iso-bib (~> 0.2.0)
8
7
 
@@ -11,14 +10,11 @@ GEM
11
10
  specs:
12
11
  addressable (2.6.0)
13
12
  public_suffix (>= 2.0.2, < 4.0)
14
- algoliasearch (1.26.0)
15
- httpclient (~> 2.8, >= 2.8.3)
16
- json (>= 1.5.1)
17
13
  byebug (11.0.1)
18
14
  coderay (1.1.2)
19
15
  crack (0.4.3)
20
16
  safe_yaml (~> 1.0.0)
21
- debase (0.2.2)
17
+ debase (0.2.3)
22
18
  debase-ruby_core_source (>= 0.10.2)
23
19
  debase-ruby_core_source (0.10.5)
24
20
  diff-lcs (1.3)
@@ -26,13 +22,12 @@ GEM
26
22
  equivalent-xml (0.6.0)
27
23
  nokogiri (>= 1.4.3)
28
24
  hashdiff (0.4.0)
29
- httpclient (2.8.3)
30
25
  isoics (0.1.7)
31
26
  json (2.2.0)
32
27
  method_source (0.9.2)
33
- mini_portile2 (2.3.0)
34
- nokogiri (1.8.5)
35
- mini_portile2 (~> 2.3.0)
28
+ mini_portile2 (2.4.0)
29
+ nokogiri (1.10.3)
30
+ mini_portile2 (~> 2.4.0)
36
31
  pry (0.12.2)
37
32
  coderay (~> 1.1.0)
38
33
  method_source (~> 0.9.0)
@@ -41,22 +36,21 @@ GEM
41
36
  pry (~> 0.10)
42
37
  public_suffix (3.1.1)
43
38
  rake (10.5.0)
44
- relaton-bib (0.2.3)
39
+ relaton-bib (0.2.5)
45
40
  addressable
46
- nokogiri (~> 1.8.4)
47
- relaton-iec (0.3.1)
41
+ nokogiri (~> 1.10)
42
+ relaton-iec (0.3.2)
48
43
  addressable
49
44
  relaton-iso-bib (~> 0.2.0)
50
- relaton-iso-bib (0.2.3)
45
+ relaton-iso-bib (0.2.4)
51
46
  isoics (~> 0.1.6)
52
- nokogiri (~> 1.8.4)
53
47
  relaton-bib (~> 0.2.0)
54
48
  ruby_deep_clone (~> 0.8.0)
55
49
  rspec (3.8.0)
56
50
  rspec-core (~> 3.8.0)
57
51
  rspec-expectations (~> 3.8.0)
58
52
  rspec-mocks (~> 3.8.0)
59
- rspec-core (3.8.1)
53
+ rspec-core (3.8.2)
60
54
  rspec-support (~> 3.8.0)
61
55
  rspec-expectations (3.8.4)
62
56
  diff-lcs (>= 1.2.0, < 2.0)
@@ -69,7 +63,7 @@ GEM
69
63
  rake (>= 0.8.1)
70
64
  ruby_deep_clone (0.8.0)
71
65
  safe_yaml (1.0.5)
72
- simplecov (0.16.1)
66
+ simplecov (0.17.0)
73
67
  docile (~> 1.1)
74
68
  json (>= 1.8, < 3)
75
69
  simplecov-html (~> 0.10.0)
data/appveyor.yml CHANGED
@@ -7,6 +7,7 @@ cache:
7
7
 
8
8
  environment:
9
9
  matrix:
10
+ - RUBY_VERSION: 26
10
11
  - RUBY_VERSION: 25
11
12
  - RUBY_VERSION: 24
12
13
  - RUBY_VERSION: _trunk
@@ -29,15 +29,12 @@ module RelatonIso
29
29
 
30
30
  # @return [String]
31
31
  def inspect
32
- matched_words = @hit["_highlightResult"].
33
- reduce([]) { |a, (_k, v)| a + v["matchedWords"] }.uniq
32
+ # matched_words = @hit["_highlightResult"].
33
+ # reduce([]) { |a, (_k, v)| a + v["matchedWords"] }.uniq
34
34
 
35
35
  "<#{self.class}:#{format('%#.14x', object_id << 1)} "\
36
- "@text=\"#{@hit_collection&.hit_pages&.text}\" "\
37
- "@fullIdentifier=\"#{@fetch&.shortref}\" "\
38
- "@matchedWords=#{matched_words} "\
39
- "@category=\"#{@hit['category']}\" "\
40
- "@title=\"#{@hit['title']}\">"
36
+ "@text=\"#{@hit_collection&.ref}\" "\
37
+ "@reference=\"#{@hit["docRef"]}\""
41
38
  end
42
39
 
43
40
  # @param builder [Nokogiri::XML::Builder]
@@ -6,37 +6,66 @@ module RelatonIso
6
6
  # Page of hit collection.
7
7
  class HitCollection < Array
8
8
  # @return [TrueClass, FalseClass]
9
- attr_reader :fetched
9
+ # attr_reader :fetched
10
10
 
11
11
  # @return [RelatonIso::HitPages]
12
- attr_reader :hit_pages
12
+ # attr_reader :hit_pages
13
+
14
+ # @return [String]
15
+ attr_reader :ref
13
16
 
14
17
  # @param hits [Array<Hash>]
15
- def initialize(hits, hit_pages = nil)
16
- concat(hits.map { |h| Hit.new(h, self) })
17
- @fetched = false
18
- @hit_pages = hit_pages
18
+ def initialize(ref)
19
+ # concat(hits.map { |h| Hit.new(h, self) })
20
+ # @fetched = false
21
+ # @hit_pages = hit_pages
22
+ @ref = ref
23
+ %r{(?<num>\d+)(-(?<part>\d+))?} =~ ref
24
+ http = Net::HTTP.new "www.iso.org", 443
25
+ http.use_ssl = true
26
+ search = ["status=ENT_ACTIVE,ENT_PROGRESS,ENT_INACTIVE,ENT_DELETED"]
27
+ search << "docNumber=#{num}"
28
+ search << "docPartNo=#{part}" if part
29
+ q = search.join "&"
30
+ resp = http.get(
31
+ "/cms/render/live/en/sites/isoorg.advancedSearch.do?#{q}",
32
+ { 'Accept' => 'application/json, text/plain, */*' }
33
+ )
34
+ json = JSON.parse resp.body
35
+ concat(json["standards"].map { |h| Hit.new h, self })
19
36
  end
20
37
 
21
38
  # @return [RelatonIso::HitCollection]
22
- def fetch
23
- workers = RelatonBib::WorkersPool.new 4
24
- workers.worker(&:fetch)
25
- each do |hit|
26
- workers << hit
27
- end
28
- workers.end
29
- workers.result
30
- @fetched = true
31
- self
32
- end
39
+ # def fetch
40
+ # workers = RelatonBib::WorkersPool.new 4
41
+ # workers.worker(&:fetch)
42
+ # each do |hit|
43
+ # workers << hit
44
+ # end
45
+ # workers.end
46
+ # workers.result
47
+ # @fetched = true
48
+ # self
49
+ # end
33
50
 
34
51
  def to_s
35
52
  inspect
36
53
  end
37
54
 
38
55
  def inspect
39
- "<#{self.class}:#{format('%#.14x', object_id << 1)} @fetched=#{@fetched}>"
56
+ "<#{self.class}:#{format('%#.14x', object_id << 1)} @ref=#{@ref}>"
57
+ end
58
+
59
+ def to_xml(**opts)
60
+ builder = Nokogiri::XML::Builder.new(encoding: "UTF-8") do |xml|
61
+ xml.documents do
62
+ each do |hit|
63
+ hit.fetch
64
+ hit.to_xml xml, **opts
65
+ end
66
+ end
67
+ end
68
+ builder.to_xml
40
69
  end
41
70
  end
42
71
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  # require 'relaton_iso/iso_bibliographic_item'
4
4
  require "relaton_iso/scrapper"
5
- require "relaton_iso/hit_pages"
5
+ require "relaton_iso/hit_collection"
6
6
  require "relaton_iec"
7
7
 
8
8
  module RelatonIso
@@ -12,10 +12,9 @@ module RelatonIso
12
12
  # @param text [String]
13
13
  # @return [RelatonIso::HitPages]
14
14
  def search(text)
15
- HitPages.new text
16
- rescue Algolia::AlgoliaProtocolError, SocketError
17
- # warn "Could not access http://www.iso.org"
18
- # []
15
+ HitCollection.new text
16
+ rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
17
+ Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError
19
18
  raise RelatonBib::RequestError, "Could not access http://www.iso.org"
20
19
  end
21
20
 
@@ -34,7 +33,7 @@ module RelatonIso
34
33
  %r{
35
34
  ^(?<code1>[^\s]+\s[^/]+) # match code
36
35
  /?
37
- (?<corr>(Amd|CD Amd|Cor|CD Cor)\s\d+:?(\d{4})?(/Cor \d+:\d{4})?) # match correction
36
+ (?<corr>(Amd|DAmd|CD Amd|Cor|CD Cor)\s\d+:?(\d{4})?(/Cor \d+:\d{4})?) # match correction
38
37
  }x =~ code
39
38
  code = code1 if code1
40
39
 
@@ -46,14 +45,9 @@ module RelatonIso
46
45
  end
47
46
  end
48
47
  code += "-1" if opts[:all_parts]
49
- return RelatonIec::IecBibliography.get(code, year, opts) if %r[^ISO/IEC DIR].match code
48
+ return RelatonIec::IecBibliography.get(code, year, opts) if %r[^ISO/IEC DIR] =~ code
50
49
 
51
50
  ret = isobib_get1(code, year, corr)
52
- if ret.nil? && code =~ %r[^ISO\s]
53
- c = code.gsub "ISO", "ISO/IEC"
54
- warn "Attempting ISO/IEC retrieval"
55
- ret = isobib_get1(c, year, corr)
56
- end
57
51
  return nil if ret.nil?
58
52
 
59
53
  ret.to_most_recent_reference unless year || opts[:keep_year]
@@ -80,32 +74,63 @@ module RelatonIso
80
74
  nil
81
75
  end
82
76
 
83
- def fetch_pages(s, n)
84
- workers = RelatonBib::WorkersPool.new n
85
- workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
86
- s.each_with_index { |hit, i| workers << { i: i, hit: hit } }
87
- workers.end
88
- workers.result.sort { |x, y| x[:i] <=> y[:i] }.map { |x| x[:hit] }
89
- end
77
+ # def fetch_pages(s, n)
78
+ # workers = RelatonBib::WorkersPool.new n
79
+ # workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
80
+ # s.each_with_index { |hit, i| workers << { i: i, hit: hit } }
81
+ # workers.end
82
+ # workers.result.sort { |x, y| x[:i] <=> y[:i] }.map { |x| x[:hit] }
83
+ # end
90
84
 
85
+ # Search for hits. If no found then trying missed stages and ISO/IEC.
86
+ #
87
+ # @param code [String] reference without correction
88
+ # @param corr [String] correction
89
+ # @return [Array<RelatonIso::Hit>]
91
90
  def isobib_search_filter(code, corr)
92
- # docidrx = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+}
93
- # corrigrx = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+:[0-9]+/}
94
91
  warn "fetching #{code}..."
95
92
  result = search(code)
96
- result.reduce([]) do |ret, page|
97
- ret += page.select do |i|
98
- i.hit["title"] &&
99
- i.hit["title"] =~ %r{^#{code}} && (
100
- corr && %r{^#{code}[d-]*(:\d{4})?/#{corr}} =~ i.hit["title"] ||
101
- %r{^#{code}[\d-]*(:\d{4})?/} !~ i.hit["title"] && !corr
102
- )
93
+ res = search_code result, code, corr
94
+ return res unless res.empty?
95
+
96
+ # try stages
97
+ if %r{^\w+/[^/]+\s\d+} =~ code # code like ISO/IEC 123, ISO/IEC/IEE 123
98
+ res = try_stages(result, corr) do |st|
99
+ code.sub(%r{^(?<pref>[^\s]+\s)}) { "#{$~[:pref]}#{st} " }
100
+ end
101
+ return res unless res.empty?
102
+ elsif %r{^\w+\s\d+} =~ code # code like ISO 123
103
+ res = try_stages(result, corr) do |st|
104
+ code.sub(%r{^(?<pref>\w+)}) { "#{$~[:pref]}/#{st}" }
103
105
  end
104
- return ret if ret.size > 9
106
+ return res unless res.empty?
107
+ end
105
108
 
106
- ret
109
+ if %r{^ISO\s} =~ code # try ISO/IEC if ISO not found
110
+ warn "Attempting ISO/IEC retrieval"
111
+ c = code.sub "ISO", "ISO/IEC"
112
+ res = search_code result, c, corr
113
+ end
114
+ res
115
+ end
116
+
117
+ def try_stages(result, corr)
118
+ %w[NP WD CD DIS FDIS PRF IS].each do |st| # try stages
119
+ warn "Attempting #{st} stage retrieval"
120
+ c = yield st
121
+ res = search_code result, c, corr
122
+ return res unless res.empty?
123
+ end
124
+ []
125
+ end
126
+
127
+ def search_code(result, code, corr)
128
+ result.select do |i|
129
+ i.hit["docRef"] =~ %r{^#{code}(?!-)} && (
130
+ corr && %r{^#{code}[\w-]*(:\d{4})?/#{corr}} =~ i.hit["docRef"] ||
131
+ %r{^#{code}[\w-]*(:\d{4})?/} !~ i.hit["docRef"] && !corr
132
+ )
107
133
  end
108
- # []
109
134
  end
110
135
 
111
136
  # Sort through the results from RelatonIso, fetching them three at a time,
@@ -116,17 +141,13 @@ module RelatonIso
116
141
  # If no match, returns any years which caused mismatch, for error reporting
117
142
  def isobib_results_filter(result, year)
118
143
  missed_years = []
119
- result.each_slice(3) do |s| # ISO website only allows 3 connections
120
- fetch_pages(s, 3).each_with_index do |r, _i|
121
- next if r.nil?
122
- return { ret: r } if !year
144
+ result.each do |s|
145
+ return { ret: s.fetch } if !year
123
146
 
124
- r.dates.select { |d| d.type == "published" }.each do |d|
125
- return { ret: r } if year.to_i == d.on.year
147
+ %r{:(?<iyear>\d{4})} =~ s.hit["docRef"]
148
+ return { ret: s.fetch } if iyear == year
126
149
 
127
- missed_years << d.on.year
128
- end
129
- end
150
+ missed_years << iyear
130
151
  end
131
152
  { years: missed_years }
132
153
  end
@@ -1,14 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "algoliasearch"
4
3
  require "relaton_iso_bib"
5
4
  require "relaton_iso/hit"
6
5
  require "nokogiri"
7
6
  require "net/http"
8
7
 
9
- Algolia.init application_id: "JCL49WV5AR",
10
- api_key: "dd1b9e1ab383f4d4817d29cd5e96d3f0"
11
-
12
8
  module RelatonIso
13
9
  # Scrapper.
14
10
  # rubocop:disable Metrics/ModuleLength
@@ -50,9 +46,8 @@ module RelatonIso
50
46
  # @return [Hash]
51
47
  # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
52
48
  def parse_page(hit_data)
53
- return unless hit_data["path"] =~ /\d+$/
54
-
55
- doc, url = get_page "/standard/#{hit_data['path'].match(/\d+$/)}.html"
49
+ path = "/contents/data/standard#{hit_data["splitPath"]}/#{hit_data["csnumber"]}.html"
50
+ doc, url = get_page path
56
51
 
57
52
  # Fetch edition.
58
53
  edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")&.
@@ -67,14 +62,14 @@ module RelatonIso
67
62
  language: langs(doc).map { |l| l[:lang] },
68
63
  script: langs(doc).map { |l| script(l[:lang]) }.uniq,
69
64
  titles: titles,
70
- type: fetch_type(hit_data["title"]),
71
- docstatus: fetch_status(doc, hit_data["status"]),
65
+ type: fetch_type(hit_data["docRef"]),
66
+ docstatus: fetch_status(doc),
72
67
  ics: fetch_ics(doc),
73
- dates: fetch_dates(doc, hit_data["title"]),
74
- contributors: fetch_contributors(hit_data["title"]),
68
+ dates: fetch_dates(doc, hit_data["docRef"]),
69
+ contributors: fetch_contributors(hit_data["docRef"]),
75
70
  editorialgroup: fetch_workgroup(doc),
76
71
  abstract: abstract,
77
- copyright: fetch_copyright(hit_data["title"], doc),
72
+ copyright: fetch_copyright(hit_data["docRef"], doc),
78
73
  link: fetch_link(doc, url),
79
74
  relations: fetch_relations(doc),
80
75
  structuredidentifier: fetch_structuredidentifier(doc),
@@ -227,7 +222,7 @@ module RelatonIso
227
222
  # @param doc [Nokogiri::HTML::Document]
228
223
  # @param status [String]
229
224
  # @return [Hash]
230
- def fetch_status(doc, _status)
225
+ def fetch_status(doc)
231
226
  stage, substage = doc.css("li.dropdown.active span.stage-code > strong").text.split "."
232
227
  RelatonBib::DocumentStatus.new(stage: stage, substage: substage)
233
228
  end
@@ -283,18 +278,18 @@ module RelatonIso
283
278
  # rubocop:enable Metrics/MethodLength
284
279
 
285
280
  # Fetch type.
286
- # @param title [String]
281
+ # @param ref [String]
287
282
  # @return [String]
288
- def fetch_type(title)
289
- type_match = title.match(%r{^(ISO|IWA|IEC)(?:(/IEC|/IEEE|/PRF|
290
- /NP)*\s|/)(TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|Guide|(?=\d+))}x)
283
+ def fetch_type(ref)
284
+ %r{
285
+ ^(?<prefix>ISO|IWA|IEC)
286
+ (?:(/IEC|/IEEE|/PRF|/NP|/DGuide)*\s|/)
287
+ (?<type>TS|TR|PAS|AWI|CD|FDIS|NP|DIS|WD|R|Guide|(?=\d+))
288
+ }x =~ ref
291
289
  # return "international-standard" if type_match.nil?
292
- if TYPES[type_match[3]]
293
- TYPES[type_match[3]]
294
- elsif type_match[1] == "ISO"
295
- "international-standard"
296
- elsif type_match[1] == "IWA"
297
- "international-workshop-agreement"
290
+ if TYPES[type] then TYPES[type]
291
+ elsif prefix == "ISO" then "international-standard"
292
+ elsif prefix == "IWA" then "international-workshop-agreement"
298
293
  end
299
294
  # rescue => _e
300
295
  # puts 'Unknown document type: ' + title
@@ -305,10 +300,11 @@ module RelatonIso
305
300
  # @param lang [String]
306
301
  # @return [Hash]
307
302
  def fetch_title(doc, lang)
308
- titles = doc.at("//h3[@itemprop='description'] | //h2[@itemprop='description']").
309
- text.split " -- "
310
- case titles.size
311
- when 0
303
+ titles = doc.at(
304
+ "//h3[@itemprop='description'] | //h2[@itemprop='description']",
305
+ )&.text&.split " -- "
306
+ case titles&.size
307
+ when nil, 0
312
308
  intro, main, part = nil, "", nil
313
309
  when 1
314
310
  intro, main, part = nil, titles[0], nil
@@ -344,10 +340,11 @@ module RelatonIso
344
340
  # rubocop:disable Metrics/MethodLength
345
341
  # Fetch dates
346
342
  # @param doc [Nokogiri::HTML::Document]
343
+ # @param ref [String]
347
344
  # @return [Array<Hash>]
348
- def fetch_dates(doc, title)
345
+ def fetch_dates(doc, ref)
349
346
  dates = []
350
- %r{^[^\s]+\s[\d-]+:(?<ref_date_str>\d{4})} =~ title
347
+ %r{^[^\s]+\s[\d-]+:(?<ref_date_str>\d{4})} =~ ref
351
348
  pub_date_str = doc.xpath("//span[@itemprop='releaseDate']").text
352
349
  if ref_date_str
353
350
  ref_date = Date.strptime ref_date_str, "%Y"
@@ -368,8 +365,8 @@ module RelatonIso
368
365
  dates
369
366
  end
370
367
 
371
- def fetch_contributors(title)
372
- title.sub(/\s.*/, "").split("/").map do |abbrev|
368
+ def fetch_contributors(ref)
369
+ ref.sub(/\s.*/, "").split("/").map do |abbrev|
373
370
  case abbrev
374
371
  when "IEC"
375
372
  name = "International Electrotechnical Commission"
@@ -400,22 +397,23 @@ module RelatonIso
400
397
  # @param url [String]
401
398
  # @return [Array<Hash>]
402
399
  def fetch_link(doc, url)
403
- obp_elms = doc.xpath("//a[contains(@href, '/obp/ui/')]")
404
- obp = obp_elms.attr("href").value if obp_elms.any?
405
- rss = DOMAIN + doc.xpath("//a[contains(@href, 'rss')]").attr("href").value
406
- [
407
- { type: "src", content: url },
408
- { type: "obp", content: obp },
409
- { type: "rss", content: rss },
410
- ]
400
+ links = [{ type: "src", content: url }]
401
+ obp = doc.at("//a[contains(@href, '/obp/ui/')]")
402
+ links << { type: "obp", content: obp[:href] } if obp
403
+ rss = doc.at("//a[contains(@href, 'rss')]")
404
+ links << { type: "rss", content: DOMAIN + rss[:href] } if rss
405
+ pub = doc.at "//p[contains(., 'publicly available')]/a"
406
+ links << { type: "pub", content: pub[:href] } if pub
407
+ links
411
408
  end
412
409
 
413
410
  # Fetch copyright.
414
- # @param title [String]
411
+ # @param ref [String]
412
+ # @param doc [Nokogiri::HTML::Document]
415
413
  # @return [Hash]
416
- def fetch_copyright(title, doc)
417
- owner_name = title.match(/.*?(?=\s)/).to_s
418
- from = title.match(/(?<=:)\d{4}/).to_s
414
+ def fetch_copyright(ref, doc)
415
+ owner_name = ref.match(/.*?(?=\s)/).to_s
416
+ from = ref.match(/(?<=:)\d{4}/).to_s
419
417
  if from.empty?
420
418
  from = doc.xpath("//span[@itemprop='releaseDate']").text.match(/\d{4}/).to_s
421
419
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RelatonIso
4
- VERSION = "0.5.2"
4
+ VERSION = "0.5.4"
5
5
  end
data/relaton_iso.gemspec CHANGED
@@ -38,7 +38,6 @@ Gem::Specification.new do |spec|
38
38
  spec.add_development_dependency "vcr"
39
39
  spec.add_development_dependency "webmock"
40
40
 
41
- spec.add_dependency "algoliasearch"
42
41
  spec.add_dependency "relaton-iec", "~> 0.3.0"
43
42
  spec.add_dependency "relaton-iso-bib", "~> 0.2.0"
44
43
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-iso
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.5.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-07-09 00:00:00.000000000 Z
11
+ date: 2019-07-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -164,20 +164,6 @@ dependencies:
164
164
  - - ">="
165
165
  - !ruby/object:Gem::Version
166
166
  version: '0'
167
- - !ruby/object:Gem::Dependency
168
- name: algoliasearch
169
- requirement: !ruby/object:Gem::Requirement
170
- requirements:
171
- - - ">="
172
- - !ruby/object:Gem::Version
173
- version: '0'
174
- type: :runtime
175
- prerelease: false
176
- version_requirements: !ruby/object:Gem::Requirement
177
- requirements:
178
- - - ">="
179
- - !ruby/object:Gem::Version
180
- version: '0'
181
167
  - !ruby/object:Gem::Dependency
182
168
  name: relaton-iec
183
169
  requirement: !ruby/object:Gem::Requirement
@@ -245,7 +231,6 @@ files:
245
231
  - lib/relaton_iso.rb
246
232
  - lib/relaton_iso/hit.rb
247
233
  - lib/relaton_iso/hit_collection.rb
248
- - lib/relaton_iso/hit_pages.rb
249
234
  - lib/relaton_iso/iso_bibliography.rb
250
235
  - lib/relaton_iso/scrapper.rb
251
236
  - lib/relaton_iso/version.rb
@@ -1,96 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "algoliasearch"
4
- require "relaton_iso/hit_collection"
5
-
6
- module RelatonIso
7
- # Pages of hits.
8
- class HitPages < Array
9
- Algolia.init application_id: "JCL49WV5AR",
10
- api_key: "dd1b9e1ab383f4d4817d29cd5e96d3f0"
11
-
12
- # @return [String]
13
- attr_reader :text
14
-
15
- # @param text [String]
16
- def initialize(text)
17
- @text = text
18
- @index = Algolia::Index.new "all_en"
19
- resp = @index.search(text, facetFilters: ["category:standard"])
20
- @nb_pages = resp["nbPages"]
21
- self << HitCollection.new(resp["hits"], self)
22
- end
23
-
24
- # @return [RelatonIso::HitCollection]
25
- def last
26
- collection(@nb_pages - 1)
27
- end
28
-
29
- # @param i [Integer]
30
- # @return [RelatonIso::HitCollection]
31
- def [](idx)
32
- # collection i
33
- return if idx + 1 > @nb_pages
34
-
35
- collection idx
36
- super
37
- end
38
-
39
- # @return [Array]
40
- def map(&block)
41
- m = []
42
- @nb_pages.times do |n|
43
- m << yield(self[n]) if block
44
- end
45
- m
46
- end
47
-
48
- def each(&block)
49
- @nb_pages.times do |n|
50
- yield self[n] if block
51
- end
52
- end
53
-
54
- def to_s
55
- inspect
56
- end
57
-
58
- def inspect
59
- "<#{self.class}:#{format('%#.14x', object_id << 1)} @text=#{@text} "\
60
- "@pages=#{@nb_pages}>"
61
- end
62
-
63
- # @return [Integer]
64
- def size
65
- @nb_pages
66
- end
67
-
68
- def to_xml(**opts)
69
- builder = Nokogiri::XML::Builder.new(encoding: "UTF-8") do |xml|
70
- xml.documents do
71
- each do |page|
72
- page.fetch
73
- page.each { |hit| hit.to_xml xml, **opts }
74
- end
75
- end
76
- end
77
- builder.to_xml
78
- end
79
-
80
- private
81
-
82
- # @param i [Integer]
83
- # @return [RelatonIso::HitCollection]
84
- def collection(idx)
85
- return if idx + 1 > @nb_pages
86
-
87
- while Array.instance_method(:size).bind(self).call < idx + 1
88
- resp = @index.search(@text,
89
- facetFilters: ["category:standard"],
90
- page: idx)
91
- self << HitCollection.new(resp["hits"], self)
92
- end
93
- Array.instance_method(:[]).bind(self).call idx
94
- end
95
- end
96
- end