relaton-iso 0.6.4 → 0.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +7 -7
- data/lib/relaton_iso.rb +8 -4
- data/lib/relaton_iso/iso_bibliography.rb +4 -3
- data/lib/relaton_iso/processor.rb +35 -0
- data/lib/relaton_iso/scrapper.rb +4 -51
- data/lib/relaton_iso/version.rb +1 -1
- data/relaton_iso.gemspec +1 -1
- metadata +4 -4
- data/lib/relaton/processor.rb +0 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0ff396da8dad98ab886022064ab47bfe29d85f6a
|
4
|
+
data.tar.gz: b03ac90297c301edac42193e2d37119a24b7348f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 40dbc389dbee1e726b882944102eb674289d38b8e73d32f27f29dceeae875f40f3ec272b82a22eab23891f6732235531397e67001575d2e0922317fabbb70d15
|
7
|
+
data.tar.gz: fdacef79ca7cae29db9a2690b99c097c31fd241253176f3babb2bf3d2eb7113dd178fa7c01b0c7da232c86fa2831917af98b2165413cd512f8fb6feb0b9e5af7
|
data/Gemfile.lock
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
relaton-iso (0.6.
|
4
|
+
relaton-iso (0.6.5)
|
5
5
|
relaton-iec (~> 0.4.0)
|
6
6
|
relaton-iso-bib (~> 0.3.0)
|
7
7
|
|
8
8
|
GEM
|
9
9
|
remote: https://rubygems.org/
|
10
10
|
specs:
|
11
|
-
addressable (2.
|
12
|
-
public_suffix (>= 2.0.2, <
|
11
|
+
addressable (2.7.0)
|
12
|
+
public_suffix (>= 2.0.2, < 5.0)
|
13
13
|
byebug (11.0.1)
|
14
14
|
coderay (1.1.2)
|
15
15
|
crack (0.4.3)
|
@@ -34,15 +34,15 @@ GEM
|
|
34
34
|
pry-byebug (3.7.0)
|
35
35
|
byebug (~> 11.0)
|
36
36
|
pry (~> 0.10)
|
37
|
-
public_suffix (
|
37
|
+
public_suffix (4.0.1)
|
38
38
|
rake (10.5.0)
|
39
|
-
relaton-bib (0.3.
|
39
|
+
relaton-bib (0.3.6)
|
40
40
|
addressable
|
41
|
-
nokogiri
|
41
|
+
nokogiri
|
42
42
|
relaton-iec (0.4.3)
|
43
43
|
addressable
|
44
44
|
relaton-iso-bib (~> 0.3.0)
|
45
|
-
relaton-iso-bib (0.3.
|
45
|
+
relaton-iso-bib (0.3.5)
|
46
46
|
isoics (~> 0.1.6)
|
47
47
|
relaton-bib (~> 0.3.0)
|
48
48
|
ruby_deep_clone (~> 0.8.0)
|
data/lib/relaton_iso.rb
CHANGED
@@ -2,7 +2,11 @@
|
|
2
2
|
|
3
3
|
require "relaton_iso/version"
|
4
4
|
require "relaton_iso/iso_bibliography"
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
|
6
|
+
# if defined? Relaton
|
7
|
+
# require "relaton_iso/processor"
|
8
|
+
# # don't register the gem if it's required form relaton's registry
|
9
|
+
# return if caller.detect { |c| c.include? "register_gems" }
|
10
|
+
|
11
|
+
# Relaton::Registry.instance.register(RelatonIso::Processor)
|
12
|
+
# end
|
@@ -15,7 +15,7 @@ module RelatonIso
|
|
15
15
|
HitCollection.new text
|
16
16
|
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
17
17
|
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError,
|
18
|
-
OpenSSL::SSL::SSLError
|
18
|
+
OpenSSL::SSL::SSLError, Errno::ETIMEDOUT
|
19
19
|
raise RelatonBib::RequestError, "Could not access http://www.iso.org"
|
20
20
|
end
|
21
21
|
|
@@ -34,12 +34,13 @@ module RelatonIso
|
|
34
34
|
%r{
|
35
35
|
^(?<code1>[^\s]+\s[^/]+) # match code
|
36
36
|
/?
|
37
|
-
(?<corr>(Amd|DAmd|CD
|
37
|
+
(?<corr>(Amd|DAmd|(CD|WD|AWI|NP)\sAmd|Cor|CD\sCor|FDAmd)\s\d+ # correction name
|
38
|
+
:?(\d{4})?(/Cor\s\d+:\d{4})?) # match correction year
|
38
39
|
}x =~ code
|
39
40
|
code = code1 if code1
|
40
41
|
|
41
42
|
if year.nil?
|
42
|
-
/^(?<code1>[^\s]
|
43
|
+
/^(?<code1>[^\s]+(\s\w+)?\s[\d-]+):?(?<year1>\d{4})?/ =~ code
|
43
44
|
unless code1.nil?
|
44
45
|
code = code1
|
45
46
|
year = year1
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require "relaton/processor"
|
2
|
+
|
3
|
+
module RelatonIso
|
4
|
+
class Processor < Relaton::Processor
|
5
|
+
attr_reader :idtype
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@short = :relaton_iso
|
9
|
+
@prefix = "ISO"
|
10
|
+
@defaultprefix = %r{^(ISO)[ /]}
|
11
|
+
@idtype = "ISO"
|
12
|
+
end
|
13
|
+
|
14
|
+
# @param code [String]
|
15
|
+
# @param date [String, NilClass] year
|
16
|
+
# @param opts [Hash]
|
17
|
+
# @return [RelatonIsoBib::IsoBibliographicItem]
|
18
|
+
def get(code, date, opts)
|
19
|
+
::RelatonIso::IsoBibliography.get(code, date, opts)
|
20
|
+
end
|
21
|
+
|
22
|
+
# @param xml [String]
|
23
|
+
# @return [RelatonIsoBib::IsoBibliographicItem]
|
24
|
+
def from_xml(xml)
|
25
|
+
::RelatonIsoBib::XMLParser.from_xml xml
|
26
|
+
end
|
27
|
+
|
28
|
+
# @param hash [Hash]
|
29
|
+
# @return [RelatonIsoBib::IsoBibliographicItem]
|
30
|
+
def hash_to_bib(hash)
|
31
|
+
item_hash = ::RelatonIsoBib::HashConverter.hash_to_bib(hash)
|
32
|
+
::RelatonIsoBib::IsoBibliographicItem.new item_hash
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/lib/relaton_iso/scrapper.rb
CHANGED
@@ -42,7 +42,7 @@ module RelatonIso
|
|
42
42
|
|
43
43
|
RelatonIsoBib::IsoBibliographicItem.new(
|
44
44
|
fetched: Date.today.to_s,
|
45
|
-
docid: fetch_docid(
|
45
|
+
docid: fetch_docid(hit_data["docRef"]),
|
46
46
|
docnumber: fetch_docnumber(doc),
|
47
47
|
edition: edition,
|
48
48
|
language: langs(doc).map { |l| l[:lang] },
|
@@ -65,46 +65,6 @@ module RelatonIso
|
|
65
65
|
|
66
66
|
private
|
67
67
|
|
68
|
-
# Start algolia search workers.
|
69
|
-
# @param text[String]
|
70
|
-
# @param iso_workers [RelatonBib::WorkersPool]
|
71
|
-
# @reaturn [RelatonBib::WorkersPool]
|
72
|
-
# def start_algolia_search(text, iso_workers)
|
73
|
-
# index = Algolia::Index.new "all_en"
|
74
|
-
# algolia_workers = RelatonBib::WorkersPool.new
|
75
|
-
# algolia_workers.worker do |page|
|
76
|
-
# algolia_worker(index, text, page, algolia_workers, iso_workers)
|
77
|
-
# end
|
78
|
-
|
79
|
-
# # Add first page so algolia worker will start.
|
80
|
-
# algolia_workers << 0
|
81
|
-
# end
|
82
|
-
|
83
|
-
# Fetch ISO documents.
|
84
|
-
# @param hit [Hash]
|
85
|
-
# @param isiso_workers [RelatonIso::WorkersPool]
|
86
|
-
# def iso_worker(hit, iso_workers)
|
87
|
-
# print "Parse #{iso_workers.size} of #{iso_workers.nb_hits} \r"
|
88
|
-
# parse_page hit
|
89
|
-
# end
|
90
|
-
|
91
|
-
# Fetch hits from algolia search service.
|
92
|
-
# @param index[Algolia::Index]
|
93
|
-
# @param text [String]
|
94
|
-
# @param page [Integer]
|
95
|
-
# @param algolia_workers [RelatonBib::WorkersPool]
|
96
|
-
# @param isiso_workers [RelatonBib::WorkersPool]
|
97
|
-
# def algolia_worker(index, text, page, algolia_workers, iso_workers)
|
98
|
-
# res = index.search text, facetFilters: ["category:standard"], page: page
|
99
|
-
# next_page = res["page"] + 1
|
100
|
-
# algolia_workers << next_page if next_page < res["nbPages"]
|
101
|
-
# res["hits"].each do |hit|
|
102
|
-
# iso_workers.nb_hits = res["nbHits"]
|
103
|
-
# iso_workers << hit
|
104
|
-
# end
|
105
|
-
# iso_workers.end unless next_page < res["nbPages"]
|
106
|
-
# end
|
107
|
-
|
108
68
|
# Fetch titles and abstracts.
|
109
69
|
# @param doc [Nokigiri::HTML::Document]
|
110
70
|
# @return [Array<Array>]
|
@@ -115,10 +75,6 @@ module RelatonIso
|
|
115
75
|
langs(doc).each do |lang|
|
116
76
|
# Don't need to get page for en. We already have it.
|
117
77
|
d = lang[:path] ? get_page(lang[:path])[0] : doc
|
118
|
-
|
119
|
-
# Check if unavailable for the lang.
|
120
|
-
next if d.css("h5.help-block").any?
|
121
|
-
|
122
78
|
titles << fetch_title(d, lang[:lang])
|
123
79
|
|
124
80
|
# Fetch abstracts.
|
@@ -174,7 +130,7 @@ module RelatonIso
|
|
174
130
|
[Nokogiri::HTML(resp.body), url]
|
175
131
|
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
176
132
|
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError,
|
177
|
-
OpenSSL::SSL::SSLError
|
133
|
+
OpenSSL::SSL::SSLError, Errno::ETIMEDOUT
|
178
134
|
raise RelatonBib::RequestError, "Could not access #{url}"
|
179
135
|
end
|
180
136
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
@@ -182,11 +138,8 @@ module RelatonIso
|
|
182
138
|
# Fetch docid.
|
183
139
|
# @param doc [Nokogiri::HTML::Document]
|
184
140
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
185
|
-
def fetch_docid(
|
186
|
-
|
187
|
-
return [] unless item_ref
|
188
|
-
|
189
|
-
[RelatonBib::DocumentIdentifier.new(id: item_ref.text, type: "ISO")]
|
141
|
+
def fetch_docid(doc_ref)
|
142
|
+
[RelatonBib::DocumentIdentifier.new(id: doc_ref, type: "ISO")]
|
190
143
|
end
|
191
144
|
|
192
145
|
def fetch_docnumber(doc)
|
data/lib/relaton_iso/version.rb
CHANGED
data/relaton_iso.gemspec
CHANGED
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec|
|
|
15
15
|
spec.description = "RelatonIso: retrieve ISO Standards for bibliographic use "\
|
16
16
|
"using the IsoBibliographicItem model"
|
17
17
|
|
18
|
-
spec.homepage = "https://github.com/
|
18
|
+
spec.homepage = "https://github.com/relaton/relaton-iso"
|
19
19
|
spec.license = "BSD-2-Clause"
|
20
20
|
|
21
21
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iso
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-09-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -227,15 +227,15 @@ files:
|
|
227
227
|
- bin/rspec
|
228
228
|
- bin/safe_yaml
|
229
229
|
- bin/setup
|
230
|
-
- lib/relaton/processor.rb
|
231
230
|
- lib/relaton_iso.rb
|
232
231
|
- lib/relaton_iso/hit.rb
|
233
232
|
- lib/relaton_iso/hit_collection.rb
|
234
233
|
- lib/relaton_iso/iso_bibliography.rb
|
234
|
+
- lib/relaton_iso/processor.rb
|
235
235
|
- lib/relaton_iso/scrapper.rb
|
236
236
|
- lib/relaton_iso/version.rb
|
237
237
|
- relaton_iso.gemspec
|
238
|
-
homepage: https://github.com/
|
238
|
+
homepage: https://github.com/relaton/relaton-iso
|
239
239
|
licenses:
|
240
240
|
- BSD-2-Clause
|
241
241
|
metadata: {}
|
data/lib/relaton/processor.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
require "relaton/processor"
|
2
|
-
|
3
|
-
module Relaton
|
4
|
-
module RelatonIso
|
5
|
-
class Processor < Relaton::Processor
|
6
|
-
def initialize
|
7
|
-
@short = :relaton_iso
|
8
|
-
@prefix = "ISO"
|
9
|
-
@defaultprefix = %r{^(ISO)[ /]}
|
10
|
-
@idtype = "ISO"
|
11
|
-
end
|
12
|
-
|
13
|
-
def get(code, date, opts)
|
14
|
-
::RelatonIso::IsoBibliography.get(code, date, opts)
|
15
|
-
end
|
16
|
-
|
17
|
-
def from_xml(xml)
|
18
|
-
RelatonIsoBib::XMLParser.from_xml xml
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|