relaton-iso 0.6.4 → 0.6.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +7 -7
- data/lib/relaton_iso.rb +8 -4
- data/lib/relaton_iso/iso_bibliography.rb +4 -3
- data/lib/relaton_iso/processor.rb +35 -0
- data/lib/relaton_iso/scrapper.rb +4 -51
- data/lib/relaton_iso/version.rb +1 -1
- data/relaton_iso.gemspec +1 -1
- metadata +4 -4
- data/lib/relaton/processor.rb +0 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0ff396da8dad98ab886022064ab47bfe29d85f6a
|
4
|
+
data.tar.gz: b03ac90297c301edac42193e2d37119a24b7348f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 40dbc389dbee1e726b882944102eb674289d38b8e73d32f27f29dceeae875f40f3ec272b82a22eab23891f6732235531397e67001575d2e0922317fabbb70d15
|
7
|
+
data.tar.gz: fdacef79ca7cae29db9a2690b99c097c31fd241253176f3babb2bf3d2eb7113dd178fa7c01b0c7da232c86fa2831917af98b2165413cd512f8fb6feb0b9e5af7
|
data/Gemfile.lock
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
relaton-iso (0.6.
|
4
|
+
relaton-iso (0.6.5)
|
5
5
|
relaton-iec (~> 0.4.0)
|
6
6
|
relaton-iso-bib (~> 0.3.0)
|
7
7
|
|
8
8
|
GEM
|
9
9
|
remote: https://rubygems.org/
|
10
10
|
specs:
|
11
|
-
addressable (2.
|
12
|
-
public_suffix (>= 2.0.2, <
|
11
|
+
addressable (2.7.0)
|
12
|
+
public_suffix (>= 2.0.2, < 5.0)
|
13
13
|
byebug (11.0.1)
|
14
14
|
coderay (1.1.2)
|
15
15
|
crack (0.4.3)
|
@@ -34,15 +34,15 @@ GEM
|
|
34
34
|
pry-byebug (3.7.0)
|
35
35
|
byebug (~> 11.0)
|
36
36
|
pry (~> 0.10)
|
37
|
-
public_suffix (
|
37
|
+
public_suffix (4.0.1)
|
38
38
|
rake (10.5.0)
|
39
|
-
relaton-bib (0.3.
|
39
|
+
relaton-bib (0.3.6)
|
40
40
|
addressable
|
41
|
-
nokogiri
|
41
|
+
nokogiri
|
42
42
|
relaton-iec (0.4.3)
|
43
43
|
addressable
|
44
44
|
relaton-iso-bib (~> 0.3.0)
|
45
|
-
relaton-iso-bib (0.3.
|
45
|
+
relaton-iso-bib (0.3.5)
|
46
46
|
isoics (~> 0.1.6)
|
47
47
|
relaton-bib (~> 0.3.0)
|
48
48
|
ruby_deep_clone (~> 0.8.0)
|
data/lib/relaton_iso.rb
CHANGED
@@ -2,7 +2,11 @@
|
|
2
2
|
|
3
3
|
require "relaton_iso/version"
|
4
4
|
require "relaton_iso/iso_bibliography"
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
|
6
|
+
# if defined? Relaton
|
7
|
+
# require "relaton_iso/processor"
|
8
|
+
# # don't register the gem if it's required form relaton's registry
|
9
|
+
# return if caller.detect { |c| c.include? "register_gems" }
|
10
|
+
|
11
|
+
# Relaton::Registry.instance.register(RelatonIso::Processor)
|
12
|
+
# end
|
@@ -15,7 +15,7 @@ module RelatonIso
|
|
15
15
|
HitCollection.new text
|
16
16
|
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
17
17
|
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError,
|
18
|
-
OpenSSL::SSL::SSLError
|
18
|
+
OpenSSL::SSL::SSLError, Errno::ETIMEDOUT
|
19
19
|
raise RelatonBib::RequestError, "Could not access http://www.iso.org"
|
20
20
|
end
|
21
21
|
|
@@ -34,12 +34,13 @@ module RelatonIso
|
|
34
34
|
%r{
|
35
35
|
^(?<code1>[^\s]+\s[^/]+) # match code
|
36
36
|
/?
|
37
|
-
(?<corr>(Amd|DAmd|CD
|
37
|
+
(?<corr>(Amd|DAmd|(CD|WD|AWI|NP)\sAmd|Cor|CD\sCor|FDAmd)\s\d+ # correction name
|
38
|
+
:?(\d{4})?(/Cor\s\d+:\d{4})?) # match correction year
|
38
39
|
}x =~ code
|
39
40
|
code = code1 if code1
|
40
41
|
|
41
42
|
if year.nil?
|
42
|
-
/^(?<code1>[^\s]
|
43
|
+
/^(?<code1>[^\s]+(\s\w+)?\s[\d-]+):?(?<year1>\d{4})?/ =~ code
|
43
44
|
unless code1.nil?
|
44
45
|
code = code1
|
45
46
|
year = year1
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require "relaton/processor"
|
2
|
+
|
3
|
+
module RelatonIso
|
4
|
+
class Processor < Relaton::Processor
|
5
|
+
attr_reader :idtype
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@short = :relaton_iso
|
9
|
+
@prefix = "ISO"
|
10
|
+
@defaultprefix = %r{^(ISO)[ /]}
|
11
|
+
@idtype = "ISO"
|
12
|
+
end
|
13
|
+
|
14
|
+
# @param code [String]
|
15
|
+
# @param date [String, NilClass] year
|
16
|
+
# @param opts [Hash]
|
17
|
+
# @return [RelatonIsoBib::IsoBibliographicItem]
|
18
|
+
def get(code, date, opts)
|
19
|
+
::RelatonIso::IsoBibliography.get(code, date, opts)
|
20
|
+
end
|
21
|
+
|
22
|
+
# @param xml [String]
|
23
|
+
# @return [RelatonIsoBib::IsoBibliographicItem]
|
24
|
+
def from_xml(xml)
|
25
|
+
::RelatonIsoBib::XMLParser.from_xml xml
|
26
|
+
end
|
27
|
+
|
28
|
+
# @param hash [Hash]
|
29
|
+
# @return [RelatonIsoBib::IsoBibliographicItem]
|
30
|
+
def hash_to_bib(hash)
|
31
|
+
item_hash = ::RelatonIsoBib::HashConverter.hash_to_bib(hash)
|
32
|
+
::RelatonIsoBib::IsoBibliographicItem.new item_hash
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/lib/relaton_iso/scrapper.rb
CHANGED
@@ -42,7 +42,7 @@ module RelatonIso
|
|
42
42
|
|
43
43
|
RelatonIsoBib::IsoBibliographicItem.new(
|
44
44
|
fetched: Date.today.to_s,
|
45
|
-
docid: fetch_docid(
|
45
|
+
docid: fetch_docid(hit_data["docRef"]),
|
46
46
|
docnumber: fetch_docnumber(doc),
|
47
47
|
edition: edition,
|
48
48
|
language: langs(doc).map { |l| l[:lang] },
|
@@ -65,46 +65,6 @@ module RelatonIso
|
|
65
65
|
|
66
66
|
private
|
67
67
|
|
68
|
-
# Start algolia search workers.
|
69
|
-
# @param text[String]
|
70
|
-
# @param iso_workers [RelatonBib::WorkersPool]
|
71
|
-
# @reaturn [RelatonBib::WorkersPool]
|
72
|
-
# def start_algolia_search(text, iso_workers)
|
73
|
-
# index = Algolia::Index.new "all_en"
|
74
|
-
# algolia_workers = RelatonBib::WorkersPool.new
|
75
|
-
# algolia_workers.worker do |page|
|
76
|
-
# algolia_worker(index, text, page, algolia_workers, iso_workers)
|
77
|
-
# end
|
78
|
-
|
79
|
-
# # Add first page so algolia worker will start.
|
80
|
-
# algolia_workers << 0
|
81
|
-
# end
|
82
|
-
|
83
|
-
# Fetch ISO documents.
|
84
|
-
# @param hit [Hash]
|
85
|
-
# @param isiso_workers [RelatonIso::WorkersPool]
|
86
|
-
# def iso_worker(hit, iso_workers)
|
87
|
-
# print "Parse #{iso_workers.size} of #{iso_workers.nb_hits} \r"
|
88
|
-
# parse_page hit
|
89
|
-
# end
|
90
|
-
|
91
|
-
# Fetch hits from algolia search service.
|
92
|
-
# @param index[Algolia::Index]
|
93
|
-
# @param text [String]
|
94
|
-
# @param page [Integer]
|
95
|
-
# @param algolia_workers [RelatonBib::WorkersPool]
|
96
|
-
# @param isiso_workers [RelatonBib::WorkersPool]
|
97
|
-
# def algolia_worker(index, text, page, algolia_workers, iso_workers)
|
98
|
-
# res = index.search text, facetFilters: ["category:standard"], page: page
|
99
|
-
# next_page = res["page"] + 1
|
100
|
-
# algolia_workers << next_page if next_page < res["nbPages"]
|
101
|
-
# res["hits"].each do |hit|
|
102
|
-
# iso_workers.nb_hits = res["nbHits"]
|
103
|
-
# iso_workers << hit
|
104
|
-
# end
|
105
|
-
# iso_workers.end unless next_page < res["nbPages"]
|
106
|
-
# end
|
107
|
-
|
108
68
|
# Fetch titles and abstracts.
|
109
69
|
# @param doc [Nokigiri::HTML::Document]
|
110
70
|
# @return [Array<Array>]
|
@@ -115,10 +75,6 @@ module RelatonIso
|
|
115
75
|
langs(doc).each do |lang|
|
116
76
|
# Don't need to get page for en. We already have it.
|
117
77
|
d = lang[:path] ? get_page(lang[:path])[0] : doc
|
118
|
-
|
119
|
-
# Check if unavailable for the lang.
|
120
|
-
next if d.css("h5.help-block").any?
|
121
|
-
|
122
78
|
titles << fetch_title(d, lang[:lang])
|
123
79
|
|
124
80
|
# Fetch abstracts.
|
@@ -174,7 +130,7 @@ module RelatonIso
|
|
174
130
|
[Nokogiri::HTML(resp.body), url]
|
175
131
|
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
|
176
132
|
Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError,
|
177
|
-
OpenSSL::SSL::SSLError
|
133
|
+
OpenSSL::SSL::SSLError, Errno::ETIMEDOUT
|
178
134
|
raise RelatonBib::RequestError, "Could not access #{url}"
|
179
135
|
end
|
180
136
|
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
@@ -182,11 +138,8 @@ module RelatonIso
|
|
182
138
|
# Fetch docid.
|
183
139
|
# @param doc [Nokogiri::HTML::Document]
|
184
140
|
# @return [Array<RelatonBib::DocumentIdentifier>]
|
185
|
-
def fetch_docid(
|
186
|
-
|
187
|
-
return [] unless item_ref
|
188
|
-
|
189
|
-
[RelatonBib::DocumentIdentifier.new(id: item_ref.text, type: "ISO")]
|
141
|
+
def fetch_docid(doc_ref)
|
142
|
+
[RelatonBib::DocumentIdentifier.new(id: doc_ref, type: "ISO")]
|
190
143
|
end
|
191
144
|
|
192
145
|
def fetch_docnumber(doc)
|
data/lib/relaton_iso/version.rb
CHANGED
data/relaton_iso.gemspec
CHANGED
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec|
|
|
15
15
|
spec.description = "RelatonIso: retrieve ISO Standards for bibliographic use "\
|
16
16
|
"using the IsoBibliographicItem model"
|
17
17
|
|
18
|
-
spec.homepage = "https://github.com/
|
18
|
+
spec.homepage = "https://github.com/relaton/relaton-iso"
|
19
19
|
spec.license = "BSD-2-Clause"
|
20
20
|
|
21
21
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iso
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-09-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -227,15 +227,15 @@ files:
|
|
227
227
|
- bin/rspec
|
228
228
|
- bin/safe_yaml
|
229
229
|
- bin/setup
|
230
|
-
- lib/relaton/processor.rb
|
231
230
|
- lib/relaton_iso.rb
|
232
231
|
- lib/relaton_iso/hit.rb
|
233
232
|
- lib/relaton_iso/hit_collection.rb
|
234
233
|
- lib/relaton_iso/iso_bibliography.rb
|
234
|
+
- lib/relaton_iso/processor.rb
|
235
235
|
- lib/relaton_iso/scrapper.rb
|
236
236
|
- lib/relaton_iso/version.rb
|
237
237
|
- relaton_iso.gemspec
|
238
|
-
homepage: https://github.com/
|
238
|
+
homepage: https://github.com/relaton/relaton-iso
|
239
239
|
licenses:
|
240
240
|
- BSD-2-Clause
|
241
241
|
metadata: {}
|
data/lib/relaton/processor.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
require "relaton/processor"
|
2
|
-
|
3
|
-
module Relaton
|
4
|
-
module RelatonIso
|
5
|
-
class Processor < Relaton::Processor
|
6
|
-
def initialize
|
7
|
-
@short = :relaton_iso
|
8
|
-
@prefix = "ISO"
|
9
|
-
@defaultprefix = %r{^(ISO)[ /]}
|
10
|
-
@idtype = "ISO"
|
11
|
-
end
|
12
|
-
|
13
|
-
def get(code, date, opts)
|
14
|
-
::RelatonIso::IsoBibliography.get(code, date, opts)
|
15
|
-
end
|
16
|
-
|
17
|
-
def from_xml(xml)
|
18
|
-
RelatonIsoBib::XMLParser.from_xml xml
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|