relaton-iso 0.6.5 → 0.6.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +4 -4
- data/lib/relaton_iso/hit.rb +1 -26
- data/lib/relaton_iso/hit_collection.rb +56 -13
- data/lib/relaton_iso/iso_bibliography.rb +32 -26
- data/lib/relaton_iso/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d39e9890ddcc1333a9430d8f149414e7049af523
|
4
|
+
data.tar.gz: b614ae1fe64ce2b1e7f72c83c6649f9e222346a5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8e958c6ce53f617d167a5059c772a6a4a469317d35d7444a41b6181879678149f2704306aed4fde38baa5d5771f3a8f46744b09c61549d23585b45a08539a07d
|
7
|
+
data.tar.gz: 531c10407c214307d4a0cca9758d4af2eaa94e5724ce2377510c5970f0a797f43b5b3d5e7aef0a59829c0dcd8c7504a112859de5ead6cbc56b7abbb607aceb6c
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
relaton-iso (0.6.
|
4
|
+
relaton-iso (0.6.6)
|
5
5
|
relaton-iec (~> 0.4.0)
|
6
6
|
relaton-iso-bib (~> 0.3.0)
|
7
7
|
|
@@ -36,13 +36,13 @@ GEM
|
|
36
36
|
pry (~> 0.10)
|
37
37
|
public_suffix (4.0.1)
|
38
38
|
rake (10.5.0)
|
39
|
-
relaton-bib (0.3.
|
39
|
+
relaton-bib (0.3.8)
|
40
40
|
addressable
|
41
41
|
nokogiri
|
42
|
-
relaton-iec (0.4.
|
42
|
+
relaton-iec (0.4.6)
|
43
43
|
addressable
|
44
44
|
relaton-iso-bib (~> 0.3.0)
|
45
|
-
relaton-iso-bib (0.3.
|
45
|
+
relaton-iso-bib (0.3.7)
|
46
46
|
isoics (~> 0.1.6)
|
47
47
|
relaton-bib (~> 0.3.0)
|
48
48
|
ruby_deep_clone (~> 0.8.0)
|
data/lib/relaton_iso/hit.rb
CHANGED
@@ -2,41 +2,16 @@
|
|
2
2
|
|
3
3
|
module RelatonIso
|
4
4
|
# Hit.
|
5
|
-
class Hit
|
5
|
+
class Hit < RelatonBib::Hit
|
6
6
|
# @return [RelatonIso::HitCollection]
|
7
7
|
attr_reader :hit_collection
|
8
8
|
|
9
|
-
# @return [Array<Hash>]
|
10
|
-
attr_reader :hit
|
11
|
-
|
12
|
-
# @param hit [Hash]
|
13
|
-
# @param hit_collection [RelatonIso:HitCollection]
|
14
|
-
def initialize(hit, hit_collection = nil)
|
15
|
-
@hit = hit
|
16
|
-
@hit_collection = hit_collection
|
17
|
-
end
|
18
|
-
|
19
9
|
# Parse page.
|
20
10
|
# @return [RelatonIso::IsoBibliographicItem]
|
21
11
|
def fetch
|
22
12
|
@fetch ||= Scrapper.parse_page @hit
|
23
13
|
end
|
24
14
|
|
25
|
-
# @return [String]
|
26
|
-
def to_s
|
27
|
-
inspect
|
28
|
-
end
|
29
|
-
|
30
|
-
# @return [String]
|
31
|
-
def inspect
|
32
|
-
# matched_words = @hit["_highlightResult"].
|
33
|
-
# reduce([]) { |a, (_k, v)| a + v["matchedWords"] }.uniq
|
34
|
-
|
35
|
-
"<#{self.class}:#{format('%#.14x', object_id << 1)} "\
|
36
|
-
"@text=\"#{@hit_collection&.ref}\" "\
|
37
|
-
"@reference=\"#{@hit["docRef"]}\""
|
38
|
-
end
|
39
|
-
|
40
15
|
# @param builder [Nokogiri::XML::Builder]
|
41
16
|
def to_xml(builder = nil, **opts)
|
42
17
|
if builder
|
@@ -1,10 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require "forwardable"
|
3
4
|
require "relaton_iso/hit"
|
4
5
|
|
5
6
|
module RelatonIso
|
6
7
|
# Page of hit collection.
|
7
|
-
class HitCollection
|
8
|
+
class HitCollection
|
9
|
+
extend Forwardable
|
10
|
+
|
11
|
+
def_delegators :@array, :<<, :[], :first, :empty?, :any?, :size
|
12
|
+
|
8
13
|
# @return [TrueClass, FalseClass]
|
9
14
|
# attr_reader :fetched
|
10
15
|
|
@@ -12,15 +17,13 @@ module RelatonIso
|
|
12
17
|
# attr_reader :hit_pages
|
13
18
|
|
14
19
|
# @return [String]
|
15
|
-
attr_reader :
|
20
|
+
attr_reader :text
|
16
21
|
|
17
22
|
# @param hits [Array<Hash>]
|
18
|
-
def initialize(
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
@ref = ref
|
23
|
-
%r{(?<num>\d+)(-(?<part>\d+))?} =~ ref
|
23
|
+
def initialize(text)
|
24
|
+
@array = []
|
25
|
+
@text = text
|
26
|
+
%r{(?<num>\d+)(-(?<part>\d+))?} =~ text
|
24
27
|
http = Net::HTTP.new "www.iso.org", 443
|
25
28
|
http.use_ssl = true
|
26
29
|
search = ["status=ENT_ACTIVE,ENT_PROGRESS,ENT_INACTIVE,ENT_DELETED"]
|
@@ -28,12 +31,11 @@ module RelatonIso
|
|
28
31
|
search << "docPartNo=#{part}" if part
|
29
32
|
q = search.join "&"
|
30
33
|
resp = http.get("/cms/render/live/en/sites/isoorg.advancedSearch.do?#{q}",
|
31
|
-
|
34
|
+
"Accept" => "application/json, text/plain, */*")
|
32
35
|
return if resp.body.empty?
|
33
36
|
|
34
37
|
json = JSON.parse resp.body
|
35
|
-
|
36
|
-
sort! do |a, b|
|
38
|
+
@array = json["standards"].map { |h| Hit.new h, self }.sort! do |a, b|
|
37
39
|
if a.sort_weight == b.sort_weight
|
38
40
|
(parse_date(b.hit) - parse_date(a.hit)).to_i
|
39
41
|
else
|
@@ -42,18 +44,59 @@ module RelatonIso
|
|
42
44
|
end
|
43
45
|
end
|
44
46
|
|
47
|
+
def select(&block)
|
48
|
+
me = DeepClone.clone self
|
49
|
+
me.instance_variable_get(:@array).select!(&block)
|
50
|
+
me
|
51
|
+
end
|
52
|
+
|
53
|
+
def reduce!(sum, &block)
|
54
|
+
@array = @array.reduce sum, &block
|
55
|
+
self
|
56
|
+
end
|
57
|
+
|
58
|
+
# @return [RelatonIso::HitCollection]
|
59
|
+
# def fetch
|
60
|
+
# return self if @fetched
|
61
|
+
|
62
|
+
# workers = RelatonBib::WorkersPool.new 4
|
63
|
+
# workers.worker(&:fetch)
|
64
|
+
# @array.each do |hit|
|
65
|
+
# workers << hit
|
66
|
+
# end
|
67
|
+
# workers.end
|
68
|
+
# workers.result
|
69
|
+
# @fetched = true
|
70
|
+
# self
|
71
|
+
# end
|
72
|
+
|
73
|
+
def to_all_parts
|
74
|
+
hit = @array.min_by { |h| h.hit["docPart"].to_i }
|
75
|
+
bibitem = hit.fetch
|
76
|
+
bibitem.to_all_parts
|
77
|
+
@array.reject { |h| h.hit["docRef"] == hit.hit["docRef"] }.each do |hi|
|
78
|
+
isobib = RelatonIsoBib::IsoBibliographicItem.new(
|
79
|
+
formattedref: RelatonBib::FormattedRef.new(content: hi.hit["docRef"]),
|
80
|
+
)
|
81
|
+
bibitem.relation << RelatonBib::DocumentRelation.new(
|
82
|
+
type: "partOf", bibitem: isobib,
|
83
|
+
)
|
84
|
+
end
|
85
|
+
bibitem
|
86
|
+
end
|
87
|
+
|
45
88
|
def to_s
|
46
89
|
inspect
|
47
90
|
end
|
48
91
|
|
49
92
|
def inspect
|
50
|
-
"<#{self.class}:#{format('%#.14x', object_id << 1)} @ref=#{@
|
93
|
+
"<#{self.class}:#{format('%#.14x', object_id << 1)} @ref=#{@text}>"
|
51
94
|
end
|
52
95
|
|
53
96
|
def to_xml(**opts)
|
54
97
|
builder = Nokogiri::XML::Builder.new(encoding: "UTF-8") do |xml|
|
55
98
|
xml.documents do
|
56
|
-
each do |hit|
|
99
|
+
@array.each do |hit|
|
57
100
|
hit.fetch
|
58
101
|
hit.to_xml xml, **opts
|
59
102
|
end
|
@@ -46,14 +46,15 @@ module RelatonIso
|
|
46
46
|
year = year1
|
47
47
|
end
|
48
48
|
end
|
49
|
-
code
|
49
|
+
opts[:all_parts] ||= code !~ %r{^[^\s]+\s\d+-\d+} && opts[:all_parts].nil?
|
50
|
+
# code += "-1" if all_parts
|
50
51
|
return RelatonIec::IecBibliography.get(code, year, opts) if %r[^ISO/IEC DIR] =~ code
|
51
52
|
|
52
|
-
ret = isobib_get1(code, year, corr)
|
53
|
+
ret = isobib_get1(code, year, corr, opts)
|
53
54
|
return nil if ret.nil?
|
54
55
|
|
55
|
-
ret.to_most_recent_reference unless year || opts[:keep_year]
|
56
|
-
ret.to_all_parts if
|
56
|
+
ret.to_most_recent_reference unless year || opts[:keep_year] || opts[:all_parts]
|
57
|
+
# ret.to_all_parts if all_parts
|
57
58
|
ret
|
58
59
|
end
|
59
60
|
|
@@ -89,20 +90,20 @@ module RelatonIso
|
|
89
90
|
# @param code [String] reference without correction
|
90
91
|
# @param corr [String] correction
|
91
92
|
# @return [Array<RelatonIso::Hit>]
|
92
|
-
def isobib_search_filter(code, corr)
|
93
|
+
def isobib_search_filter(code, corr, opts)
|
93
94
|
warn "fetching #{code}..."
|
94
95
|
result = search(code)
|
95
|
-
res = search_code result, code, corr
|
96
|
+
res = search_code result, code, corr, opts
|
96
97
|
return res unless res.empty?
|
97
98
|
|
98
99
|
# try stages
|
99
100
|
if %r{^\w+/[^/]+\s\d+} =~ code # code like ISO/IEC 123, ISO/IEC/IEE 123
|
100
|
-
res = try_stages(result, corr) do |st|
|
101
|
+
res = try_stages(result, corr, opts) do |st|
|
101
102
|
code.sub(%r{^(?<pref>[^\s]+\s)}) { "#{$~[:pref]}#{st} " }
|
102
103
|
end
|
103
104
|
return res unless res.empty?
|
104
105
|
elsif %r{^\w+\s\d+} =~ code # code like ISO 123
|
105
|
-
res = try_stages(result, corr) do |st|
|
106
|
+
res = try_stages(result, corr, opts) do |st|
|
106
107
|
code.sub(%r{^(?<pref>\w+)}) { "#{$~[:pref]}/#{st}" }
|
107
108
|
end
|
108
109
|
return res unless res.empty?
|
@@ -111,24 +112,24 @@ module RelatonIso
|
|
111
112
|
if %r{^ISO\s} =~ code # try ISO/IEC if ISO not found
|
112
113
|
warn "Attempting ISO/IEC retrieval"
|
113
114
|
c = code.sub "ISO", "ISO/IEC"
|
114
|
-
res = search_code result, c, corr
|
115
|
+
res = search_code result, c, corr, opts
|
115
116
|
end
|
116
117
|
res
|
117
118
|
end
|
118
119
|
|
119
|
-
def try_stages(result, corr)
|
120
|
+
def try_stages(result, corr, opts)
|
120
121
|
%w[NP WD CD DIS FDIS PRF IS AWI].each do |st| # try stages
|
121
122
|
warn "Attempting #{st} stage retrieval"
|
122
123
|
c = yield st
|
123
|
-
res = search_code result, c, corr
|
124
|
+
res = search_code result, c, corr, opts
|
124
125
|
return res unless res.empty?
|
125
126
|
end
|
126
|
-
|
127
|
+
result
|
127
128
|
end
|
128
129
|
|
129
|
-
def search_code(result, code, corr)
|
130
|
+
def search_code(result, code, corr, opts)
|
130
131
|
result.select do |i|
|
131
|
-
i.hit["docRef"] =~ %r{^#{code}(?!-)} && (
|
132
|
+
(opts[:all_parts] || i.hit["docRef"] =~ %r{^#{code}(?!-)}) && (
|
132
133
|
corr && %r{^#{code}[\w-]*(:\d{4})?/#{corr}} =~ i.hit["docRef"] ||
|
133
134
|
%r{^#{code}[\w-]*(:\d{4})?/} !~ i.hit["docRef"] && !corr
|
134
135
|
)
|
@@ -141,24 +142,29 @@ module RelatonIso
|
|
141
142
|
# Only expects the first page of results to be populated.
|
142
143
|
# Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
|
143
144
|
# If no match, returns any years which caused mismatch, for error reporting
|
144
|
-
def isobib_results_filter(result, year)
|
145
|
+
def isobib_results_filter(result, year, opts)
|
145
146
|
missed_years = []
|
146
|
-
result.
|
147
|
-
|
148
|
-
|
147
|
+
hits = result.reduce!([]) do |hts, h|
|
148
|
+
if !year && h.hit["publicationStatus"] == "Withdrawn"
|
149
|
+
hts
|
150
|
+
elsif !year || %r{:(?<iyear>\d{4})} =~ h.hit["docRef"] && iyear == year
|
151
|
+
hts << h
|
152
|
+
else
|
153
|
+
missed_years << iyear
|
154
|
+
hts
|
155
|
+
end
|
156
|
+
end
|
157
|
+
return { years: missed_years } unless hits.any?
|
149
158
|
|
150
|
-
|
151
|
-
return { ret: s.fetch } if iyear == year
|
159
|
+
return { ret: hits.first.fetch } if !opts[:all_parts] || hits.size == 1
|
152
160
|
|
153
|
-
|
154
|
-
end
|
155
|
-
{ years: missed_years }
|
161
|
+
{ ret: hits.to_all_parts }
|
156
162
|
end
|
157
163
|
|
158
|
-
def isobib_get1(code, year, corr)
|
164
|
+
def isobib_get1(code, year, corr, opts)
|
159
165
|
# return iev(code) if /^IEC 60050-/.match code
|
160
|
-
result = isobib_search_filter(code, corr) || return
|
161
|
-
ret = isobib_results_filter(result, year)
|
166
|
+
result = isobib_search_filter(code, corr, opts) || return
|
167
|
+
ret = isobib_results_filter(result, year, opts)
|
162
168
|
return ret[:ret] if ret[:ret]
|
163
169
|
|
164
170
|
fetch_ref_err(code, year, ret[:years])
|
data/lib/relaton_iso/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iso
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-09-
|
11
|
+
date: 2019-09-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|