relaton-iso 0.6.5 → 0.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +4 -4
- data/lib/relaton_iso/hit.rb +1 -26
- data/lib/relaton_iso/hit_collection.rb +56 -13
- data/lib/relaton_iso/iso_bibliography.rb +32 -26
- data/lib/relaton_iso/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d39e9890ddcc1333a9430d8f149414e7049af523
|
4
|
+
data.tar.gz: b614ae1fe64ce2b1e7f72c83c6649f9e222346a5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8e958c6ce53f617d167a5059c772a6a4a469317d35d7444a41b6181879678149f2704306aed4fde38baa5d5771f3a8f46744b09c61549d23585b45a08539a07d
|
7
|
+
data.tar.gz: 531c10407c214307d4a0cca9758d4af2eaa94e5724ce2377510c5970f0a797f43b5b3d5e7aef0a59829c0dcd8c7504a112859de5ead6cbc56b7abbb607aceb6c
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
relaton-iso (0.6.
|
4
|
+
relaton-iso (0.6.6)
|
5
5
|
relaton-iec (~> 0.4.0)
|
6
6
|
relaton-iso-bib (~> 0.3.0)
|
7
7
|
|
@@ -36,13 +36,13 @@ GEM
|
|
36
36
|
pry (~> 0.10)
|
37
37
|
public_suffix (4.0.1)
|
38
38
|
rake (10.5.0)
|
39
|
-
relaton-bib (0.3.
|
39
|
+
relaton-bib (0.3.8)
|
40
40
|
addressable
|
41
41
|
nokogiri
|
42
|
-
relaton-iec (0.4.
|
42
|
+
relaton-iec (0.4.6)
|
43
43
|
addressable
|
44
44
|
relaton-iso-bib (~> 0.3.0)
|
45
|
-
relaton-iso-bib (0.3.
|
45
|
+
relaton-iso-bib (0.3.7)
|
46
46
|
isoics (~> 0.1.6)
|
47
47
|
relaton-bib (~> 0.3.0)
|
48
48
|
ruby_deep_clone (~> 0.8.0)
|
data/lib/relaton_iso/hit.rb
CHANGED
@@ -2,41 +2,16 @@
|
|
2
2
|
|
3
3
|
module RelatonIso
|
4
4
|
# Hit.
|
5
|
-
class Hit
|
5
|
+
class Hit < RelatonBib::Hit
|
6
6
|
# @return [RelatonIso::HitCollection]
|
7
7
|
attr_reader :hit_collection
|
8
8
|
|
9
|
-
# @return [Array<Hash>]
|
10
|
-
attr_reader :hit
|
11
|
-
|
12
|
-
# @param hit [Hash]
|
13
|
-
# @param hit_collection [RelatonIso:HitCollection]
|
14
|
-
def initialize(hit, hit_collection = nil)
|
15
|
-
@hit = hit
|
16
|
-
@hit_collection = hit_collection
|
17
|
-
end
|
18
|
-
|
19
9
|
# Parse page.
|
20
10
|
# @return [RelatonIso::IsoBibliographicItem]
|
21
11
|
def fetch
|
22
12
|
@fetch ||= Scrapper.parse_page @hit
|
23
13
|
end
|
24
14
|
|
25
|
-
# @return [String]
|
26
|
-
def to_s
|
27
|
-
inspect
|
28
|
-
end
|
29
|
-
|
30
|
-
# @return [String]
|
31
|
-
def inspect
|
32
|
-
# matched_words = @hit["_highlightResult"].
|
33
|
-
# reduce([]) { |a, (_k, v)| a + v["matchedWords"] }.uniq
|
34
|
-
|
35
|
-
"<#{self.class}:#{format('%#.14x', object_id << 1)} "\
|
36
|
-
"@text=\"#{@hit_collection&.ref}\" "\
|
37
|
-
"@reference=\"#{@hit["docRef"]}\""
|
38
|
-
end
|
39
|
-
|
40
15
|
# @param builder [Nokogiri::XML::Builder]
|
41
16
|
def to_xml(builder = nil, **opts)
|
42
17
|
if builder
|
@@ -1,10 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require "forwardable"
|
3
4
|
require "relaton_iso/hit"
|
4
5
|
|
5
6
|
module RelatonIso
|
6
7
|
# Page of hit collection.
|
7
|
-
class HitCollection
|
8
|
+
class HitCollection
|
9
|
+
extend Forwardable
|
10
|
+
|
11
|
+
def_delegators :@array, :<<, :[], :first, :empty?, :any?, :size
|
12
|
+
|
8
13
|
# @return [TrueClass, FalseClass]
|
9
14
|
# attr_reader :fetched
|
10
15
|
|
@@ -12,15 +17,13 @@ module RelatonIso
|
|
12
17
|
# attr_reader :hit_pages
|
13
18
|
|
14
19
|
# @return [String]
|
15
|
-
attr_reader :
|
20
|
+
attr_reader :text
|
16
21
|
|
17
22
|
# @param hits [Array<Hash>]
|
18
|
-
def initialize(
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
@ref = ref
|
23
|
-
%r{(?<num>\d+)(-(?<part>\d+))?} =~ ref
|
23
|
+
def initialize(text)
|
24
|
+
@array = []
|
25
|
+
@text = text
|
26
|
+
%r{(?<num>\d+)(-(?<part>\d+))?} =~ text
|
24
27
|
http = Net::HTTP.new "www.iso.org", 443
|
25
28
|
http.use_ssl = true
|
26
29
|
search = ["status=ENT_ACTIVE,ENT_PROGRESS,ENT_INACTIVE,ENT_DELETED"]
|
@@ -28,12 +31,11 @@ module RelatonIso
|
|
28
31
|
search << "docPartNo=#{part}" if part
|
29
32
|
q = search.join "&"
|
30
33
|
resp = http.get("/cms/render/live/en/sites/isoorg.advancedSearch.do?#{q}",
|
31
|
-
|
34
|
+
"Accept" => "application/json, text/plain, */*")
|
32
35
|
return if resp.body.empty?
|
33
36
|
|
34
37
|
json = JSON.parse resp.body
|
35
|
-
|
36
|
-
sort! do |a, b|
|
38
|
+
@array = json["standards"].map { |h| Hit.new h, self }.sort! do |a, b|
|
37
39
|
if a.sort_weight == b.sort_weight
|
38
40
|
(parse_date(b.hit) - parse_date(a.hit)).to_i
|
39
41
|
else
|
@@ -42,18 +44,59 @@ module RelatonIso
|
|
42
44
|
end
|
43
45
|
end
|
44
46
|
|
47
|
+
def select(&block)
|
48
|
+
me = DeepClone.clone self
|
49
|
+
me.instance_variable_get(:@array).select!(&block)
|
50
|
+
me
|
51
|
+
end
|
52
|
+
|
53
|
+
def reduce!(sum, &block)
|
54
|
+
@array = @array.reduce sum, &block
|
55
|
+
self
|
56
|
+
end
|
57
|
+
|
58
|
+
# @return [RelatonIso::HitCollection]
|
59
|
+
# def fetch
|
60
|
+
# return self if @fetched
|
61
|
+
|
62
|
+
# workers = RelatonBib::WorkersPool.new 4
|
63
|
+
# workers.worker(&:fetch)
|
64
|
+
# @array.each do |hit|
|
65
|
+
# workers << hit
|
66
|
+
# end
|
67
|
+
# workers.end
|
68
|
+
# workers.result
|
69
|
+
# @fetched = true
|
70
|
+
# self
|
71
|
+
# end
|
72
|
+
|
73
|
+
def to_all_parts
|
74
|
+
hit = @array.min_by { |h| h.hit["docPart"].to_i }
|
75
|
+
bibitem = hit.fetch
|
76
|
+
bibitem.to_all_parts
|
77
|
+
@array.reject { |h| h.hit["docRef"] == hit.hit["docRef"] }.each do |hi|
|
78
|
+
isobib = RelatonIsoBib::IsoBibliographicItem.new(
|
79
|
+
formattedref: RelatonBib::FormattedRef.new(content: hi.hit["docRef"]),
|
80
|
+
)
|
81
|
+
bibitem.relation << RelatonBib::DocumentRelation.new(
|
82
|
+
type: "partOf", bibitem: isobib,
|
83
|
+
)
|
84
|
+
end
|
85
|
+
bibitem
|
86
|
+
end
|
87
|
+
|
45
88
|
def to_s
|
46
89
|
inspect
|
47
90
|
end
|
48
91
|
|
49
92
|
def inspect
|
50
|
-
"<#{self.class}:#{format('%#.14x', object_id << 1)} @ref=#{@
|
93
|
+
"<#{self.class}:#{format('%#.14x', object_id << 1)} @ref=#{@text}>"
|
51
94
|
end
|
52
95
|
|
53
96
|
def to_xml(**opts)
|
54
97
|
builder = Nokogiri::XML::Builder.new(encoding: "UTF-8") do |xml|
|
55
98
|
xml.documents do
|
56
|
-
each do |hit|
|
99
|
+
@array.each do |hit|
|
57
100
|
hit.fetch
|
58
101
|
hit.to_xml xml, **opts
|
59
102
|
end
|
@@ -46,14 +46,15 @@ module RelatonIso
|
|
46
46
|
year = year1
|
47
47
|
end
|
48
48
|
end
|
49
|
-
code
|
49
|
+
opts[:all_parts] ||= code !~ %r{^[^\s]+\s\d+-\d+} && opts[:all_parts].nil?
|
50
|
+
# code += "-1" if all_parts
|
50
51
|
return RelatonIec::IecBibliography.get(code, year, opts) if %r[^ISO/IEC DIR] =~ code
|
51
52
|
|
52
|
-
ret = isobib_get1(code, year, corr)
|
53
|
+
ret = isobib_get1(code, year, corr, opts)
|
53
54
|
return nil if ret.nil?
|
54
55
|
|
55
|
-
ret.to_most_recent_reference unless year || opts[:keep_year]
|
56
|
-
ret.to_all_parts if
|
56
|
+
ret.to_most_recent_reference unless year || opts[:keep_year] || opts[:all_parts]
|
57
|
+
# ret.to_all_parts if all_parts
|
57
58
|
ret
|
58
59
|
end
|
59
60
|
|
@@ -89,20 +90,20 @@ module RelatonIso
|
|
89
90
|
# @param code [String] reference without correction
|
90
91
|
# @param corr [String] correction
|
91
92
|
# @return [Array<RelatonIso::Hit>]
|
92
|
-
def isobib_search_filter(code, corr)
|
93
|
+
def isobib_search_filter(code, corr, opts)
|
93
94
|
warn "fetching #{code}..."
|
94
95
|
result = search(code)
|
95
|
-
res = search_code result, code, corr
|
96
|
+
res = search_code result, code, corr, opts
|
96
97
|
return res unless res.empty?
|
97
98
|
|
98
99
|
# try stages
|
99
100
|
if %r{^\w+/[^/]+\s\d+} =~ code # code like ISO/IEC 123, ISO/IEC/IEE 123
|
100
|
-
res = try_stages(result, corr) do |st|
|
101
|
+
res = try_stages(result, corr, opts) do |st|
|
101
102
|
code.sub(%r{^(?<pref>[^\s]+\s)}) { "#{$~[:pref]}#{st} " }
|
102
103
|
end
|
103
104
|
return res unless res.empty?
|
104
105
|
elsif %r{^\w+\s\d+} =~ code # code like ISO 123
|
105
|
-
res = try_stages(result, corr) do |st|
|
106
|
+
res = try_stages(result, corr, opts) do |st|
|
106
107
|
code.sub(%r{^(?<pref>\w+)}) { "#{$~[:pref]}/#{st}" }
|
107
108
|
end
|
108
109
|
return res unless res.empty?
|
@@ -111,24 +112,24 @@ module RelatonIso
|
|
111
112
|
if %r{^ISO\s} =~ code # try ISO/IEC if ISO not found
|
112
113
|
warn "Attempting ISO/IEC retrieval"
|
113
114
|
c = code.sub "ISO", "ISO/IEC"
|
114
|
-
res = search_code result, c, corr
|
115
|
+
res = search_code result, c, corr, opts
|
115
116
|
end
|
116
117
|
res
|
117
118
|
end
|
118
119
|
|
119
|
-
def try_stages(result, corr)
|
120
|
+
def try_stages(result, corr, opts)
|
120
121
|
%w[NP WD CD DIS FDIS PRF IS AWI].each do |st| # try stages
|
121
122
|
warn "Attempting #{st} stage retrieval"
|
122
123
|
c = yield st
|
123
|
-
res = search_code result, c, corr
|
124
|
+
res = search_code result, c, corr, opts
|
124
125
|
return res unless res.empty?
|
125
126
|
end
|
126
|
-
|
127
|
+
result
|
127
128
|
end
|
128
129
|
|
129
|
-
def search_code(result, code, corr)
|
130
|
+
def search_code(result, code, corr, opts)
|
130
131
|
result.select do |i|
|
131
|
-
i.hit["docRef"] =~ %r{^#{code}(?!-)} && (
|
132
|
+
(opts[:all_parts] || i.hit["docRef"] =~ %r{^#{code}(?!-)}) && (
|
132
133
|
corr && %r{^#{code}[\w-]*(:\d{4})?/#{corr}} =~ i.hit["docRef"] ||
|
133
134
|
%r{^#{code}[\w-]*(:\d{4})?/} !~ i.hit["docRef"] && !corr
|
134
135
|
)
|
@@ -141,24 +142,29 @@ module RelatonIso
|
|
141
142
|
# Only expects the first page of results to be populated.
|
142
143
|
# Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
|
143
144
|
# If no match, returns any years which caused mismatch, for error reporting
|
144
|
-
def isobib_results_filter(result, year)
|
145
|
+
def isobib_results_filter(result, year, opts)
|
145
146
|
missed_years = []
|
146
|
-
result.
|
147
|
-
|
148
|
-
|
147
|
+
hits = result.reduce!([]) do |hts, h|
|
148
|
+
if !year && h.hit["publicationStatus"] == "Withdrawn"
|
149
|
+
hts
|
150
|
+
elsif !year || %r{:(?<iyear>\d{4})} =~ h.hit["docRef"] && iyear == year
|
151
|
+
hts << h
|
152
|
+
else
|
153
|
+
missed_years << iyear
|
154
|
+
hts
|
155
|
+
end
|
156
|
+
end
|
157
|
+
return { years: missed_years } unless hits.any?
|
149
158
|
|
150
|
-
|
151
|
-
return { ret: s.fetch } if iyear == year
|
159
|
+
return { ret: hits.first.fetch } if !opts[:all_parts] || hits.size == 1
|
152
160
|
|
153
|
-
|
154
|
-
end
|
155
|
-
{ years: missed_years }
|
161
|
+
{ ret: hits.to_all_parts }
|
156
162
|
end
|
157
163
|
|
158
|
-
def isobib_get1(code, year, corr)
|
164
|
+
def isobib_get1(code, year, corr, opts)
|
159
165
|
# return iev(code) if /^IEC 60050-/.match code
|
160
|
-
result = isobib_search_filter(code, corr) || return
|
161
|
-
ret = isobib_results_filter(result, year)
|
166
|
+
result = isobib_search_filter(code, corr, opts) || return
|
167
|
+
ret = isobib_results_filter(result, year, opts)
|
162
168
|
return ret[:ret] if ret[:ret]
|
163
169
|
|
164
170
|
fetch_ref_err(code, year, ret[:years])
|
data/lib/relaton_iso/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iso
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-09-
|
11
|
+
date: 2019-09-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|