relaton-iso 1.15.4 → 1.15.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/release.yml +2 -1
- data/Gemfile +3 -0
- data/README.adoc +42 -62
- data/lib/relaton_iso/config.rb +10 -0
- data/lib/relaton_iso/hit.rb +3 -3
- data/lib/relaton_iso/hit_collection.rb +10 -31
- data/lib/relaton_iso/iso_bibliography.rb +76 -129
- data/lib/relaton_iso/scrapper.rb +0 -5
- data/lib/relaton_iso/util.rb +9 -0
- data/lib/relaton_iso/version.rb +1 -1
- data/lib/relaton_iso.rb +8 -1
- data/relaton_iso.gemspec +6 -9
- metadata +18 -44
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fadb60d3f225174438b872a27c8632fcb9dcb685b66ef67071b7ab16dd78f052
|
4
|
+
data.tar.gz: b53c685d0adf0856851f31d1808654f59f16d0ccd8db70b2ef818b22126e505e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a10856c01099b1590b99ab0c2102aaf6dee654bd3fdf0b6a7f0717d3f85881bc58999219168e6ba707846cbd3fc785fbc9951003d7c21ad6c134af2a0fb0d4ea
|
7
|
+
data.tar.gz: a9a4b7a79acf34a0a42218f28979ec06a41c0b6305fcfbb77ebec0cf4881e2b806548ae5603112122420f1c4ed0d365d9be50a2c291fb4d5a00fe68534f2311b
|
@@ -7,7 +7,8 @@ on:
|
|
7
7
|
inputs:
|
8
8
|
next_version:
|
9
9
|
description: |
|
10
|
-
Next release version. Possible values: x.y.z, major, minor, patch or pre|rc|etc
|
10
|
+
Next release version. Possible values: x.y.z, major, minor, patch (or pre|rc|etc).
|
11
|
+
Also, you can pass 'skip' to skip 'git tag' and do 'gem push' for the current version
|
11
12
|
required: true
|
12
13
|
default: 'skip'
|
13
14
|
repository_dispatch:
|
data/Gemfile
CHANGED
data/README.adoc
CHANGED
@@ -31,13 +31,24 @@ Or install it yourself as:
|
|
31
31
|
|
32
32
|
== Usage
|
33
33
|
|
34
|
-
===
|
34
|
+
=== Configuration
|
35
|
+
|
36
|
+
Configuration is optional. The available option is `logger` which is a `Logger` instance. By default, the logger is `Logger.new($stderr)` with `Logger::WARN` level. To change the logger level, use `RelatonIso.configure` block.
|
35
37
|
|
36
38
|
[source,ruby]
|
37
39
|
----
|
38
40
|
require 'relaton_iso'
|
39
41
|
=> true
|
40
42
|
|
43
|
+
RelatonIso.configure do |config|
|
44
|
+
config.logger.level = Logger::DEBUG
|
45
|
+
end
|
46
|
+
----
|
47
|
+
|
48
|
+
=== Search for standards using keywords
|
49
|
+
|
50
|
+
[source,ruby]
|
51
|
+
----
|
41
52
|
hit_collection = RelatonIso::IsoBibliography.search("ISO 19115")
|
42
53
|
=> <RelatonIso::HitCollection:0x007fa5bc847038 @ref=19115 @fetched=false>
|
43
54
|
|
@@ -49,37 +60,11 @@ item = hit_collection[2].fetch
|
|
49
60
|
...
|
50
61
|
|
51
62
|
item.docidentifier
|
52
|
-
=> [#<RelatonIso::DocumentIdentifier:
|
53
|
-
|
54
|
-
#<Pubid::Iso::Identifier:0x00007fa8b7db2a60
|
55
|
-
@base=#<Pubid::Iso::Identifier:0x00007fa8b7db2c68 @edition="1", @number="19115"@4, @part="1", @publisher="ISO", @year=2014>,
|
56
|
-
@number="2"@21,
|
57
|
-
@publisher="ISO",
|
58
|
-
@stage=#<Pubid::Iso::Stage:0x00007fa8b7db20d8 @abbr=nil, @harmonized_code=#<Pubid::Iso::HarmonizedStageCode:0x00007fa8b7db2088 @stages=["60.60"]>>,
|
59
|
-
@typed_stage=#<Pubid::Iso::TypedStage:0x00007fa8b7db2920 @type=#<Pubid::Iso::Type:0x00007fa8b7db26c8 @type=:amd>, @typed_stage=nil>,
|
60
|
-
@year=2020>,
|
61
|
-
@language=nil,
|
62
|
-
@primary=true,
|
63
|
-
@scope=nil,
|
64
|
-
@script=nil,
|
65
|
-
@type="ISO">,
|
66
|
-
#<RelatonIso::DocumentIdentifier:0x00007fa8b7db1e80
|
67
|
-
@id=
|
68
|
-
#<Pubid::Iso::Identifier:0x00007fa8b7db2a60
|
69
|
-
@base=#<Pubid::Iso::Identifier:0x00007fa8b7db2c68 @edition="1", @number="19115"@4, @part="1", @publisher="ISO", @year=2014>,
|
70
|
-
@number="2"@21,
|
71
|
-
@publisher="ISO",
|
72
|
-
@stage=#<Pubid::Iso::Stage:0x00007fa8b7db20d8 @abbr=nil, @harmonized_code=#<Pubid::Iso::HarmonizedStageCode:0x00007fa8b7db2088 @stages=["60.60"]>>,
|
73
|
-
@typed_stage=#<Pubid::Iso::TypedStage:0x00007fa8b7db2920 @type=#<Pubid::Iso::Type:0x00007fa8b7db26c8 @type=:amd>, @typed_stage=nil>,
|
74
|
-
@year=2020>,
|
75
|
-
@language=nil,
|
76
|
-
@primary=nil,
|
77
|
-
@scope=nil,
|
78
|
-
@script=nil,
|
79
|
-
@type="URN">]
|
63
|
+
=> [#<RelatonIso::DocumentIdentifier:0x0000000112a23a88
|
64
|
+
...
|
80
65
|
|
81
66
|
item.docidentifier.detect { |di| di.type == "URN" }.id
|
82
|
-
=> "urn:iso:std:iso:19115:-1:ed-1:amd:2020:v2"
|
67
|
+
=> "urn:iso:std:iso:19115:-1:ed-1:stage-60.60:amd:2020:v2"
|
83
68
|
----
|
84
69
|
|
85
70
|
=== Fetch document by reference and year
|
@@ -93,9 +78,9 @@ item = RelatonIso::IsoBibliography.get "ISO 19115:2003"
|
|
93
78
|
...
|
94
79
|
|
95
80
|
item = RelatonIso::IsoBibliography.get "ISO 19115", "2003"
|
96
|
-
[relaton-iso] ("ISO 19115")
|
97
|
-
[relaton-iso] ("ISO 19115:2003") Found
|
98
|
-
=> #<RelatonIsoBib::IsoBibliographicItem:
|
81
|
+
[relaton-iso] ("ISO 19115:2003") Fetching from ISO...
|
82
|
+
[relaton-iso] ("ISO 19115:2003") Found ("ISO 19115:2003").
|
83
|
+
=> #<RelatonIsoBib::IsoBibliographicItem:0x0000000112c9ca80
|
99
84
|
...
|
100
85
|
|
101
86
|
item.docidentifier[0].id
|
@@ -113,7 +98,7 @@ item = RelatonIso::IsoBibliography.get "ISO 19115"
|
|
113
98
|
...
|
114
99
|
|
115
100
|
item.docidentifier[0].id
|
116
|
-
=> "ISO 19115
|
101
|
+
=> "ISO 19115"
|
117
102
|
----
|
118
103
|
|
119
104
|
=== Fetch a part document
|
@@ -136,13 +121,13 @@ item.docidentifier[0].id
|
|
136
121
|
----
|
137
122
|
item = RelatonIso::IsoBibliography.get "ISO 19115 (all parts)"
|
138
123
|
[relaton-iso] ("ISO 19115") Fetching from ISO...
|
139
|
-
[relaton-iso] ("ISO 19115") Found
|
124
|
+
[relaton-iso] ("ISO 19115") Found ("ISO 19115").
|
140
125
|
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8ca216e118
|
141
126
|
...
|
142
127
|
|
143
128
|
item = RelatonIso::IsoBibliography.get "ISO 19115", nil, all_parts: true
|
144
129
|
[relaton-iso] ("ISO 19115") Fetching from ISO...
|
145
|
-
[relaton-iso] ("ISO 19115") Found
|
130
|
+
[relaton-iso] ("ISO 19115") Found ("ISO 19115").
|
146
131
|
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c830f3d38
|
147
132
|
...
|
148
133
|
|
@@ -151,12 +136,12 @@ item.docidentifier[0].id
|
|
151
136
|
|
152
137
|
item = RelatonIso::IsoBibliography.get "ISO 19115-1 (all parts)"
|
153
138
|
[relaton-iso] ("ISO 19115") Fetching from ISO...
|
154
|
-
[relaton-iso] ("ISO 19115") Found
|
139
|
+
[relaton-iso] ("ISO 19115") Found ("ISO 19115").
|
155
140
|
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c8290e5a0
|
156
141
|
|
157
142
|
item = RelatonIso::IsoBibliography.get "ISO 19115-1", nil, all_parts: true
|
158
143
|
[relaton-iso] ("ISO 19115") Fetching from ISO...
|
159
|
-
[relaton-iso] ("ISO 19115") Found
|
144
|
+
[relaton-iso] ("ISO 19115") Found ("ISO 19115").
|
160
145
|
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c925355b8
|
161
146
|
...
|
162
147
|
|
@@ -217,35 +202,31 @@ item.to_xml note: [{ text: "Note", type: "note" }]
|
|
217
202
|
[source,ruby]
|
218
203
|
----
|
219
204
|
item.title lang: 'en'
|
220
|
-
=> #<RelatonBib::TypedTitleStringCollection:
|
205
|
+
=> #<RelatonBib::TypedTitleStringCollection:0x0000000112783fd0
|
221
206
|
@array=
|
222
|
-
[#<RelatonBib::TypedTitleString:
|
223
|
-
@title=#<RelatonBib::FormattedString:
|
207
|
+
[#<RelatonBib::TypedTitleString:0x00000001138e2380
|
208
|
+
@title=#<RelatonBib::FormattedString:0x0000000112d496b8 @content="Geographic information", @format="text/plain", @language=["en"], @script=["Latn"]>,
|
224
209
|
@type="title-intro">,
|
225
|
-
#<RelatonBib::TypedTitleString:
|
226
|
-
@title=#<RelatonBib::FormattedString:
|
210
|
+
#<RelatonBib::TypedTitleString:0x00000001138e1f70
|
211
|
+
@title=#<RelatonBib::FormattedString:0x0000000112d495c8 @content="Metadata", @format="text/plain", @language=["en"], @script=["Latn"]>,
|
227
212
|
@type="title-main">,
|
228
|
-
#<RelatonBib::TypedTitleString:
|
213
|
+
#<RelatonBib::TypedTitleString:0x00000001138e1d68
|
229
214
|
@title=
|
230
|
-
#<RelatonBib::FormattedString:
|
231
|
-
@content="Geographic information – Metadata",
|
232
|
-
@format="text/plain",
|
233
|
-
@language=["en"],
|
234
|
-
@script=["Latn"]>,
|
215
|
+
#<RelatonBib::FormattedString:0x0000000112d49488 @content="Geographic information – Metadata", @format="text/plain", @language=["en"], @script=["Latn"]>,
|
235
216
|
@type="main">]>
|
236
217
|
|
237
218
|
item.title lang: 'fr'
|
238
|
-
=> #<RelatonBib::TypedTitleStringCollection:
|
219
|
+
=> #<RelatonBib::TypedTitleStringCollection:0x0000000113067458
|
239
220
|
@array=
|
240
|
-
[#<RelatonBib::TypedTitleString:
|
241
|
-
@title=#<RelatonBib::FormattedString:
|
221
|
+
[#<RelatonBib::TypedTitleString:0x00000001138e1c28
|
222
|
+
@title=#<RelatonBib::FormattedString:0x0000000112d49438 @content="Information géographique", @format="text/plain", @language=["fr"], @script=["Latn"]>,
|
242
223
|
@type="title-intro">,
|
243
|
-
#<RelatonBib::TypedTitleString:
|
244
|
-
@title=#<RelatonBib::FormattedString:
|
224
|
+
#<RelatonBib::TypedTitleString:0x00000001138e1b10
|
225
|
+
@title=#<RelatonBib::FormattedString:0x0000000112d49398 @content="Métadonnées", @format="text/plain", @language=["fr"], @script=["Latn"]>,
|
245
226
|
@type="title-main">,
|
246
|
-
#<RelatonBib::TypedTitleString:
|
227
|
+
#<RelatonBib::TypedTitleString:0x00000001138e1908
|
247
228
|
@title=
|
248
|
-
#<RelatonBib::FormattedString:
|
229
|
+
#<RelatonBib::FormattedString:0x0000000112d491b8
|
249
230
|
@content="Information géographique – Métadonnées",
|
250
231
|
@format="text/plain",
|
251
232
|
@language=["fr"],
|
@@ -254,10 +235,9 @@ item.title lang: 'fr'
|
|
254
235
|
|
255
236
|
item = RelatonIso::IsoBibliography.get "ISO 19115:2003"
|
256
237
|
[relaton-iso] ("ISO 19115:2003") Fetching from ISO...
|
257
|
-
[relaton-iso] ("ISO 19115:2003") Found
|
238
|
+
[relaton-iso] ("ISO 19115:2003") Found ("ISO 19115:2003").
|
258
239
|
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007fa8870b69e0
|
259
240
|
|
260
|
-
item.abstract lang: 'en'
|
261
241
|
item.abstract lang: 'en'
|
262
242
|
=> #<RelatonBib::FormattedString:0x00007fa8870b4f78
|
263
243
|
@content=
|
@@ -274,13 +254,13 @@ Each ISO document has `src` type link and optional `obp`, `rss`, and `pub` link
|
|
274
254
|
[source,ruby]
|
275
255
|
----
|
276
256
|
item.link
|
277
|
-
=> [#<RelatonBib::TypedUri:
|
278
|
-
@content=#<Addressable::URI:
|
257
|
+
=> [#<RelatonBib::TypedUri:0x0000000112d66c40
|
258
|
+
@content=#<Addressable::URI:0x93d71c URI:https://www.iso.org/standard/26020.html>,
|
279
259
|
@language=nil,
|
280
260
|
@script=nil,
|
281
261
|
@type="src">,
|
282
|
-
#<RelatonBib::TypedUri:
|
283
|
-
@content=#<Addressable::URI:
|
262
|
+
#<RelatonBib::TypedUri:0x0000000112d66920
|
263
|
+
@content=#<Addressable::URI:0x93d730 URI:https://www.iso.org/contents/data/standard/02/60/26020.detail.rss>,
|
284
264
|
@language=nil,
|
285
265
|
@script=nil,
|
286
266
|
@type="rss">]
|
data/lib/relaton_iso/hit.rb
CHANGED
@@ -42,9 +42,9 @@ module RelatonIso
|
|
42
42
|
# @return [Pubid::Iso::Identifier]
|
43
43
|
def pubid
|
44
44
|
@pubid ||= Pubid::Iso::Identifier.parse_from_title(hit[:title])
|
45
|
-
rescue Pubid::Iso::Errors::WrongTypeError => e
|
46
|
-
warn "
|
47
|
-
warn
|
45
|
+
rescue Pubid::Iso::Errors::WrongTypeError, Pubid::Iso::Errors::ParseError => e
|
46
|
+
Util.warn "unable to find an identifier in `#{hit[:title]}`."
|
47
|
+
Util.warn e.message
|
48
48
|
end
|
49
49
|
end
|
50
50
|
end
|
@@ -6,17 +6,23 @@ require "relaton_iso/hit"
|
|
6
6
|
module RelatonIso
|
7
7
|
# Page of hit collection.
|
8
8
|
class HitCollection < RelatonBib::HitCollection
|
9
|
-
#
|
9
|
+
# @return [Boolean] whether the search was performed on GitHub
|
10
|
+
attr_reader :from_gh
|
10
11
|
|
11
12
|
# @param text [String] reference to search
|
12
13
|
def initialize(text)
|
13
14
|
super
|
14
|
-
@
|
15
|
+
@from_gh = text.match?(/^ISO[\s\/](?:TC\s184\/SC\s?4|IEC\sDIR\s(?:\d|IEC|JTC))/)
|
16
|
+
end
|
17
|
+
|
18
|
+
def fetch
|
19
|
+
@array = from_gh ? fetch_github : fetch_iso
|
20
|
+
self
|
15
21
|
end
|
16
22
|
|
17
23
|
# @param lang [String, NilClass]
|
18
24
|
# @return [RelatonIsoBib::IsoBibliographicItem, nil]
|
19
|
-
def to_all_parts(lang = nil) # rubocop:disable Metrics/
|
25
|
+
def to_all_parts(lang = nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
20
26
|
# parts = @array.reject { |h| h.hit["docPart"]&.empty? }
|
21
27
|
hit = @array.min_by { |h| h.pubid.part.to_i }
|
22
28
|
return @array.first&.fetch lang unless hit
|
@@ -33,7 +39,6 @@ module RelatonIso
|
|
33
39
|
end
|
34
40
|
all_parts_item
|
35
41
|
end
|
36
|
-
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
37
42
|
|
38
43
|
private
|
39
44
|
|
@@ -63,23 +68,11 @@ module RelatonIso
|
|
63
68
|
# @return [Array<RelatonIso::Hit>]
|
64
69
|
#
|
65
70
|
def fetch_iso # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
66
|
-
# %r{\s(?<num>\d+)(?:-(?<part>[\d-]+))?} =~ text
|
67
|
-
# http = Net::HTTP.new "www.iso.org", 443
|
68
|
-
# http.use_ssl = true
|
69
|
-
# search = ["status=ENT_ACTIVE,ENT_PROGRESS,ENT_INACTIVE,ENT_DELETED"]
|
70
|
-
# search << "docNumber=#{num}"
|
71
|
-
# search << "docPartNo=#{part}" if part
|
72
|
-
# q = search.join "&"
|
73
|
-
# resp = http.get("/cms/render/live/en/sites/isoorg.advancedSearch.do?#{q}",
|
74
|
-
# "Accept" => "application/json, text/plain, */*")
|
75
71
|
config = Algolia::Search::Config.new(application_id: "JCL49WV5AR", api_key: "dd1b9e1ab383f4d4817d29cd5e96d3f0")
|
76
|
-
client = Algolia::Search::Client.new config, logger:
|
72
|
+
client = Algolia::Search::Client.new config, logger: RelatonIso.configuration.logger
|
77
73
|
index = client.init_index "all_en"
|
78
74
|
resp = index.search text, hitsPerPage: 100, filters: "category:standard"
|
79
|
-
# return [] if resp.body.empty?
|
80
75
|
|
81
|
-
# json = JSON.parse resp.body
|
82
|
-
# json["standards"]
|
83
76
|
resp[:hits].map { |h| Hit.new h, self }.sort! do |a, b|
|
84
77
|
if a.sort_weight == b.sort_weight && b.hit[:year] = a.hit[:year]
|
85
78
|
a.hit[:title] <=> b.hit[:title]
|
@@ -90,19 +83,5 @@ module RelatonIso
|
|
90
83
|
end
|
91
84
|
end
|
92
85
|
end
|
93
|
-
|
94
|
-
# @param hit [Hash]
|
95
|
-
# @return [Date]
|
96
|
-
# def parse_date(hit)
|
97
|
-
# if hit["publicationDate"]
|
98
|
-
# Date.strptime(hit["publicationDate"], "%Y-%m")
|
99
|
-
# elsif %r{:(?<year>\d{4})} =~ hit["docRef"]
|
100
|
-
# Date.strptime(year, "%Y")
|
101
|
-
# elsif hit["newProjectDate"]
|
102
|
-
# Date.parse hit["newProjectDate"]
|
103
|
-
# else
|
104
|
-
# Date.new 0
|
105
|
-
# end
|
106
|
-
# end
|
107
86
|
end
|
108
87
|
end
|
@@ -12,7 +12,7 @@ module RelatonIso
|
|
12
12
|
# @param text [String]
|
13
13
|
# @return [RelatonIso::HitCollection]
|
14
14
|
def search(text)
|
15
|
-
HitCollection.new
|
15
|
+
HitCollection.new(text.gsub("\u2013", "-")).fetch
|
16
16
|
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
17
17
|
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
18
18
|
Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT,
|
@@ -29,7 +29,7 @@ module RelatonIso
|
|
29
29
|
#
|
30
30
|
# @return [RelatonIsoBib::IsoBibliographicItem] Relaton XML serialisation of reference
|
31
31
|
def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize
|
32
|
-
code = ref.gsub(
|
32
|
+
code = ref.gsub("\u2013", "-")
|
33
33
|
|
34
34
|
# parse "all parts" request
|
35
35
|
code.sub! " (all parts)", ""
|
@@ -37,43 +37,24 @@ module RelatonIso
|
|
37
37
|
|
38
38
|
query_pubid = Pubid::Iso::Identifier.parse(code)
|
39
39
|
query_pubid.year = year if year
|
40
|
+
query_pubid.part = nil if opts[:all_parts]
|
41
|
+
Util.warn "(#{query_pubid}) Fetching from ISO..."
|
40
42
|
|
41
|
-
|
42
|
-
|
43
|
-
# Try with ISO/IEC prefix if ISO not found
|
44
|
-
if resp[:hits].empty? && query_pubid.copublisher.nil? &&
|
45
|
-
query_pubid.publisher == "ISO"
|
46
|
-
resp_isoiec = retry_isoiec_prefix(query_pubid, opts)
|
47
|
-
resp = resp_isoiec unless resp_isoiec.nil?
|
48
|
-
end
|
43
|
+
hits, missed_year_ids = isobib_search_filter(query_pubid, opts)
|
44
|
+
tip_ids = look_up_with_any_types_stages(hits, ref, opts)
|
49
45
|
|
50
|
-
|
51
|
-
|
52
|
-
resp[:hits].any? && resp[:hits].first.fetch(opts[:lang])
|
46
|
+
ret = if !opts[:all_parts] || hits.size == 1
|
47
|
+
hits.any? && hits.first.fetch(opts[:lang])
|
53
48
|
else
|
54
|
-
|
49
|
+
hits.to_all_parts(opts[:lang])
|
55
50
|
end
|
56
51
|
|
57
|
-
return fetch_ref_err(query_pubid) unless ret
|
52
|
+
return fetch_ref_err(query_pubid, missed_year_ids, tip_ids) unless ret
|
58
53
|
|
59
|
-
# puts "xxxxx #{ret.docidentifier.first.id.inspect}"
|
60
54
|
response_docid = ret.docidentifier.first.id.sub(" (all parts)", "")
|
61
55
|
response_pubid = Pubid::Iso::Identifier.parse(response_docid)
|
62
|
-
|
63
|
-
|
64
|
-
if query_pubid.to_s == response_pubid.to_s
|
65
|
-
warn "[relaton-iso] (\"#{query_pubid}\") Found exact match."
|
66
|
-
elsif matches_base?(query_pubid, response_pubid)
|
67
|
-
warn "[relaton-iso] (\"#{query_pubid}\") " \
|
68
|
-
"Found (\"#{response_pubid}\")."
|
69
|
-
elsif matches_base?(query_pubid, response_pubid, any_types_stages: true)
|
70
|
-
warn "[relaton-iso] (\"#{query_pubid}\") TIP: " \
|
71
|
-
"Found with different type/stage, " \
|
72
|
-
"please amend to (\"#{response_pubid}\")."
|
73
|
-
else
|
74
|
-
# when there are all parts
|
75
|
-
warn "[relaton-iso] (\"#{query_pubid}\") Found (\"#{response_pubid}\")."
|
76
|
-
end
|
56
|
+
|
57
|
+
Util.warn "(#{query_pubid}) Found `#{response_pubid}`."
|
77
58
|
|
78
59
|
get_all = (
|
79
60
|
(query_pubid.year && opts[:keep_year].nil?) ||
|
@@ -83,9 +64,9 @@ module RelatonIso
|
|
83
64
|
return ret if get_all
|
84
65
|
|
85
66
|
ret.to_most_recent_reference
|
86
|
-
|
87
67
|
rescue Pubid::Core::Errors::ParseError
|
88
|
-
warn "
|
68
|
+
Util.warn "(#{code}) is not recognized as a standards identifier."
|
69
|
+
nil
|
89
70
|
end
|
90
71
|
|
91
72
|
# @param query_pubid [Pubid::Iso::Identifier]
|
@@ -109,147 +90,113 @@ module RelatonIso
|
|
109
90
|
# @return [<Type>] <description>
|
110
91
|
#
|
111
92
|
def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics?PerceivedComplexity
|
112
|
-
return unless pubid.respond_to?(:publisher)
|
93
|
+
return false unless pubid.respond_to?(:publisher)
|
113
94
|
|
114
95
|
query_pubid.publisher == pubid.publisher &&
|
115
96
|
query_pubid.number == pubid.number &&
|
116
97
|
query_pubid.copublisher == pubid.copublisher &&
|
117
|
-
(
|
118
|
-
(
|
98
|
+
(any_types_stages || query_pubid.stage == pubid.stage) &&
|
99
|
+
(any_types_stages || query_pubid.is_a?(pubid.class))
|
119
100
|
end
|
120
101
|
|
121
102
|
# @param hit_collection [RelatonIso::HitCollection]
|
122
103
|
# @param year [String]
|
123
|
-
# @return [RelatonIso::HitCollection]
|
124
|
-
def filter_hits_by_year(hit_collection, year)
|
125
|
-
|
126
|
-
return
|
104
|
+
# @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
|
105
|
+
def filter_hits_by_year(hit_collection, year)
|
106
|
+
missed_year_ids = Set.new
|
107
|
+
return [hit_collection, missed_year_ids] if year.nil?
|
127
108
|
|
128
109
|
# filter by year
|
129
110
|
hits = hit_collection.select do |hit|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
hit.pubid.year = year
|
136
|
-
true
|
137
|
-
else
|
138
|
-
missed_year = (hit.pubid.year || hit.hit[:year]).to_s
|
139
|
-
if missed_year && !missed_year.empty? && !missed_years.include?(missed_year)
|
140
|
-
missed_years << missed_year
|
141
|
-
end
|
142
|
-
false
|
143
|
-
end
|
111
|
+
hit.pubid.year ||= hit.hit[:year]
|
112
|
+
next true if check_year(year, hit)
|
113
|
+
|
114
|
+
missed_year_ids << hit.pubid.to_s if hit.pubid.year
|
115
|
+
false
|
144
116
|
end
|
145
117
|
|
146
|
-
|
118
|
+
[hits, missed_year_ids]
|
147
119
|
end
|
148
120
|
|
149
121
|
private
|
150
122
|
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
123
|
+
def check_year(year, hit) # rubocop:disable Metrics/AbcSize
|
124
|
+
(hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s) ||
|
125
|
+
(!hit.pubid.base.nil? && hit.pubid.base.year.to_s == year.to_s) ||
|
126
|
+
(!hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s)
|
127
|
+
end
|
156
128
|
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
129
|
+
# @param pubid [Pubid::Iso::Identifier] PubID with no results
|
130
|
+
def fetch_ref_err(pubid, missed_year_ids, tip_ids) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
131
|
+
Util.warn "(#{pubid}) Not found."
|
132
|
+
|
133
|
+
if missed_year_ids.any?
|
134
|
+
ids = missed_year_ids.map { |i| "`#{i}`" }.join(", ")
|
135
|
+
Util.warn "(#{pubid}) TIP: No match for edition year " \
|
136
|
+
"#{pubid.year}, but matches exist for #{ids}."
|
164
137
|
end
|
165
138
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
"deliverable type abbreviation (TS, TR, PAS, Guide)."
|
139
|
+
if tip_ids.any?
|
140
|
+
ids = tip_ids.map { |i| "`#{i}`" }.join(", ")
|
141
|
+
Util.warn "(#{pubid}) TIP: Matches exist for #{ids}."
|
170
142
|
end
|
171
143
|
|
172
|
-
|
173
|
-
|
144
|
+
if pubid.part
|
145
|
+
Util.warn "(#{pubid}) TIP: If it cannot be found, " \
|
146
|
+
"the document may no longer be published in parts."
|
147
|
+
else
|
148
|
+
Util.warn "(#{pubid}) TIP: If you wish to cite " \
|
149
|
+
"all document parts for the reference, use " \
|
150
|
+
"`#{pubid.to_s(format: :ref_undated)} (all parts)`."
|
151
|
+
end
|
174
152
|
|
175
|
-
|
176
|
-
# @param missed_years [Array<String>]
|
177
|
-
def warn_missing_years(pubid, missed_years)
|
178
|
-
warn "[relaton-iso] (\"#{pubid}\") TIP: " \
|
179
|
-
"No match for edition year #{pubid.year}, " \
|
180
|
-
"but matches exist for #{missed_years.uniq.join(', ')}."
|
153
|
+
nil
|
181
154
|
end
|
182
155
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
# @param opts [Hash]
|
187
|
-
# @return [Array<RelatonIso::Hit>]
|
188
|
-
def retry_isoiec_prefix(old_pubid, opts) # rubocop:disable Metrics/MethodLength
|
189
|
-
return nil unless old_pubid.copublisher.nil? && old_pubid.publisher == "ISO"
|
190
|
-
|
191
|
-
pubid = old_pubid.dup
|
192
|
-
pubid.copublisher = "IEC"
|
193
|
-
warn "[relaton-iso] (\"#{old_pubid}\") Not found, trying with ISO/IEC prefix (\"#{pubid}\")..."
|
194
|
-
resp_isoiec = isobib_search_filter(pubid, opts)
|
195
|
-
|
196
|
-
if resp_isoiec[:hits].empty?
|
197
|
-
warn "[relaton-iso] (\"#{pubid}\") Not found. "
|
198
|
-
return nil
|
199
|
-
end
|
200
|
-
|
201
|
-
warn "[relaton-iso] (\"#{pubid}\") TIP: Found with ISO/IEC prefix, " \
|
202
|
-
"please amend to (\"#{pubid}\")."
|
156
|
+
def look_up_with_any_types_stages(hits, ref, opts) # rubocop:disable Metrics/MethodLength
|
157
|
+
found_ids = []
|
158
|
+
return found_ids if hits.from_gh || hits.any? || !ref.match?(/^ISO[\/\s][A-Z]/)
|
203
159
|
|
204
|
-
|
160
|
+
ref_no_type_stage = ref.sub(/^ISO[\/\s][A-Z]+/, "ISO")
|
161
|
+
pubid = Pubid::Iso::Identifier.parse(ref_no_type_stage)
|
162
|
+
resp, = isobib_search_filter(pubid, opts, any_types_stages: true)
|
163
|
+
resp.map &:pubid
|
205
164
|
end
|
206
165
|
|
166
|
+
#
|
207
167
|
# Search for hits. If no found then trying missed stages.
|
208
168
|
#
|
209
169
|
# @param query_pubid [Pubid::Iso::Identifier] reference without correction
|
210
170
|
# @param opts [Hash]
|
211
|
-
# @
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
# fetch hits collection
|
171
|
+
# @param any_types_stages [Boolean] match with any stages
|
172
|
+
#
|
173
|
+
# @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed years
|
174
|
+
#
|
175
|
+
def isobib_search_filter(query_pubid, opts, any_types_stages: false) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
218
176
|
query_pubid_without_year = query_pubid.dup
|
219
177
|
# remove year for query
|
220
178
|
query_pubid_without_year.year = nil
|
221
179
|
hit_collection = search(query_pubid_without_year.to_s)
|
222
180
|
|
223
181
|
# filter only matching hits
|
224
|
-
|
225
|
-
return res unless res[:hits].empty?
|
226
|
-
|
227
|
-
missed_years += res[:missed_years]
|
228
|
-
|
229
|
-
# lookup for documents with stages when no match without stage
|
230
|
-
res = filter_hits hit_collection, query_pubid,
|
231
|
-
all_parts: opts[:all_parts], any_types_stages: true
|
232
|
-
return res unless res[:hits].empty?
|
233
|
-
|
234
|
-
missed_years += res[:missed_years]
|
235
|
-
|
236
|
-
if missed_years.any?
|
237
|
-
warn_missing_years(query_pubid, missed_years)
|
238
|
-
end
|
239
|
-
|
240
|
-
res
|
182
|
+
filter_hits hit_collection, query_pubid, opts[:all_parts], any_types_stages
|
241
183
|
end
|
242
184
|
|
243
|
-
#
|
185
|
+
#
|
186
|
+
# Filter hits by query_pubid.
|
187
|
+
#
|
188
|
+
# @param hit_collection [RelatonIso::HitCollection]
|
244
189
|
# @param query_pubid [Pubid::Iso::Identifier]
|
245
190
|
# @param all_parts [Boolean]
|
246
|
-
# @param
|
247
|
-
#
|
248
|
-
|
191
|
+
# @param any_stypes_tages [Boolean]
|
192
|
+
#
|
193
|
+
# @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
|
194
|
+
#
|
195
|
+
def filter_hits(hit_collection, query_pubid, all_parts, any_stypes_tages) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
249
196
|
# filter out
|
250
197
|
result = hit_collection.select do |i|
|
251
198
|
hit_pubid = i.pubid
|
252
|
-
matches_base?(query_pubid, hit_pubid, any_types_stages:
|
199
|
+
matches_base?(query_pubid, hit_pubid, any_types_stages: any_stypes_tages) &&
|
253
200
|
matches_parts?(query_pubid, hit_pubid, all_parts: all_parts) &&
|
254
201
|
query_pubid.corrigendums == hit_pubid.corrigendums &&
|
255
202
|
query_pubid.amendments == hit_pubid.amendments
|
data/lib/relaton_iso/scrapper.rb
CHANGED
data/lib/relaton_iso/version.rb
CHANGED
data/lib/relaton_iso.rb
CHANGED
@@ -1,6 +1,13 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require "nokogiri"
|
4
|
+
require "net/http"
|
5
|
+
require "logger"
|
6
|
+
require "pubid-iso"
|
7
|
+
require "relaton_iso_bib"
|
3
8
|
require "relaton_iso/version"
|
9
|
+
require "relaton_iso/config"
|
10
|
+
require "relaton_iso/util"
|
11
|
+
require "relaton_iso/hit"
|
4
12
|
require "relaton_iso/iso_bibliography"
|
5
|
-
require "pubid-iso"
|
6
13
|
require "relaton_iso/document_identifier"
|
data/relaton_iso.gemspec
CHANGED
@@ -10,10 +10,10 @@ Gem::Specification.new do |spec|
|
|
10
10
|
spec.authors = ["Ribose Inc."]
|
11
11
|
spec.email = ["open.source@ribose.com"]
|
12
12
|
|
13
|
-
spec.summary = "RelatonIso: retrieve ISO Standards for bibliographic
|
14
|
-
"using the IsoBibliographicItem model"
|
15
|
-
spec.description = "RelatonIso: retrieve ISO Standards for bibliographic
|
16
|
-
"using the IsoBibliographicItem model"
|
13
|
+
spec.summary = "RelatonIso: retrieve ISO Standards for bibliographic " \
|
14
|
+
"use using the IsoBibliographicItem model"
|
15
|
+
spec.description = "RelatonIso: retrieve ISO Standards for bibliographic " \
|
16
|
+
"use using the IsoBibliographicItem model"
|
17
17
|
|
18
18
|
spec.homepage = "https://github.com/relaton/relaton-iso"
|
19
19
|
spec.license = "BSD-2-Clause"
|
@@ -26,11 +26,8 @@ Gem::Specification.new do |spec|
|
|
26
26
|
spec.require_paths = ["lib"]
|
27
27
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.7.0")
|
28
28
|
|
29
|
-
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
30
|
-
spec.add_development_dependency "rake", "~> 13.0"
|
31
|
-
spec.add_development_dependency "rspec", "~> 3.0"
|
32
|
-
|
33
29
|
spec.add_dependency "algolia", "~> 2.3.0"
|
34
|
-
spec.add_dependency "pubid-iso", "~> 0.
|
30
|
+
spec.add_dependency "pubid-iso", "~> 0.6.0"
|
31
|
+
spec.add_dependency "relaton-bib", "~> 1.14.13"
|
35
32
|
spec.add_dependency "relaton-iso-bib", "~> 1.14.0"
|
36
33
|
end
|
metadata
CHANGED
@@ -1,85 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iso
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.15.
|
4
|
+
version: 1.15.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-08-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0.6'
|
20
|
-
type: :development
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0.6'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - "~>"
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '13.0'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - "~>"
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '13.0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: rspec
|
14
|
+
name: algolia
|
43
15
|
requirement: !ruby/object:Gem::Requirement
|
44
16
|
requirements:
|
45
17
|
- - "~>"
|
46
18
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
48
|
-
type: :
|
19
|
+
version: 2.3.0
|
20
|
+
type: :runtime
|
49
21
|
prerelease: false
|
50
22
|
version_requirements: !ruby/object:Gem::Requirement
|
51
23
|
requirements:
|
52
24
|
- - "~>"
|
53
25
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
26
|
+
version: 2.3.0
|
55
27
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
28
|
+
name: pubid-iso
|
57
29
|
requirement: !ruby/object:Gem::Requirement
|
58
30
|
requirements:
|
59
31
|
- - "~>"
|
60
32
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
33
|
+
version: 0.6.0
|
62
34
|
type: :runtime
|
63
35
|
prerelease: false
|
64
36
|
version_requirements: !ruby/object:Gem::Requirement
|
65
37
|
requirements:
|
66
38
|
- - "~>"
|
67
39
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
40
|
+
version: 0.6.0
|
69
41
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
42
|
+
name: relaton-bib
|
71
43
|
requirement: !ruby/object:Gem::Requirement
|
72
44
|
requirements:
|
73
45
|
- - "~>"
|
74
46
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
47
|
+
version: 1.14.13
|
76
48
|
type: :runtime
|
77
49
|
prerelease: false
|
78
50
|
version_requirements: !ruby/object:Gem::Requirement
|
79
51
|
requirements:
|
80
52
|
- - "~>"
|
81
53
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
54
|
+
version: 1.14.13
|
83
55
|
- !ruby/object:Gem::Dependency
|
84
56
|
name: relaton-iso-bib
|
85
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -135,19 +107,21 @@ files:
|
|
135
107
|
- bin/setup
|
136
108
|
- bin/thor
|
137
109
|
- lib/relaton_iso.rb
|
110
|
+
- lib/relaton_iso/config.rb
|
138
111
|
- lib/relaton_iso/document_identifier.rb
|
139
112
|
- lib/relaton_iso/hit.rb
|
140
113
|
- lib/relaton_iso/hit_collection.rb
|
141
114
|
- lib/relaton_iso/iso_bibliography.rb
|
142
115
|
- lib/relaton_iso/processor.rb
|
143
116
|
- lib/relaton_iso/scrapper.rb
|
117
|
+
- lib/relaton_iso/util.rb
|
144
118
|
- lib/relaton_iso/version.rb
|
145
119
|
- relaton_iso.gemspec
|
146
120
|
homepage: https://github.com/relaton/relaton-iso
|
147
121
|
licenses:
|
148
122
|
- BSD-2-Clause
|
149
123
|
metadata: {}
|
150
|
-
post_install_message:
|
124
|
+
post_install_message:
|
151
125
|
rdoc_options: []
|
152
126
|
require_paths:
|
153
127
|
- lib
|
@@ -162,8 +136,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
162
136
|
- !ruby/object:Gem::Version
|
163
137
|
version: '0'
|
164
138
|
requirements: []
|
165
|
-
rubygems_version: 3.
|
166
|
-
signing_key:
|
139
|
+
rubygems_version: 3.3.26
|
140
|
+
signing_key:
|
167
141
|
specification_version: 4
|
168
142
|
summary: 'RelatonIso: retrieve ISO Standards for bibliographic use using the IsoBibliographicItem
|
169
143
|
model'
|