relaton-iso 1.15.4 → 1.15.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/release.yml +2 -1
- data/Gemfile +3 -0
- data/README.adoc +42 -62
- data/lib/relaton_iso/config.rb +10 -0
- data/lib/relaton_iso/hit.rb +3 -3
- data/lib/relaton_iso/hit_collection.rb +10 -31
- data/lib/relaton_iso/iso_bibliography.rb +76 -129
- data/lib/relaton_iso/scrapper.rb +0 -5
- data/lib/relaton_iso/util.rb +9 -0
- data/lib/relaton_iso/version.rb +1 -1
- data/lib/relaton_iso.rb +8 -1
- data/relaton_iso.gemspec +6 -9
- metadata +18 -44
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fadb60d3f225174438b872a27c8632fcb9dcb685b66ef67071b7ab16dd78f052
|
4
|
+
data.tar.gz: b53c685d0adf0856851f31d1808654f59f16d0ccd8db70b2ef818b22126e505e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a10856c01099b1590b99ab0c2102aaf6dee654bd3fdf0b6a7f0717d3f85881bc58999219168e6ba707846cbd3fc785fbc9951003d7c21ad6c134af2a0fb0d4ea
|
7
|
+
data.tar.gz: a9a4b7a79acf34a0a42218f28979ec06a41c0b6305fcfbb77ebec0cf4881e2b806548ae5603112122420f1c4ed0d365d9be50a2c291fb4d5a00fe68534f2311b
|
@@ -7,7 +7,8 @@ on:
|
|
7
7
|
inputs:
|
8
8
|
next_version:
|
9
9
|
description: |
|
10
|
-
Next release version. Possible values: x.y.z, major, minor, patch or pre|rc|etc
|
10
|
+
Next release version. Possible values: x.y.z, major, minor, patch (or pre|rc|etc).
|
11
|
+
Also, you can pass 'skip' to skip 'git tag' and do 'gem push' for the current version
|
11
12
|
required: true
|
12
13
|
default: 'skip'
|
13
14
|
repository_dispatch:
|
data/Gemfile
CHANGED
data/README.adoc
CHANGED
@@ -31,13 +31,24 @@ Or install it yourself as:
|
|
31
31
|
|
32
32
|
== Usage
|
33
33
|
|
34
|
-
===
|
34
|
+
=== Configuration
|
35
|
+
|
36
|
+
Configuration is optional. The available option is `logger` which is a `Logger` instance. By default, the logger is `Logger.new($stderr)` with `Logger::WARN` level. To change the logger level, use `RelatonIso.configure` block.
|
35
37
|
|
36
38
|
[source,ruby]
|
37
39
|
----
|
38
40
|
require 'relaton_iso'
|
39
41
|
=> true
|
40
42
|
|
43
|
+
RelatonIso.configure do |config|
|
44
|
+
config.logger.level = Logger::DEBUG
|
45
|
+
end
|
46
|
+
----
|
47
|
+
|
48
|
+
=== Search for standards using keywords
|
49
|
+
|
50
|
+
[source,ruby]
|
51
|
+
----
|
41
52
|
hit_collection = RelatonIso::IsoBibliography.search("ISO 19115")
|
42
53
|
=> <RelatonIso::HitCollection:0x007fa5bc847038 @ref=19115 @fetched=false>
|
43
54
|
|
@@ -49,37 +60,11 @@ item = hit_collection[2].fetch
|
|
49
60
|
...
|
50
61
|
|
51
62
|
item.docidentifier
|
52
|
-
=> [#<RelatonIso::DocumentIdentifier:
|
53
|
-
|
54
|
-
#<Pubid::Iso::Identifier:0x00007fa8b7db2a60
|
55
|
-
@base=#<Pubid::Iso::Identifier:0x00007fa8b7db2c68 @edition="1", @number="19115"@4, @part="1", @publisher="ISO", @year=2014>,
|
56
|
-
@number="2"@21,
|
57
|
-
@publisher="ISO",
|
58
|
-
@stage=#<Pubid::Iso::Stage:0x00007fa8b7db20d8 @abbr=nil, @harmonized_code=#<Pubid::Iso::HarmonizedStageCode:0x00007fa8b7db2088 @stages=["60.60"]>>,
|
59
|
-
@typed_stage=#<Pubid::Iso::TypedStage:0x00007fa8b7db2920 @type=#<Pubid::Iso::Type:0x00007fa8b7db26c8 @type=:amd>, @typed_stage=nil>,
|
60
|
-
@year=2020>,
|
61
|
-
@language=nil,
|
62
|
-
@primary=true,
|
63
|
-
@scope=nil,
|
64
|
-
@script=nil,
|
65
|
-
@type="ISO">,
|
66
|
-
#<RelatonIso::DocumentIdentifier:0x00007fa8b7db1e80
|
67
|
-
@id=
|
68
|
-
#<Pubid::Iso::Identifier:0x00007fa8b7db2a60
|
69
|
-
@base=#<Pubid::Iso::Identifier:0x00007fa8b7db2c68 @edition="1", @number="19115"@4, @part="1", @publisher="ISO", @year=2014>,
|
70
|
-
@number="2"@21,
|
71
|
-
@publisher="ISO",
|
72
|
-
@stage=#<Pubid::Iso::Stage:0x00007fa8b7db20d8 @abbr=nil, @harmonized_code=#<Pubid::Iso::HarmonizedStageCode:0x00007fa8b7db2088 @stages=["60.60"]>>,
|
73
|
-
@typed_stage=#<Pubid::Iso::TypedStage:0x00007fa8b7db2920 @type=#<Pubid::Iso::Type:0x00007fa8b7db26c8 @type=:amd>, @typed_stage=nil>,
|
74
|
-
@year=2020>,
|
75
|
-
@language=nil,
|
76
|
-
@primary=nil,
|
77
|
-
@scope=nil,
|
78
|
-
@script=nil,
|
79
|
-
@type="URN">]
|
63
|
+
=> [#<RelatonIso::DocumentIdentifier:0x0000000112a23a88
|
64
|
+
...
|
80
65
|
|
81
66
|
item.docidentifier.detect { |di| di.type == "URN" }.id
|
82
|
-
=> "urn:iso:std:iso:19115:-1:ed-1:amd:2020:v2"
|
67
|
+
=> "urn:iso:std:iso:19115:-1:ed-1:stage-60.60:amd:2020:v2"
|
83
68
|
----
|
84
69
|
|
85
70
|
=== Fetch document by reference and year
|
@@ -93,9 +78,9 @@ item = RelatonIso::IsoBibliography.get "ISO 19115:2003"
|
|
93
78
|
...
|
94
79
|
|
95
80
|
item = RelatonIso::IsoBibliography.get "ISO 19115", "2003"
|
96
|
-
[relaton-iso] ("ISO 19115")
|
97
|
-
[relaton-iso] ("ISO 19115:2003") Found
|
98
|
-
=> #<RelatonIsoBib::IsoBibliographicItem:
|
81
|
+
[relaton-iso] ("ISO 19115:2003") Fetching from ISO...
|
82
|
+
[relaton-iso] ("ISO 19115:2003") Found ("ISO 19115:2003").
|
83
|
+
=> #<RelatonIsoBib::IsoBibliographicItem:0x0000000112c9ca80
|
99
84
|
...
|
100
85
|
|
101
86
|
item.docidentifier[0].id
|
@@ -113,7 +98,7 @@ item = RelatonIso::IsoBibliography.get "ISO 19115"
|
|
113
98
|
...
|
114
99
|
|
115
100
|
item.docidentifier[0].id
|
116
|
-
=> "ISO 19115
|
101
|
+
=> "ISO 19115"
|
117
102
|
----
|
118
103
|
|
119
104
|
=== Fetch a part document
|
@@ -136,13 +121,13 @@ item.docidentifier[0].id
|
|
136
121
|
----
|
137
122
|
item = RelatonIso::IsoBibliography.get "ISO 19115 (all parts)"
|
138
123
|
[relaton-iso] ("ISO 19115") Fetching from ISO...
|
139
|
-
[relaton-iso] ("ISO 19115") Found
|
124
|
+
[relaton-iso] ("ISO 19115") Found ("ISO 19115").
|
140
125
|
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8ca216e118
|
141
126
|
...
|
142
127
|
|
143
128
|
item = RelatonIso::IsoBibliography.get "ISO 19115", nil, all_parts: true
|
144
129
|
[relaton-iso] ("ISO 19115") Fetching from ISO...
|
145
|
-
[relaton-iso] ("ISO 19115") Found
|
130
|
+
[relaton-iso] ("ISO 19115") Found ("ISO 19115").
|
146
131
|
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c830f3d38
|
147
132
|
...
|
148
133
|
|
@@ -151,12 +136,12 @@ item.docidentifier[0].id
|
|
151
136
|
|
152
137
|
item = RelatonIso::IsoBibliography.get "ISO 19115-1 (all parts)"
|
153
138
|
[relaton-iso] ("ISO 19115") Fetching from ISO...
|
154
|
-
[relaton-iso] ("ISO 19115") Found
|
139
|
+
[relaton-iso] ("ISO 19115") Found ("ISO 19115").
|
155
140
|
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c8290e5a0
|
156
141
|
|
157
142
|
item = RelatonIso::IsoBibliography.get "ISO 19115-1", nil, all_parts: true
|
158
143
|
[relaton-iso] ("ISO 19115") Fetching from ISO...
|
159
|
-
[relaton-iso] ("ISO 19115") Found
|
144
|
+
[relaton-iso] ("ISO 19115") Found ("ISO 19115").
|
160
145
|
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007f8c925355b8
|
161
146
|
...
|
162
147
|
|
@@ -217,35 +202,31 @@ item.to_xml note: [{ text: "Note", type: "note" }]
|
|
217
202
|
[source,ruby]
|
218
203
|
----
|
219
204
|
item.title lang: 'en'
|
220
|
-
=> #<RelatonBib::TypedTitleStringCollection:
|
205
|
+
=> #<RelatonBib::TypedTitleStringCollection:0x0000000112783fd0
|
221
206
|
@array=
|
222
|
-
[#<RelatonBib::TypedTitleString:
|
223
|
-
@title=#<RelatonBib::FormattedString:
|
207
|
+
[#<RelatonBib::TypedTitleString:0x00000001138e2380
|
208
|
+
@title=#<RelatonBib::FormattedString:0x0000000112d496b8 @content="Geographic information", @format="text/plain", @language=["en"], @script=["Latn"]>,
|
224
209
|
@type="title-intro">,
|
225
|
-
#<RelatonBib::TypedTitleString:
|
226
|
-
@title=#<RelatonBib::FormattedString:
|
210
|
+
#<RelatonBib::TypedTitleString:0x00000001138e1f70
|
211
|
+
@title=#<RelatonBib::FormattedString:0x0000000112d495c8 @content="Metadata", @format="text/plain", @language=["en"], @script=["Latn"]>,
|
227
212
|
@type="title-main">,
|
228
|
-
#<RelatonBib::TypedTitleString:
|
213
|
+
#<RelatonBib::TypedTitleString:0x00000001138e1d68
|
229
214
|
@title=
|
230
|
-
#<RelatonBib::FormattedString:
|
231
|
-
@content="Geographic information – Metadata",
|
232
|
-
@format="text/plain",
|
233
|
-
@language=["en"],
|
234
|
-
@script=["Latn"]>,
|
215
|
+
#<RelatonBib::FormattedString:0x0000000112d49488 @content="Geographic information – Metadata", @format="text/plain", @language=["en"], @script=["Latn"]>,
|
235
216
|
@type="main">]>
|
236
217
|
|
237
218
|
item.title lang: 'fr'
|
238
|
-
=> #<RelatonBib::TypedTitleStringCollection:
|
219
|
+
=> #<RelatonBib::TypedTitleStringCollection:0x0000000113067458
|
239
220
|
@array=
|
240
|
-
[#<RelatonBib::TypedTitleString:
|
241
|
-
@title=#<RelatonBib::FormattedString:
|
221
|
+
[#<RelatonBib::TypedTitleString:0x00000001138e1c28
|
222
|
+
@title=#<RelatonBib::FormattedString:0x0000000112d49438 @content="Information géographique", @format="text/plain", @language=["fr"], @script=["Latn"]>,
|
242
223
|
@type="title-intro">,
|
243
|
-
#<RelatonBib::TypedTitleString:
|
244
|
-
@title=#<RelatonBib::FormattedString:
|
224
|
+
#<RelatonBib::TypedTitleString:0x00000001138e1b10
|
225
|
+
@title=#<RelatonBib::FormattedString:0x0000000112d49398 @content="Métadonnées", @format="text/plain", @language=["fr"], @script=["Latn"]>,
|
245
226
|
@type="title-main">,
|
246
|
-
#<RelatonBib::TypedTitleString:
|
227
|
+
#<RelatonBib::TypedTitleString:0x00000001138e1908
|
247
228
|
@title=
|
248
|
-
#<RelatonBib::FormattedString:
|
229
|
+
#<RelatonBib::FormattedString:0x0000000112d491b8
|
249
230
|
@content="Information géographique – Métadonnées",
|
250
231
|
@format="text/plain",
|
251
232
|
@language=["fr"],
|
@@ -254,10 +235,9 @@ item.title lang: 'fr'
|
|
254
235
|
|
255
236
|
item = RelatonIso::IsoBibliography.get "ISO 19115:2003"
|
256
237
|
[relaton-iso] ("ISO 19115:2003") Fetching from ISO...
|
257
|
-
[relaton-iso] ("ISO 19115:2003") Found
|
238
|
+
[relaton-iso] ("ISO 19115:2003") Found ("ISO 19115:2003").
|
258
239
|
=> #<RelatonIsoBib::IsoBibliographicItem:0x00007fa8870b69e0
|
259
240
|
|
260
|
-
item.abstract lang: 'en'
|
261
241
|
item.abstract lang: 'en'
|
262
242
|
=> #<RelatonBib::FormattedString:0x00007fa8870b4f78
|
263
243
|
@content=
|
@@ -274,13 +254,13 @@ Each ISO document has `src` type link and optional `obp`, `rss`, and `pub` link
|
|
274
254
|
[source,ruby]
|
275
255
|
----
|
276
256
|
item.link
|
277
|
-
=> [#<RelatonBib::TypedUri:
|
278
|
-
@content=#<Addressable::URI:
|
257
|
+
=> [#<RelatonBib::TypedUri:0x0000000112d66c40
|
258
|
+
@content=#<Addressable::URI:0x93d71c URI:https://www.iso.org/standard/26020.html>,
|
279
259
|
@language=nil,
|
280
260
|
@script=nil,
|
281
261
|
@type="src">,
|
282
|
-
#<RelatonBib::TypedUri:
|
283
|
-
@content=#<Addressable::URI:
|
262
|
+
#<RelatonBib::TypedUri:0x0000000112d66920
|
263
|
+
@content=#<Addressable::URI:0x93d730 URI:https://www.iso.org/contents/data/standard/02/60/26020.detail.rss>,
|
284
264
|
@language=nil,
|
285
265
|
@script=nil,
|
286
266
|
@type="rss">]
|
data/lib/relaton_iso/hit.rb
CHANGED
@@ -42,9 +42,9 @@ module RelatonIso
|
|
42
42
|
# @return [Pubid::Iso::Identifier]
|
43
43
|
def pubid
|
44
44
|
@pubid ||= Pubid::Iso::Identifier.parse_from_title(hit[:title])
|
45
|
-
rescue Pubid::Iso::Errors::WrongTypeError => e
|
46
|
-
warn "
|
47
|
-
warn
|
45
|
+
rescue Pubid::Iso::Errors::WrongTypeError, Pubid::Iso::Errors::ParseError => e
|
46
|
+
Util.warn "unable to find an identifier in `#{hit[:title]}`."
|
47
|
+
Util.warn e.message
|
48
48
|
end
|
49
49
|
end
|
50
50
|
end
|
@@ -6,17 +6,23 @@ require "relaton_iso/hit"
|
|
6
6
|
module RelatonIso
|
7
7
|
# Page of hit collection.
|
8
8
|
class HitCollection < RelatonBib::HitCollection
|
9
|
-
#
|
9
|
+
# @return [Boolean] whether the search was performed on GitHub
|
10
|
+
attr_reader :from_gh
|
10
11
|
|
11
12
|
# @param text [String] reference to search
|
12
13
|
def initialize(text)
|
13
14
|
super
|
14
|
-
@
|
15
|
+
@from_gh = text.match?(/^ISO[\s\/](?:TC\s184\/SC\s?4|IEC\sDIR\s(?:\d|IEC|JTC))/)
|
16
|
+
end
|
17
|
+
|
18
|
+
def fetch
|
19
|
+
@array = from_gh ? fetch_github : fetch_iso
|
20
|
+
self
|
15
21
|
end
|
16
22
|
|
17
23
|
# @param lang [String, NilClass]
|
18
24
|
# @return [RelatonIsoBib::IsoBibliographicItem, nil]
|
19
|
-
def to_all_parts(lang = nil) # rubocop:disable Metrics/
|
25
|
+
def to_all_parts(lang = nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
20
26
|
# parts = @array.reject { |h| h.hit["docPart"]&.empty? }
|
21
27
|
hit = @array.min_by { |h| h.pubid.part.to_i }
|
22
28
|
return @array.first&.fetch lang unless hit
|
@@ -33,7 +39,6 @@ module RelatonIso
|
|
33
39
|
end
|
34
40
|
all_parts_item
|
35
41
|
end
|
36
|
-
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
37
42
|
|
38
43
|
private
|
39
44
|
|
@@ -63,23 +68,11 @@ module RelatonIso
|
|
63
68
|
# @return [Array<RelatonIso::Hit>]
|
64
69
|
#
|
65
70
|
def fetch_iso # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
66
|
-
# %r{\s(?<num>\d+)(?:-(?<part>[\d-]+))?} =~ text
|
67
|
-
# http = Net::HTTP.new "www.iso.org", 443
|
68
|
-
# http.use_ssl = true
|
69
|
-
# search = ["status=ENT_ACTIVE,ENT_PROGRESS,ENT_INACTIVE,ENT_DELETED"]
|
70
|
-
# search << "docNumber=#{num}"
|
71
|
-
# search << "docPartNo=#{part}" if part
|
72
|
-
# q = search.join "&"
|
73
|
-
# resp = http.get("/cms/render/live/en/sites/isoorg.advancedSearch.do?#{q}",
|
74
|
-
# "Accept" => "application/json, text/plain, */*")
|
75
71
|
config = Algolia::Search::Config.new(application_id: "JCL49WV5AR", api_key: "dd1b9e1ab383f4d4817d29cd5e96d3f0")
|
76
|
-
client = Algolia::Search::Client.new config, logger:
|
72
|
+
client = Algolia::Search::Client.new config, logger: RelatonIso.configuration.logger
|
77
73
|
index = client.init_index "all_en"
|
78
74
|
resp = index.search text, hitsPerPage: 100, filters: "category:standard"
|
79
|
-
# return [] if resp.body.empty?
|
80
75
|
|
81
|
-
# json = JSON.parse resp.body
|
82
|
-
# json["standards"]
|
83
76
|
resp[:hits].map { |h| Hit.new h, self }.sort! do |a, b|
|
84
77
|
if a.sort_weight == b.sort_weight && b.hit[:year] = a.hit[:year]
|
85
78
|
a.hit[:title] <=> b.hit[:title]
|
@@ -90,19 +83,5 @@ module RelatonIso
|
|
90
83
|
end
|
91
84
|
end
|
92
85
|
end
|
93
|
-
|
94
|
-
# @param hit [Hash]
|
95
|
-
# @return [Date]
|
96
|
-
# def parse_date(hit)
|
97
|
-
# if hit["publicationDate"]
|
98
|
-
# Date.strptime(hit["publicationDate"], "%Y-%m")
|
99
|
-
# elsif %r{:(?<year>\d{4})} =~ hit["docRef"]
|
100
|
-
# Date.strptime(year, "%Y")
|
101
|
-
# elsif hit["newProjectDate"]
|
102
|
-
# Date.parse hit["newProjectDate"]
|
103
|
-
# else
|
104
|
-
# Date.new 0
|
105
|
-
# end
|
106
|
-
# end
|
107
86
|
end
|
108
87
|
end
|
@@ -12,7 +12,7 @@ module RelatonIso
|
|
12
12
|
# @param text [String]
|
13
13
|
# @return [RelatonIso::HitCollection]
|
14
14
|
def search(text)
|
15
|
-
HitCollection.new
|
15
|
+
HitCollection.new(text.gsub("\u2013", "-")).fetch
|
16
16
|
rescue SocketError, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET,
|
17
17
|
EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
|
18
18
|
Net::ProtocolError, OpenSSL::SSL::SSLError, Errno::ETIMEDOUT,
|
@@ -29,7 +29,7 @@ module RelatonIso
|
|
29
29
|
#
|
30
30
|
# @return [RelatonIsoBib::IsoBibliographicItem] Relaton XML serialisation of reference
|
31
31
|
def get(ref, year = nil, opts = {}) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity,Metrics/AbcSize
|
32
|
-
code = ref.gsub(
|
32
|
+
code = ref.gsub("\u2013", "-")
|
33
33
|
|
34
34
|
# parse "all parts" request
|
35
35
|
code.sub! " (all parts)", ""
|
@@ -37,43 +37,24 @@ module RelatonIso
|
|
37
37
|
|
38
38
|
query_pubid = Pubid::Iso::Identifier.parse(code)
|
39
39
|
query_pubid.year = year if year
|
40
|
+
query_pubid.part = nil if opts[:all_parts]
|
41
|
+
Util.warn "(#{query_pubid}) Fetching from ISO..."
|
40
42
|
|
41
|
-
|
42
|
-
|
43
|
-
# Try with ISO/IEC prefix if ISO not found
|
44
|
-
if resp[:hits].empty? && query_pubid.copublisher.nil? &&
|
45
|
-
query_pubid.publisher == "ISO"
|
46
|
-
resp_isoiec = retry_isoiec_prefix(query_pubid, opts)
|
47
|
-
resp = resp_isoiec unless resp_isoiec.nil?
|
48
|
-
end
|
43
|
+
hits, missed_year_ids = isobib_search_filter(query_pubid, opts)
|
44
|
+
tip_ids = look_up_with_any_types_stages(hits, ref, opts)
|
49
45
|
|
50
|
-
|
51
|
-
|
52
|
-
resp[:hits].any? && resp[:hits].first.fetch(opts[:lang])
|
46
|
+
ret = if !opts[:all_parts] || hits.size == 1
|
47
|
+
hits.any? && hits.first.fetch(opts[:lang])
|
53
48
|
else
|
54
|
-
|
49
|
+
hits.to_all_parts(opts[:lang])
|
55
50
|
end
|
56
51
|
|
57
|
-
return fetch_ref_err(query_pubid) unless ret
|
52
|
+
return fetch_ref_err(query_pubid, missed_year_ids, tip_ids) unless ret
|
58
53
|
|
59
|
-
# puts "xxxxx #{ret.docidentifier.first.id.inspect}"
|
60
54
|
response_docid = ret.docidentifier.first.id.sub(" (all parts)", "")
|
61
55
|
response_pubid = Pubid::Iso::Identifier.parse(response_docid)
|
62
|
-
|
63
|
-
|
64
|
-
if query_pubid.to_s == response_pubid.to_s
|
65
|
-
warn "[relaton-iso] (\"#{query_pubid}\") Found exact match."
|
66
|
-
elsif matches_base?(query_pubid, response_pubid)
|
67
|
-
warn "[relaton-iso] (\"#{query_pubid}\") " \
|
68
|
-
"Found (\"#{response_pubid}\")."
|
69
|
-
elsif matches_base?(query_pubid, response_pubid, any_types_stages: true)
|
70
|
-
warn "[relaton-iso] (\"#{query_pubid}\") TIP: " \
|
71
|
-
"Found with different type/stage, " \
|
72
|
-
"please amend to (\"#{response_pubid}\")."
|
73
|
-
else
|
74
|
-
# when there are all parts
|
75
|
-
warn "[relaton-iso] (\"#{query_pubid}\") Found (\"#{response_pubid}\")."
|
76
|
-
end
|
56
|
+
|
57
|
+
Util.warn "(#{query_pubid}) Found `#{response_pubid}`."
|
77
58
|
|
78
59
|
get_all = (
|
79
60
|
(query_pubid.year && opts[:keep_year].nil?) ||
|
@@ -83,9 +64,9 @@ module RelatonIso
|
|
83
64
|
return ret if get_all
|
84
65
|
|
85
66
|
ret.to_most_recent_reference
|
86
|
-
|
87
67
|
rescue Pubid::Core::Errors::ParseError
|
88
|
-
warn "
|
68
|
+
Util.warn "(#{code}) is not recognized as a standards identifier."
|
69
|
+
nil
|
89
70
|
end
|
90
71
|
|
91
72
|
# @param query_pubid [Pubid::Iso::Identifier]
|
@@ -109,147 +90,113 @@ module RelatonIso
|
|
109
90
|
# @return [<Type>] <description>
|
110
91
|
#
|
111
92
|
def matches_base?(query_pubid, pubid, any_types_stages: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics?PerceivedComplexity
|
112
|
-
return unless pubid.respond_to?(:publisher)
|
93
|
+
return false unless pubid.respond_to?(:publisher)
|
113
94
|
|
114
95
|
query_pubid.publisher == pubid.publisher &&
|
115
96
|
query_pubid.number == pubid.number &&
|
116
97
|
query_pubid.copublisher == pubid.copublisher &&
|
117
|
-
(
|
118
|
-
(
|
98
|
+
(any_types_stages || query_pubid.stage == pubid.stage) &&
|
99
|
+
(any_types_stages || query_pubid.is_a?(pubid.class))
|
119
100
|
end
|
120
101
|
|
121
102
|
# @param hit_collection [RelatonIso::HitCollection]
|
122
103
|
# @param year [String]
|
123
|
-
# @return [RelatonIso::HitCollection]
|
124
|
-
def filter_hits_by_year(hit_collection, year)
|
125
|
-
|
126
|
-
return
|
104
|
+
# @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
|
105
|
+
def filter_hits_by_year(hit_collection, year)
|
106
|
+
missed_year_ids = Set.new
|
107
|
+
return [hit_collection, missed_year_ids] if year.nil?
|
127
108
|
|
128
109
|
# filter by year
|
129
110
|
hits = hit_collection.select do |hit|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
hit.pubid.year = year
|
136
|
-
true
|
137
|
-
else
|
138
|
-
missed_year = (hit.pubid.year || hit.hit[:year]).to_s
|
139
|
-
if missed_year && !missed_year.empty? && !missed_years.include?(missed_year)
|
140
|
-
missed_years << missed_year
|
141
|
-
end
|
142
|
-
false
|
143
|
-
end
|
111
|
+
hit.pubid.year ||= hit.hit[:year]
|
112
|
+
next true if check_year(year, hit)
|
113
|
+
|
114
|
+
missed_year_ids << hit.pubid.to_s if hit.pubid.year
|
115
|
+
false
|
144
116
|
end
|
145
117
|
|
146
|
-
|
118
|
+
[hits, missed_year_ids]
|
147
119
|
end
|
148
120
|
|
149
121
|
private
|
150
122
|
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
123
|
+
def check_year(year, hit) # rubocop:disable Metrics/AbcSize
|
124
|
+
(hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s) ||
|
125
|
+
(!hit.pubid.base.nil? && hit.pubid.base.year.to_s == year.to_s) ||
|
126
|
+
(!hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s)
|
127
|
+
end
|
156
128
|
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
129
|
+
# @param pubid [Pubid::Iso::Identifier] PubID with no results
|
130
|
+
def fetch_ref_err(pubid, missed_year_ids, tip_ids) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
131
|
+
Util.warn "(#{pubid}) Not found."
|
132
|
+
|
133
|
+
if missed_year_ids.any?
|
134
|
+
ids = missed_year_ids.map { |i| "`#{i}`" }.join(", ")
|
135
|
+
Util.warn "(#{pubid}) TIP: No match for edition year " \
|
136
|
+
"#{pubid.year}, but matches exist for #{ids}."
|
164
137
|
end
|
165
138
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
"deliverable type abbreviation (TS, TR, PAS, Guide)."
|
139
|
+
if tip_ids.any?
|
140
|
+
ids = tip_ids.map { |i| "`#{i}`" }.join(", ")
|
141
|
+
Util.warn "(#{pubid}) TIP: Matches exist for #{ids}."
|
170
142
|
end
|
171
143
|
|
172
|
-
|
173
|
-
|
144
|
+
if pubid.part
|
145
|
+
Util.warn "(#{pubid}) TIP: If it cannot be found, " \
|
146
|
+
"the document may no longer be published in parts."
|
147
|
+
else
|
148
|
+
Util.warn "(#{pubid}) TIP: If you wish to cite " \
|
149
|
+
"all document parts for the reference, use " \
|
150
|
+
"`#{pubid.to_s(format: :ref_undated)} (all parts)`."
|
151
|
+
end
|
174
152
|
|
175
|
-
|
176
|
-
# @param missed_years [Array<String>]
|
177
|
-
def warn_missing_years(pubid, missed_years)
|
178
|
-
warn "[relaton-iso] (\"#{pubid}\") TIP: " \
|
179
|
-
"No match for edition year #{pubid.year}, " \
|
180
|
-
"but matches exist for #{missed_years.uniq.join(', ')}."
|
153
|
+
nil
|
181
154
|
end
|
182
155
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
# @param opts [Hash]
|
187
|
-
# @return [Array<RelatonIso::Hit>]
|
188
|
-
def retry_isoiec_prefix(old_pubid, opts) # rubocop:disable Metrics/MethodLength
|
189
|
-
return nil unless old_pubid.copublisher.nil? && old_pubid.publisher == "ISO"
|
190
|
-
|
191
|
-
pubid = old_pubid.dup
|
192
|
-
pubid.copublisher = "IEC"
|
193
|
-
warn "[relaton-iso] (\"#{old_pubid}\") Not found, trying with ISO/IEC prefix (\"#{pubid}\")..."
|
194
|
-
resp_isoiec = isobib_search_filter(pubid, opts)
|
195
|
-
|
196
|
-
if resp_isoiec[:hits].empty?
|
197
|
-
warn "[relaton-iso] (\"#{pubid}\") Not found. "
|
198
|
-
return nil
|
199
|
-
end
|
200
|
-
|
201
|
-
warn "[relaton-iso] (\"#{pubid}\") TIP: Found with ISO/IEC prefix, " \
|
202
|
-
"please amend to (\"#{pubid}\")."
|
156
|
+
def look_up_with_any_types_stages(hits, ref, opts) # rubocop:disable Metrics/MethodLength
|
157
|
+
found_ids = []
|
158
|
+
return found_ids if hits.from_gh || hits.any? || !ref.match?(/^ISO[\/\s][A-Z]/)
|
203
159
|
|
204
|
-
|
160
|
+
ref_no_type_stage = ref.sub(/^ISO[\/\s][A-Z]+/, "ISO")
|
161
|
+
pubid = Pubid::Iso::Identifier.parse(ref_no_type_stage)
|
162
|
+
resp, = isobib_search_filter(pubid, opts, any_types_stages: true)
|
163
|
+
resp.map &:pubid
|
205
164
|
end
|
206
165
|
|
166
|
+
#
|
207
167
|
# Search for hits. If no found then trying missed stages.
|
208
168
|
#
|
209
169
|
# @param query_pubid [Pubid::Iso::Identifier] reference without correction
|
210
170
|
# @param opts [Hash]
|
211
|
-
# @
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
# fetch hits collection
|
171
|
+
# @param any_types_stages [Boolean] match with any stages
|
172
|
+
#
|
173
|
+
# @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed years
|
174
|
+
#
|
175
|
+
def isobib_search_filter(query_pubid, opts, any_types_stages: false) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
218
176
|
query_pubid_without_year = query_pubid.dup
|
219
177
|
# remove year for query
|
220
178
|
query_pubid_without_year.year = nil
|
221
179
|
hit_collection = search(query_pubid_without_year.to_s)
|
222
180
|
|
223
181
|
# filter only matching hits
|
224
|
-
|
225
|
-
return res unless res[:hits].empty?
|
226
|
-
|
227
|
-
missed_years += res[:missed_years]
|
228
|
-
|
229
|
-
# lookup for documents with stages when no match without stage
|
230
|
-
res = filter_hits hit_collection, query_pubid,
|
231
|
-
all_parts: opts[:all_parts], any_types_stages: true
|
232
|
-
return res unless res[:hits].empty?
|
233
|
-
|
234
|
-
missed_years += res[:missed_years]
|
235
|
-
|
236
|
-
if missed_years.any?
|
237
|
-
warn_missing_years(query_pubid, missed_years)
|
238
|
-
end
|
239
|
-
|
240
|
-
res
|
182
|
+
filter_hits hit_collection, query_pubid, opts[:all_parts], any_types_stages
|
241
183
|
end
|
242
184
|
|
243
|
-
#
|
185
|
+
#
|
186
|
+
# Filter hits by query_pubid.
|
187
|
+
#
|
188
|
+
# @param hit_collection [RelatonIso::HitCollection]
|
244
189
|
# @param query_pubid [Pubid::Iso::Identifier]
|
245
190
|
# @param all_parts [Boolean]
|
246
|
-
# @param
|
247
|
-
#
|
248
|
-
|
191
|
+
# @param any_stypes_tages [Boolean]
|
192
|
+
#
|
193
|
+
# @return [Array<RelatonIso::HitCollection, Array<String>>] hits and missed year IDs
|
194
|
+
#
|
195
|
+
def filter_hits(hit_collection, query_pubid, all_parts, any_stypes_tages) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
249
196
|
# filter out
|
250
197
|
result = hit_collection.select do |i|
|
251
198
|
hit_pubid = i.pubid
|
252
|
-
matches_base?(query_pubid, hit_pubid, any_types_stages:
|
199
|
+
matches_base?(query_pubid, hit_pubid, any_types_stages: any_stypes_tages) &&
|
253
200
|
matches_parts?(query_pubid, hit_pubid, all_parts: all_parts) &&
|
254
201
|
query_pubid.corrigendums == hit_pubid.corrigendums &&
|
255
202
|
query_pubid.amendments == hit_pubid.amendments
|
data/lib/relaton_iso/scrapper.rb
CHANGED
data/lib/relaton_iso/version.rb
CHANGED
data/lib/relaton_iso.rb
CHANGED
@@ -1,6 +1,13 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require "nokogiri"
|
4
|
+
require "net/http"
|
5
|
+
require "logger"
|
6
|
+
require "pubid-iso"
|
7
|
+
require "relaton_iso_bib"
|
3
8
|
require "relaton_iso/version"
|
9
|
+
require "relaton_iso/config"
|
10
|
+
require "relaton_iso/util"
|
11
|
+
require "relaton_iso/hit"
|
4
12
|
require "relaton_iso/iso_bibliography"
|
5
|
-
require "pubid-iso"
|
6
13
|
require "relaton_iso/document_identifier"
|
data/relaton_iso.gemspec
CHANGED
@@ -10,10 +10,10 @@ Gem::Specification.new do |spec|
|
|
10
10
|
spec.authors = ["Ribose Inc."]
|
11
11
|
spec.email = ["open.source@ribose.com"]
|
12
12
|
|
13
|
-
spec.summary = "RelatonIso: retrieve ISO Standards for bibliographic
|
14
|
-
"using the IsoBibliographicItem model"
|
15
|
-
spec.description = "RelatonIso: retrieve ISO Standards for bibliographic
|
16
|
-
"using the IsoBibliographicItem model"
|
13
|
+
spec.summary = "RelatonIso: retrieve ISO Standards for bibliographic " \
|
14
|
+
"use using the IsoBibliographicItem model"
|
15
|
+
spec.description = "RelatonIso: retrieve ISO Standards for bibliographic " \
|
16
|
+
"use using the IsoBibliographicItem model"
|
17
17
|
|
18
18
|
spec.homepage = "https://github.com/relaton/relaton-iso"
|
19
19
|
spec.license = "BSD-2-Clause"
|
@@ -26,11 +26,8 @@ Gem::Specification.new do |spec|
|
|
26
26
|
spec.require_paths = ["lib"]
|
27
27
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.7.0")
|
28
28
|
|
29
|
-
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
30
|
-
spec.add_development_dependency "rake", "~> 13.0"
|
31
|
-
spec.add_development_dependency "rspec", "~> 3.0"
|
32
|
-
|
33
29
|
spec.add_dependency "algolia", "~> 2.3.0"
|
34
|
-
spec.add_dependency "pubid-iso", "~> 0.
|
30
|
+
spec.add_dependency "pubid-iso", "~> 0.6.0"
|
31
|
+
spec.add_dependency "relaton-bib", "~> 1.14.13"
|
35
32
|
spec.add_dependency "relaton-iso-bib", "~> 1.14.0"
|
36
33
|
end
|
metadata
CHANGED
@@ -1,85 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iso
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.15.
|
4
|
+
version: 1.15.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-08-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0.6'
|
20
|
-
type: :development
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0.6'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - "~>"
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '13.0'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - "~>"
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '13.0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: rspec
|
14
|
+
name: algolia
|
43
15
|
requirement: !ruby/object:Gem::Requirement
|
44
16
|
requirements:
|
45
17
|
- - "~>"
|
46
18
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
48
|
-
type: :
|
19
|
+
version: 2.3.0
|
20
|
+
type: :runtime
|
49
21
|
prerelease: false
|
50
22
|
version_requirements: !ruby/object:Gem::Requirement
|
51
23
|
requirements:
|
52
24
|
- - "~>"
|
53
25
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
26
|
+
version: 2.3.0
|
55
27
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
28
|
+
name: pubid-iso
|
57
29
|
requirement: !ruby/object:Gem::Requirement
|
58
30
|
requirements:
|
59
31
|
- - "~>"
|
60
32
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
33
|
+
version: 0.6.0
|
62
34
|
type: :runtime
|
63
35
|
prerelease: false
|
64
36
|
version_requirements: !ruby/object:Gem::Requirement
|
65
37
|
requirements:
|
66
38
|
- - "~>"
|
67
39
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
40
|
+
version: 0.6.0
|
69
41
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
42
|
+
name: relaton-bib
|
71
43
|
requirement: !ruby/object:Gem::Requirement
|
72
44
|
requirements:
|
73
45
|
- - "~>"
|
74
46
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
47
|
+
version: 1.14.13
|
76
48
|
type: :runtime
|
77
49
|
prerelease: false
|
78
50
|
version_requirements: !ruby/object:Gem::Requirement
|
79
51
|
requirements:
|
80
52
|
- - "~>"
|
81
53
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
54
|
+
version: 1.14.13
|
83
55
|
- !ruby/object:Gem::Dependency
|
84
56
|
name: relaton-iso-bib
|
85
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -135,19 +107,21 @@ files:
|
|
135
107
|
- bin/setup
|
136
108
|
- bin/thor
|
137
109
|
- lib/relaton_iso.rb
|
110
|
+
- lib/relaton_iso/config.rb
|
138
111
|
- lib/relaton_iso/document_identifier.rb
|
139
112
|
- lib/relaton_iso/hit.rb
|
140
113
|
- lib/relaton_iso/hit_collection.rb
|
141
114
|
- lib/relaton_iso/iso_bibliography.rb
|
142
115
|
- lib/relaton_iso/processor.rb
|
143
116
|
- lib/relaton_iso/scrapper.rb
|
117
|
+
- lib/relaton_iso/util.rb
|
144
118
|
- lib/relaton_iso/version.rb
|
145
119
|
- relaton_iso.gemspec
|
146
120
|
homepage: https://github.com/relaton/relaton-iso
|
147
121
|
licenses:
|
148
122
|
- BSD-2-Clause
|
149
123
|
metadata: {}
|
150
|
-
post_install_message:
|
124
|
+
post_install_message:
|
151
125
|
rdoc_options: []
|
152
126
|
require_paths:
|
153
127
|
- lib
|
@@ -162,8 +136,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
162
136
|
- !ruby/object:Gem::Version
|
163
137
|
version: '0'
|
164
138
|
requirements: []
|
165
|
-
rubygems_version: 3.
|
166
|
-
signing_key:
|
139
|
+
rubygems_version: 3.3.26
|
140
|
+
signing_key:
|
167
141
|
specification_version: 4
|
168
142
|
summary: 'RelatonIso: retrieve ISO Standards for bibliographic use using the IsoBibliographicItem
|
169
143
|
model'
|