relaton-iso 1.18.4 → 1.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +6 -13
- data/bin/{rdebug-ide → pubid-nist} +5 -7
- data/lib/relaton_iso/data_fetcher.rb +6 -8
- data/lib/relaton_iso/document_identifier.rb +1 -1
- data/lib/relaton_iso/hit_collection.rb +1 -2
- data/lib/relaton_iso/iso_bibliography.rb +9 -9
- data/lib/relaton_iso/scrapper.rb +3 -2
- data/lib/relaton_iso/util.rb +1 -4
- data/lib/relaton_iso/version.rb +1 -1
- data/lib/relaton_iso.rb +0 -1
- data/relaton_iso.gemspec +1 -1
- metadata +5 -7
- data/lib/relaton_iso/config.rb +0 -10
- data/lib/relaton_iso/index.rb +0 -132
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a66fbf3c95df32417a9bdaea666de8a154ec75d52ed1ef7a7db4a92925a57893
|
4
|
+
data.tar.gz: 05dbd444b033089d02f6334e82b54372c291b6d34cb7b3722b8bf0122abf62bb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 70e8782fb0c925a56c329a9ff1a19de812f3055d072a113f1d0af390c7ef51904991bb615f37f912ca079f68ca5105542905f10102969c4aa5296e57b2fae8f7
|
7
|
+
data.tar.gz: 7e404e058db5336e2225428512da3e3d6e507baa6ae6c19a0ba5cde0aa39c5a5326c3dca63e20448c1baafa5beadf7e8193aaa11fdff6ccec6b9ed9789d0b69e
|
data/README.adoc
CHANGED
@@ -31,24 +31,13 @@ Or install it yourself as:
|
|
31
31
|
|
32
32
|
== Usage
|
33
33
|
|
34
|
-
===
|
35
|
-
|
36
|
-
Configuration is optional. The available option is `logger` which is a `Logger` instance. By default, the logger is `Logger.new($stderr)` with `Logger::WARN` level. To change the logger level, use `RelatonIso.configure` block.
|
34
|
+
=== Search for standards using keywords
|
37
35
|
|
38
36
|
[source,ruby]
|
39
37
|
----
|
40
38
|
require 'relaton_iso'
|
41
39
|
=> true
|
42
40
|
|
43
|
-
RelatonIso.configure do |config|
|
44
|
-
config.logger.level = Logger::DEBUG
|
45
|
-
end
|
46
|
-
----
|
47
|
-
|
48
|
-
=== Search for standards using keywords
|
49
|
-
|
50
|
-
[source,ruby]
|
51
|
-
----
|
52
41
|
hit_collection = RelatonIso::IsoBibliography.search("ISO 19115")
|
53
42
|
=> <RelatonIso::HitCollection:0x007fa5bc847038 @ref=19115 @fetched=false>
|
54
43
|
|
@@ -277,7 +266,7 @@ item.to_xml bibdata: true
|
|
277
266
|
<fetched>2022-12-04</fetched>
|
278
267
|
<title type="title-intro" format="text/plain" language="en" script="Latn">Geographic information</title>
|
279
268
|
...
|
280
|
-
<ext schema-version="v1.0.
|
269
|
+
<ext schema-version="v1.0.3">
|
281
270
|
<doctype>international-standard</doctype>
|
282
271
|
...
|
283
272
|
</ext>
|
@@ -360,6 +349,10 @@ item.link
|
|
360
349
|
@type="rss">]
|
361
350
|
----
|
362
351
|
|
352
|
+
=== Logging
|
353
|
+
|
354
|
+
RelatonIso uses the relaton-logger gem for logging. By default, it logs to STDOUT. To change the log levels and add other loggers, read the https://github.com/relaton/relaton-logger#usage[relaton-logger] documentation.
|
355
|
+
|
363
356
|
== Development
|
364
357
|
|
365
358
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -4,18 +4,16 @@
|
|
4
4
|
#
|
5
5
|
# This file was generated by Bundler.
|
6
6
|
#
|
7
|
-
# The application '
|
7
|
+
# The application 'pubid-nist' is installed as part of a gem, and
|
8
8
|
# this file is here to facilitate running it.
|
9
9
|
#
|
10
10
|
|
11
|
-
|
12
|
-
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
-
Pathname.new(__FILE__).realpath)
|
11
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../Gemfile", __dir__)
|
14
12
|
|
15
|
-
bundle_binstub = File.expand_path("
|
13
|
+
bundle_binstub = File.expand_path("bundle", __dir__)
|
16
14
|
|
17
15
|
if File.file?(bundle_binstub)
|
18
|
-
if File.read(bundle_binstub, 300)
|
16
|
+
if File.read(bundle_binstub, 300).include?("This file was generated by Bundler")
|
19
17
|
load(bundle_binstub)
|
20
18
|
else
|
21
19
|
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
@@ -26,4 +24,4 @@ end
|
|
26
24
|
require "rubygems"
|
27
25
|
require "bundler/setup"
|
28
26
|
|
29
|
-
load Gem.bin_path("
|
27
|
+
load Gem.bin_path("pubid-nist", "pubid-nist")
|
@@ -95,12 +95,11 @@ module RelatonIso
|
|
95
95
|
uri = URI(Scrapper::DOMAIN + path)
|
96
96
|
begin
|
97
97
|
get_response uri
|
98
|
-
rescue Net::OpenTimeout, Net::ReadTimeout => e
|
98
|
+
rescue Net::OpenTimeout, Net::ReadTimeout, Errno::ECONNREFUSED => e
|
99
99
|
try += 1
|
100
100
|
retry if check_try try, uri
|
101
101
|
|
102
|
-
|
103
|
-
warn e.message
|
102
|
+
Util.error "Error fetching #{uri}, #{e.message}"
|
104
103
|
end
|
105
104
|
end
|
106
105
|
|
@@ -111,7 +110,7 @@ module RelatonIso
|
|
111
110
|
|
112
111
|
def check_try(try, uri)
|
113
112
|
if try < 3
|
114
|
-
warn "Timeout fetching #{uri}, retrying..."
|
113
|
+
Util.warn "Timeout fetching #{uri}, retrying..."
|
115
114
|
sleep 1
|
116
115
|
true
|
117
116
|
end
|
@@ -137,9 +136,8 @@ module RelatonIso
|
|
137
136
|
doc = Scrapper.parse_page docpath
|
138
137
|
@mutex.synchronize { save_doc doc, docpath }
|
139
138
|
rescue StandardError => e
|
140
|
-
|
141
|
-
|
142
|
-
warn e.backtrace
|
139
|
+
Util.error "Error fetching document: #{Scrapper::DOMAIN}#{docpath}\n" \
|
140
|
+
"#{e.message}\n#{e.backtrace}"
|
143
141
|
end
|
144
142
|
|
145
143
|
# def compare_docids(id1, id2)
|
@@ -158,7 +156,7 @@ module RelatonIso
|
|
158
156
|
file_name = docid.id.gsub(/[\s\/:]+/, "-").downcase
|
159
157
|
file = File.join @output, "#{file_name}.#{@ext}"
|
160
158
|
if @files.include? file
|
161
|
-
warn "Duplicate file #{file} for #{docid.id} from #{Scrapper::DOMAIN}#{docpath}"
|
159
|
+
Util.warn "Duplicate file #{file} for #{docid.id} from #{Scrapper::DOMAIN}#{docpath}"
|
162
160
|
else
|
163
161
|
@files << file
|
164
162
|
index.add_or_update docid.to_h, file
|
@@ -11,7 +11,7 @@ module RelatonIso
|
|
11
11
|
end
|
12
12
|
type == "URN" ? @id.urn.to_s : id_str
|
13
13
|
rescue Pubid::Iso::Errors::NoEditionError => e
|
14
|
-
Util.warn "
|
14
|
+
Util.warn "#{type} identifier can't be generated for `#{@id}`: #{e.message}"
|
15
15
|
end
|
16
16
|
|
17
17
|
def to_h
|
@@ -38,7 +38,7 @@ module RelatonIso
|
|
38
38
|
|
39
39
|
query_pubid = Pubid::Iso::Identifier.parse(code)
|
40
40
|
query_pubid.root.year = year.to_i if year&.respond_to?(:to_i)
|
41
|
-
Util.
|
41
|
+
Util.info "Fetching from Relaton repository ...", key: query_pubid.to_s
|
42
42
|
|
43
43
|
hits, missed_year_ids = isobib_search_filter(query_pubid, opts)
|
44
44
|
tip_ids = look_up_with_any_types_stages(hits, ref, opts)
|
@@ -46,13 +46,13 @@ module RelatonIso
|
|
46
46
|
return fetch_ref_err(query_pubid, missed_year_ids, tip_ids) unless ret
|
47
47
|
|
48
48
|
response_pubid = ret.docidentifier.first.id # .sub(" (all parts)", "")
|
49
|
-
Util.
|
49
|
+
Util.info "Found: `#{response_pubid}`", key: query_pubid.to_s
|
50
50
|
get_all = (query_pubid.root.year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts]
|
51
51
|
return ret if get_all
|
52
52
|
|
53
53
|
ret.to_most_recent_reference
|
54
54
|
rescue Pubid::Core::Errors::ParseError
|
55
|
-
Util.
|
55
|
+
Util.info "Is not recognized as a standards identifier.", key: code
|
56
56
|
nil
|
57
57
|
end
|
58
58
|
|
@@ -115,23 +115,23 @@ module RelatonIso
|
|
115
115
|
|
116
116
|
# @param pubid [Pubid::Iso::Identifier] PubID with no results
|
117
117
|
def fetch_ref_err(pubid, missed_year_ids, tip_ids) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
118
|
-
Util.
|
118
|
+
Util.info "Not found.", key: pubid.to_s
|
119
119
|
|
120
120
|
if missed_year_ids.any?
|
121
121
|
ids = missed_year_ids.map { |i| "`#{i}`" }.join(", ")
|
122
|
-
Util.
|
122
|
+
Util.info "TIP: No match for edition year #{pubid.year}, but matches exist for #{ids}.", key: pubid.to_s
|
123
123
|
end
|
124
124
|
|
125
125
|
if tip_ids.any?
|
126
126
|
ids = tip_ids.map { |i| "`#{i}`" }.join(", ")
|
127
|
-
Util.
|
127
|
+
Util.info "TIP: Matches exist for #{ids}.", key: pubid.to_s
|
128
128
|
end
|
129
129
|
|
130
130
|
if pubid.part
|
131
|
-
Util.
|
131
|
+
Util.info "TIP: If it cannot be found, the document may no longer be published in parts.", key: pubid.to_s
|
132
132
|
else
|
133
|
-
Util.
|
134
|
-
"use `#{pubid.to_s(format: :ref_undated)} (all parts)`."
|
133
|
+
Util.info "TIP: If you wish to cite all document parts for the reference, " \
|
134
|
+
"use `#{pubid.to_s(format: :ref_undated)} (all parts)`.", key: pubid.to_s
|
135
135
|
end
|
136
136
|
|
137
137
|
nil
|
data/lib/relaton_iso/scrapper.rb
CHANGED
@@ -218,8 +218,9 @@ module RelatonIso
|
|
218
218
|
case resp.code
|
219
219
|
when "200" then [resp, uri]
|
220
220
|
when "301" then get_redirection(resp["location"])
|
221
|
+
when "404" then raise RelatonBib::RequestError, "#{uri} not found."
|
221
222
|
else
|
222
|
-
sleep
|
223
|
+
sleep (2**try)
|
223
224
|
get_response uri, try + 1
|
224
225
|
end
|
225
226
|
end
|
@@ -279,7 +280,7 @@ module RelatonIso
|
|
279
280
|
# @return [String, nil] ID
|
280
281
|
#
|
281
282
|
def item_ref(doc)
|
282
|
-
doc.at("//main//section/div/div/div//h1/span[1]")&.text
|
283
|
+
doc.at("//main//section/div/div/div//h1/span[1]")&.text&.strip
|
283
284
|
end
|
284
285
|
|
285
286
|
# Fetch status.
|
data/lib/relaton_iso/util.rb
CHANGED
data/lib/relaton_iso/version.rb
CHANGED
data/lib/relaton_iso.rb
CHANGED
data/relaton_iso.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iso
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.19.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-07-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pubid
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 1.
|
47
|
+
version: 1.19.0
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 1.
|
54
|
+
version: 1.19.0
|
55
55
|
description: 'RelatonIso: retrieve ISO Standards for bibliographic use using the IsoBibliographicItem
|
56
56
|
model'
|
57
57
|
email:
|
@@ -81,10 +81,10 @@ files:
|
|
81
81
|
- bin/ldiff
|
82
82
|
- bin/nokogiri
|
83
83
|
- bin/pry
|
84
|
+
- bin/pubid-nist
|
84
85
|
- bin/racc
|
85
86
|
- bin/rackup
|
86
87
|
- bin/rake
|
87
|
-
- bin/rdebug-ide
|
88
88
|
- bin/rspec
|
89
89
|
- bin/rubocop
|
90
90
|
- bin/ruby-parse
|
@@ -93,13 +93,11 @@ files:
|
|
93
93
|
- bin/setup
|
94
94
|
- bin/thor
|
95
95
|
- lib/relaton_iso.rb
|
96
|
-
- lib/relaton_iso/config.rb
|
97
96
|
- lib/relaton_iso/data_fetcher.rb
|
98
97
|
- lib/relaton_iso/document_identifier.rb
|
99
98
|
- lib/relaton_iso/hash_converter.rb
|
100
99
|
- lib/relaton_iso/hit.rb
|
101
100
|
- lib/relaton_iso/hit_collection.rb
|
102
|
-
- lib/relaton_iso/index.rb
|
103
101
|
- lib/relaton_iso/iso_bibliography.rb
|
104
102
|
- lib/relaton_iso/processor.rb
|
105
103
|
- lib/relaton_iso/queue.rb
|
data/lib/relaton_iso/config.rb
DELETED
data/lib/relaton_iso/index.rb
DELETED
@@ -1,132 +0,0 @@
|
|
1
|
-
module RelatonIso
|
2
|
-
# Index.
|
3
|
-
class Index
|
4
|
-
#
|
5
|
-
# Initialise index. If file path is given, read index from file. If file is not
|
6
|
-
# given, look for it in a `/home/USER/.relaton/iso` directory. If file
|
7
|
-
# doesn't exist, or is outdated then fetch index from GitHub.
|
8
|
-
#
|
9
|
-
# @param [String, nil] file path to index file.
|
10
|
-
#
|
11
|
-
def initialize(file = nil)
|
12
|
-
@file = file
|
13
|
-
end
|
14
|
-
|
15
|
-
#
|
16
|
-
# Create index.
|
17
|
-
#
|
18
|
-
# @return [Array<Hash>] index
|
19
|
-
#
|
20
|
-
def index
|
21
|
-
@index ||= read_index || read_from_user_dir || fetch_index
|
22
|
-
end
|
23
|
-
|
24
|
-
#
|
25
|
-
# Add or update index entry.
|
26
|
-
#
|
27
|
-
# @param [RelatonIsoBib::IsoBibliographicItem] item document
|
28
|
-
#
|
29
|
-
# @return [void]
|
30
|
-
#
|
31
|
-
def <<(item)
|
32
|
-
id = item.docidentifier.detect(&:primary).id
|
33
|
-
row = self[id] || begin
|
34
|
-
r = { id: id }
|
35
|
-
index << r
|
36
|
-
r
|
37
|
-
end
|
38
|
-
row[:title] = item.title.first.title.content
|
39
|
-
end
|
40
|
-
|
41
|
-
#
|
42
|
-
# Fetch document from index by ID.
|
43
|
-
#
|
44
|
-
# @param [String] id document ID
|
45
|
-
#
|
46
|
-
# @return [Hash] index entry
|
47
|
-
#
|
48
|
-
def [](id)
|
49
|
-
index.detect { |i| i[:id] == id }
|
50
|
-
end
|
51
|
-
|
52
|
-
#
|
53
|
-
# Save index to file.
|
54
|
-
#
|
55
|
-
# @return [void]
|
56
|
-
#
|
57
|
-
def save
|
58
|
-
serialize_and_save index
|
59
|
-
end
|
60
|
-
|
61
|
-
private
|
62
|
-
|
63
|
-
#
|
64
|
-
# Serialize index and save to file.
|
65
|
-
#
|
66
|
-
# @param [Array<Hash>] idx index
|
67
|
-
#
|
68
|
-
# @return [void]
|
69
|
-
#
|
70
|
-
def serialize_and_save(idx)
|
71
|
-
File.open(@file, "w:UTF-8") do |f|
|
72
|
-
f.puts "---"
|
73
|
-
idx.each do |i|
|
74
|
-
f.puts i.transform_keys(&:to_s).to_yaml.sub("---\n", "")
|
75
|
-
end
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
#
|
80
|
-
# Read index from file. If file doesn't exist, create empty index.
|
81
|
-
#
|
82
|
-
# @return [Array<Hash>, nil] index
|
83
|
-
#
|
84
|
-
def read_index
|
85
|
-
if @file && File.exist?(@file) then read_file
|
86
|
-
elsif @file then []
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
#
|
91
|
-
# Read index from `/home/USER/.relaton/iso` or fetch it from GitHub,
|
92
|
-
# if file doesn't exist, or is outdated.
|
93
|
-
#
|
94
|
-
# @return [Array<Hash>] index
|
95
|
-
#
|
96
|
-
def read_from_user_dir
|
97
|
-
@file = File.join(Dir.home, "index.yml")
|
98
|
-
read_file if File.exist?(@file) && !outdated?
|
99
|
-
end
|
100
|
-
|
101
|
-
def read_file
|
102
|
-
yaml = File.read @file, encoding: "UTF-8"
|
103
|
-
RelatonBib.parse_yaml yaml, [], symbolize_names: true
|
104
|
-
end
|
105
|
-
|
106
|
-
#
|
107
|
-
# Check if index file is outdated.
|
108
|
-
#
|
109
|
-
# @return [Boolean] true if older than 24 hours
|
110
|
-
#
|
111
|
-
def outdated?
|
112
|
-
(Time.now - File.mtime(@file)) / 3600 > 24
|
113
|
-
end
|
114
|
-
|
115
|
-
#
|
116
|
-
# Fetch index from GitHub.
|
117
|
-
#
|
118
|
-
# @return [Array<Hash>] index
|
119
|
-
#
|
120
|
-
def fetch_index
|
121
|
-
url = "https://raw.githubusercontent.com/relaton/relaton-data-iso/master/iso/index.zip"
|
122
|
-
zip = Zip::InputStream.new URI(url).open
|
123
|
-
yaml = zip.get_next_entry.get_input_stream.read
|
124
|
-
idx = RelatonBib.parse_yaml yaml, [], symbolize_names: true
|
125
|
-
serialize_and_save idx
|
126
|
-
idx
|
127
|
-
rescue OpenURI::HTTPError => e
|
128
|
-
warn "[relaton-iso] WARNING: failed to fetch index: #{e.message}"
|
129
|
-
[]
|
130
|
-
end
|
131
|
-
end
|
132
|
-
end
|