relaton-iso 1.18.4 → 1.19.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.adoc +6 -13
- data/bin/{rdebug-ide → pubid-nist} +5 -7
- data/lib/relaton_iso/data_fetcher.rb +6 -8
- data/lib/relaton_iso/document_identifier.rb +1 -1
- data/lib/relaton_iso/hit_collection.rb +1 -2
- data/lib/relaton_iso/iso_bibliography.rb +9 -9
- data/lib/relaton_iso/scrapper.rb +3 -2
- data/lib/relaton_iso/util.rb +1 -4
- data/lib/relaton_iso/version.rb +1 -1
- data/lib/relaton_iso.rb +0 -1
- data/relaton_iso.gemspec +1 -1
- metadata +5 -7
- data/lib/relaton_iso/config.rb +0 -10
- data/lib/relaton_iso/index.rb +0 -132
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 05ea6b24502f419ac918fa4128bbde3fead57788c976a9a45aeff911a9d0487e
|
4
|
+
data.tar.gz: dd524e78977ef203a13560590e4d4167a91f6cd07fbe3c5bee419699c0fe0b40
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f2d5b69b515b3dc68a0cc27c0a8a59e6298849b4df54dfbf45a8cb7d52d347863eeea7adb9d44df56e4cab08ec04e3d12d1d47eb8dd5358567b38f214afbc380
|
7
|
+
data.tar.gz: 114c12c049bf9da2d03e94905ac68017e4bbf61087c261bcbc21a92cee94eac7371a015ea01592def5dee256add9d814c1f3ed75dcf78ac434c29e76d26a06ca
|
data/README.adoc
CHANGED
@@ -31,24 +31,13 @@ Or install it yourself as:
|
|
31
31
|
|
32
32
|
== Usage
|
33
33
|
|
34
|
-
===
|
35
|
-
|
36
|
-
Configuration is optional. The available option is `logger` which is a `Logger` instance. By default, the logger is `Logger.new($stderr)` with `Logger::WARN` level. To change the logger level, use `RelatonIso.configure` block.
|
34
|
+
=== Search for standards using keywords
|
37
35
|
|
38
36
|
[source,ruby]
|
39
37
|
----
|
40
38
|
require 'relaton_iso'
|
41
39
|
=> true
|
42
40
|
|
43
|
-
RelatonIso.configure do |config|
|
44
|
-
config.logger.level = Logger::DEBUG
|
45
|
-
end
|
46
|
-
----
|
47
|
-
|
48
|
-
=== Search for standards using keywords
|
49
|
-
|
50
|
-
[source,ruby]
|
51
|
-
----
|
52
41
|
hit_collection = RelatonIso::IsoBibliography.search("ISO 19115")
|
53
42
|
=> <RelatonIso::HitCollection:0x007fa5bc847038 @ref=19115 @fetched=false>
|
54
43
|
|
@@ -277,7 +266,7 @@ item.to_xml bibdata: true
|
|
277
266
|
<fetched>2022-12-04</fetched>
|
278
267
|
<title type="title-intro" format="text/plain" language="en" script="Latn">Geographic information</title>
|
279
268
|
...
|
280
|
-
<ext schema-version="v1.0.
|
269
|
+
<ext schema-version="v1.0.3">
|
281
270
|
<doctype>international-standard</doctype>
|
282
271
|
...
|
283
272
|
</ext>
|
@@ -360,6 +349,10 @@ item.link
|
|
360
349
|
@type="rss">]
|
361
350
|
----
|
362
351
|
|
352
|
+
=== Logging
|
353
|
+
|
354
|
+
RelatonIso uses the relaton-logger gem for logging. By default, it logs to STDOUT. To change the log levels and add other loggers, read the https://github.com/relaton/relaton-logger#usage[relaton-logger] documentation.
|
355
|
+
|
363
356
|
== Development
|
364
357
|
|
365
358
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -4,18 +4,16 @@
|
|
4
4
|
#
|
5
5
|
# This file was generated by Bundler.
|
6
6
|
#
|
7
|
-
# The application '
|
7
|
+
# The application 'pubid-nist' is installed as part of a gem, and
|
8
8
|
# this file is here to facilitate running it.
|
9
9
|
#
|
10
10
|
|
11
|
-
|
12
|
-
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
-
Pathname.new(__FILE__).realpath)
|
11
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../Gemfile", __dir__)
|
14
12
|
|
15
|
-
bundle_binstub = File.expand_path("
|
13
|
+
bundle_binstub = File.expand_path("bundle", __dir__)
|
16
14
|
|
17
15
|
if File.file?(bundle_binstub)
|
18
|
-
if File.read(bundle_binstub, 300)
|
16
|
+
if File.read(bundle_binstub, 300).include?("This file was generated by Bundler")
|
19
17
|
load(bundle_binstub)
|
20
18
|
else
|
21
19
|
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
@@ -26,4 +24,4 @@ end
|
|
26
24
|
require "rubygems"
|
27
25
|
require "bundler/setup"
|
28
26
|
|
29
|
-
load Gem.bin_path("
|
27
|
+
load Gem.bin_path("pubid-nist", "pubid-nist")
|
@@ -95,12 +95,11 @@ module RelatonIso
|
|
95
95
|
uri = URI(Scrapper::DOMAIN + path)
|
96
96
|
begin
|
97
97
|
get_response uri
|
98
|
-
rescue Net::OpenTimeout, Net::ReadTimeout => e
|
98
|
+
rescue Net::OpenTimeout, Net::ReadTimeout, Errno::ECONNREFUSED => e
|
99
99
|
try += 1
|
100
100
|
retry if check_try try, uri
|
101
101
|
|
102
|
-
|
103
|
-
warn e.message
|
102
|
+
Util.error "Error fetching #{uri}, #{e.message}"
|
104
103
|
end
|
105
104
|
end
|
106
105
|
|
@@ -111,7 +110,7 @@ module RelatonIso
|
|
111
110
|
|
112
111
|
def check_try(try, uri)
|
113
112
|
if try < 3
|
114
|
-
warn "Timeout fetching #{uri}, retrying..."
|
113
|
+
Util.warn "Timeout fetching #{uri}, retrying..."
|
115
114
|
sleep 1
|
116
115
|
true
|
117
116
|
end
|
@@ -137,9 +136,8 @@ module RelatonIso
|
|
137
136
|
doc = Scrapper.parse_page docpath
|
138
137
|
@mutex.synchronize { save_doc doc, docpath }
|
139
138
|
rescue StandardError => e
|
140
|
-
|
141
|
-
|
142
|
-
warn e.backtrace
|
139
|
+
Util.error "Error fetching document: #{Scrapper::DOMAIN}#{docpath}\n" \
|
140
|
+
"#{e.message}\n#{e.backtrace}"
|
143
141
|
end
|
144
142
|
|
145
143
|
# def compare_docids(id1, id2)
|
@@ -158,7 +156,7 @@ module RelatonIso
|
|
158
156
|
file_name = docid.id.gsub(/[\s\/:]+/, "-").downcase
|
159
157
|
file = File.join @output, "#{file_name}.#{@ext}"
|
160
158
|
if @files.include? file
|
161
|
-
warn "Duplicate file #{file} for #{docid.id} from #{Scrapper::DOMAIN}#{docpath}"
|
159
|
+
Util.warn "Duplicate file #{file} for #{docid.id} from #{Scrapper::DOMAIN}#{docpath}"
|
162
160
|
else
|
163
161
|
@files << file
|
164
162
|
index.add_or_update docid.to_h, file
|
@@ -11,7 +11,7 @@ module RelatonIso
|
|
11
11
|
end
|
12
12
|
type == "URN" ? @id.urn.to_s : id_str
|
13
13
|
rescue Pubid::Iso::Errors::NoEditionError => e
|
14
|
-
Util.warn "
|
14
|
+
Util.warn "#{type} identifier can't be generated for `#{@id}`: #{e.message}"
|
15
15
|
end
|
16
16
|
|
17
17
|
def to_h
|
@@ -38,7 +38,7 @@ module RelatonIso
|
|
38
38
|
|
39
39
|
query_pubid = Pubid::Iso::Identifier.parse(code)
|
40
40
|
query_pubid.root.year = year.to_i if year&.respond_to?(:to_i)
|
41
|
-
Util.
|
41
|
+
Util.info "Fetching from Relaton repository ...", key: query_pubid.to_s
|
42
42
|
|
43
43
|
hits, missed_year_ids = isobib_search_filter(query_pubid, opts)
|
44
44
|
tip_ids = look_up_with_any_types_stages(hits, ref, opts)
|
@@ -46,13 +46,13 @@ module RelatonIso
|
|
46
46
|
return fetch_ref_err(query_pubid, missed_year_ids, tip_ids) unless ret
|
47
47
|
|
48
48
|
response_pubid = ret.docidentifier.first.id # .sub(" (all parts)", "")
|
49
|
-
Util.
|
49
|
+
Util.info "Found: `#{response_pubid}`", key: query_pubid.to_s
|
50
50
|
get_all = (query_pubid.root.year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts]
|
51
51
|
return ret if get_all
|
52
52
|
|
53
53
|
ret.to_most_recent_reference
|
54
54
|
rescue Pubid::Core::Errors::ParseError
|
55
|
-
Util.warn "
|
55
|
+
Util.warn "Is not recognized as a standards identifier.", key: code
|
56
56
|
nil
|
57
57
|
end
|
58
58
|
|
@@ -115,23 +115,23 @@ module RelatonIso
|
|
115
115
|
|
116
116
|
# @param pubid [Pubid::Iso::Identifier] PubID with no results
|
117
117
|
def fetch_ref_err(pubid, missed_year_ids, tip_ids) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
|
118
|
-
Util.
|
118
|
+
Util.info "Not found.", key: pubid.to_s
|
119
119
|
|
120
120
|
if missed_year_ids.any?
|
121
121
|
ids = missed_year_ids.map { |i| "`#{i}`" }.join(", ")
|
122
|
-
Util.
|
122
|
+
Util.info "TIP: No match for edition year #{pubid.year}, but matches exist for #{ids}.", key: pubid.to_s
|
123
123
|
end
|
124
124
|
|
125
125
|
if tip_ids.any?
|
126
126
|
ids = tip_ids.map { |i| "`#{i}`" }.join(", ")
|
127
|
-
Util.
|
127
|
+
Util.info "TIP: Matches exist for #{ids}.", key: pubid.to_s
|
128
128
|
end
|
129
129
|
|
130
130
|
if pubid.part
|
131
|
-
Util.
|
131
|
+
Util.info "TIP: If it cannot be found, the document may no longer be published in parts.", key: pubid.to_s
|
132
132
|
else
|
133
|
-
Util.
|
134
|
-
"use `#{pubid.to_s(format: :ref_undated)} (all parts)`."
|
133
|
+
Util.info "TIP: If you wish to cite all document parts for the reference, " \
|
134
|
+
"use `#{pubid.to_s(format: :ref_undated)} (all parts)`.", key: pubid.to_s
|
135
135
|
end
|
136
136
|
|
137
137
|
nil
|
data/lib/relaton_iso/scrapper.rb
CHANGED
@@ -218,8 +218,9 @@ module RelatonIso
|
|
218
218
|
case resp.code
|
219
219
|
when "200" then [resp, uri]
|
220
220
|
when "301" then get_redirection(resp["location"])
|
221
|
+
when "404" then raise RelatonBib::RequestError, "#{uri} not found."
|
221
222
|
else
|
222
|
-
sleep
|
223
|
+
sleep (2**try)
|
223
224
|
get_response uri, try + 1
|
224
225
|
end
|
225
226
|
end
|
@@ -279,7 +280,7 @@ module RelatonIso
|
|
279
280
|
# @return [String, nil] ID
|
280
281
|
#
|
281
282
|
def item_ref(doc)
|
282
|
-
doc.at("//main//section/div/div/div//h1/span[1]")&.text
|
283
|
+
doc.at("//main//section/div/div/div//h1/span[1]")&.text&.strip
|
283
284
|
end
|
284
285
|
|
285
286
|
# Fetch status.
|
data/lib/relaton_iso/util.rb
CHANGED
data/lib/relaton_iso/version.rb
CHANGED
data/lib/relaton_iso.rb
CHANGED
data/relaton_iso.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iso
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.19.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-07-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pubid
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 1.
|
47
|
+
version: 1.19.0
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 1.
|
54
|
+
version: 1.19.0
|
55
55
|
description: 'RelatonIso: retrieve ISO Standards for bibliographic use using the IsoBibliographicItem
|
56
56
|
model'
|
57
57
|
email:
|
@@ -81,10 +81,10 @@ files:
|
|
81
81
|
- bin/ldiff
|
82
82
|
- bin/nokogiri
|
83
83
|
- bin/pry
|
84
|
+
- bin/pubid-nist
|
84
85
|
- bin/racc
|
85
86
|
- bin/rackup
|
86
87
|
- bin/rake
|
87
|
-
- bin/rdebug-ide
|
88
88
|
- bin/rspec
|
89
89
|
- bin/rubocop
|
90
90
|
- bin/ruby-parse
|
@@ -93,13 +93,11 @@ files:
|
|
93
93
|
- bin/setup
|
94
94
|
- bin/thor
|
95
95
|
- lib/relaton_iso.rb
|
96
|
-
- lib/relaton_iso/config.rb
|
97
96
|
- lib/relaton_iso/data_fetcher.rb
|
98
97
|
- lib/relaton_iso/document_identifier.rb
|
99
98
|
- lib/relaton_iso/hash_converter.rb
|
100
99
|
- lib/relaton_iso/hit.rb
|
101
100
|
- lib/relaton_iso/hit_collection.rb
|
102
|
-
- lib/relaton_iso/index.rb
|
103
101
|
- lib/relaton_iso/iso_bibliography.rb
|
104
102
|
- lib/relaton_iso/processor.rb
|
105
103
|
- lib/relaton_iso/queue.rb
|
data/lib/relaton_iso/config.rb
DELETED
data/lib/relaton_iso/index.rb
DELETED
@@ -1,132 +0,0 @@
|
|
1
|
-
module RelatonIso
|
2
|
-
# Index.
|
3
|
-
class Index
|
4
|
-
#
|
5
|
-
# Initialise index. If file path is given, read index from file. If file is not
|
6
|
-
# given, look for it in a `/home/USER/.relaton/iso` directory. If file
|
7
|
-
# doesn't exist, or is outdated then fetch index from GitHub.
|
8
|
-
#
|
9
|
-
# @param [String, nil] file path to index file.
|
10
|
-
#
|
11
|
-
def initialize(file = nil)
|
12
|
-
@file = file
|
13
|
-
end
|
14
|
-
|
15
|
-
#
|
16
|
-
# Create index.
|
17
|
-
#
|
18
|
-
# @return [Array<Hash>] index
|
19
|
-
#
|
20
|
-
def index
|
21
|
-
@index ||= read_index || read_from_user_dir || fetch_index
|
22
|
-
end
|
23
|
-
|
24
|
-
#
|
25
|
-
# Add or update index entry.
|
26
|
-
#
|
27
|
-
# @param [RelatonIsoBib::IsoBibliographicItem] item document
|
28
|
-
#
|
29
|
-
# @return [void]
|
30
|
-
#
|
31
|
-
def <<(item)
|
32
|
-
id = item.docidentifier.detect(&:primary).id
|
33
|
-
row = self[id] || begin
|
34
|
-
r = { id: id }
|
35
|
-
index << r
|
36
|
-
r
|
37
|
-
end
|
38
|
-
row[:title] = item.title.first.title.content
|
39
|
-
end
|
40
|
-
|
41
|
-
#
|
42
|
-
# Fetch document from index by ID.
|
43
|
-
#
|
44
|
-
# @param [String] id document ID
|
45
|
-
#
|
46
|
-
# @return [Hash] index entry
|
47
|
-
#
|
48
|
-
def [](id)
|
49
|
-
index.detect { |i| i[:id] == id }
|
50
|
-
end
|
51
|
-
|
52
|
-
#
|
53
|
-
# Save index to file.
|
54
|
-
#
|
55
|
-
# @return [void]
|
56
|
-
#
|
57
|
-
def save
|
58
|
-
serialize_and_save index
|
59
|
-
end
|
60
|
-
|
61
|
-
private
|
62
|
-
|
63
|
-
#
|
64
|
-
# Serialize index and save to file.
|
65
|
-
#
|
66
|
-
# @param [Array<Hash>] idx index
|
67
|
-
#
|
68
|
-
# @return [void]
|
69
|
-
#
|
70
|
-
def serialize_and_save(idx)
|
71
|
-
File.open(@file, "w:UTF-8") do |f|
|
72
|
-
f.puts "---"
|
73
|
-
idx.each do |i|
|
74
|
-
f.puts i.transform_keys(&:to_s).to_yaml.sub("---\n", "")
|
75
|
-
end
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
#
|
80
|
-
# Read index from file. If file doesn't exist, create empty index.
|
81
|
-
#
|
82
|
-
# @return [Array<Hash>, nil] index
|
83
|
-
#
|
84
|
-
def read_index
|
85
|
-
if @file && File.exist?(@file) then read_file
|
86
|
-
elsif @file then []
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
#
|
91
|
-
# Read index from `/home/USER/.relaton/iso` or fetch it from GitHub,
|
92
|
-
# if file doesn't exist, or is outdated.
|
93
|
-
#
|
94
|
-
# @return [Array<Hash>] index
|
95
|
-
#
|
96
|
-
def read_from_user_dir
|
97
|
-
@file = File.join(Dir.home, "index.yml")
|
98
|
-
read_file if File.exist?(@file) && !outdated?
|
99
|
-
end
|
100
|
-
|
101
|
-
def read_file
|
102
|
-
yaml = File.read @file, encoding: "UTF-8"
|
103
|
-
RelatonBib.parse_yaml yaml, [], symbolize_names: true
|
104
|
-
end
|
105
|
-
|
106
|
-
#
|
107
|
-
# Check if index file is outdated.
|
108
|
-
#
|
109
|
-
# @return [Boolean] true if older than 24 hours
|
110
|
-
#
|
111
|
-
def outdated?
|
112
|
-
(Time.now - File.mtime(@file)) / 3600 > 24
|
113
|
-
end
|
114
|
-
|
115
|
-
#
|
116
|
-
# Fetch index from GitHub.
|
117
|
-
#
|
118
|
-
# @return [Array<Hash>] index
|
119
|
-
#
|
120
|
-
def fetch_index
|
121
|
-
url = "https://raw.githubusercontent.com/relaton/relaton-data-iso/master/iso/index.zip"
|
122
|
-
zip = Zip::InputStream.new URI(url).open
|
123
|
-
yaml = zip.get_next_entry.get_input_stream.read
|
124
|
-
idx = RelatonBib.parse_yaml yaml, [], symbolize_names: true
|
125
|
-
serialize_and_save idx
|
126
|
-
idx
|
127
|
-
rescue OpenURI::HTTPError => e
|
128
|
-
warn "[relaton-iso] WARNING: failed to fetch index: #{e.message}"
|
129
|
-
[]
|
130
|
-
end
|
131
|
-
end
|
132
|
-
end
|