relaton-iso 1.19.2 → 2.0.0.pre.alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/Gemfile +1 -0
- data/README.adoc +134 -131
- data/bin/console +1 -1
- data/grammars/basicdoc.rng +2110 -0
- data/grammars/biblio-standoc.rng +287 -0
- data/grammars/biblio.rng +2097 -0
- data/grammars/relaton-iso-compile.rng +11 -0
- data/grammars/relaton-iso.rng +214 -0
- data/lib/relaton/iso/bibliography.rb +206 -0
- data/lib/relaton/iso/data_fetcher.rb +227 -0
- data/lib/relaton/iso/hash_parser_v1.rb +121 -0
- data/lib/relaton/iso/hit.rb +62 -0
- data/lib/relaton/iso/hit_collection.rb +117 -0
- data/lib/relaton/iso/item_data.rb +49 -0
- data/lib/relaton/iso/model/bibdata.rb +9 -0
- data/lib/relaton/iso/model/bibitem.rb +7 -0
- data/lib/relaton/iso/model/contributor.rb +7 -0
- data/lib/relaton/iso/model/contributor_info.rb +9 -0
- data/lib/relaton/iso/model/docidentifier.rb +128 -0
- data/lib/relaton/iso/model/doctype.rb +13 -0
- data/lib/relaton/iso/model/ext.rb +47 -0
- data/lib/relaton/iso/model/iso_project_group.rb +21 -0
- data/lib/relaton/iso/model/item.rb +17 -0
- data/lib/relaton/iso/model/item_base.rb +19 -0
- data/lib/relaton/iso/model/organization.rb +9 -0
- data/lib/relaton/iso/model/project_number.rb +22 -0
- data/lib/relaton/iso/model/relation.rb +9 -0
- data/lib/relaton/iso/model/stagename.rb +14 -0
- data/lib/relaton/iso/model/structured_identifier.rb +31 -0
- data/lib/relaton/iso/processor.rb +78 -0
- data/lib/relaton/iso/queue.rb +63 -0
- data/lib/relaton/iso/scraper.rb +591 -0
- data/lib/relaton/iso/util.rb +8 -0
- data/lib/relaton/iso/version.rb +7 -0
- data/lib/relaton/iso.rb +17 -0
- data/relaton_iso.gemspec +9 -7
- metadata +79 -49
- data/bin/bundle +0 -109
- data/bin/byebug +0 -27
- data/bin/coderay +0 -27
- data/bin/gdb_wrapper +0 -29
- data/bin/htmldiff +0 -27
- data/bin/httpclient +0 -29
- data/bin/ldiff +0 -27
- data/bin/nokogiri +0 -27
- data/bin/pry +0 -27
- data/bin/pubid-nist +0 -27
- data/bin/racc +0 -27
- data/bin/rackup +0 -29
- data/bin/rake +0 -27
- data/bin/rubocop +0 -27
- data/bin/ruby-parse +0 -27
- data/bin/ruby-rewrite +0 -27
- data/bin/safe_yaml +0 -29
- data/bin/thor +0 -27
- data/lib/relaton_iso/data_fetcher.rb +0 -246
- data/lib/relaton_iso/document_identifier.rb +0 -46
- data/lib/relaton_iso/hash_converter.rb +0 -15
- data/lib/relaton_iso/hit.rb +0 -59
- data/lib/relaton_iso/hit_collection.rb +0 -100
- data/lib/relaton_iso/iso_bibliography.rb +0 -202
- data/lib/relaton_iso/processor.rb +0 -67
- data/lib/relaton_iso/queue.rb +0 -61
- data/lib/relaton_iso/scrapper.rb +0 -553
- data/lib/relaton_iso/util.rb +0 -6
- data/lib/relaton_iso/version.rb +0 -5
- data/lib/relaton_iso.rb +0 -17
data/bin/ruby-parse
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
# frozen_string_literal: true
|
3
|
-
|
4
|
-
#
|
5
|
-
# This file was generated by Bundler.
|
6
|
-
#
|
7
|
-
# The application 'ruby-parse' is installed as part of a gem, and
|
8
|
-
# this file is here to facilitate running it.
|
9
|
-
#
|
10
|
-
|
11
|
-
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../Gemfile", __dir__)
|
12
|
-
|
13
|
-
bundle_binstub = File.expand_path("bundle", __dir__)
|
14
|
-
|
15
|
-
if File.file?(bundle_binstub)
|
16
|
-
if File.read(bundle_binstub, 300).include?("This file was generated by Bundler")
|
17
|
-
load(bundle_binstub)
|
18
|
-
else
|
19
|
-
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
20
|
-
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
require "rubygems"
|
25
|
-
require "bundler/setup"
|
26
|
-
|
27
|
-
load Gem.bin_path("parser", "ruby-parse")
|
data/bin/ruby-rewrite
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
# frozen_string_literal: true
|
3
|
-
|
4
|
-
#
|
5
|
-
# This file was generated by Bundler.
|
6
|
-
#
|
7
|
-
# The application 'ruby-rewrite' is installed as part of a gem, and
|
8
|
-
# this file is here to facilitate running it.
|
9
|
-
#
|
10
|
-
|
11
|
-
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../Gemfile", __dir__)
|
12
|
-
|
13
|
-
bundle_binstub = File.expand_path("bundle", __dir__)
|
14
|
-
|
15
|
-
if File.file?(bundle_binstub)
|
16
|
-
if File.read(bundle_binstub, 300).include?("This file was generated by Bundler")
|
17
|
-
load(bundle_binstub)
|
18
|
-
else
|
19
|
-
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
20
|
-
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
require "rubygems"
|
25
|
-
require "bundler/setup"
|
26
|
-
|
27
|
-
load Gem.bin_path("parser", "ruby-rewrite")
|
data/bin/safe_yaml
DELETED
@@ -1,29 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
# frozen_string_literal: true
|
3
|
-
|
4
|
-
#
|
5
|
-
# This file was generated by Bundler.
|
6
|
-
#
|
7
|
-
# The application 'safe_yaml' is installed as part of a gem, and
|
8
|
-
# this file is here to facilitate running it.
|
9
|
-
#
|
10
|
-
|
11
|
-
require "pathname"
|
12
|
-
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
-
Pathname.new(__FILE__).realpath)
|
14
|
-
|
15
|
-
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
-
|
17
|
-
if File.file?(bundle_binstub)
|
18
|
-
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
-
load(bundle_binstub)
|
20
|
-
else
|
21
|
-
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
-
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
require "rubygems"
|
27
|
-
require "bundler/setup"
|
28
|
-
|
29
|
-
load Gem.bin_path("safe_yaml", "safe_yaml")
|
data/bin/thor
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
# frozen_string_literal: true
|
3
|
-
|
4
|
-
#
|
5
|
-
# This file was generated by Bundler.
|
6
|
-
#
|
7
|
-
# The application 'thor' is installed as part of a gem, and
|
8
|
-
# this file is here to facilitate running it.
|
9
|
-
#
|
10
|
-
|
11
|
-
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../Gemfile", __dir__)
|
12
|
-
|
13
|
-
bundle_binstub = File.expand_path("bundle", __dir__)
|
14
|
-
|
15
|
-
if File.file?(bundle_binstub)
|
16
|
-
if File.read(bundle_binstub, 300).include?("This file was generated by Bundler")
|
17
|
-
load(bundle_binstub)
|
18
|
-
else
|
19
|
-
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
20
|
-
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
require "rubygems"
|
25
|
-
require "bundler/setup"
|
26
|
-
|
27
|
-
load Gem.bin_path("thor", "thor")
|
@@ -1,246 +0,0 @@
|
|
1
|
-
module RelatonIso
|
2
|
-
# Fetch all the documents from ISO website.
|
3
|
-
class DataFetcher
|
4
|
-
#
|
5
|
-
# Initialize data fetcher.
|
6
|
-
#
|
7
|
-
# @param [String] output output directory
|
8
|
-
# @param [String] format format of output files (yaml, bibxml, xml)
|
9
|
-
#
|
10
|
-
def initialize(output, format) # rubocop:disable Metrics/AbcSize
|
11
|
-
@output = output
|
12
|
-
@format = format
|
13
|
-
@ext = format.sub(/^bib/, "")
|
14
|
-
@files = Set.new
|
15
|
-
@queue = ::Queue.new
|
16
|
-
@mutex = Mutex.new
|
17
|
-
@gh_issue = Relaton::Logger::Channels::GhIssue.new "relaton/relaton-iso", "Error fetching ISO documents"
|
18
|
-
Relaton.logger_pool[:gh_issue] = Relaton::Logger::Log.new(@gh_issue, levels: [:error])
|
19
|
-
@errors = Hash.new(true)
|
20
|
-
end
|
21
|
-
|
22
|
-
def index
|
23
|
-
@index ||= Relaton::Index.find_or_create :iso, file: HitCollection::INDEXFILE
|
24
|
-
end
|
25
|
-
|
26
|
-
def iso_queue
|
27
|
-
@iso_queue ||= RelatonIso::Queue.new
|
28
|
-
end
|
29
|
-
|
30
|
-
#
|
31
|
-
# Initialize data fetcher and fetch data.
|
32
|
-
#
|
33
|
-
# @param [String] output output directory (default: "data")
|
34
|
-
# @param [String] format format of output files. Allowed: yaml (default), bibxml, xml
|
35
|
-
#
|
36
|
-
# @return [void]
|
37
|
-
#
|
38
|
-
def self.fetch(output: "data", format: "yaml")
|
39
|
-
t1 = Time.now
|
40
|
-
Util.info "Started at: #{t1}"
|
41
|
-
FileUtils.mkdir_p output
|
42
|
-
new(output, format).fetch
|
43
|
-
t2 = Time.now
|
44
|
-
Util.info "Stopped at: #{t2}"
|
45
|
-
Util.info "Done in: #{(t2 - t1).round} sec."
|
46
|
-
end
|
47
|
-
|
48
|
-
#
|
49
|
-
# Go through all ICS and fetch all documents.
|
50
|
-
#
|
51
|
-
# @return [void]
|
52
|
-
#
|
53
|
-
def fetch # rubocop:disable Metrics/AbcSize
|
54
|
-
Util.info "Scrapping ICS pages..."
|
55
|
-
fetch_ics
|
56
|
-
Util.info "(#{Time.now}) Scrapping documents..."
|
57
|
-
fetch_docs
|
58
|
-
iso_queue.save
|
59
|
-
# index.sort! { |a, b| compare_docids a, b }
|
60
|
-
index.save
|
61
|
-
repot_errors
|
62
|
-
end
|
63
|
-
|
64
|
-
def repot_errors
|
65
|
-
@errors.select { |_, v| v }.each_key do |k|
|
66
|
-
Util.error "Failed to fetch #{k}"
|
67
|
-
end
|
68
|
-
@gh_issue.create_issue
|
69
|
-
end
|
70
|
-
|
71
|
-
#
|
72
|
-
# Fetch ICS page recursively and store all the links to documents in the iso_queue.
|
73
|
-
#
|
74
|
-
# @param [String] path path to ICS page
|
75
|
-
#
|
76
|
-
def fetch_ics
|
77
|
-
threads = Array.new(3) { thread { |path| fetch_ics_page(path) } }
|
78
|
-
fetch_ics_page "/standards-catalogue/browse-by-ics.html"
|
79
|
-
sleep(1) until @queue.empty?
|
80
|
-
threads.size.times { @queue << :END }
|
81
|
-
threads.each(&:join)
|
82
|
-
end
|
83
|
-
|
84
|
-
def fetch_ics_page(path)
|
85
|
-
resp = get_redirection path
|
86
|
-
unless resp
|
87
|
-
Util.error "Failed fetching ICS page #{url(path)}"
|
88
|
-
return
|
89
|
-
end
|
90
|
-
|
91
|
-
page = Nokogiri::HTML(resp.body)
|
92
|
-
parse_doc_links page
|
93
|
-
parse_ics_links page
|
94
|
-
end
|
95
|
-
|
96
|
-
def parse_doc_links(page)
|
97
|
-
doc_links = page.xpath "//td[@data-title='Standard and/or project']/div/div/a"
|
98
|
-
@errors[:doc_links] &&= doc_links.empty?
|
99
|
-
doc_links.each { |item| iso_queue.add_first item[:href].split("?").first }
|
100
|
-
end
|
101
|
-
|
102
|
-
def parse_ics_links(page)
|
103
|
-
ics_links = page.xpath("//td[@data-title='ICS']/a")
|
104
|
-
@errors[:ics_links] &&= ics_links.empty?
|
105
|
-
ics_links.each { |item| @queue << item[:href] }
|
106
|
-
end
|
107
|
-
|
108
|
-
def url(path)
|
109
|
-
Scrapper::DOMAIN + path
|
110
|
-
end
|
111
|
-
|
112
|
-
#
|
113
|
-
# Get the page from the given path. If the page is redirected, get the
|
114
|
-
# page from the new path.
|
115
|
-
#
|
116
|
-
# @param [String] path path to the page
|
117
|
-
#
|
118
|
-
# @return [Net::HTTPOK, nil] HTTP response
|
119
|
-
#
|
120
|
-
def get_redirection(path) # rubocop:disable Metrics/MethodLength
|
121
|
-
try = 0
|
122
|
-
uri = URI url(path)
|
123
|
-
begin
|
124
|
-
get_response uri
|
125
|
-
rescue Net::OpenTimeout, Net::ReadTimeout, Errno::ECONNREFUSED => e
|
126
|
-
try += 1
|
127
|
-
retry if check_try try, uri
|
128
|
-
|
129
|
-
Util.warn "Failed fetching #{uri}, #{e.message}"
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
|
-
def get_response(uri)
|
134
|
-
resp = Net::HTTP.get_response(uri)
|
135
|
-
resp.code == "302" ? get_redirection(resp["location"]) : resp
|
136
|
-
end
|
137
|
-
|
138
|
-
def check_try(try, uri)
|
139
|
-
if try < 3
|
140
|
-
Util.warn "Timeout fetching #{uri}, retrying..."
|
141
|
-
sleep 1
|
142
|
-
true
|
143
|
-
end
|
144
|
-
end
|
145
|
-
|
146
|
-
def fetch_docs
|
147
|
-
threads = Array.new(3) { thread { |path| fetch_doc(path) } }
|
148
|
-
iso_queue[0..10_000].each { |docpath| @queue << docpath }
|
149
|
-
threads.size.times { @queue << :END }
|
150
|
-
threads.each(&:join)
|
151
|
-
end
|
152
|
-
|
153
|
-
#
|
154
|
-
# Fetch document from ISO website.
|
155
|
-
#
|
156
|
-
# @param [String] docpath document page path
|
157
|
-
#
|
158
|
-
# @return [void]
|
159
|
-
#
|
160
|
-
def fetch_doc(docpath)
|
161
|
-
doc = Scrapper.parse_page docpath, errors: @errors
|
162
|
-
@mutex.synchronize { save_doc doc, docpath }
|
163
|
-
rescue StandardError => e
|
164
|
-
Util.warn "Fail fetching document: #{url(docpath)}\n#{e.message}\n#{e.backtrace}"
|
165
|
-
end
|
166
|
-
|
167
|
-
# def compare_docids(id1, id2)
|
168
|
-
# Pubid::Iso::Identifier.create(**id1).to_s <=> Pubid::Iso::Identifier.create(**id2).to_s
|
169
|
-
# end
|
170
|
-
|
171
|
-
#
|
172
|
-
# save document to file.
|
173
|
-
#
|
174
|
-
# @param [RelatonIsoBib::IsoBibliographicItem] doc document
|
175
|
-
#
|
176
|
-
# @return [void]
|
177
|
-
#
|
178
|
-
def save_doc(doc, docpath) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
179
|
-
docid = doc.docidentifier.detect(&:primary)
|
180
|
-
file_name = docid.id.gsub(/[\s\/:]+/, "-").downcase
|
181
|
-
file = File.join @output, "#{file_name}.#{@ext}"
|
182
|
-
if File.exist?(file)
|
183
|
-
rewrite_with_same_or_newer doc, docid, file, docpath
|
184
|
-
else
|
185
|
-
write_file file, doc, docid
|
186
|
-
end
|
187
|
-
iso_queue.move_last docpath
|
188
|
-
end
|
189
|
-
|
190
|
-
def rewrite_with_same_or_newer(doc, docid, file, docpath)
|
191
|
-
hash = YAML.load_file file
|
192
|
-
item_hash = HashConverter.hash_to_bib hash
|
193
|
-
bib = ::RelatonIsoBib::IsoBibliographicItem.new(**item_hash)
|
194
|
-
if edition_greater?(doc, bib) || replace_substage98?(doc, bib)
|
195
|
-
write_file file, doc, docid
|
196
|
-
elsif @files.include?(file) && !edition_greater?(bib, doc)
|
197
|
-
Util.warn "Duplicate file `#{file}` for `#{docid.id}` from #{url(docpath)}"
|
198
|
-
end
|
199
|
-
end
|
200
|
-
|
201
|
-
def edition_greater?(doc, bib)
|
202
|
-
doc.edition && bib.edition && doc.edition.content.to_i > bib.edition.content.to_i
|
203
|
-
end
|
204
|
-
|
205
|
-
def replace_substage98?(doc, bib) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
206
|
-
doc.edition&.content == bib.edition&.content &&
|
207
|
-
(doc.status&.substage&.value != "98" || bib.status&.substage&.value == "98")
|
208
|
-
end
|
209
|
-
|
210
|
-
def write_file(file, doc, docid)
|
211
|
-
@files << file
|
212
|
-
index.add_or_update docid.to_h, file
|
213
|
-
File.write file, serialize(doc), encoding: "UTF-8"
|
214
|
-
end
|
215
|
-
|
216
|
-
#
|
217
|
-
# Serialize document to string.
|
218
|
-
#
|
219
|
-
# @param [RelatonIsoBib::IsoBibliographicItem] doc document
|
220
|
-
#
|
221
|
-
# @return [String] serialized document
|
222
|
-
#
|
223
|
-
def serialize(doc)
|
224
|
-
case @format
|
225
|
-
when "yaml" then doc.to_hash.to_yaml
|
226
|
-
when "bibxml" then doc.to_bibxml
|
227
|
-
when "xml" then doc.to_xml bibdata: true
|
228
|
-
end
|
229
|
-
end
|
230
|
-
|
231
|
-
private
|
232
|
-
|
233
|
-
#
|
234
|
-
# Create thread worker
|
235
|
-
#
|
236
|
-
# @return [Thread] thread
|
237
|
-
#
|
238
|
-
def thread
|
239
|
-
Thread.new do
|
240
|
-
while (path = @queue.pop) != :END
|
241
|
-
yield path
|
242
|
-
end
|
243
|
-
end
|
244
|
-
end
|
245
|
-
end
|
246
|
-
end
|
@@ -1,46 +0,0 @@
|
|
1
|
-
module RelatonIso
|
2
|
-
class DocumentIdentifier < RelatonBib::DocumentIdentifier
|
3
|
-
def id # rubocop:disable Metrics/MethodLength
|
4
|
-
id_str = @id.to_s.sub(/\sED\d+/, "").squeeze(" ").sub(/^ISO\/\s/, "ISO ") # workarounds for pubid gem bugs
|
5
|
-
if @all_parts
|
6
|
-
if type == "URN"
|
7
|
-
return "#{@id.urn}:ser"
|
8
|
-
else
|
9
|
-
return "#{id_str} (all parts)"
|
10
|
-
end
|
11
|
-
end
|
12
|
-
type == "URN" ? @id.urn.to_s : id_str
|
13
|
-
rescue Pubid::Iso::Errors::NoEditionError => e
|
14
|
-
Util.warn "#{type} identifier can't be generated for `#{@id}`: #{e.message}"
|
15
|
-
end
|
16
|
-
|
17
|
-
def to_h
|
18
|
-
stringify_values(@id.to_h) if @id.respond_to? :to_h
|
19
|
-
end
|
20
|
-
|
21
|
-
def remove_part
|
22
|
-
@id.part = nil
|
23
|
-
end
|
24
|
-
|
25
|
-
def remove_date
|
26
|
-
@id.year = nil
|
27
|
-
end
|
28
|
-
|
29
|
-
def all_parts
|
30
|
-
@all_parts = true
|
31
|
-
end
|
32
|
-
|
33
|
-
def stringify_values(hash)
|
34
|
-
hash.transform_values { |v| stringify(v) }.reject { |_k, v| v.empty? }
|
35
|
-
end
|
36
|
-
|
37
|
-
def stringify(val)
|
38
|
-
case val
|
39
|
-
when Array then val.map { |i| i.is_a?(Hash) ? stringify_values(i) : i.to_s }
|
40
|
-
when Hash then stringify_values(val)
|
41
|
-
when Symbol then val
|
42
|
-
else val.to_s
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
@@ -1,15 +0,0 @@
|
|
1
|
-
module RelatonIso
|
2
|
-
module HashConverter
|
3
|
-
include RelatonIsoBib::HashConverter
|
4
|
-
extend self
|
5
|
-
|
6
|
-
def create_docid(**args)
|
7
|
-
begin
|
8
|
-
args[:id] = Pubid::Iso::Identifier.parse args[:id] if args[:id].is_a?(String) && args[:primary]
|
9
|
-
rescue StandardError
|
10
|
-
Util.warn "Unable to create a Pubid::Iso::Identifier from `#{args[:id]}`"
|
11
|
-
end
|
12
|
-
DocumentIdentifier.new(**args)
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
data/lib/relaton_iso/hit.rb
DELETED
@@ -1,59 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module RelatonIso
|
4
|
-
# Hit.
|
5
|
-
class Hit < RelatonBib::Hit
|
6
|
-
# @return [RelatonIsoBib::IsoBibliographicItem]
|
7
|
-
attr_writer :fetch
|
8
|
-
|
9
|
-
# @return [Pubid::Iso::Identifier] pubid
|
10
|
-
attr_writer :pubid
|
11
|
-
|
12
|
-
# Update edition for pubid when provided in Bibliographic Item
|
13
|
-
# def update_edition(bibliographic_item)
|
14
|
-
# if bibliographic_item.edition
|
15
|
-
# pubid.root.edition = bibliographic_item.edition.content
|
16
|
-
# end
|
17
|
-
# end
|
18
|
-
|
19
|
-
# Parse page.
|
20
|
-
# @param lang [String, nil]
|
21
|
-
# @return [RelatonIso::IsoBibliographicItem]
|
22
|
-
def fetch(_lang = nil)
|
23
|
-
@fetch ||= begin
|
24
|
-
url = "#{HitCollection::ENDPOINT}#{hit[:file]}"
|
25
|
-
resp = Net::HTTP.get_response URI(url)
|
26
|
-
hash = YAML.safe_load resp.body
|
27
|
-
hash["fetched"] = Date.today.to_s
|
28
|
-
RelatonIsoBib::IsoBibliographicItem.from_hash hash
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
# @return [Integer]
|
33
|
-
def sort_weight
|
34
|
-
case hit[:status] # && hit["publicationStatus"]["key"]
|
35
|
-
when "Published" then 0
|
36
|
-
when "Under development" then 1
|
37
|
-
when "Withdrawn" then 2
|
38
|
-
when "Deleted" then 3
|
39
|
-
else 4
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
# @return [Pubid::Iso::Identifier]
|
44
|
-
def pubid
|
45
|
-
return @pubid if defined? @pubid
|
46
|
-
|
47
|
-
create_pubid hit[:id]
|
48
|
-
rescue StandardError
|
49
|
-
Util.warn "Unable to create an identifier from #{hit[:id]}"
|
50
|
-
@pubid = nil
|
51
|
-
end
|
52
|
-
|
53
|
-
private
|
54
|
-
|
55
|
-
def create_pubid(id)
|
56
|
-
@pubid = id.is_a?(Hash) ? Pubid::Iso::Identifier.create(**id) : id
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
@@ -1,100 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "relaton_iso/hit"
|
4
|
-
|
5
|
-
module RelatonIso
|
6
|
-
# Page of hit collection.
|
7
|
-
class HitCollection < RelatonBib::HitCollection
|
8
|
-
INDEXFILE = "index-v1.yaml"
|
9
|
-
ENDPOINT = "https://raw.githubusercontent.com/relaton/relaton-data-iso/main/"
|
10
|
-
|
11
|
-
# @param text [Pubid::Iso::Identifier] reference to search
|
12
|
-
def initialize(pubid, opts = {})
|
13
|
-
super
|
14
|
-
@opts = opts
|
15
|
-
end
|
16
|
-
|
17
|
-
# @return [Pubid::Iso::Identifier]
|
18
|
-
alias ref_pubid text
|
19
|
-
|
20
|
-
def ref_pubid_no_year
|
21
|
-
@ref_pubid_no_year ||= ref_pubid.dup.tap { |r| r.base = r.base.exclude(:year) if r.base }
|
22
|
-
end
|
23
|
-
|
24
|
-
def ref_pubid_excluded
|
25
|
-
@ref_pubid_excluded ||= ref_pubid_no_year.exclude(*excludings)
|
26
|
-
end
|
27
|
-
|
28
|
-
def fetch # rubocop:disable Metrics/AbcSize
|
29
|
-
@array = index.search do |row|
|
30
|
-
row[:id].is_a?(Hash) ? pubid_match?(row[:id]) : ref_pubid.to_s == row[:id]
|
31
|
-
end.map { |row| Hit.new row, self }
|
32
|
-
.sort_by! { |h| h.pubid.to_s }
|
33
|
-
.reverse!
|
34
|
-
self
|
35
|
-
end
|
36
|
-
|
37
|
-
def pubid_match?(id)
|
38
|
-
pubid = create_pubid(id)
|
39
|
-
return false unless pubid
|
40
|
-
|
41
|
-
pubid.base = pubid.base.exclude(:year, :edition) if pubid.base
|
42
|
-
dir_excludings = excludings.dup
|
43
|
-
dir_excludings << :edition unless pubid.typed_stage_abbrev == "DIR"
|
44
|
-
pubid.exclude(*dir_excludings) == ref_pubid_excluded
|
45
|
-
end
|
46
|
-
|
47
|
-
def create_pubid(id)
|
48
|
-
Pubid::Iso::Identifier.create(**id)
|
49
|
-
rescue StandardError => e
|
50
|
-
Util.warn e.message, key: ref_pubid.to_s
|
51
|
-
end
|
52
|
-
|
53
|
-
def excludings
|
54
|
-
return @excludings if defined? @excludings
|
55
|
-
|
56
|
-
excl_parts = %i[year]
|
57
|
-
excl_parts << :part if ref_pubid.root.part.nil? || @opts[:all_parts]
|
58
|
-
if ref_pubid.stage.nil? || @opts[:all_parts]
|
59
|
-
excl_parts << :stage
|
60
|
-
excl_parts << :iteration
|
61
|
-
end
|
62
|
-
# excl_parts << :edition if ref_pubid.root.edition.nil? || all_parts
|
63
|
-
@escludings = excl_parts
|
64
|
-
end
|
65
|
-
|
66
|
-
def index
|
67
|
-
@index ||= Relaton::Index.find_or_create :iso, url: "#{ENDPOINT}index-v1.zip", file: INDEXFILE
|
68
|
-
end
|
69
|
-
|
70
|
-
def fetch_doc
|
71
|
-
if !@opts[:all_parts] || size == 1
|
72
|
-
any? && first.fetch(@opts[:lang])
|
73
|
-
else
|
74
|
-
to_all_parts(@opts[:lang])
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
# @param lang [String, nil]
|
79
|
-
# @return [RelatonIsoBib::IsoBibliographicItem, nil]
|
80
|
-
def to_all_parts(lang = nil) # rubocop:disable Metrics/AbcSize
|
81
|
-
hit = @array.min_by { |h| h.pubid.part.to_i }
|
82
|
-
return @array.first&.fetch lang unless hit
|
83
|
-
|
84
|
-
bibitem = hit.fetch(lang)
|
85
|
-
all_parts_item = bibitem.to_all_parts
|
86
|
-
@array.reject { |h| h.pubid.part == hit.pubid.part }.each do |hi|
|
87
|
-
all_parts_item.relation << create_relation(hi)
|
88
|
-
end
|
89
|
-
all_parts_item
|
90
|
-
end
|
91
|
-
|
92
|
-
def create_relation(hit)
|
93
|
-
docid = DocumentIdentifier.new(id: hit.pubid, type: "ISO", primary: true)
|
94
|
-
isobib = RelatonIsoBib::IsoBibliographicItem.new(
|
95
|
-
formattedref: RelatonBib::FormattedRef.new(content: hit.pubid.to_s), docid: [docid],
|
96
|
-
)
|
97
|
-
RelatonBib::DocumentRelation.new(type: "instanceOf", bibitem: isobib)
|
98
|
-
end
|
99
|
-
end
|
100
|
-
end
|