relaton-iso 2.1.5 → 2.2.0.pre.alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +7 -0
- data/Rakefile +2 -2
- data/lib/relaton/iso/bibliography.rb +50 -18
- data/lib/relaton/iso/data_fetcher.rb +60 -12
- data/lib/relaton/iso/data_parser.rb +9 -11
- data/lib/relaton/iso/hit.rb +1 -1
- data/lib/relaton/iso/hit_collection.rb +85 -32
- data/lib/relaton/iso/item_data.rb +1 -1
- data/lib/relaton/iso/model/docidentifier.rb +18 -12
- data/lib/relaton/iso/model/doctype.rb +1 -1
- data/lib/relaton/iso/scraper.rb +5 -6
- data/lib/relaton/iso/type/pubid.rb +1 -1
- data/lib/relaton/iso/version.rb +1 -1
- data/lib/relaton/iso.rb +2 -2
- data/relaton-iso.gemspec +5 -5
- metadata +12 -13
- data/.rubocop.yml +0 -12
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 162f2a0536cbb7c7d32b893c2acd75049c9297660fc8a4873459a60f36b0fbff
|
|
4
|
+
data.tar.gz: 344fae9765adff7581674eb8e03b64a1402667989771306290e650e5f408d1f0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 285cdea0029f25e27cac57b01fa54f35a5a1fe50d1cd16c8537e0a04832bce769ea6fe66ab1ae0b576b7ed0fc005e7f755a065dc0b110212ba4921446f75dd5f
|
|
7
|
+
data.tar.gz: e27586d77e4862bfb98847e03d9087f51931de94c88b46bc9f3c757c3e0740fac6848b95e949806cb9b1f4749a02cd677bb250ca97f38d346b8c3a4f312a8806
|
data/Gemfile
CHANGED
|
@@ -5,6 +5,13 @@ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
|
|
|
5
5
|
# Specify your gem's dependencies in relaton_iso.gemspec
|
|
6
6
|
gemspec
|
|
7
7
|
|
|
8
|
+
# Use local monorepo sibling gems where available.
|
|
9
|
+
Dir["../*/"].each do |dir|
|
|
10
|
+
name = File.basename(dir)
|
|
11
|
+
next if name == File.basename(__dir__)
|
|
12
|
+
next unless File.exist?(File.join(dir, "#{name}.gemspec"))
|
|
13
|
+
gem name, path: dir
|
|
14
|
+
end
|
|
8
15
|
|
|
9
16
|
gem "byebug"
|
|
10
17
|
gem "equivalent-xml"
|
data/Rakefile
CHANGED
|
@@ -11,8 +11,8 @@ namespace :spec do
|
|
|
11
11
|
require "net/http"
|
|
12
12
|
require "uri"
|
|
13
13
|
|
|
14
|
-
url = "https://raw.githubusercontent.com/relaton/relaton-data-iso/v2/index-
|
|
15
|
-
dest = File.join(__dir__, "spec", "fixtures", "index-
|
|
14
|
+
url = "https://raw.githubusercontent.com/relaton/relaton-data-iso/v2/index-v2.zip"
|
|
15
|
+
dest = File.join(__dir__, "spec", "fixtures", "index-v2.zip")
|
|
16
16
|
|
|
17
17
|
puts "Downloading #{url} ..."
|
|
18
18
|
uri = URI.parse(url)
|
|
@@ -40,8 +40,10 @@ module Relaton
|
|
|
40
40
|
# opts[:all_parts] ||= $~ && opts[:all_parts].nil?
|
|
41
41
|
|
|
42
42
|
query_pubid = ::Pubid::Iso::Identifier.parse(code)
|
|
43
|
-
|
|
44
|
-
|
|
43
|
+
if year&.respond_to?(:to_i)
|
|
44
|
+
query_pubid.root.date = ::Pubid::Components::Date.new(year: year.to_s)
|
|
45
|
+
end
|
|
46
|
+
query_pubid.root.all_parts = opts[:all_parts] if opts[:all_parts]
|
|
45
47
|
Util.info "Fetching from Relaton repository ...", key: query_pubid.to_s
|
|
46
48
|
|
|
47
49
|
hits, missed_year_ids = isobib_search_filter(query_pubid, opts)
|
|
@@ -57,7 +59,7 @@ module Relaton
|
|
|
57
59
|
|
|
58
60
|
response_pubid = ret.docidentifier.find(&:primary) # .sub(" (all parts)", "")
|
|
59
61
|
Util.info "Found: `#{response_pubid}`", key: query_pubid.to_s
|
|
60
|
-
get_all = (query_pubid.root.year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts] ||
|
|
62
|
+
get_all = (query_pubid.root.date&.year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts] ||
|
|
61
63
|
opts[:publication_date_before] || opts[:publication_date_after]
|
|
62
64
|
if get_all
|
|
63
65
|
filter_item_by_date(ret, opts) if date_filter
|
|
@@ -65,7 +67,7 @@ module Relaton
|
|
|
65
67
|
end
|
|
66
68
|
|
|
67
69
|
ret.to_most_recent_reference
|
|
68
|
-
rescue ::
|
|
70
|
+
rescue Parslet::ParseFailed
|
|
69
71
|
Util.warn "Is not recognized as a standards identifier.", key: code
|
|
70
72
|
nil
|
|
71
73
|
end
|
|
@@ -95,7 +97,7 @@ module Relaton
|
|
|
95
97
|
|
|
96
98
|
query_pubid.publisher == pubid.publisher &&
|
|
97
99
|
query_pubid.number == pubid.number &&
|
|
98
|
-
query_pubid.
|
|
100
|
+
query_pubid.copublishers == pubid.copublishers &&
|
|
99
101
|
(any_types_stages || query_pubid.stage == pubid.stage) &&
|
|
100
102
|
(any_types_stages || query_pubid.is_a?(pubid.class))
|
|
101
103
|
end
|
|
@@ -109,10 +111,12 @@ module Relaton
|
|
|
109
111
|
|
|
110
112
|
# filter by year
|
|
111
113
|
hit_collection.select! do |hit|
|
|
112
|
-
hit.pubid.year
|
|
114
|
+
if hit.pubid.date&.year.nil? && hit.hit[:year]
|
|
115
|
+
hit.pubid.date = ::Pubid::Components::Date.new(year: hit.hit[:year].to_s)
|
|
116
|
+
end
|
|
113
117
|
next true if check_year(year, hit)
|
|
114
118
|
|
|
115
|
-
missed_year_ids << hit.pubid.to_s if hit.pubid.year
|
|
119
|
+
missed_year_ids << hit.pubid.to_s if hit.pubid.date&.year
|
|
116
120
|
false
|
|
117
121
|
end
|
|
118
122
|
|
|
@@ -195,7 +199,7 @@ module Relaton
|
|
|
195
199
|
# @param hit [Relaton::Iso::Hit]
|
|
196
200
|
# @return [Integer]
|
|
197
201
|
def hit_year(hit)
|
|
198
|
-
yr = hit.pubid&.year || hit.hit[:year] || hit.pubid&.root&.year
|
|
202
|
+
yr = hit.pubid&.date&.year || hit.hit[:year] || hit.pubid&.root&.date&.year
|
|
199
203
|
yr.to_i
|
|
200
204
|
end
|
|
201
205
|
|
|
@@ -253,9 +257,14 @@ module Relaton
|
|
|
253
257
|
end
|
|
254
258
|
|
|
255
259
|
def check_year(year, hit) # rubocop:disable Metrics/AbcSize
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
260
|
+
pub = hit.pubid
|
|
261
|
+
own_year = pub.date&.year.to_s
|
|
262
|
+
base_year = pub.base_identifier&.date&.year.to_s
|
|
263
|
+
if pub.base_identifier.nil?
|
|
264
|
+
own_year == year.to_s
|
|
265
|
+
else
|
|
266
|
+
base_year == year.to_s || own_year == year.to_s
|
|
267
|
+
end
|
|
259
268
|
end
|
|
260
269
|
|
|
261
270
|
# @param pubid [Pubid::Iso::Identifier] PubID with no results
|
|
@@ -264,7 +273,7 @@ module Relaton
|
|
|
264
273
|
|
|
265
274
|
if missed_year_ids.any?
|
|
266
275
|
ids = missed_year_ids.map { |i| "`#{i}`" }.join(", ")
|
|
267
|
-
Util.info "TIP: No match for edition year #{pubid.year}, but matches exist for #{ids}.", key: pubid.to_s
|
|
276
|
+
Util.info "TIP: No match for edition year #{pubid.date&.year}, but matches exist for #{ids}.", key: pubid.to_s
|
|
268
277
|
end
|
|
269
278
|
|
|
270
279
|
if tip_ids.any?
|
|
@@ -276,7 +285,7 @@ module Relaton
|
|
|
276
285
|
Util.info "TIP: If it cannot be found, the document may no longer be published in parts.", key: pubid.to_s
|
|
277
286
|
else
|
|
278
287
|
Util.info "TIP: If you wish to cite all document parts for the reference, " \
|
|
279
|
-
"use `#{pubid.
|
|
288
|
+
"use `#{pubid.exclude(:date)} (all parts)`.", key: pubid.to_s
|
|
280
289
|
end
|
|
281
290
|
|
|
282
291
|
nil
|
|
@@ -326,12 +335,17 @@ module Relaton
|
|
|
326
335
|
!(query_pubid.root.all_parts && i.pubid.part.nil?)
|
|
327
336
|
end
|
|
328
337
|
|
|
329
|
-
filter_hits_by_year(hit_collection, query_pubid.root.year)
|
|
338
|
+
filter_hits_by_year(hit_collection, query_pubid.root.date&.year)
|
|
330
339
|
end
|
|
331
340
|
|
|
332
341
|
def build_excludings(all_parts, any_types_stages)
|
|
333
|
-
|
|
334
|
-
|
|
342
|
+
# 2.x attribute names: :year → :date, :iteration → :stage_iteration.
|
|
343
|
+
# Always exclude :typed_stage: parse fills the default-published
|
|
344
|
+
# typed_stage with original_abbr="" while .create leaves it nil,
|
|
345
|
+
# so equality would never hold against indexed/created rows
|
|
346
|
+
# otherwise.
|
|
347
|
+
excludings = %i[date edition all_parts typed_stage]
|
|
348
|
+
excludings += %i[type stage stage_iteration] if any_types_stages
|
|
335
349
|
excludings << :part if all_parts
|
|
336
350
|
excludings
|
|
337
351
|
end
|
|
@@ -340,10 +354,28 @@ module Relaton
|
|
|
340
354
|
if pubid.is_a? String then pubid == query_pubid.to_s
|
|
341
355
|
else
|
|
342
356
|
pubid = pubid.dup
|
|
343
|
-
pubid.
|
|
344
|
-
pubid.exclude(*excludings) == no_year_ref
|
|
357
|
+
pubid.base_identifier = pubid.base_identifier.exclude(:date, :edition) if pubid.base_identifier
|
|
358
|
+
normalize_compound_part(pubid.exclude(*excludings)) == no_year_ref
|
|
345
359
|
end
|
|
346
360
|
end
|
|
361
|
+
|
|
362
|
+
# @TODO TEMP WORKAROUND (pubid 2.x migration): the v1-generated index
|
|
363
|
+
# stores a compound part such as "5-1-3" in :part with no :subpart, and
|
|
364
|
+
# Relaton::Index builds each row via Pubid::Iso::Identifier.from_hash(id),
|
|
365
|
+
# which keeps it as part="5-1-3" subpart=nil. A parsed query (no_year_ref)
|
|
366
|
+
# splits it (part="5", subpart="1-3"), so the two never compare equal.
|
|
367
|
+
# Re-split the candidate's compound part on the first dash to mirror parse
|
|
368
|
+
# before comparing. `exclude` returns a fresh instance, so mutating this
|
|
369
|
+
# copy is safe. Remove once pubid create() splits compound parts itself.
|
|
370
|
+
def normalize_compound_part(pubid)
|
|
371
|
+
num = pubid.part&.value.to_s
|
|
372
|
+
return pubid unless pubid.subpart.nil? && num.include?("-")
|
|
373
|
+
|
|
374
|
+
head, tail = num.split("-", 2)
|
|
375
|
+
pubid.part = ::Pubid::Iso::Components::Code.new(value: head)
|
|
376
|
+
pubid.subpart = ::Pubid::Iso::Components::Code.new(value: tail)
|
|
377
|
+
pubid
|
|
378
|
+
end
|
|
347
379
|
end
|
|
348
380
|
end
|
|
349
381
|
end
|
|
@@ -12,11 +12,21 @@ module Relaton
|
|
|
12
12
|
# (see https://www.iso.org/open-data.html) and write each one as a YAML
|
|
13
13
|
# file under `@output`.
|
|
14
14
|
#
|
|
15
|
-
#
|
|
15
|
+
# The upstream feed has no delta API, so any run that proceeds re-downloads
|
|
16
|
+
# and re-ingests the whole feed. There is therefore no value in a partial
|
|
17
|
+
# update: a run either skips entirely or does a full replace. `source` modes
|
|
18
|
+
# (matching the `Relaton::Core::DataFetcher.fetch` arg):
|
|
16
19
|
#
|
|
17
|
-
# * `"iso-open-data"` (default) - skip the
|
|
18
|
-
# `
|
|
19
|
-
# * `"iso-open-data-all"` -
|
|
20
|
+
# * `"iso-open-data"` (default) - skip when the feed's `Last-Modified` is
|
|
21
|
+
# unchanged; otherwise wipe `@output` + index and rebuild from scratch.
|
|
22
|
+
# * `"iso-open-data-all"` - the same full rebuild, but ignore the
|
|
23
|
+
# `Last-Modified` short-circuit and always run.
|
|
24
|
+
#
|
|
25
|
+
# Wiping happens here, after the short-circuit decision, so `@output` and the
|
|
26
|
+
# index always mirror the current feed (records that have left it don't
|
|
27
|
+
# linger as stale files or dangling index entries) without risking an empty
|
|
28
|
+
# tree on a skipped run. `#fetch` returns true when it rebuilt, false when
|
|
29
|
+
# it skipped, so callers can chain follow-up work (e.g. the pubid-v1 index).
|
|
20
30
|
#
|
|
21
31
|
class DataFetcher < Core::DataFetcher
|
|
22
32
|
OPEN_DATA_URL = "https://isopublicstorageprod.blob.core.windows.net/" \
|
|
@@ -45,9 +55,9 @@ module Relaton
|
|
|
45
55
|
|
|
46
56
|
Util.info "Fetching ISO Open Data (mode: #{@source})..."
|
|
47
57
|
last_modified = fetch_last_modified
|
|
48
|
-
return if up_to_date?(last_modified)
|
|
58
|
+
return false if up_to_date?(last_modified)
|
|
49
59
|
|
|
50
|
-
|
|
60
|
+
reset_output
|
|
51
61
|
jsonl_path = download_dataset
|
|
52
62
|
ref_index, amend_index, date_index = build_ref_index(jsonl_path)
|
|
53
63
|
tc_index = build_tc_index
|
|
@@ -57,6 +67,7 @@ module Relaton
|
|
|
57
67
|
index.save
|
|
58
68
|
save_last_modified(last_modified)
|
|
59
69
|
report_errors
|
|
70
|
+
true
|
|
60
71
|
rescue StandardError => e
|
|
61
72
|
Util.error "#{e.message}\n#{e.backtrace.join("\n")}"
|
|
62
73
|
raise
|
|
@@ -103,8 +114,13 @@ module Relaton
|
|
|
103
114
|
File.write(LAST_MODIFIED_FILE, last_modified, encoding: "UTF-8")
|
|
104
115
|
end
|
|
105
116
|
|
|
106
|
-
|
|
107
|
-
|
|
117
|
+
# Reset the data tree and the index together so the rebuild is a clean
|
|
118
|
+
# mirror of the feed. Called only after the short-circuit, so a skipped run
|
|
119
|
+
# never strands an empty tree. `Core::DataFetcher.fetch` recreates the
|
|
120
|
+
# directory before ingest writes into it.
|
|
121
|
+
def reset_output
|
|
122
|
+
FileUtils.rm_rf(@output)
|
|
123
|
+
index.remove_all
|
|
108
124
|
FileUtils.mkdir_p(@output)
|
|
109
125
|
end
|
|
110
126
|
|
|
@@ -174,9 +190,9 @@ module Relaton
|
|
|
174
190
|
|
|
175
191
|
def amend_base(ref)
|
|
176
192
|
pubid = ::Pubid::Iso::Identifier.parse(ref)
|
|
177
|
-
return nil unless pubid.
|
|
193
|
+
return nil unless pubid.base_identifier
|
|
178
194
|
|
|
179
|
-
pubid.
|
|
195
|
+
pubid.base_identifier.to_s
|
|
180
196
|
rescue StandardError
|
|
181
197
|
nil
|
|
182
198
|
end
|
|
@@ -260,10 +276,42 @@ module Relaton
|
|
|
260
276
|
|
|
261
277
|
def write_file(file, doc, docid)
|
|
262
278
|
@files << file
|
|
263
|
-
|
|
279
|
+
index_primary(docid, file)
|
|
264
280
|
File.write(file, serialize(doc), encoding: "UTF-8")
|
|
265
281
|
end
|
|
266
282
|
|
|
283
|
+
# Add a document's primary id to the index. With pubid 2.x every ISO id
|
|
284
|
+
# is expected to parse; if one does not (`docid.pubid` is nil) record it
|
|
285
|
+
# so `report_errors` raises a tracked GitHub issue at the end, and skip
|
|
286
|
+
# the index entry rather than indexing a raw string (which would crash
|
|
287
|
+
# the index sort: `get_id_number` calls `.number` on the id). The data
|
|
288
|
+
# file is still written, so the document is not lost — only unindexed
|
|
289
|
+
# until its id parses.
|
|
290
|
+
def index_primary(docid, file)
|
|
291
|
+
unless docid.pubid
|
|
292
|
+
unparseable_ids << [docid.content.to_s, file]
|
|
293
|
+
return
|
|
294
|
+
end
|
|
295
|
+
index.add_or_update(docid.pubid, file)
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
def unparseable_ids
|
|
299
|
+
@unparseable_ids ||= []
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
# Surface unparseable primary ids through the shared error-reporting
|
|
303
|
+
# machinery (a "Error fetching documents" GitHub issue in CI) so they are
|
|
304
|
+
# visible and tracked, not silently dropped in the action log. The
|
|
305
|
+
# gh_issue logger channel is registered inside `report_errors`, so emit
|
|
306
|
+
# these at :error after it is set up and before `super` creates the issue.
|
|
307
|
+
def report_errors
|
|
308
|
+
gh_issue
|
|
309
|
+
unparseable_ids.each do |content, file|
|
|
310
|
+
log_error "Unparseable primary id `#{content}` was not indexed (#{file})"
|
|
311
|
+
end
|
|
312
|
+
super
|
|
313
|
+
end
|
|
314
|
+
|
|
267
315
|
# --- static merge -----------------------------------------------------
|
|
268
316
|
|
|
269
317
|
def merge_static_files
|
|
@@ -274,7 +322,7 @@ module Relaton
|
|
|
274
322
|
did = item.docidentifier.detect(&:primary)
|
|
275
323
|
next unless did
|
|
276
324
|
|
|
277
|
-
|
|
325
|
+
index_primary(did, f)
|
|
278
326
|
end
|
|
279
327
|
end
|
|
280
328
|
|
|
@@ -37,7 +37,6 @@ module Relaton
|
|
|
37
37
|
"Cor" => "technical-corrigendum",
|
|
38
38
|
"Add" => "addendum",
|
|
39
39
|
"Suppl" => "supplement",
|
|
40
|
-
"Ext" => "extract",
|
|
41
40
|
}.freeze
|
|
42
41
|
|
|
43
42
|
DOC_URL = "https://www.iso.org/standard/%d.html"
|
|
@@ -123,8 +122,9 @@ module Relaton
|
|
|
123
122
|
end
|
|
124
123
|
|
|
125
124
|
def iso_reference_pubid
|
|
126
|
-
|
|
127
|
-
|
|
125
|
+
pubid.dup.tap do |id|
|
|
126
|
+
id.languages = [::Pubid::Components::Language.new(code: "en", original_code: "E")]
|
|
127
|
+
end
|
|
128
128
|
rescue StandardError
|
|
129
129
|
nil
|
|
130
130
|
end
|
|
@@ -133,12 +133,10 @@ module Relaton
|
|
|
133
133
|
return @urn_pubid if defined?(@urn_pubid)
|
|
134
134
|
|
|
135
135
|
@urn_pubid = begin
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
end
|
|
141
|
-
dup_pubid
|
|
136
|
+
# Override stage even when the parsed pubid carries the default
|
|
137
|
+
# "published" stage — relaton's currentStage (e.g. 9092 = Withdrawn)
|
|
138
|
+
# is the authoritative source for URN stage.
|
|
139
|
+
stage_dotted ? pubid.with_harmonized_stage(stage_dotted) : pubid.dup
|
|
142
140
|
rescue StandardError
|
|
143
141
|
nil
|
|
144
142
|
end
|
|
@@ -386,9 +384,9 @@ module Relaton
|
|
|
386
384
|
end
|
|
387
385
|
|
|
388
386
|
def base_relation
|
|
389
|
-
return [] unless pubid&.
|
|
387
|
+
return [] unless pubid&.base_identifier
|
|
390
388
|
|
|
391
|
-
[relation_for(pubid.
|
|
389
|
+
[relation_for(pubid.base_identifier.to_s, "updates")]
|
|
392
390
|
end
|
|
393
391
|
|
|
394
392
|
def relation_for(ref, type)
|
data/lib/relaton/iso/hit.rb
CHANGED
|
@@ -12,15 +12,35 @@ module Relaton
|
|
|
12
12
|
@opts ||= {}
|
|
13
13
|
end
|
|
14
14
|
|
|
15
|
+
# Maps the legacy 1.x exclude symbols to their pubid 2.x attribute
|
|
16
|
+
# names. The public excludings API still uses :year/:iteration for
|
|
17
|
+
# backwards compatibility with existing call sites and specs.
|
|
18
|
+
LEGACY_EXCLUDE_MAP = { year: :date, iteration: :stage_iteration }.freeze
|
|
19
|
+
private_constant :LEGACY_EXCLUDE_MAP
|
|
20
|
+
|
|
21
|
+
def translate_excludings(attrs)
|
|
22
|
+
out = attrs.map { |a| LEGACY_EXCLUDE_MAP[a] || a }
|
|
23
|
+
# Excluding :stage implies excluding :typed_stage too — the two
|
|
24
|
+
# carry overlapping data and their default-published values can
|
|
25
|
+
# differ in trivia (e.g. original_abbr "" vs nil), so leaving
|
|
26
|
+
# typed_stage in the comparison breaks otherwise-equal matches.
|
|
27
|
+
out << :typed_stage if out.include?(:stage) && !out.include?(:typed_stage)
|
|
28
|
+
out
|
|
29
|
+
end
|
|
30
|
+
|
|
15
31
|
def ref_pubid_no_year
|
|
16
|
-
@ref_pubid_no_year ||=
|
|
32
|
+
@ref_pubid_no_year ||=
|
|
33
|
+
if ref.base_identifier
|
|
34
|
+
ref.dup.tap { |r| r.base_identifier = r.base_identifier.exclude(:date) }
|
|
35
|
+
else
|
|
36
|
+
ref.exclude(:date)
|
|
37
|
+
end
|
|
17
38
|
end
|
|
18
39
|
|
|
19
40
|
def ref_pubid_excluded
|
|
20
41
|
return @ref_pubid_excluded if defined? @ref_pubid_excluded
|
|
21
42
|
|
|
22
|
-
ref_excludings = excludings
|
|
23
|
-
ref_excludings << :all_parts
|
|
43
|
+
ref_excludings = translate_excludings(excludings) + [:all_parts]
|
|
24
44
|
@ref_pubid_excluded ||= ref_pubid_no_year.exclude(*ref_excludings)
|
|
25
45
|
end
|
|
26
46
|
|
|
@@ -30,38 +50,68 @@ module Relaton
|
|
|
30
50
|
# @return [Array<Relaton::Iso::Hit>] hits
|
|
31
51
|
#
|
|
32
52
|
def find # rubocop:disable Metrics/AbcSize
|
|
33
|
-
|
|
34
|
-
|
|
53
|
+
# Pass `ref` (a Pubid::Identifier, not a String) so the index can
|
|
54
|
+
# narrow candidates by number via binary search before applying the
|
|
55
|
+
# block, instead of a full O(n) scan of every row. Every row's `:id`
|
|
56
|
+
# is already a Pubid::Identifier — Relaton::Index deserialized it via
|
|
57
|
+
# the `pubid_class` passed in `#index` — so `pubid_match?` compares
|
|
58
|
+
# Pubid to Pubid directly.
|
|
59
|
+
@array = index.search(ref) do |row|
|
|
60
|
+
pubid_match?(row[:id])
|
|
35
61
|
end.map { |row| Hit.new row, self }
|
|
36
|
-
|
|
37
|
-
|
|
62
|
+
# An all-parts query drops :part from the match, so multiple rows can
|
|
63
|
+
# resolve to the same pubid; collapse them so each part appears once.
|
|
64
|
+
@array.uniq! { |h| h.pubid.to_s } if ref.root.all_parts
|
|
65
|
+
# Most-recent first (pubid string desc ~ year desc), then float
|
|
66
|
+
# published-stage ids above drafts. An undated query excludes :stage
|
|
67
|
+
# when matching, so a future draft (e.g. ISO/AWI) matches alongside the
|
|
68
|
+
# published edition; without this the draft would sort first lexically
|
|
69
|
+
# ("ISO/AWI …" > "ISO …") and be returned by fetch_doc's `first`. The
|
|
70
|
+
# index id carries no lifecycle status, so the parsed stage is the only
|
|
71
|
+
# signal available here. partition is stable, preserving the year order
|
|
72
|
+
# within each group.
|
|
73
|
+
@array.sort_by! { |h| h.pubid.to_s }.reverse!
|
|
74
|
+
published, drafts = @array.partition do |h|
|
|
75
|
+
h.pubid && default_published_stage?(h.pubid)
|
|
76
|
+
end
|
|
77
|
+
@array = published + drafts
|
|
38
78
|
self
|
|
39
79
|
end
|
|
40
80
|
|
|
41
|
-
def pubid_match?(
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
#
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
exclude_id_attrs(
|
|
81
|
+
def pubid_match?(pubid)
|
|
82
|
+
match_excludings = translate_excludings(excludings) + [:all_parts]
|
|
83
|
+
match_excludings << :edition unless pubid.typed_stage&.abbr&.include?("DIR")
|
|
84
|
+
# Only the candidate is built via .create (from the index) and so may
|
|
85
|
+
# carry a compound part; `ref_pubid_no_year` is always a parsed pubid,
|
|
86
|
+
# already split, so it needs no normalization.
|
|
87
|
+
cand = normalize_compound_part(exclude_id_attrs(pubid, *match_excludings))
|
|
88
|
+
cand == exclude_id_attrs(ref_pubid_no_year, *match_excludings)
|
|
49
89
|
end
|
|
50
90
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
91
|
+
# @TODO TEMP WORKAROUND (pubid 2.x migration): the v1-generated index
|
|
92
|
+
# stores a compound part such as "5-1-3" in :part with no :subpart, and
|
|
93
|
+
# Relaton::Index builds each row via Pubid::Iso::Identifier.from_hash(id),
|
|
94
|
+
# which keeps it as part="5-1-3" subpart=nil. A parsed query splits it
|
|
95
|
+
# (part="5", subpart="1-3"), so the two never compare equal. Re-split the
|
|
96
|
+
# compound part on the first dash to mirror parse before comparing.
|
|
97
|
+
# `exclude` returns a fresh instance, so mutating this copy is safe.
|
|
98
|
+
# Remove once pubid create() splits compound parts itself.
|
|
99
|
+
def normalize_compound_part(pubid)
|
|
100
|
+
num = pubid.part&.value.to_s
|
|
101
|
+
return pubid unless pubid.subpart.nil? && num.include?("-")
|
|
102
|
+
|
|
103
|
+
head, tail = num.split("-", 2)
|
|
104
|
+
pubid.part = ::Pubid::Iso::Components::Code.new(value: head)
|
|
105
|
+
pubid.subpart = ::Pubid::Iso::Components::Code.new(value: tail)
|
|
106
|
+
pubid
|
|
57
107
|
end
|
|
58
108
|
|
|
59
109
|
def exclude_id_attrs(pubid, *attrs)
|
|
60
110
|
xid = pubid.exclude(*attrs)
|
|
61
111
|
curr = xid
|
|
62
|
-
while curr.
|
|
63
|
-
curr.
|
|
64
|
-
curr = curr.
|
|
112
|
+
while curr.base_identifier
|
|
113
|
+
curr.base_identifier = curr.base_identifier.exclude(*attrs)
|
|
114
|
+
curr = curr.base_identifier
|
|
65
115
|
end
|
|
66
116
|
xid
|
|
67
117
|
end
|
|
@@ -71,23 +121,26 @@ module Relaton
|
|
|
71
121
|
|
|
72
122
|
excl_attrs = %i[year]
|
|
73
123
|
excl_attrs << :part if ref.root.part.nil? || ref.root.all_parts
|
|
74
|
-
if ref
|
|
124
|
+
if default_published_stage?(ref) || ref.root.all_parts
|
|
75
125
|
excl_attrs << :stage
|
|
76
126
|
excl_attrs << :iteration
|
|
77
127
|
end
|
|
78
|
-
# excl_parts << :edition if ref.root.edition.nil? || all_parts
|
|
79
128
|
@excludings = excl_attrs
|
|
80
129
|
end
|
|
81
130
|
|
|
131
|
+
# Pubid 2.x auto-populates a published-stage default on parse/.create,
|
|
132
|
+
# so ref.stage is never nil. Treat that default as "no stage specified".
|
|
133
|
+
def default_published_stage?(pubid)
|
|
134
|
+
return true if pubid.typed_stage.nil?
|
|
135
|
+
|
|
136
|
+
pubid.typed_stage.stage_code.to_s == "published"
|
|
137
|
+
end
|
|
138
|
+
|
|
82
139
|
def index
|
|
83
140
|
@index ||= Relaton::Index.find_or_create(
|
|
84
141
|
:iso,
|
|
85
142
|
url: "#{ENDPOINT}#{INDEXFILE}.zip",
|
|
86
143
|
file: "#{INDEXFILE}.yaml",
|
|
87
|
-
id_keys: %i[publisher number copublisher part year edition type stage
|
|
88
|
-
iteration joint_document tctype sctype wgtype tcnumber
|
|
89
|
-
scnumber wgnumber dirtype base supplements addendum
|
|
90
|
-
jtc_dir month amendments corrigendums language],
|
|
91
144
|
pubid_class: ::Pubid::Iso::Identifier,
|
|
92
145
|
)
|
|
93
146
|
end
|
|
@@ -107,9 +160,9 @@ module Relaton
|
|
|
107
160
|
def to_all_parts # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity
|
|
108
161
|
parts = @array.select { |h| h.pubid.part }
|
|
109
162
|
if opts[:publication_date_before] || opts[:publication_date_after]
|
|
110
|
-
parts = parts.select { |h| Bibliography.send(:year_in_range?, (h.pubid.year || h.hit[:year]).to_i, opts) }
|
|
163
|
+
parts = parts.select { |h| Bibliography.send(:year_in_range?, (h.pubid.date&.year || h.hit[:year]).to_i, opts) }
|
|
111
164
|
end
|
|
112
|
-
hit = parts.min_by { |h| h.pubid.part.to_i }
|
|
165
|
+
hit = parts.min_by { |h| h.pubid.part.value.to_i }
|
|
113
166
|
return @array.first&.item unless hit
|
|
114
167
|
|
|
115
168
|
bibitem = hit.item
|
|
@@ -27,7 +27,7 @@ module Relaton
|
|
|
27
27
|
else
|
|
28
28
|
parsed =
|
|
29
29
|
case value
|
|
30
|
-
when ::Pubid::Iso::Identifier
|
|
30
|
+
when ::Pubid::Iso::Identifier then value
|
|
31
31
|
when String
|
|
32
32
|
begin
|
|
33
33
|
::Pubid::Iso::Identifier.parse(value)
|
|
@@ -42,6 +42,13 @@ module Relaton
|
|
|
42
42
|
|
|
43
43
|
if parsed
|
|
44
44
|
@pubid = parsed
|
|
45
|
+
# TC committee documents have a canonical spelling ("… N1110")
|
|
46
|
+
# that pubid renders with a space ("… N 1110"). Preserve the
|
|
47
|
+
# source string (same intent as the iso-tc bypass) while keeping
|
|
48
|
+
# the parsed pubid for any structural operations.
|
|
49
|
+
if value.is_a?(String) && parsed.is_a?(::Pubid::Iso::Identifiers::TcDocument)
|
|
50
|
+
@raw_content = value
|
|
51
|
+
end
|
|
45
52
|
elsif value.is_a?(String)
|
|
46
53
|
@raw_content = value
|
|
47
54
|
end
|
|
@@ -80,18 +87,18 @@ module Relaton
|
|
|
80
87
|
end
|
|
81
88
|
|
|
82
89
|
def remove_date!
|
|
83
|
-
remove_attr!(:
|
|
90
|
+
remove_attr!(:date)
|
|
84
91
|
end
|
|
85
92
|
|
|
86
93
|
def exclude_year
|
|
87
94
|
return @raw_content if @raw_content
|
|
88
95
|
return nil unless @pubid
|
|
89
96
|
|
|
90
|
-
pubid = @pubid.exclude(:
|
|
97
|
+
pubid = @pubid.exclude(:date)
|
|
91
98
|
current = pubid
|
|
92
|
-
while current.
|
|
93
|
-
current.
|
|
94
|
-
current = current.
|
|
99
|
+
while current.base_identifier
|
|
100
|
+
current.base_identifier = current.base_identifier.exclude(:date)
|
|
101
|
+
current = current.base_identifier
|
|
95
102
|
end
|
|
96
103
|
pubid
|
|
97
104
|
end
|
|
@@ -100,11 +107,10 @@ module Relaton
|
|
|
100
107
|
|
|
101
108
|
def render_pubid(pubid)
|
|
102
109
|
case type
|
|
103
|
-
when "URN" then pubid.
|
|
104
|
-
when "
|
|
105
|
-
pubid.to_s(format: :ref_num_short, with_prf: true)
|
|
110
|
+
when "URN" then pubid.to_urn
|
|
111
|
+
when "ISO" then pubid.exclude(:languages).to_s
|
|
106
112
|
else
|
|
107
|
-
pubid.to_s
|
|
113
|
+
pubid.to_s
|
|
108
114
|
end
|
|
109
115
|
end
|
|
110
116
|
|
|
@@ -112,10 +118,10 @@ module Relaton
|
|
|
112
118
|
return unless @pubid
|
|
113
119
|
|
|
114
120
|
@pubid.send("#{attr}=", nil)
|
|
115
|
-
base = @pubid.
|
|
121
|
+
base = @pubid.base_identifier
|
|
116
122
|
while base
|
|
117
123
|
base.send("#{attr}=", nil)
|
|
118
|
-
base = base.
|
|
124
|
+
base = base.base_identifier
|
|
119
125
|
end
|
|
120
126
|
refresh_content!
|
|
121
127
|
end
|
|
@@ -4,7 +4,7 @@ module Relaton
|
|
|
4
4
|
TYPES = %w[
|
|
5
5
|
international-standard technical-specification technical-report publicly-available-specification
|
|
6
6
|
international-workshop-agreement guide recommendation amendment technical-corrigendum directive
|
|
7
|
-
committee-document addendum supplement
|
|
7
|
+
committee-document addendum supplement
|
|
8
8
|
].freeze
|
|
9
9
|
|
|
10
10
|
attribute :content, :string, values: TYPES
|
data/lib/relaton/iso/scraper.rb
CHANGED
|
@@ -104,16 +104,14 @@ module Relaton
|
|
|
104
104
|
return @pubid if @pubid
|
|
105
105
|
|
|
106
106
|
@pubid = ::Pubid::Iso::Identifier.parse(id)
|
|
107
|
-
@pubid.root.edition ||= edition.content if @pubid.
|
|
107
|
+
@pubid.root.edition ||= edition.content if @pubid.base_identifier
|
|
108
108
|
@pubid
|
|
109
109
|
rescue StandardError => e
|
|
110
110
|
Util.error "Failed to parse pubid from #{id}: #{e.message}"
|
|
111
111
|
end
|
|
112
112
|
|
|
113
113
|
def urn
|
|
114
|
-
|
|
115
|
-
pubid_dup.stage ||= ::Pubid::Iso::Identifier.parse_stage(stage_code)
|
|
116
|
-
pubid_dup
|
|
114
|
+
pubid.with_harmonized_stage(stage_code)
|
|
117
115
|
end
|
|
118
116
|
|
|
119
117
|
def edition
|
|
@@ -143,8 +141,9 @@ module Relaton
|
|
|
143
141
|
# @return [String] English reference identifier
|
|
144
142
|
#
|
|
145
143
|
def isoref
|
|
146
|
-
|
|
147
|
-
|
|
144
|
+
pubid.dup.tap do |id|
|
|
145
|
+
id.languages = [::Pubid::Components::Language.new(code: "en", original_code: "E")]
|
|
146
|
+
end.to_s
|
|
148
147
|
end
|
|
149
148
|
|
|
150
149
|
private
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
module Relaton
|
|
2
2
|
module Iso
|
|
3
3
|
module Type
|
|
4
|
-
# Lutaml-model attribute type that preserves `Pubid::Iso::Identifier
|
|
4
|
+
# Lutaml-model attribute type that preserves `Pubid::Iso::Identifier`
|
|
5
5
|
# instances on the way in and stringifies them on the way out.
|
|
6
6
|
#
|
|
7
7
|
# The default `:string` type calls `.to_s` during `cast`, which loses the
|
data/lib/relaton/iso/version.rb
CHANGED
data/lib/relaton/iso.rb
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "net/http"
|
|
4
|
-
require "pubid
|
|
4
|
+
require "pubid"
|
|
5
5
|
require "relaton/index"
|
|
6
6
|
require "isoics"
|
|
7
7
|
require "relaton/bib"
|
|
@@ -18,7 +18,7 @@ require_relative "iso/bibliography"
|
|
|
18
18
|
|
|
19
19
|
module Relaton
|
|
20
20
|
module Iso
|
|
21
|
-
INDEXFILE = "index-
|
|
21
|
+
INDEXFILE = "index-v2"
|
|
22
22
|
|
|
23
23
|
def self.grammar_hash
|
|
24
24
|
# gem_path = File.expand_path "..", __dir__
|
data/relaton-iso.gemspec
CHANGED
|
@@ -24,11 +24,11 @@ Gem::Specification.new do |spec|
|
|
|
24
24
|
spec.bindir = "exe"
|
|
25
25
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
26
26
|
spec.require_paths = ["lib"]
|
|
27
|
-
spec.required_ruby_version = Gem::Requirement.new(">= 3.
|
|
27
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 3.3.0")
|
|
28
28
|
|
|
29
29
|
spec.add_dependency "isoics", "~> 0.1.6"
|
|
30
|
-
spec.add_dependency "pubid
|
|
31
|
-
spec.add_dependency "relaton-bib", "~> 2.
|
|
32
|
-
spec.add_dependency "relaton-core", "~>
|
|
33
|
-
spec.add_dependency "relaton-index", "~>
|
|
30
|
+
spec.add_dependency "pubid", "~> 2.0.0.pre.alpha.3"
|
|
31
|
+
spec.add_dependency "relaton-bib", "~> 2.2.0.pre.alpha.1"
|
|
32
|
+
spec.add_dependency "relaton-core", "~> 2.2.0.pre.alpha.1"
|
|
33
|
+
spec.add_dependency "relaton-index", "~> 2.2.0.pre.alpha.1"
|
|
34
34
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: relaton-iso
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.1
|
|
4
|
+
version: 2.2.0.pre.alpha.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-06-
|
|
11
|
+
date: 2026-06-26 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: isoics
|
|
@@ -25,61 +25,61 @@ dependencies:
|
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
26
|
version: 0.1.6
|
|
27
27
|
- !ruby/object:Gem::Dependency
|
|
28
|
-
name: pubid
|
|
28
|
+
name: pubid
|
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
|
30
30
|
requirements:
|
|
31
31
|
- - "~>"
|
|
32
32
|
- !ruby/object:Gem::Version
|
|
33
|
-
version:
|
|
33
|
+
version: 2.0.0.pre.alpha.3
|
|
34
34
|
type: :runtime
|
|
35
35
|
prerelease: false
|
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
|
37
37
|
requirements:
|
|
38
38
|
- - "~>"
|
|
39
39
|
- !ruby/object:Gem::Version
|
|
40
|
-
version:
|
|
40
|
+
version: 2.0.0.pre.alpha.3
|
|
41
41
|
- !ruby/object:Gem::Dependency
|
|
42
42
|
name: relaton-bib
|
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
|
44
44
|
requirements:
|
|
45
45
|
- - "~>"
|
|
46
46
|
- !ruby/object:Gem::Version
|
|
47
|
-
version: 2.
|
|
47
|
+
version: 2.2.0.pre.alpha.1
|
|
48
48
|
type: :runtime
|
|
49
49
|
prerelease: false
|
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
|
51
51
|
requirements:
|
|
52
52
|
- - "~>"
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
|
-
version: 2.
|
|
54
|
+
version: 2.2.0.pre.alpha.1
|
|
55
55
|
- !ruby/object:Gem::Dependency
|
|
56
56
|
name: relaton-core
|
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
|
58
58
|
requirements:
|
|
59
59
|
- - "~>"
|
|
60
60
|
- !ruby/object:Gem::Version
|
|
61
|
-
version:
|
|
61
|
+
version: 2.2.0.pre.alpha.1
|
|
62
62
|
type: :runtime
|
|
63
63
|
prerelease: false
|
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
|
65
65
|
requirements:
|
|
66
66
|
- - "~>"
|
|
67
67
|
- !ruby/object:Gem::Version
|
|
68
|
-
version:
|
|
68
|
+
version: 2.2.0.pre.alpha.1
|
|
69
69
|
- !ruby/object:Gem::Dependency
|
|
70
70
|
name: relaton-index
|
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
|
72
72
|
requirements:
|
|
73
73
|
- - "~>"
|
|
74
74
|
- !ruby/object:Gem::Version
|
|
75
|
-
version:
|
|
75
|
+
version: 2.2.0.pre.alpha.1
|
|
76
76
|
type: :runtime
|
|
77
77
|
prerelease: false
|
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
|
79
79
|
requirements:
|
|
80
80
|
- - "~>"
|
|
81
81
|
- !ruby/object:Gem::Version
|
|
82
|
-
version:
|
|
82
|
+
version: 2.2.0.pre.alpha.1
|
|
83
83
|
description: 'Relaton::Iso: retrieve ISO Standards for bibliographic use using the
|
|
84
84
|
IsoBibliographicItem model'
|
|
85
85
|
email:
|
|
@@ -93,7 +93,6 @@ files:
|
|
|
93
93
|
- ".gitignore"
|
|
94
94
|
- ".hound.yml"
|
|
95
95
|
- ".rspec"
|
|
96
|
-
- ".rubocop.yml"
|
|
97
96
|
- CLAUDE.md
|
|
98
97
|
- CODE_OF_CONDUCT.md
|
|
99
98
|
- Gemfile
|
|
@@ -143,7 +142,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
143
142
|
requirements:
|
|
144
143
|
- - ">="
|
|
145
144
|
- !ruby/object:Gem::Version
|
|
146
|
-
version: 3.
|
|
145
|
+
version: 3.3.0
|
|
147
146
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
148
147
|
requirements:
|
|
149
148
|
- - ">="
|
data/.rubocop.yml
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
# This project follows the Ribose OSS style guide.
|
|
2
|
-
# https://github.com/riboseinc/oss-guides
|
|
3
|
-
# All project-specific additions and overrides should be specified in this file.
|
|
4
|
-
|
|
5
|
-
require: rubocop-rails
|
|
6
|
-
|
|
7
|
-
inherit_from:
|
|
8
|
-
- https://raw.githubusercontent.com/riboseinc/oss-guides/master/ci/rubocop.yml
|
|
9
|
-
AllCops:
|
|
10
|
-
TargetRubyVersion: 3.2
|
|
11
|
-
Rails:
|
|
12
|
-
Enabled: false
|