relaton-iso 2.1.5 → 2.2.0.pre.alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 36e773a9fdc9851a0b9efa4fdbbcf9d34c22ad2cb55ac34fb1af86974273a48c
4
- data.tar.gz: 587a6518297383f705e02cf9db43f28fa3824c6129b379e25fd5aaaa24c9db91
3
+ metadata.gz: 162f2a0536cbb7c7d32b893c2acd75049c9297660fc8a4873459a60f36b0fbff
4
+ data.tar.gz: 344fae9765adff7581674eb8e03b64a1402667989771306290e650e5f408d1f0
5
5
  SHA512:
6
- metadata.gz: 58c4fe78b4a8b42272a6d24f3ad72f4e3308d0536913ab910f60b734aeaea2dea7f036d60733923566998bef6e1b2980fa718aea3fcdf7e08d97475876e7a833
7
- data.tar.gz: f269f06947f7051219053f4247a7c1bc664b46cbd430e43567d5a76f6c94371a5224c26ff93102380ceb42100d96b618b168a51319847b0203d3fe875ce5582c
6
+ metadata.gz: 285cdea0029f25e27cac57b01fa54f35a5a1fe50d1cd16c8537e0a04832bce769ea6fe66ab1ae0b576b7ed0fc005e7f755a065dc0b110212ba4921446f75dd5f
7
+ data.tar.gz: e27586d77e4862bfb98847e03d9087f51931de94c88b46bc9f3c757c3e0740fac6848b95e949806cb9b1f4749a02cd677bb250ca97f38d346b8c3a4f312a8806
data/Gemfile CHANGED
@@ -5,6 +5,13 @@ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
5
5
  # Specify your gem's dependencies in relaton_iso.gemspec
6
6
  gemspec
7
7
 
8
+ # Use local monorepo sibling gems where available.
9
+ Dir["../*/"].each do |dir|
10
+ name = File.basename(dir)
11
+ next if name == File.basename(__dir__)
12
+ next unless File.exist?(File.join(dir, "#{name}.gemspec"))
13
+ gem name, path: dir
14
+ end
8
15
 
9
16
  gem "byebug"
10
17
  gem "equivalent-xml"
data/Rakefile CHANGED
@@ -11,8 +11,8 @@ namespace :spec do
11
11
  require "net/http"
12
12
  require "uri"
13
13
 
14
- url = "https://raw.githubusercontent.com/relaton/relaton-data-iso/v2/index-v1.zip"
15
- dest = File.join(__dir__, "spec", "fixtures", "index-v1.zip")
14
+ url = "https://raw.githubusercontent.com/relaton/relaton-data-iso/v2/index-v2.zip"
15
+ dest = File.join(__dir__, "spec", "fixtures", "index-v2.zip")
16
16
 
17
17
  puts "Downloading #{url} ..."
18
18
  uri = URI.parse(url)
@@ -40,8 +40,10 @@ module Relaton
40
40
  # opts[:all_parts] ||= $~ && opts[:all_parts].nil?
41
41
 
42
42
  query_pubid = ::Pubid::Iso::Identifier.parse(code)
43
- query_pubid.root.year = year.to_i if year&.respond_to?(:to_i)
44
- query_pubid.root.all_parts ||= opts[:all_parts]
43
+ if year&.respond_to?(:to_i)
44
+ query_pubid.root.date = ::Pubid::Components::Date.new(year: year.to_s)
45
+ end
46
+ query_pubid.root.all_parts = opts[:all_parts] if opts[:all_parts]
45
47
  Util.info "Fetching from Relaton repository ...", key: query_pubid.to_s
46
48
 
47
49
  hits, missed_year_ids = isobib_search_filter(query_pubid, opts)
@@ -57,7 +59,7 @@ module Relaton
57
59
 
58
60
  response_pubid = ret.docidentifier.find(&:primary) # .sub(" (all parts)", "")
59
61
  Util.info "Found: `#{response_pubid}`", key: query_pubid.to_s
60
- get_all = (query_pubid.root.year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts] ||
62
+ get_all = (query_pubid.root.date&.year && opts[:keep_year].nil?) || opts[:keep_year] || opts[:all_parts] ||
61
63
  opts[:publication_date_before] || opts[:publication_date_after]
62
64
  if get_all
63
65
  filter_item_by_date(ret, opts) if date_filter
@@ -65,7 +67,7 @@ module Relaton
65
67
  end
66
68
 
67
69
  ret.to_most_recent_reference
68
- rescue ::Pubid::Core::Errors::ParseError
70
+ rescue Parslet::ParseFailed
69
71
  Util.warn "Is not recognized as a standards identifier.", key: code
70
72
  nil
71
73
  end
@@ -95,7 +97,7 @@ module Relaton
95
97
 
96
98
  query_pubid.publisher == pubid.publisher &&
97
99
  query_pubid.number == pubid.number &&
98
- query_pubid.copublisher == pubid.copublisher &&
100
+ query_pubid.copublishers == pubid.copublishers &&
99
101
  (any_types_stages || query_pubid.stage == pubid.stage) &&
100
102
  (any_types_stages || query_pubid.is_a?(pubid.class))
101
103
  end
@@ -109,10 +111,12 @@ module Relaton
109
111
 
110
112
  # filter by year
111
113
  hit_collection.select! do |hit|
112
- hit.pubid.year ||= hit.hit[:year]
114
+ if hit.pubid.date&.year.nil? && hit.hit[:year]
115
+ hit.pubid.date = ::Pubid::Components::Date.new(year: hit.hit[:year].to_s)
116
+ end
113
117
  next true if check_year(year, hit)
114
118
 
115
- missed_year_ids << hit.pubid.to_s if hit.pubid.year
119
+ missed_year_ids << hit.pubid.to_s if hit.pubid.date&.year
116
120
  false
117
121
  end
118
122
 
@@ -195,7 +199,7 @@ module Relaton
195
199
  # @param hit [Relaton::Iso::Hit]
196
200
  # @return [Integer]
197
201
  def hit_year(hit)
198
- yr = hit.pubid&.year || hit.hit[:year] || hit.pubid&.root&.year
202
+ yr = hit.pubid&.date&.year || hit.hit[:year] || hit.pubid&.root&.date&.year
199
203
  yr.to_i
200
204
  end
201
205
 
@@ -253,9 +257,14 @@ module Relaton
253
257
  end
254
258
 
255
259
  def check_year(year, hit) # rubocop:disable Metrics/AbcSize
256
- (hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s) ||
257
- (!hit.pubid.base.nil? && hit.pubid.base.year.to_s == year.to_s) ||
258
- (!hit.pubid.base.nil? && hit.pubid.year.to_s == year.to_s)
260
+ pub = hit.pubid
261
+ own_year = pub.date&.year.to_s
262
+ base_year = pub.base_identifier&.date&.year.to_s
263
+ if pub.base_identifier.nil?
264
+ own_year == year.to_s
265
+ else
266
+ base_year == year.to_s || own_year == year.to_s
267
+ end
259
268
  end
260
269
 
261
270
  # @param pubid [Pubid::Iso::Identifier] PubID with no results
@@ -264,7 +273,7 @@ module Relaton
264
273
 
265
274
  if missed_year_ids.any?
266
275
  ids = missed_year_ids.map { |i| "`#{i}`" }.join(", ")
267
- Util.info "TIP: No match for edition year #{pubid.year}, but matches exist for #{ids}.", key: pubid.to_s
276
+ Util.info "TIP: No match for edition year #{pubid.date&.year}, but matches exist for #{ids}.", key: pubid.to_s
268
277
  end
269
278
 
270
279
  if tip_ids.any?
@@ -276,7 +285,7 @@ module Relaton
276
285
  Util.info "TIP: If it cannot be found, the document may no longer be published in parts.", key: pubid.to_s
277
286
  else
278
287
  Util.info "TIP: If you wish to cite all document parts for the reference, " \
279
- "use `#{pubid.to_s(format: :ref_undated)} (all parts)`.", key: pubid.to_s
288
+ "use `#{pubid.exclude(:date)} (all parts)`.", key: pubid.to_s
280
289
  end
281
290
 
282
291
  nil
@@ -326,12 +335,17 @@ module Relaton
326
335
  !(query_pubid.root.all_parts && i.pubid.part.nil?)
327
336
  end
328
337
 
329
- filter_hits_by_year(hit_collection, query_pubid.root.year)
338
+ filter_hits_by_year(hit_collection, query_pubid.root.date&.year)
330
339
  end
331
340
 
332
341
  def build_excludings(all_parts, any_types_stages)
333
- excludings = %i[year edition all_parts]
334
- excludings += %i[type stage iteration] if any_types_stages
342
+ # 2.x attribute names: :year :date, :iteration → :stage_iteration.
343
+ # Always exclude :typed_stage: parse fills the default-published
344
+ # typed_stage with original_abbr="" while .create leaves it nil,
345
+ # so equality would never hold against indexed/created rows
346
+ # otherwise.
347
+ excludings = %i[date edition all_parts typed_stage]
348
+ excludings += %i[type stage stage_iteration] if any_types_stages
335
349
  excludings << :part if all_parts
336
350
  excludings
337
351
  end
@@ -340,10 +354,28 @@ module Relaton
340
354
  if pubid.is_a? String then pubid == query_pubid.to_s
341
355
  else
342
356
  pubid = pubid.dup
343
- pubid.base = pubid.base.exclude(:year, :edition) if pubid.base
344
- pubid.exclude(*excludings) == no_year_ref
357
+ pubid.base_identifier = pubid.base_identifier.exclude(:date, :edition) if pubid.base_identifier
358
+ normalize_compound_part(pubid.exclude(*excludings)) == no_year_ref
345
359
  end
346
360
  end
361
+
362
+ # @TODO TEMP WORKAROUND (pubid 2.x migration): the v1-generated index
363
+ # stores a compound part such as "5-1-3" in :part with no :subpart, and
364
+ # Relaton::Index builds each row via Pubid::Iso::Identifier.from_hash(id),
365
+ # which keeps it as part="5-1-3" subpart=nil. A parsed query (no_year_ref)
366
+ # splits it (part="5", subpart="1-3"), so the two never compare equal.
367
+ # Re-split the candidate's compound part on the first dash to mirror parse
368
+ # before comparing. `exclude` returns a fresh instance, so mutating this
369
+ # copy is safe. Remove once pubid create() splits compound parts itself.
370
+ def normalize_compound_part(pubid)
371
+ num = pubid.part&.value.to_s
372
+ return pubid unless pubid.subpart.nil? && num.include?("-")
373
+
374
+ head, tail = num.split("-", 2)
375
+ pubid.part = ::Pubid::Iso::Components::Code.new(value: head)
376
+ pubid.subpart = ::Pubid::Iso::Components::Code.new(value: tail)
377
+ pubid
378
+ end
347
379
  end
348
380
  end
349
381
  end
@@ -12,11 +12,21 @@ module Relaton
12
12
  # (see https://www.iso.org/open-data.html) and write each one as a YAML
13
13
  # file under `@output`.
14
14
  #
15
- # `source` modes (matching the `Relaton::Core::DataFetcher.fetch` arg):
15
+ # The upstream feed has no delta API, so any run that proceeds re-downloads
16
+ # and re-ingests the whole feed. There is therefore no value in a partial
17
+ # update: a run either skips entirely or does a full replace. `source` modes
18
+ # (matching the `Relaton::Core::DataFetcher.fetch` arg):
16
19
  #
17
- # * `"iso-open-data"` (default) - skip the run if the upstream
18
- # `Last-Modified` header matches `LAST_MODIFIED_FILE`.
19
- # * `"iso-open-data-all"` - clear `@output` and re-emit every record.
20
+ # * `"iso-open-data"` (default) - skip when the feed's `Last-Modified` is
21
+ # unchanged; otherwise wipe `@output` + index and rebuild from scratch.
22
+ # * `"iso-open-data-all"` - the same full rebuild, but ignore the
23
+ # `Last-Modified` short-circuit and always run.
24
+ #
25
+ # Wiping happens here, after the short-circuit decision, so `@output` and the
26
+ # index always mirror the current feed (records that have left it don't
27
+ # linger as stale files or dangling index entries) without risking an empty
28
+ # tree on a skipped run. `#fetch` returns true when it rebuilt, false when
29
+ # it skipped, so callers can chain follow-up work (e.g. the pubid-v1 index).
20
30
  #
21
31
  class DataFetcher < Core::DataFetcher
22
32
  OPEN_DATA_URL = "https://isopublicstorageprod.blob.core.windows.net/" \
@@ -45,9 +55,9 @@ module Relaton
45
55
 
46
56
  Util.info "Fetching ISO Open Data (mode: #{@source})..."
47
57
  last_modified = fetch_last_modified
48
- return if up_to_date?(last_modified)
58
+ return false if up_to_date?(last_modified)
49
59
 
50
- prepare_output
60
+ reset_output
51
61
  jsonl_path = download_dataset
52
62
  ref_index, amend_index, date_index = build_ref_index(jsonl_path)
53
63
  tc_index = build_tc_index
@@ -57,6 +67,7 @@ module Relaton
57
67
  index.save
58
68
  save_last_modified(last_modified)
59
69
  report_errors
70
+ true
60
71
  rescue StandardError => e
61
72
  Util.error "#{e.message}\n#{e.backtrace.join("\n")}"
62
73
  raise
@@ -103,8 +114,13 @@ module Relaton
103
114
  File.write(LAST_MODIFIED_FILE, last_modified, encoding: "UTF-8")
104
115
  end
105
116
 
106
- def prepare_output
107
- FileUtils.rm_rf(@output) if @full_refresh
117
+ # Reset the data tree and the index together so the rebuild is a clean
118
+ # mirror of the feed. Called only after the short-circuit, so a skipped run
119
+ # never strands an empty tree. `Core::DataFetcher.fetch` recreates the
120
+ # directory before ingest writes into it.
121
+ def reset_output
122
+ FileUtils.rm_rf(@output)
123
+ index.remove_all
108
124
  FileUtils.mkdir_p(@output)
109
125
  end
110
126
 
@@ -174,9 +190,9 @@ module Relaton
174
190
 
175
191
  def amend_base(ref)
176
192
  pubid = ::Pubid::Iso::Identifier.parse(ref)
177
- return nil unless pubid.respond_to?(:base) && pubid.base
193
+ return nil unless pubid.base_identifier
178
194
 
179
- pubid.base.to_s
195
+ pubid.base_identifier.to_s
180
196
  rescue StandardError
181
197
  nil
182
198
  end
@@ -260,10 +276,42 @@ module Relaton
260
276
 
261
277
  def write_file(file, doc, docid)
262
278
  @files << file
263
- index.add_or_update(docid.pubid || docid.content.to_s, file)
279
+ index_primary(docid, file)
264
280
  File.write(file, serialize(doc), encoding: "UTF-8")
265
281
  end
266
282
 
283
+ # Add a document's primary id to the index. With pubid 2.x every ISO id
284
+ # is expected to parse; if one does not (`docid.pubid` is nil) record it
285
+ # so `report_errors` raises a tracked GitHub issue at the end, and skip
286
+ # the index entry rather than indexing a raw string (which would crash
287
+ # the index sort: `get_id_number` calls `.number` on the id). The data
288
+ # file is still written, so the document is not lost — only unindexed
289
+ # until its id parses.
290
+ def index_primary(docid, file)
291
+ unless docid.pubid
292
+ unparseable_ids << [docid.content.to_s, file]
293
+ return
294
+ end
295
+ index.add_or_update(docid.pubid, file)
296
+ end
297
+
298
+ def unparseable_ids
299
+ @unparseable_ids ||= []
300
+ end
301
+
302
+ # Surface unparseable primary ids through the shared error-reporting
303
+ # machinery (a "Error fetching documents" GitHub issue in CI) so they are
304
+ # visible and tracked, not silently dropped in the action log. The
305
+ # gh_issue logger channel is registered inside `report_errors`, so emit
306
+ # these at :error after it is set up and before `super` creates the issue.
307
+ def report_errors
308
+ gh_issue
309
+ unparseable_ids.each do |content, file|
310
+ log_error "Unparseable primary id `#{content}` was not indexed (#{file})"
311
+ end
312
+ super
313
+ end
314
+
267
315
  # --- static merge -----------------------------------------------------
268
316
 
269
317
  def merge_static_files
@@ -274,7 +322,7 @@ module Relaton
274
322
  did = item.docidentifier.detect(&:primary)
275
323
  next unless did
276
324
 
277
- index.add_or_update(did.pubid || did.content.to_s, f)
325
+ index_primary(did, f)
278
326
  end
279
327
  end
280
328
 
@@ -37,7 +37,6 @@ module Relaton
37
37
  "Cor" => "technical-corrigendum",
38
38
  "Add" => "addendum",
39
39
  "Suppl" => "supplement",
40
- "Ext" => "extract",
41
40
  }.freeze
42
41
 
43
42
  DOC_URL = "https://www.iso.org/standard/%d.html"
@@ -123,8 +122,9 @@ module Relaton
123
122
  end
124
123
 
125
124
  def iso_reference_pubid
126
- params = pubid.to_h.except(:typed_stage)
127
- ::Pubid::Iso::Identifier.create(language: "en", **params)
125
+ pubid.dup.tap do |id|
126
+ id.languages = [::Pubid::Components::Language.new(code: "en", original_code: "E")]
127
+ end
128
128
  rescue StandardError
129
129
  nil
130
130
  end
@@ -133,12 +133,10 @@ module Relaton
133
133
  return @urn_pubid if defined?(@urn_pubid)
134
134
 
135
135
  @urn_pubid = begin
136
- dup_pubid = pubid.dup
137
- if dup_pubid.respond_to?(:stage=) && stage_dotted &&
138
- dup_pubid.respond_to?(:stage) && dup_pubid.stage.nil?
139
- dup_pubid.stage = ::Pubid::Iso::Identifier.parse_stage(stage_dotted)
140
- end
141
- dup_pubid
136
+ # Override stage even when the parsed pubid carries the default
137
+ # "published" stage relaton's currentStage (e.g. 9092 = Withdrawn)
138
+ # is the authoritative source for URN stage.
139
+ stage_dotted ? pubid.with_harmonized_stage(stage_dotted) : pubid.dup
142
140
  rescue StandardError
143
141
  nil
144
142
  end
@@ -386,9 +384,9 @@ module Relaton
386
384
  end
387
385
 
388
386
  def base_relation
389
- return [] unless pubid&.respond_to?(:base) && pubid.base
387
+ return [] unless pubid&.base_identifier
390
388
 
391
- [relation_for(pubid.base.to_s, "updates")]
389
+ [relation_for(pubid.base_identifier.to_s, "updates")]
392
390
  end
393
391
 
394
392
  def relation_for(ref, type)
@@ -47,7 +47,7 @@ module Relaton
47
47
 
48
48
  def create_pubid(id)
49
49
  if id.is_a?(Hash)
50
- ::Pubid::Iso::Identifier.create(**id)
50
+ ::Pubid::Iso::Identifier.from_hash(id)
51
51
  else
52
52
  id
53
53
  end
@@ -12,15 +12,35 @@ module Relaton
12
12
  @opts ||= {}
13
13
  end
14
14
 
15
+ # Maps the legacy 1.x exclude symbols to their pubid 2.x attribute
16
+ # names. The public excludings API still uses :year/:iteration for
17
+ # backwards compatibility with existing call sites and specs.
18
+ LEGACY_EXCLUDE_MAP = { year: :date, iteration: :stage_iteration }.freeze
19
+ private_constant :LEGACY_EXCLUDE_MAP
20
+
21
+ def translate_excludings(attrs)
22
+ out = attrs.map { |a| LEGACY_EXCLUDE_MAP[a] || a }
23
+ # Excluding :stage implies excluding :typed_stage too — the two
24
+ # carry overlapping data and their default-published values can
25
+ # differ in trivia (e.g. original_abbr "" vs nil), so leaving
26
+ # typed_stage in the comparison breaks otherwise-equal matches.
27
+ out << :typed_stage if out.include?(:stage) && !out.include?(:typed_stage)
28
+ out
29
+ end
30
+
15
31
  def ref_pubid_no_year
16
- @ref_pubid_no_year ||= ref.base ? ref.dup.tap { |r| r.base = r.base.exclude(:year) } : ref.exclude(:year)
32
+ @ref_pubid_no_year ||=
33
+ if ref.base_identifier
34
+ ref.dup.tap { |r| r.base_identifier = r.base_identifier.exclude(:date) }
35
+ else
36
+ ref.exclude(:date)
37
+ end
17
38
  end
18
39
 
19
40
  def ref_pubid_excluded
20
41
  return @ref_pubid_excluded if defined? @ref_pubid_excluded
21
42
 
22
- ref_excludings = excludings.dup
23
- ref_excludings << :all_parts
43
+ ref_excludings = translate_excludings(excludings) + [:all_parts]
24
44
  @ref_pubid_excluded ||= ref_pubid_no_year.exclude(*ref_excludings)
25
45
  end
26
46
 
@@ -30,38 +50,68 @@ module Relaton
30
50
  # @return [Array<Relaton::Iso::Hit>] hits
31
51
  #
32
52
  def find # rubocop:disable Metrics/AbcSize
33
- @array = index.search do |row|
34
- row[:id].is_a?(Hash) || row[:id].is_a?(::Pubid::Core::Identifier::Base) ? pubid_match?(row[:id]) : ref.to_s(with_prf: true) == row[:id]
53
+ # Pass `ref` (a Pubid::Identifier, not a String) so the index can
54
+ # narrow candidates by number via binary search before applying the
55
+ # block, instead of a full O(n) scan of every row. Every row's `:id`
56
+ # is already a Pubid::Identifier — Relaton::Index deserialized it via
57
+ # the `pubid_class` passed in `#index` — so `pubid_match?` compares
58
+ # Pubid to Pubid directly.
59
+ @array = index.search(ref) do |row|
60
+ pubid_match?(row[:id])
35
61
  end.map { |row| Hit.new row, self }
36
- .sort_by! { |h| h.pubid.to_s }
37
- .reverse!
62
+ # An all-parts query drops :part from the match, so multiple rows can
63
+ # resolve to the same pubid; collapse them so each part appears once.
64
+ @array.uniq! { |h| h.pubid.to_s } if ref.root.all_parts
65
+ # Most-recent first (pubid string desc ~ year desc), then float
66
+ # published-stage ids above drafts. An undated query excludes :stage
67
+ # when matching, so a future draft (e.g. ISO/AWI) matches alongside the
68
+ # published edition; without this the draft would sort first lexically
69
+ # ("ISO/AWI …" > "ISO …") and be returned by fetch_doc's `first`. The
70
+ # index id carries no lifecycle status, so the parsed stage is the only
71
+ # signal available here. partition is stable, preserving the year order
72
+ # within each group.
73
+ @array.sort_by! { |h| h.pubid.to_s }.reverse!
74
+ published, drafts = @array.partition do |h|
75
+ h.pubid && default_published_stage?(h.pubid)
76
+ end
77
+ @array = published + drafts
38
78
  self
39
79
  end
40
80
 
41
- def pubid_match?(id)
42
- pubid = create_pubid(id)
43
- return false unless pubid
44
-
45
- # pubid.base = pubid.base.exclude(:year, :edition) if pubid.base
46
- dir_excludings = excludings.dup
47
- dir_excludings << :edition unless pubid.typed_stage_abbrev == "DIR"
48
- exclude_id_attrs(pubid, *dir_excludings) == ref_pubid_excluded
81
+ def pubid_match?(pubid)
82
+ match_excludings = translate_excludings(excludings) + [:all_parts]
83
+ match_excludings << :edition unless pubid.typed_stage&.abbr&.include?("DIR")
84
+ # Only the candidate is built via .create (from the index) and so may
85
+ # carry a compound part; `ref_pubid_no_year` is always a parsed pubid,
86
+ # already split, so it needs no normalization.
87
+ cand = normalize_compound_part(exclude_id_attrs(pubid, *match_excludings))
88
+ cand == exclude_id_attrs(ref_pubid_no_year, *match_excludings)
49
89
  end
50
90
 
51
- def create_pubid(id)
52
- return id if id.is_a?(::Pubid::Core::Identifier::Base)
53
-
54
- ::Pubid::Iso::Identifier.create(**id)
55
- rescue StandardError => e
56
- Util.warn e.message, key: ref.to_s
91
+ # @TODO TEMP WORKAROUND (pubid 2.x migration): the v1-generated index
92
+ # stores a compound part such as "5-1-3" in :part with no :subpart, and
93
+ # Relaton::Index builds each row via Pubid::Iso::Identifier.from_hash(id),
94
+ # which keeps it as part="5-1-3" subpart=nil. A parsed query splits it
95
+ # (part="5", subpart="1-3"), so the two never compare equal. Re-split the
96
+ # compound part on the first dash to mirror parse before comparing.
97
+ # `exclude` returns a fresh instance, so mutating this copy is safe.
98
+ # Remove once pubid create() splits compound parts itself.
99
+ def normalize_compound_part(pubid)
100
+ num = pubid.part&.value.to_s
101
+ return pubid unless pubid.subpart.nil? && num.include?("-")
102
+
103
+ head, tail = num.split("-", 2)
104
+ pubid.part = ::Pubid::Iso::Components::Code.new(value: head)
105
+ pubid.subpart = ::Pubid::Iso::Components::Code.new(value: tail)
106
+ pubid
57
107
  end
58
108
 
59
109
  def exclude_id_attrs(pubid, *attrs)
60
110
  xid = pubid.exclude(*attrs)
61
111
  curr = xid
62
- while curr.base
63
- curr.base = curr.base.exclude(*attrs)
64
- curr = curr.base
112
+ while curr.base_identifier
113
+ curr.base_identifier = curr.base_identifier.exclude(*attrs)
114
+ curr = curr.base_identifier
65
115
  end
66
116
  xid
67
117
  end
@@ -71,23 +121,26 @@ module Relaton
71
121
 
72
122
  excl_attrs = %i[year]
73
123
  excl_attrs << :part if ref.root.part.nil? || ref.root.all_parts
74
- if ref.stage.nil? || ref.root.all_parts
124
+ if default_published_stage?(ref) || ref.root.all_parts
75
125
  excl_attrs << :stage
76
126
  excl_attrs << :iteration
77
127
  end
78
- # excl_parts << :edition if ref.root.edition.nil? || all_parts
79
128
  @excludings = excl_attrs
80
129
  end
81
130
 
131
+ # Pubid 2.x auto-populates a published-stage default on parse/.create,
132
+ # so ref.stage is never nil. Treat that default as "no stage specified".
133
+ def default_published_stage?(pubid)
134
+ return true if pubid.typed_stage.nil?
135
+
136
+ pubid.typed_stage.stage_code.to_s == "published"
137
+ end
138
+
82
139
  def index
83
140
  @index ||= Relaton::Index.find_or_create(
84
141
  :iso,
85
142
  url: "#{ENDPOINT}#{INDEXFILE}.zip",
86
143
  file: "#{INDEXFILE}.yaml",
87
- id_keys: %i[publisher number copublisher part year edition type stage
88
- iteration joint_document tctype sctype wgtype tcnumber
89
- scnumber wgnumber dirtype base supplements addendum
90
- jtc_dir month amendments corrigendums language],
91
144
  pubid_class: ::Pubid::Iso::Identifier,
92
145
  )
93
146
  end
@@ -107,9 +160,9 @@ module Relaton
107
160
  def to_all_parts # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity
108
161
  parts = @array.select { |h| h.pubid.part }
109
162
  if opts[:publication_date_before] || opts[:publication_date_after]
110
- parts = parts.select { |h| Bibliography.send(:year_in_range?, (h.pubid.year || h.hit[:year]).to_i, opts) }
163
+ parts = parts.select { |h| Bibliography.send(:year_in_range?, (h.pubid.date&.year || h.hit[:year]).to_i, opts) }
111
164
  end
112
- hit = parts.min_by { |h| h.pubid.part.to_i }
165
+ hit = parts.min_by { |h| h.pubid.part.value.to_i }
113
166
  return @array.first&.item unless hit
114
167
 
115
168
  bibitem = hit.item
@@ -21,7 +21,7 @@ module Relaton
21
21
 
22
22
  def create_id_from_pubid(content, without_date)
23
23
  pubid = without_date ? content.exclude(:year) : content
24
- self.id = pubid.to_s(with_prf: true).gsub(/\W+/, "")
24
+ self.id = pubid.to_s.gsub(/\W+/, "")
25
25
  end
26
26
  end
27
27
  end
@@ -27,7 +27,7 @@ module Relaton
27
27
  else
28
28
  parsed =
29
29
  case value
30
- when ::Pubid::Iso::Identifier::Base then value
30
+ when ::Pubid::Iso::Identifier then value
31
31
  when String
32
32
  begin
33
33
  ::Pubid::Iso::Identifier.parse(value)
@@ -42,6 +42,13 @@ module Relaton
42
42
 
43
43
  if parsed
44
44
  @pubid = parsed
45
+ # TC committee documents have a canonical spelling ("… N1110")
46
+ # that pubid renders with a space ("… N 1110"). Preserve the
47
+ # source string (same intent as the iso-tc bypass) while keeping
48
+ # the parsed pubid for any structural operations.
49
+ if value.is_a?(String) && parsed.is_a?(::Pubid::Iso::Identifiers::TcDocument)
50
+ @raw_content = value
51
+ end
45
52
  elsif value.is_a?(String)
46
53
  @raw_content = value
47
54
  end
@@ -80,18 +87,18 @@ module Relaton
80
87
  end
81
88
 
82
89
  def remove_date!
83
- remove_attr!(:year)
90
+ remove_attr!(:date)
84
91
  end
85
92
 
86
93
  def exclude_year
87
94
  return @raw_content if @raw_content
88
95
  return nil unless @pubid
89
96
 
90
- pubid = @pubid.exclude(:year)
97
+ pubid = @pubid.exclude(:date)
91
98
  current = pubid
92
- while current.base
93
- current.base = current.base.exclude(:year)
94
- current = current.base
99
+ while current.base_identifier
100
+ current.base_identifier = current.base_identifier.exclude(:date)
101
+ current = current.base_identifier
95
102
  end
96
103
  pubid
97
104
  end
@@ -100,11 +107,10 @@ module Relaton
100
107
 
101
108
  def render_pubid(pubid)
102
109
  case type
103
- when "URN" then pubid.urn
104
- when "iso-reference", "iso-with-lang"
105
- pubid.to_s(format: :ref_num_short, with_prf: true)
110
+ when "URN" then pubid.to_urn
111
+ when "ISO" then pubid.exclude(:languages).to_s
106
112
  else
107
- pubid.to_s(with_prf: true)
113
+ pubid.to_s
108
114
  end
109
115
  end
110
116
 
@@ -112,10 +118,10 @@ module Relaton
112
118
  return unless @pubid
113
119
 
114
120
  @pubid.send("#{attr}=", nil)
115
- base = @pubid.base
121
+ base = @pubid.base_identifier
116
122
  while base
117
123
  base.send("#{attr}=", nil)
118
- base = base.base
124
+ base = base.base_identifier
119
125
  end
120
126
  refresh_content!
121
127
  end
@@ -4,7 +4,7 @@ module Relaton
4
4
  TYPES = %w[
5
5
  international-standard technical-specification technical-report publicly-available-specification
6
6
  international-workshop-agreement guide recommendation amendment technical-corrigendum directive
7
- committee-document addendum supplement extract
7
+ committee-document addendum supplement
8
8
  ].freeze
9
9
 
10
10
  attribute :content, :string, values: TYPES
@@ -104,16 +104,14 @@ module Relaton
104
104
  return @pubid if @pubid
105
105
 
106
106
  @pubid = ::Pubid::Iso::Identifier.parse(id)
107
- @pubid.root.edition ||= edition.content if @pubid.base
107
+ @pubid.root.edition ||= edition.content if @pubid.base_identifier
108
108
  @pubid
109
109
  rescue StandardError => e
110
110
  Util.error "Failed to parse pubid from #{id}: #{e.message}"
111
111
  end
112
112
 
113
113
  def urn
114
- pubid_dup = pubid.dup
115
- pubid_dup.stage ||= ::Pubid::Iso::Identifier.parse_stage(stage_code)
116
- pubid_dup
114
+ pubid.with_harmonized_stage(stage_code)
117
115
  end
118
116
 
119
117
  def edition
@@ -143,8 +141,9 @@ module Relaton
143
141
  # @return [String] English reference identifier
144
142
  #
145
143
  def isoref
146
- params = pubid.to_h.except(:typed_stage)
147
- ::Pubid::Iso::Identifier.create(language: "en", **params).to_s(format: :ref_num_short)
144
+ pubid.dup.tap do |id|
145
+ id.languages = [::Pubid::Components::Language.new(code: "en", original_code: "E")]
146
+ end.to_s
148
147
  end
149
148
 
150
149
  private
@@ -1,7 +1,7 @@
1
1
  module Relaton
2
2
  module Iso
3
3
  module Type
4
- # Lutaml-model attribute type that preserves `Pubid::Iso::Identifier::Base`
4
+ # Lutaml-model attribute type that preserves `Pubid::Iso::Identifier`
5
5
  # instances on the way in and stringifies them on the way out.
6
6
  #
7
7
  # The default `:string` type calls `.to_s` during `cast`, which loses the
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Relaton
4
4
  module Iso
5
- VERSION = "2.1.5"
5
+ VERSION = "2.2.0.pre.alpha.1"
6
6
  end
7
7
  end
data/lib/relaton/iso.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "net/http"
4
- require "pubid/iso"
4
+ require "pubid"
5
5
  require "relaton/index"
6
6
  require "isoics"
7
7
  require "relaton/bib"
@@ -18,7 +18,7 @@ require_relative "iso/bibliography"
18
18
 
19
19
  module Relaton
20
20
  module Iso
21
- INDEXFILE = "index-v1"
21
+ INDEXFILE = "index-v2"
22
22
 
23
23
  def self.grammar_hash
24
24
  # gem_path = File.expand_path "..", __dir__
data/relaton-iso.gemspec CHANGED
@@ -24,11 +24,11 @@ Gem::Specification.new do |spec|
24
24
  spec.bindir = "exe"
25
25
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
26
26
  spec.require_paths = ["lib"]
27
- spec.required_ruby_version = Gem::Requirement.new(">= 3.2.0")
27
+ spec.required_ruby_version = Gem::Requirement.new(">= 3.3.0")
28
28
 
29
29
  spec.add_dependency "isoics", "~> 0.1.6"
30
- spec.add_dependency "pubid-iso", "~> 1.15.20"
31
- spec.add_dependency "relaton-bib", "~> 2.1.0"
32
- spec.add_dependency "relaton-core", "~> 0.0.12"
33
- spec.add_dependency "relaton-index", "~> 0.2.12"
30
+ spec.add_dependency "pubid", "~> 2.0.0.pre.alpha.3"
31
+ spec.add_dependency "relaton-bib", "~> 2.2.0.pre.alpha.1"
32
+ spec.add_dependency "relaton-core", "~> 2.2.0.pre.alpha.1"
33
+ spec.add_dependency "relaton-index", "~> 2.2.0.pre.alpha.1"
34
34
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-iso
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.5
4
+ version: 2.2.0.pre.alpha.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-06-27 00:00:00.000000000 Z
11
+ date: 2026-06-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: isoics
@@ -25,61 +25,61 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: 0.1.6
27
27
  - !ruby/object:Gem::Dependency
28
- name: pubid-iso
28
+ name: pubid
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 1.15.20
33
+ version: 2.0.0.pre.alpha.3
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 1.15.20
40
+ version: 2.0.0.pre.alpha.3
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: relaton-bib
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 2.1.0
47
+ version: 2.2.0.pre.alpha.1
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 2.1.0
54
+ version: 2.2.0.pre.alpha.1
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: relaton-core
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: 0.0.12
61
+ version: 2.2.0.pre.alpha.1
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: 0.0.12
68
+ version: 2.2.0.pre.alpha.1
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: relaton-index
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: 0.2.12
75
+ version: 2.2.0.pre.alpha.1
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: 0.2.12
82
+ version: 2.2.0.pre.alpha.1
83
83
  description: 'Relaton::Iso: retrieve ISO Standards for bibliographic use using the
84
84
  IsoBibliographicItem model'
85
85
  email:
@@ -93,7 +93,6 @@ files:
93
93
  - ".gitignore"
94
94
  - ".hound.yml"
95
95
  - ".rspec"
96
- - ".rubocop.yml"
97
96
  - CLAUDE.md
98
97
  - CODE_OF_CONDUCT.md
99
98
  - Gemfile
@@ -143,7 +142,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
143
142
  requirements:
144
143
  - - ">="
145
144
  - !ruby/object:Gem::Version
146
- version: 3.2.0
145
+ version: 3.3.0
147
146
  required_rubygems_version: !ruby/object:Gem::Requirement
148
147
  requirements:
149
148
  - - ">="
data/.rubocop.yml DELETED
@@ -1,12 +0,0 @@
1
- # This project follows the Ribose OSS style guide.
2
- # https://github.com/riboseinc/oss-guides
3
- # All project-specific additions and overrides should be specified in this file.
4
-
5
- require: rubocop-rails
6
-
7
- inherit_from:
8
- - https://raw.githubusercontent.com/riboseinc/oss-guides/master/ci/rubocop.yml
9
- AllCops:
10
- TargetRubyVersion: 3.2
11
- Rails:
12
- Enabled: false