relaton-index 0.2.21 → 2.2.0.pre.alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f2e7462982813b35a76cc8549bad25b97ad125cf3b9986426889aecae65bb513
4
- data.tar.gz: '0338bedb98265e0fbe2efac2db854b7105a3c281ee0275507ca80c35d71dbbc9'
3
+ metadata.gz: e05bc6da371d0f9f3346ed3df0fa82c69c13f6af75148b8a36e1992f82b34394
4
+ data.tar.gz: 457df757d72b7213e6ac726a7ced8a452b5341d6d9b8d52a52bdf34e717d5776
5
5
  SHA512:
6
- metadata.gz: 754adaf38cb24cf06987a73b82c544aee61952cbd895cd2c5b2acba69544b5453264680f117d6788b9b3f4cdfab87d1a25c757e046947a4903bbe02be3b8a6bd
7
- data.tar.gz: f0e5b98bd26b5ef53519167370c0a0a2754f4ea482ad7e0b77c52b23b5073c57649db0b158a7d482a6a78fb2ad11143be5278e4c5a13f9114d13e71d31dfcd2f
6
+ metadata.gz: efcc97baaa47640770274043611e99dc2f1b4442d0205da3c6a375b735e9ad67480112fc1063ef6f271e14110076a845b9072f4e53fc7745ef844122f6eb7841
7
+ data.tar.gz: b4793790e433dc5bac23a1f3a937abe3cc2168e57e6cbcefdabcf10748200776fbb2db96229517ee20a092da386dbf38532052474d66954c397cd839f7525e8b
data/Gemfile CHANGED
@@ -5,6 +5,14 @@ source "https://rubygems.org"
5
5
  # Specify your gem's dependencies in relaton-index.gemspec
6
6
  gemspec
7
7
 
8
+ # Use local monorepo sibling gems where available.
9
+ Dir["../*/"].each do |dir|
10
+ name = File.basename(dir)
11
+ next if name == File.basename(__dir__)
12
+ next unless File.exist?(File.join(dir, "#{name}.gemspec"))
13
+ gem name, path: dir
14
+ end
15
+
8
16
  gem "rake", "~> 13.0"
9
17
 
10
18
  gem "rspec", "~> 3.0"
@@ -6,6 +6,13 @@ module Relaton
6
6
  # In index mode url should be nil.
7
7
  #
8
8
  class FileIO
9
+ include IdNumber
10
+
11
+ # Raised internally when a deserialized id cannot be parsed or is not
12
+ # understood by the pubid class; `#load_index` rescues it to trigger the
13
+ # wrong-structure handling (re-download, or stop and log).
14
+ class InvalidIndexError < StandardError; end
15
+
9
16
  attr_reader :url, :pubid_class
10
17
  attr_accessor :sorted
11
18
 
@@ -21,13 +28,16 @@ module Relaton
21
28
  # and save it to the storage (if not exists, or older than 24 hours)
22
29
  # if true then the index is read from the storage (used to remove index file)
23
30
  # if nil then the fiename is used to read and write file (used to create indes in GH actions)
24
- # @param [Pubid::Core::Identifier::Base] pubid class for deserialization
31
+ # @param [Pubid::Identifier] pubid class for deserialization
25
32
  #
26
- def initialize(dir, url, filename, id_keys, pubid_class = nil)
33
+ # `id_keys` is accepted for backward compatibility but no longer used: the
34
+ # index format is now validated by round-tripping a sample of ids through
35
+ # the pubid class (see #check_serialization), which understands the pubid
36
+ # v2 (lutaml) `_type` serialization that the old key-allowlist could not.
37
+ def initialize(dir, url, filename, _id_keys = nil, pubid_class = nil)
27
38
  @dir = dir
28
39
  @url = url
29
40
  @filename = filename
30
- @id_keys = id_keys || []
31
41
  @pubid_class = pubid_class
32
42
  @sorted = false
33
43
  end
@@ -84,8 +94,12 @@ module Relaton
84
94
  #
85
95
  # @return [Boolean] <description>
86
96
  #
97
+ # Structural check only. Per-id serialization is validated during
98
+ # deserialization (see #deserialize_id), which reuses the `from_hash` the
99
+ # index load performs anyway, so every row is checked at no extra parse
100
+ # cost.
87
101
  def check_format(index)
88
- check_basic_format(index) && check_id_format(index)
102
+ check_basic_format(index)
89
103
  end
90
104
 
91
105
  def check_basic_format(index)
@@ -95,13 +109,35 @@ module Relaton
95
109
  index.all? { |item| item.respond_to?(:keys) && item.keys.sort == keys }
96
110
  end
97
111
 
98
- def check_id_format(index)
99
- return true if @id_keys.empty?
112
+ # An id is supported when `from_hash` either resolves it to a concrete
113
+ # type (a subclass — the polymorphic `_type` matched) or round-trips
114
+ # losslessly through `to_hash`. The subclass clause covers valid entries
115
+ # pubid cannot fully rebuild on re-serialize (e.g. ISO directives drop a
116
+ # redundant subgroup number); the round-trip clause covers pubid classes
117
+ # without a subclass hierarchy. A wrong-format/garbled id satisfies
118
+ # neither: it falls back to the bare base class and fails to round-trip.
119
+ def id_supported?(obj, raw)
120
+ # A concrete subtype means pubid recognized the `_type`; accept without
121
+ # round-tripping. This both skips the false positive for valid-but-lossy
122
+ # types (e.g. ISO directives) and avoids the costly hash compare for the
123
+ # ~all rows that resolve to a subtype (it would otherwise add ~33%).
124
+ return true unless obj.instance_of?(@pubid_class)
100
125
 
101
- keys = index.each_with_object(Set.new) do |item, acc|
102
- acc.merge item[:id].keys if item[:id].is_a?(Hash)
126
+ normalize(obj.to_hash) == normalize(raw)
127
+ rescue StandardError
128
+ false
129
+ end
130
+
131
+ # Stringify hash keys and scalar values so the comparison ignores YAML
132
+ # scalar typing (e.g. 1 vs "1") and string/symbol key differences, while
133
+ # still detecting dropped/added keys or genuinely changed values.
134
+ def normalize(value)
135
+ case value
136
+ when Hash then value.to_h { |k, v| [k.to_s, normalize(v)] }
137
+ when Array then value.map { |v| normalize(v) }
138
+ when nil then nil
139
+ else value.to_s
103
140
  end
104
- keys.none? { |k| !@id_keys.include? k }
105
141
  end
106
142
 
107
143
  #
@@ -116,17 +152,51 @@ module Relaton
116
152
  load_index(yaml) || []
117
153
  end
118
154
 
155
+ # Deserialize and sort by the same narrowing key Type#search bsearches
156
+ # on, so binary search always has a consistent total order. The published
157
+ # index is only approximately sorted (generated under pubid 1.x base
158
+ # semantics); merely detecting sortedness left bsearch disabled and every
159
+ # search a full O(n) scan. Sorting here is one-time per load.
119
160
  def deserialize_pubid(index)
120
161
  return index unless @pubid_class
121
162
 
163
+ deserialized = index.map do |r|
164
+ { id: deserialize_id(r[:id]), file: r[:file] }
165
+ end
166
+ warn_unless_sorted(deserialized)
167
+ deserialized.sort_by! { |r| get_id_number(r[:id]) }
122
168
  @sorted = true
123
- prev_number = nil
124
- index.map do |r|
125
- id = @pubid_class.create(**(r[:id] || {}))
126
- num = get_id_number id
127
- @sorted = false if prev_number && prev_number > num
128
- prev_number = num
129
- { id: id, file: r[:file] }
169
+ deserialized
170
+ end
171
+
172
+ # Deserialize one id and verify pubid understands it. Reuses the
173
+ # `from_hash` deserialization the load performs anyway, so validating every
174
+ # row costs only the `to_hash`/compare for ids that need the round-trip
175
+ # clause. Raises InvalidIndexError when an id cannot be parsed or is
176
+ # unsupported, so `#load_index` rejects (and re-downloads) the whole index.
177
+ def deserialize_id(raw)
178
+ obj = @pubid_class.from_hash(raw)
179
+ rescue StandardError => e
180
+ raise InvalidIndexError, "cannot parse id #{raw.inspect}: #{e.message}"
181
+ else
182
+ return obj if id_supported?(obj, raw)
183
+
184
+ raise InvalidIndexError, "unsupported id #{raw.inspect}"
185
+ end
186
+
187
+ # Log when the loaded index is not already in get_id_number order, so the
188
+ # in-memory sort above (and the underlying not-sorted index file) is
189
+ # visible. Stops at the first out-of-order pair.
190
+ def warn_unless_sorted(index)
191
+ prev = nil
192
+ index.each do |r|
193
+ num = get_id_number(r[:id])
194
+ if prev && prev > num
195
+ Util.warn "Index file `#{file}` is not sorted by id number; " \
196
+ "sorting #{index.size} entries in memory.", progname
197
+ return
198
+ end
199
+ prev = num
130
200
  end
131
201
  end
132
202
 
@@ -147,18 +217,20 @@ module Relaton
147
217
  def load_index(yaml, save = false)
148
218
  index = YAML.safe_load(yaml, permitted_classes: [Symbol])
149
219
  save index if save
150
- return deserialize_pubid(index) if check_format index
220
+ return deserialize_pubid(index) if check_format(index)
151
221
 
152
- if save
153
- warn_remote_index_error "Wrong structure of"
154
- else
155
- warn_local_index_error "Wrong structure of"
156
- end
222
+ report_invalid_index(save, "Wrong structure of")
157
223
  rescue Psych::SyntaxError
224
+ report_invalid_index(save, "YAML parsing error when reading")
225
+ rescue InvalidIndexError
226
+ report_invalid_index(save, "Wrong structure of")
227
+ end
228
+
229
+ def report_invalid_index(save, reason)
158
230
  if save
159
- warn_remote_index_error "YAML parsing error when reading"
231
+ warn_remote_index_error reason
160
232
  else
161
- warn_local_index_error "YAML parsing error when reading"
233
+ warn_local_index_error reason
162
234
  end
163
235
  end
164
236
 
@@ -194,23 +266,21 @@ module Relaton
194
266
  #
195
267
  def save(index)
196
268
  yaml = sort_structured_index(index).map do |item|
197
- item.transform_values { |value| value.is_a?(Pubid::Core::Identifier::Base) ? value.to_h : value }
269
+ item.transform_values do |value|
270
+ @pubid_class && value.is_a?(@pubid_class) ? value.to_hash : value
271
+ end
198
272
  end.to_yaml
199
273
  Index.config.storage.write file, yaml
200
274
  end
201
275
 
202
276
  def sort_structured_index(index)
203
- if @pubid_class && index.first&.dig(:id).is_a?(Pubid::Core::Identifier::Base)
277
+ if @pubid_class && index.first&.dig(:id).is_a?(@pubid_class)
204
278
  index.sort_by { |item| get_id_number item[:id] }
205
279
  else
206
280
  index
207
281
  end
208
282
  end
209
283
 
210
- def get_id_number(id)
211
- id.respond_to?(:base) && id.base ? id.base.number.to_s : id.number.to_s
212
- end
213
-
214
284
  #
215
285
  # Remove index file from storage
216
286
  #
@@ -0,0 +1,30 @@
1
+ module Relaton
2
+ module Index
3
+ # Shared narrowing/sort key for structured (pubid) index ids. Type uses it
4
+ # for binary-search narrowing; FileIO uses it to sort the index and detect
5
+ # sortedness. The two MUST agree, so the rule lives in one place.
6
+ module IdNumber
7
+ # One-level narrowing key: a supplement/amendment is filed under its
8
+ # immediate parent's number, everything else under its own number.
9
+ #
10
+ # Pubid 2.x exposes the parent via `.base_identifier` (the LutaML
11
+ # `Pubid::Iso::Identifiers::*` classes that relaton loads at runtime). A
12
+ # standalone `require "pubid-iso"` can instead surface the legacy
13
+ # `Pubid::Iso::Identifier::*` classes, which use `.base`; we accept either
14
+ # so the key is stable in both load orders. The wrong accessor silently
15
+ # falls through to the row's own number and breaks bsearch narrowing.
16
+ def get_id_number(id)
17
+ base = id_base(id)
18
+ ((base && base.number) || id.number).to_s
19
+ end
20
+
21
+ def id_base(id)
22
+ if id.respond_to?(:base_identifier) && id.base_identifier
23
+ id.base_identifier
24
+ elsif id.respond_to?(:base) && id.base
25
+ id.base
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -4,6 +4,8 @@ module Relaton
4
4
  # Relaton::Index::Type is a class for indexing Relaton files.
5
5
  #
6
6
  class Type
7
+ include IdNumber
8
+
7
9
  #
8
10
  # Initialize a new Relaton::Index::Type object
9
11
  #
@@ -12,7 +14,7 @@ module Relaton
12
14
  # @param [String, nil] file output file name
13
15
  # @param [Array<Symbol>] id_keys keys of identifier to be used for sorting index
14
16
  # format of index file is checked if id_keys all is provided at least in one of the IDs
15
- # @param [Pubid::Core::Identifier::Base, nil] pubid class for deserialization
17
+ # @param [Pubid::Identifier, nil] pubid class for deserialization
16
18
  #
17
19
  def initialize(type, url = nil, file = nil, id_keys = nil, pubid_class = nil) # rubocop:disable Metrics/ParameterLists
18
20
  @file = file
@@ -41,7 +43,7 @@ module Relaton
41
43
  #
42
44
  # Add or update index item
43
45
  #
44
- # @param [Pubid::Core::Identifier::Base] id document ID
46
+ # @param [Pubid::Identifier] id document ID
45
47
  # @param [String] file file name of the document
46
48
  #
47
49
  # @return [void]
@@ -62,7 +64,7 @@ module Relaton
62
64
  #
63
65
  # Search index for a given ID
64
66
  #
65
- # @param [String, Pubid::Core::Identifier::Base] id ID to search for
67
+ # @param [String, Pubid::Identifier] id ID to search for
66
68
  #
67
69
  # @return [Array<Hash>] search results
68
70
  #
@@ -131,10 +133,6 @@ module Relaton
131
133
  index[left...right]
132
134
  end
133
135
 
134
- def get_id_number(id)
135
- id.respond_to?(:base) && id.base ? id.base.number.to_s : id.number.to_s
136
- end
137
-
138
136
  def bsearch_left(target)
139
137
  index.bsearch_index do |item|
140
138
  get_id_number(item[:id]) >= target
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Relaton
4
4
  module Index
5
- VERSION = "0.2.21"
5
+ VERSION = "2.2.0.pre.alpha.1"
6
6
  end
7
7
  end
data/lib/relaton/index.rb CHANGED
@@ -3,12 +3,12 @@
3
3
  require "yaml"
4
4
  require "zip"
5
5
  require "relaton/logger"
6
- require "pubid-core"
7
6
 
8
7
  require_relative "index/version"
9
8
  require_relative "index/file_storage"
10
9
  require_relative "index/config"
11
10
  require_relative "index/util"
11
+ require_relative "index/id_number"
12
12
  require_relative "index/pool"
13
13
  require_relative "index/type"
14
14
  require_relative "index/file_io"
metadata CHANGED
@@ -1,13 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-index
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.21
4
+ version: 2.2.0.pre.alpha.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
+ autorequire:
8
9
  bindir: exe
9
10
  cert_chain: []
10
- date: 1980-01-02 00:00:00.000000000 Z
11
+ date: 2026-06-26 00:00:00.000000000 Z
11
12
  dependencies:
12
13
  - !ruby/object:Gem::Dependency
13
14
  name: openssl
@@ -23,34 +24,20 @@ dependencies:
23
24
  - - "~>"
24
25
  - !ruby/object:Gem::Version
25
26
  version: 3.3.2
26
- - !ruby/object:Gem::Dependency
27
- name: pubid-core
28
- requirement: !ruby/object:Gem::Requirement
29
- requirements:
30
- - - "~>"
31
- - !ruby/object:Gem::Version
32
- version: 1.15.6
33
- type: :runtime
34
- prerelease: false
35
- version_requirements: !ruby/object:Gem::Requirement
36
- requirements:
37
- - - "~>"
38
- - !ruby/object:Gem::Version
39
- version: 1.15.6
40
27
  - !ruby/object:Gem::Dependency
41
28
  name: relaton-logger
42
29
  requirement: !ruby/object:Gem::Requirement
43
30
  requirements:
44
31
  - - "~>"
45
32
  - !ruby/object:Gem::Version
46
- version: 0.2.0
33
+ version: 2.2.0.pre.alpha.1
47
34
  type: :runtime
48
35
  prerelease: false
49
36
  version_requirements: !ruby/object:Gem::Requirement
50
37
  requirements:
51
38
  - - "~>"
52
39
  - !ruby/object:Gem::Version
53
- version: 0.2.0
40
+ version: 2.2.0.pre.alpha.1
54
41
  - !ruby/object:Gem::Dependency
55
42
  name: rubyzip
56
43
  requirement: !ruby/object:Gem::Requirement
@@ -65,6 +52,7 @@ dependencies:
65
52
  - - "~>"
66
53
  - !ruby/object:Gem::Version
67
54
  version: 2.3.0
55
+ description:
68
56
  email:
69
57
  - open.source@ribose.com
70
58
  executables: []
@@ -72,7 +60,6 @@ extensions: []
72
60
  extra_rdoc_files: []
73
61
  files:
74
62
  - ".rspec"
75
- - ".rubocop.yml"
76
63
  - CLAUDE.md
77
64
  - Gemfile
78
65
  - LICENSE.txt
@@ -82,11 +69,11 @@ files:
82
69
  - lib/relaton/index/config.rb
83
70
  - lib/relaton/index/file_io.rb
84
71
  - lib/relaton/index/file_storage.rb
72
+ - lib/relaton/index/id_number.rb
85
73
  - lib/relaton/index/pool.rb
86
74
  - lib/relaton/index/type.rb
87
75
  - lib/relaton/index/util.rb
88
76
  - lib/relaton/index/version.rb
89
- - relaton-index.gemspec
90
77
  - sig/relaton/index.rbs
91
78
  homepage: https://github.com/relaton/relaton-index
92
79
  licenses:
@@ -94,6 +81,7 @@ licenses:
94
81
  metadata:
95
82
  homepage_uri: https://github.com/relaton/relaton-index
96
83
  source_code_uri: https://github.com/relaton/relaton-index
84
+ post_install_message:
97
85
  rdoc_options: []
98
86
  require_paths:
99
87
  - lib
@@ -101,14 +89,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
101
89
  requirements:
102
90
  - - ">="
103
91
  - !ruby/object:Gem::Version
104
- version: 3.1.0
92
+ version: 3.3.0
105
93
  required_rubygems_version: !ruby/object:Gem::Requirement
106
94
  requirements:
107
95
  - - ">="
108
96
  - !ruby/object:Gem::Version
109
97
  version: '0'
110
98
  requirements: []
111
- rubygems_version: 3.6.9
99
+ rubygems_version: 3.5.22
100
+ signing_key:
112
101
  specification_version: 4
113
102
  summary: Relaton Index is a library for indexing Relaton files.
114
103
  test_files: []
data/.rubocop.yml DELETED
@@ -1,12 +0,0 @@
1
- # This project follows the Ribose OSS style guide.
2
- # https://github.com/riboseinc/oss-guides
3
- # All project-specific additions and overrides should be specified in this file.
4
-
5
- require: rubocop-rails
6
-
7
- inherit_from:
8
- - https://raw.githubusercontent.com/riboseinc/oss-guides/master/ci/rubocop.yml
9
- AllCops:
10
- TargetRubyVersion: 3.1
11
- Rails:
12
- Enabled: false
@@ -1,40 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative "lib/relaton/index/version"
4
-
5
- Gem::Specification.new do |spec|
6
- spec.name = "relaton-index"
7
- spec.version = Relaton::Index::VERSION
8
- spec.authors = ["Ribose Inc."]
9
- spec.email = ["open.source@ribose.com"]
10
-
11
- spec.summary = "Relaton Index is a library for indexing Relaton files."
12
- spec.homepage = "https://github.com/relaton/relaton-index"
13
- spec.license = "MIT"
14
- spec.required_ruby_version = ">= 3.1.0"
15
-
16
- # spec.metadata["allowed_push_host"] = "TODO: Set to your gem server 'https://example.com'"
17
-
18
- spec.metadata["homepage_uri"] = spec.homepage
19
- spec.metadata["source_code_uri"] = spec.homepage
20
- # spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
21
-
22
- # Specify which files should be added to the gem when it is released.
23
- # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
24
- spec.files = Dir.chdir(__dir__) do
25
- `git ls-files -z`.split("\x0").reject do |f|
26
- (f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|circleci)|appveyor)})
27
- end
28
- end
29
- spec.bindir = "exe"
30
- spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
31
- spec.require_paths = ["lib"]
32
-
33
- spec.add_dependency "openssl", "~> 3.3.2"
34
- spec.add_dependency "pubid-core", "~> 1.15.6"
35
- spec.add_dependency "relaton-logger", "~> 0.2.0"
36
- spec.add_dependency "rubyzip", "~> 2.3.0"
37
-
38
- # For more information and examples about making a new gem, check out our
39
- # guide at: https://bundler.io/guides/creating_gem.html
40
- end