documentrix 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 11ea07d3133f8de898211353a4bcff7f926b9f75114696af333e928862ee5aeb
4
- data.tar.gz: b60d8606b1974bcb43e6584f690ecb21a7e54af6915b09d9adb904bc9269b417
3
+ metadata.gz: ced4bf69e3ae1b4251844dd7915f8d75e72d1c40410f34c14311f70dd55c91f4
4
+ data.tar.gz: 6fc0799e4559f50a22d211e630ea5f7deaac671f24805f5501c85a11c7a9b853
5
5
  SHA512:
6
- metadata.gz: ae907af900fc6932de6b2d022a4ca97367d6e1901442ed30e838eb7bf128b5e5d7dd572862f29cfe6196f58553d394e390062287b21cac6185e2c6845244ee5a
7
- data.tar.gz: 75b95e852cbd6b412ae2651668db5cbffcb9f1c5845dd3bdfb2ea252380b9ab37d15f4b8381398f941c4c1dabeb1e7a174e9e6ef7dc4e0dc153455c4c0413b1e
6
+ metadata.gz: 8023a4b4d7cad8948e6cbd2a65cefd8b3993ae3de38ae5520a2ebf178989a78efb4c1296b51c614c4066c547372f9ad8067b6c8e196caaa098876a3adf44f523
7
+ data.tar.gz: 0c4544a80ecde5c98c1d3da64ffeb97155324fdc063993caddf6d2d0780c490b0c8ec24dbc2ca392b3ceb6023d964f58bad099ca4c509a6e43c33c4f7406beb0
data/CHANGES.md CHANGED
@@ -1,5 +1,25 @@
1
1
  # Changes
2
2
 
3
+ ## 2026-06-17 v0.6.0
4
+
5
+ ### Changed
6
+
7
+ - Refactored collection discovery to ensure strong consistency across multiple
8
+ client instances by delegating `Documentrix::Documents#collections` directly
9
+ to the cache backend.
10
+ - Removed `@collections_cache` and the `invalidate_collections_cache!` method
11
+ from `lib/documentrix/documents.rb`.
12
+ - Implemented a high-performance `#collections` method in
13
+ `lib/documentrix/documents/cache/redis_cache.rb` utilizing `scan_each`.
14
+ - Implemented a high-performance `#collections` method in
15
+ `lib/documentrix/documents/cache/sqlite_cache.rb` using SQL `DISTINCT`.
16
+ - Added specialized unit tests for collection extraction and regex patterns
17
+ within `spec/documentrix/documents/cache/redis_cache_spec.rb` and
18
+ `spec/documentrix/documents/cache/sqlite_cache_spec.rb`.
19
+ - Updated `spec/documents_spec.rb` and
20
+ `spec/documentrix/documents/cache/interface_spec.rb` to remove dependencies
21
+ on the deleted invalidation method.
22
+
3
23
  ## 2026-06-16 v0.5.0
4
24
 
5
25
  ### Improvements
data/documentrix.gemspec CHANGED
@@ -1,9 +1,9 @@
1
1
  # -*- encoding: utf-8 -*-
2
- # stub: documentrix 0.5.0 ruby lib
2
+ # stub: documentrix 0.6.0 ruby lib
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "documentrix".freeze
6
- s.version = "0.5.0".freeze
6
+ s.version = "0.6.0".freeze
7
7
 
8
8
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
9
9
  s.require_paths = ["lib".freeze]
@@ -101,8 +101,23 @@ class Documentrix::Documents::RedisCache
101
101
  s
102
102
  end
103
103
 
104
- # The clear_all_with_prefix method removes all key-value pairs associated
105
- # with the given prefix from this cache instance.
104
+ # Returns an array of collection names that match the given prefix.
105
+ # This is a high-performance override for Redis that only queries keys.
106
+ #
107
+ # @param prefix [String] the prefix to search for in collection names
108
+ # @return [Array<Symbol>] an array of matching collection names
109
+ def collections(prefix)
110
+ unique = Set.new
111
+ redis.scan_each(match: "#{prefix}*") do |key|
112
+ if key =~ /\A#{prefix}(.+)-/
113
+ unique << $1.to_sym
114
+ end
115
+ end
116
+ unique.to_a
117
+ end
118
+
119
+ # The clear_all_with_prefix method removes all key-value pairs associated with
120
+ # the given prefix from this cache instance.
106
121
  #
107
122
  # @return [Documentrix::Documents::RedisCache] self
108
123
  def clear_all_with_prefix
@@ -108,6 +108,22 @@ class Documentrix::Documents::Cache::SQLiteCache
108
108
  result
109
109
  end
110
110
 
111
+ # Returns an array of collection names that match the given prefix.
112
+ # This is a high-performance override for SQLite that only queries keys.
113
+ #
114
+ # @param prefix [String] the prefix to search for in collection names
115
+ # @return [Array<Symbol>] an array of matching collection names
116
+ def collections(prefix)
117
+ execute(
118
+ %{ SELECT DISTINCT key FROM records WHERE key LIKE ? },
119
+ [ "#{prefix}%" ]
120
+ ).flatten.each_with_object(Set.new) do |key, set|
121
+ if key =~ /\A#{prefix}(.+)-/
122
+ set << $1.to_sym
123
+ end
124
+ end.to_a
125
+ end
126
+
111
127
  # The tags method returns an array of unique tags from the database.
112
128
  #
113
129
  # @return [Documentrix::Utils::Tags] An instance of Documentrix::Utils::Tags
@@ -163,7 +163,7 @@ class Documentrix::Documents
163
163
  infobar.progress by: batch.size
164
164
  end
165
165
  infobar.newline
166
- invalidate_collections_cache!
166
+ self
167
167
  end
168
168
  alias << add
169
169
 
@@ -202,9 +202,7 @@ class Documentrix::Documents
202
202
  # @return [ FalseClass, TrueClass ] true if the text was removed, false
203
203
  # otherwise.
204
204
  def delete(text)
205
- res = @cache.delete(key(text))
206
- invalidate_collections_cache! if res
207
- res
205
+ @cache.delete(key(text))
208
206
  end
209
207
 
210
208
  # The size method returns the number of texts stored in the cache of this
@@ -223,7 +221,7 @@ class Documentrix::Documents
223
221
  # @return [ Documentrix::Documents ] self
224
222
  def clear(tags: nil)
225
223
  @cache.clear(tags:)
226
- invalidate_collections_cache!
224
+ self
227
225
  end
228
226
 
229
227
  # Normalizes the source identifier to a canonical form.
@@ -321,7 +319,7 @@ class Documentrix::Documents
321
319
  def source_remove(source, digest: nil)
322
320
  source = normalize_source(source)
323
321
  @cache.clear_by_source(source, digest:, operator: '!=')
324
- invalidate_collections_cache!
322
+ self
325
323
  end
326
324
 
327
325
  # The find method searches for strings within the cache by computing their
@@ -382,9 +380,7 @@ class Documentrix::Documents
382
380
  #
383
381
  # @return [Array] An array of unique collection names
384
382
  def collections
385
- @collections_cache ||= (
386
- [ default_collection ] + @cache.collections('%s-' % class_prefix)
387
- ).uniq
383
+ [ default_collection ].concat(@cache.collections('%s-' % class_prefix)).uniq
388
384
  end
389
385
 
390
386
  # Rename the current collection, moving all keys from the old prefix to a new
@@ -400,7 +396,7 @@ class Documentrix::Documents
400
396
  new_prefix = '%s-%s-' % [ class_prefix, new_collection ]
401
397
  @cache.move_prefix(prefix, new_prefix)
402
398
  self.collection = new_collection
403
- invalidate_collections_cache!
399
+ self
404
400
  end
405
401
 
406
402
  # The tags method returns an array of unique tags from the cache.
@@ -428,18 +424,6 @@ class Documentrix::Documents
428
424
 
429
425
  private
430
426
 
431
- # Resets the memoized list of collections.
432
- #
433
- # This is called whenever a mutation occurs that could change the set of
434
- # existing collections, ensuring that the #collections method returns a
435
- # fresh, accurate list on the next call.
436
- #
437
- # @return [ Documentrix::Documents ] self
438
- def invalidate_collections_cache!
439
- @collections_cache = nil
440
- self
441
- end
442
-
443
427
  # The connect_cache method initializes and returns an instance of the
444
428
  # specified cache class.
445
429
  #
@@ -1,6 +1,6 @@
1
1
  module Documentrix
2
2
  # Documentrix version
3
- VERSION = '0.5.0'
3
+ VERSION = '0.6.0'
4
4
  VERSION_ARRAY = VERSION.split('.').map(&:to_i) # :nodoc:
5
5
  VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
6
6
  VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
@@ -101,7 +101,7 @@ describe 'Documentrix::Documents::Cache Interface' do
101
101
 
102
102
  # Common methods from Cache::Common
103
103
  expect(cache).to respond_to(:collections)
104
- expect(cache.method(:collections).owner).to eq Documentrix::Documents::Cache::Common
104
+ expect(cache.method(:collections).owner).to eq Documentrix::Documents::RedisCache
105
105
 
106
106
  expect(cache).to respond_to(:pre)
107
107
  expect(cache.method(:pre).owner).to eq Documentrix::Documents::Cache::Common
@@ -167,7 +167,7 @@ describe 'Documentrix::Documents::Cache Interface' do
167
167
 
168
168
  # Common methods from Cache::Common
169
169
  expect(cache).to respond_to(:collections)
170
- expect(cache.method(:collections).owner).to eq Documentrix::Documents::Cache::Common
170
+ expect(cache.method(:collections).owner).to eq Documentrix::Documents::Cache::SQLiteCache
171
171
 
172
172
  expect(cache).to respond_to(:pre)
173
173
  expect(cache.method(:pre).owner).to eq Documentrix::Documents::Cache::Common
@@ -184,5 +184,29 @@ describe Documentrix::Documents::RedisCache do
184
184
  expect(redis).to receive(:get).with("#{prefix}foo").and_return(JSON(source: 's1', digest: 'd1'))
185
185
  expect(cache.source_exist?('s1', digest: 'd2')).to be false
186
186
  end
187
+
188
+ describe '#collections' do
189
+ it 'extracts unique collection names from keys' do
190
+ expect(redis).to receive(:scan_each).with(match: "#{prefix}*").and_yield(
191
+ "#{prefix}col1-foo"
192
+ ).and_yield(
193
+ "#{prefix}col1-bar"
194
+ ).and_yield(
195
+ "#{prefix}col2-baz"
196
+ )
197
+
198
+ expect(cache.collections(prefix)).to match_array([:col1, :col2])
199
+ end
200
+
201
+ it 'ignores keys that do not follow the collection pattern' do
202
+ expect(redis).to receive(:scan_each).with(match: "#{prefix}*").and_yield(
203
+ "#{prefix}valid-foo"
204
+ ).and_yield(
205
+ "#{prefix}invalid" # No trailing dash after the name
206
+ )
207
+
208
+ expect(cache.collections(prefix)).to eq [:valid]
209
+ end
210
+ end
187
211
  end
188
212
  end
@@ -353,4 +353,29 @@ describe Documentrix::Documents::SQLiteCache do
353
353
  expect(cache.find_records(needle)).to eq []
354
354
  end
355
355
  end
356
+
357
+ describe '#collections' do
358
+ it 'extracts unique collection names matching the prefix' do
359
+ # Since cache['key'] = val stores as "#{prefix}#{key}",
360
+ # we can create keys like "col1-foo" to get "test-col1-foo"
361
+ cache['col1-foo'] = test_value
362
+ cache['col1-bar'] = test_value
363
+ cache['col2-baz'] = test_value
364
+ cache['justprefix'] = test_value # Matches prefix, but not the pattern "prefix(name)-"
365
+
366
+ expect(cache.collections('test-')).to match_array([:col1, :col2])
367
+ end
368
+
369
+ it 'returns empty array when no keys match the prefix' do
370
+ cache['foo'] = test_value
371
+ expect(cache.collections('nonexistent-')).to eq []
372
+ end
373
+
374
+ it 'returns empty array when keys start with prefix but lack a following hyphen' do
375
+ # We need a key that starts with "test-" but doesn't have another "-" later.
376
+ # Because cache['foo'] = val results in "test-foo", this is exactly what happens.
377
+ cache['foo'] = test_value
378
+ expect(cache.collections('test-')).to eq []
379
+ end
380
+ end
356
381
  end
@@ -23,8 +23,6 @@ describe Documentrix::Documents do
23
23
  expect(ollama).to receive(:embed).
24
24
  with(model:, input: %w[ foo bar ], options: nil).
25
25
  and_return(double(embeddings: [ [ 0.1 ], [ 0.2 ] ]))
26
- expect(documents).to receive(:invalidate_collections_cache!).
27
- and_call_original
28
26
  expect(documents.add(%w[ foo bar ])).to eq documents
29
27
  expect(documents.exist?('foo')).to eq true
30
28
  expect(documents.exist?('bar')).to eq true
@@ -35,8 +33,6 @@ describe Documentrix::Documents do
35
33
  expect(ollama).to receive(:embed).
36
34
  with(model:, input: %w[ foo ], options: nil).
37
35
  and_return(double(embeddings: [ [ 0.1 ] ]))
38
- expect(documents).to receive(:invalidate_collections_cache!).
39
- and_call_original
40
36
  expect(documents << 'foo').to eq documents
41
37
  expect(documents.exist?('foo')).to eq true
42
38
  expect(documents.exist?('bar')).to eq false
@@ -127,8 +123,6 @@ describe Documentrix::Documents do
127
123
 
128
124
  it 'can delete texts' do
129
125
  expect(documents << 'foo').to eq documents
130
- expect(documents).to receive(:invalidate_collections_cache!).
131
- and_call_original
132
126
  expect {
133
127
  documents.delete('foo')
134
128
  }.to change { documents.exist?('foo') }.from(true).to(false)
@@ -142,8 +136,6 @@ describe Documentrix::Documents do
142
136
 
143
137
  it 'can clear texts' do
144
138
  expect(documents << 'foo').to eq documents
145
- expect(documents).to receive(:invalidate_collections_cache!).
146
- and_call_original
147
139
  expect {
148
140
  documents.clear
149
141
  }.to change { documents.size }.from(1).to(0)
@@ -156,13 +148,9 @@ describe Documentrix::Documents do
156
148
  expect(documents.add('foo', tags: %w[ test ])).to eq documents
157
149
  expect(documents.add('bar', tags: %w[ test2 ])).to eq documents
158
150
  expect(documents.tags.to_a).to eq %w[ test test2 ]
159
- expect(documents).to receive(:invalidate_collections_cache!).
160
- and_call_original
161
151
  expect {
162
152
  documents.clear tags: 'test'
163
153
  }.to change { documents.size }.from(2).to(1)
164
- expect(documents).to receive(:invalidate_collections_cache!).
165
- and_call_original
166
154
  expect {
167
155
  documents.clear tags: :test2
168
156
  }.to change { documents.size }.from(1).to(0)
@@ -178,8 +166,6 @@ describe Documentrix::Documents do
178
166
 
179
167
  expect(documents.size).to eq 3
180
168
 
181
- expect(documents).to receive(:invalidate_collections_cache!).
182
- and_call_original
183
169
  documents.source_remove('source1')
184
170
 
185
171
  expect(documents.size).to eq 1
@@ -196,8 +182,6 @@ describe Documentrix::Documents do
196
182
  documents.collection = :foo
197
183
  documents << 'foo'
198
184
  expect(documents.collections).to eq %i[ default foo ]
199
- expect(documents).to receive(:invalidate_collections_cache!).
200
- and_call_original
201
185
  documents.rename_collection(:bar)
202
186
  expect(documents.collection).to eq :bar
203
187
  expect(documents.collections).to eq %i[ default bar ]
@@ -211,8 +195,6 @@ describe Documentrix::Documents do
211
195
  documents.collection = :bar
212
196
  documents << 'foo'
213
197
  expect(documents.collections).to eq %i[ default foo bar ]
214
- expect(documents).not_to receive(:invalidate_collections_cache!).
215
- and_call_original
216
198
  expect {
217
199
  documents.rename_collection(:foo)
218
200
  }.to raise_error(ArgumentError, 'new collection foo already exists!')
@@ -287,8 +269,6 @@ describe Documentrix::Documents do
287
269
  documents.add('foo', source: 's1')
288
270
 
289
271
  expect(ollama).not_to receive(:embed)
290
- expect(documents).to receive(:invalidate_collections_cache!).
291
- and_call_original
292
272
  documents.source_update(['foo'], source: 's1')
293
273
  expect(documents.exist?('foo')).to be true
294
274
  end
@@ -301,8 +281,6 @@ describe Documentrix::Documents do
301
281
  allow(documents.cache).to receive(:compute_file_digest).with('s1').and_return('d2')
302
282
 
303
283
  expect(ollama).to receive(:embed).once
304
- expect(documents).to receive(:invalidate_collections_cache!).
305
- at_least(1).and_call_original
306
284
  documents.source_update(['bar'], source: 's1')
307
285
 
308
286
  expect(documents.exist?('bar')).to be true
@@ -311,8 +289,6 @@ describe Documentrix::Documents do
311
289
 
312
290
  it 'updates the source if it is an URL' do
313
291
  expect(ollama).to receive(:embed).once
314
- expect(documents).to receive(:invalidate_collections_cache!).
315
- and_call_original
316
292
  documents.source_update('foo', source: 'https://www.example.com/s1')
317
293
  expect(documents.exist?('foo')).to be true
318
294
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: documentrix
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Frank