documentrix 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +20 -0
- data/documentrix.gemspec +2 -2
- data/lib/documentrix/documents/cache/redis_cache.rb +17 -2
- data/lib/documentrix/documents/cache/sqlite_cache.rb +16 -0
- data/lib/documentrix/documents.rb +6 -22
- data/lib/documentrix/version.rb +1 -1
- data/spec/documentrix/documents/cache/interface_spec.rb +2 -2
- data/spec/documentrix/documents/cache/redis_cache_spec.rb +24 -0
- data/spec/documentrix/documents/cache/sqlite_cache_spec.rb +25 -0
- data/spec/documents_spec.rb +0 -24
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ced4bf69e3ae1b4251844dd7915f8d75e72d1c40410f34c14311f70dd55c91f4
|
|
4
|
+
data.tar.gz: 6fc0799e4559f50a22d211e630ea5f7deaac671f24805f5501c85a11c7a9b853
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8023a4b4d7cad8948e6cbd2a65cefd8b3993ae3de38ae5520a2ebf178989a78efb4c1296b51c614c4066c547372f9ad8067b6c8e196caaa098876a3adf44f523
|
|
7
|
+
data.tar.gz: 0c4544a80ecde5c98c1d3da64ffeb97155324fdc063993caddf6d2d0780c490b0c8ec24dbc2ca392b3ceb6023d964f58bad099ca4c509a6e43c33c4f7406beb0
|
data/CHANGES.md
CHANGED
|
@@ -1,5 +1,25 @@
|
|
|
1
1
|
# Changes
|
|
2
2
|
|
|
3
|
+
## 2026-06-17 v0.6.0
|
|
4
|
+
|
|
5
|
+
### Changed
|
|
6
|
+
|
|
7
|
+
- Refactored collection discovery to ensure strong consistency across multiple
|
|
8
|
+
client instances by delegating `Documentrix::Documents#collections` directly
|
|
9
|
+
to the cache backend.
|
|
10
|
+
- Removed `@collections_cache` and the `invalidate_collections_cache!` method
|
|
11
|
+
from `lib/documentrix/documents.rb`.
|
|
12
|
+
- Implemented a high-performance `#collections` method in
|
|
13
|
+
`lib/documentrix/documents/cache/redis_cache.rb` utilizing `scan_each`.
|
|
14
|
+
- Implemented a high-performance `#collections` method in
|
|
15
|
+
`lib/documentrix/documents/cache/sqlite_cache.rb` using SQL `DISTINCT`.
|
|
16
|
+
- Added specialized unit tests for collection extraction and regex patterns
|
|
17
|
+
within `spec/documentrix/documents/cache/redis_cache_spec.rb` and
|
|
18
|
+
`spec/documentrix/documents/cache/sqlite_cache_spec.rb`.
|
|
19
|
+
- Updated `spec/documents_spec.rb` and
|
|
20
|
+
`spec/documentrix/documents/cache/interface_spec.rb` to remove dependencies
|
|
21
|
+
on the deleted invalidation method.
|
|
22
|
+
|
|
3
23
|
## 2026-06-16 v0.5.0
|
|
4
24
|
|
|
5
25
|
### Improvements
|
data/documentrix.gemspec
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
|
2
|
-
# stub: documentrix 0.
|
|
2
|
+
# stub: documentrix 0.6.0 ruby lib
|
|
3
3
|
|
|
4
4
|
Gem::Specification.new do |s|
|
|
5
5
|
s.name = "documentrix".freeze
|
|
6
|
-
s.version = "0.
|
|
6
|
+
s.version = "0.6.0".freeze
|
|
7
7
|
|
|
8
8
|
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
|
9
9
|
s.require_paths = ["lib".freeze]
|
|
@@ -101,8 +101,23 @@ class Documentrix::Documents::RedisCache
|
|
|
101
101
|
s
|
|
102
102
|
end
|
|
103
103
|
|
|
104
|
-
#
|
|
105
|
-
#
|
|
104
|
+
# Returns an array of collection names that match the given prefix.
|
|
105
|
+
# This is a high-performance override for Redis that only queries keys.
|
|
106
|
+
#
|
|
107
|
+
# @param prefix [String] the prefix to search for in collection names
|
|
108
|
+
# @return [Array<Symbol>] an array of matching collection names
|
|
109
|
+
def collections(prefix)
|
|
110
|
+
unique = Set.new
|
|
111
|
+
redis.scan_each(match: "#{prefix}*") do |key|
|
|
112
|
+
if key =~ /\A#{prefix}(.+)-/
|
|
113
|
+
unique << $1.to_sym
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
unique.to_a
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# The clear_all_with_prefix method removes all key-value pairs associated with
|
|
120
|
+
# the given prefix from this cache instance.
|
|
106
121
|
#
|
|
107
122
|
# @return [Documentrix::Documents::RedisCache] self
|
|
108
123
|
def clear_all_with_prefix
|
|
@@ -108,6 +108,22 @@ class Documentrix::Documents::Cache::SQLiteCache
|
|
|
108
108
|
result
|
|
109
109
|
end
|
|
110
110
|
|
|
111
|
+
# Returns an array of collection names that match the given prefix.
|
|
112
|
+
# This is a high-performance override for SQLite that only queries keys.
|
|
113
|
+
#
|
|
114
|
+
# @param prefix [String] the prefix to search for in collection names
|
|
115
|
+
# @return [Array<Symbol>] an array of matching collection names
|
|
116
|
+
def collections(prefix)
|
|
117
|
+
execute(
|
|
118
|
+
%{ SELECT DISTINCT key FROM records WHERE key LIKE ? },
|
|
119
|
+
[ "#{prefix}%" ]
|
|
120
|
+
).flatten.each_with_object(Set.new) do |key, set|
|
|
121
|
+
if key =~ /\A#{prefix}(.+)-/
|
|
122
|
+
set << $1.to_sym
|
|
123
|
+
end
|
|
124
|
+
end.to_a
|
|
125
|
+
end
|
|
126
|
+
|
|
111
127
|
# The tags method returns an array of unique tags from the database.
|
|
112
128
|
#
|
|
113
129
|
# @return [Documentrix::Utils::Tags] An instance of Documentrix::Utils::Tags
|
|
@@ -163,7 +163,7 @@ class Documentrix::Documents
|
|
|
163
163
|
infobar.progress by: batch.size
|
|
164
164
|
end
|
|
165
165
|
infobar.newline
|
|
166
|
-
|
|
166
|
+
self
|
|
167
167
|
end
|
|
168
168
|
alias << add
|
|
169
169
|
|
|
@@ -202,9 +202,7 @@ class Documentrix::Documents
|
|
|
202
202
|
# @return [ FalseClass, TrueClass ] true if the text was removed, false
|
|
203
203
|
# otherwise.
|
|
204
204
|
def delete(text)
|
|
205
|
-
|
|
206
|
-
invalidate_collections_cache! if res
|
|
207
|
-
res
|
|
205
|
+
@cache.delete(key(text))
|
|
208
206
|
end
|
|
209
207
|
|
|
210
208
|
# The size method returns the number of texts stored in the cache of this
|
|
@@ -223,7 +221,7 @@ class Documentrix::Documents
|
|
|
223
221
|
# @return [ Documentrix::Documents ] self
|
|
224
222
|
def clear(tags: nil)
|
|
225
223
|
@cache.clear(tags:)
|
|
226
|
-
|
|
224
|
+
self
|
|
227
225
|
end
|
|
228
226
|
|
|
229
227
|
# Normalizes the source identifier to a canonical form.
|
|
@@ -321,7 +319,7 @@ class Documentrix::Documents
|
|
|
321
319
|
def source_remove(source, digest: nil)
|
|
322
320
|
source = normalize_source(source)
|
|
323
321
|
@cache.clear_by_source(source, digest:, operator: '!=')
|
|
324
|
-
|
|
322
|
+
self
|
|
325
323
|
end
|
|
326
324
|
|
|
327
325
|
# The find method searches for strings within the cache by computing their
|
|
@@ -382,9 +380,7 @@ class Documentrix::Documents
|
|
|
382
380
|
#
|
|
383
381
|
# @return [Array] An array of unique collection names
|
|
384
382
|
def collections
|
|
385
|
-
@
|
|
386
|
-
[ default_collection ] + @cache.collections('%s-' % class_prefix)
|
|
387
|
-
).uniq
|
|
383
|
+
[ default_collection ].concat(@cache.collections('%s-' % class_prefix)).uniq
|
|
388
384
|
end
|
|
389
385
|
|
|
390
386
|
# Rename the current collection, moving all keys from the old prefix to a new
|
|
@@ -400,7 +396,7 @@ class Documentrix::Documents
|
|
|
400
396
|
new_prefix = '%s-%s-' % [ class_prefix, new_collection ]
|
|
401
397
|
@cache.move_prefix(prefix, new_prefix)
|
|
402
398
|
self.collection = new_collection
|
|
403
|
-
|
|
399
|
+
self
|
|
404
400
|
end
|
|
405
401
|
|
|
406
402
|
# The tags method returns an array of unique tags from the cache.
|
|
@@ -428,18 +424,6 @@ class Documentrix::Documents
|
|
|
428
424
|
|
|
429
425
|
private
|
|
430
426
|
|
|
431
|
-
# Resets the memoized list of collections.
|
|
432
|
-
#
|
|
433
|
-
# This is called whenever a mutation occurs that could change the set of
|
|
434
|
-
# existing collections, ensuring that the #collections method returns a
|
|
435
|
-
# fresh, accurate list on the next call.
|
|
436
|
-
#
|
|
437
|
-
# @return [ Documentrix::Documents ] self
|
|
438
|
-
def invalidate_collections_cache!
|
|
439
|
-
@collections_cache = nil
|
|
440
|
-
self
|
|
441
|
-
end
|
|
442
|
-
|
|
443
427
|
# The connect_cache method initializes and returns an instance of the
|
|
444
428
|
# specified cache class.
|
|
445
429
|
#
|
data/lib/documentrix/version.rb
CHANGED
|
@@ -101,7 +101,7 @@ describe 'Documentrix::Documents::Cache Interface' do
|
|
|
101
101
|
|
|
102
102
|
# Common methods from Cache::Common
|
|
103
103
|
expect(cache).to respond_to(:collections)
|
|
104
|
-
expect(cache.method(:collections).owner).to eq Documentrix::Documents::
|
|
104
|
+
expect(cache.method(:collections).owner).to eq Documentrix::Documents::RedisCache
|
|
105
105
|
|
|
106
106
|
expect(cache).to respond_to(:pre)
|
|
107
107
|
expect(cache.method(:pre).owner).to eq Documentrix::Documents::Cache::Common
|
|
@@ -167,7 +167,7 @@ describe 'Documentrix::Documents::Cache Interface' do
|
|
|
167
167
|
|
|
168
168
|
# Common methods from Cache::Common
|
|
169
169
|
expect(cache).to respond_to(:collections)
|
|
170
|
-
expect(cache.method(:collections).owner).to eq Documentrix::Documents::Cache::
|
|
170
|
+
expect(cache.method(:collections).owner).to eq Documentrix::Documents::Cache::SQLiteCache
|
|
171
171
|
|
|
172
172
|
expect(cache).to respond_to(:pre)
|
|
173
173
|
expect(cache.method(:pre).owner).to eq Documentrix::Documents::Cache::Common
|
|
@@ -184,5 +184,29 @@ describe Documentrix::Documents::RedisCache do
|
|
|
184
184
|
expect(redis).to receive(:get).with("#{prefix}foo").and_return(JSON(source: 's1', digest: 'd1'))
|
|
185
185
|
expect(cache.source_exist?('s1', digest: 'd2')).to be false
|
|
186
186
|
end
|
|
187
|
+
|
|
188
|
+
describe '#collections' do
|
|
189
|
+
it 'extracts unique collection names from keys' do
|
|
190
|
+
expect(redis).to receive(:scan_each).with(match: "#{prefix}*").and_yield(
|
|
191
|
+
"#{prefix}col1-foo"
|
|
192
|
+
).and_yield(
|
|
193
|
+
"#{prefix}col1-bar"
|
|
194
|
+
).and_yield(
|
|
195
|
+
"#{prefix}col2-baz"
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
expect(cache.collections(prefix)).to match_array([:col1, :col2])
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
it 'ignores keys that do not follow the collection pattern' do
|
|
202
|
+
expect(redis).to receive(:scan_each).with(match: "#{prefix}*").and_yield(
|
|
203
|
+
"#{prefix}valid-foo"
|
|
204
|
+
).and_yield(
|
|
205
|
+
"#{prefix}invalid" # No trailing dash after the name
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
expect(cache.collections(prefix)).to eq [:valid]
|
|
209
|
+
end
|
|
210
|
+
end
|
|
187
211
|
end
|
|
188
212
|
end
|
|
@@ -353,4 +353,29 @@ describe Documentrix::Documents::SQLiteCache do
|
|
|
353
353
|
expect(cache.find_records(needle)).to eq []
|
|
354
354
|
end
|
|
355
355
|
end
|
|
356
|
+
|
|
357
|
+
describe '#collections' do
|
|
358
|
+
it 'extracts unique collection names matching the prefix' do
|
|
359
|
+
# Since cache['key'] = val stores as "#{prefix}#{key}",
|
|
360
|
+
# we can create keys like "col1-foo" to get "test-col1-foo"
|
|
361
|
+
cache['col1-foo'] = test_value
|
|
362
|
+
cache['col1-bar'] = test_value
|
|
363
|
+
cache['col2-baz'] = test_value
|
|
364
|
+
cache['justprefix'] = test_value # Matches prefix, but not the pattern "prefix(name)-"
|
|
365
|
+
|
|
366
|
+
expect(cache.collections('test-')).to match_array([:col1, :col2])
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
it 'returns empty array when no keys match the prefix' do
|
|
370
|
+
cache['foo'] = test_value
|
|
371
|
+
expect(cache.collections('nonexistent-')).to eq []
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
it 'returns empty array when keys start with prefix but lack a following hyphen' do
|
|
375
|
+
# We need a key that starts with "test-" but doesn't have another "-" later.
|
|
376
|
+
# Because cache['foo'] = val results in "test-foo", this is exactly what happens.
|
|
377
|
+
cache['foo'] = test_value
|
|
378
|
+
expect(cache.collections('test-')).to eq []
|
|
379
|
+
end
|
|
380
|
+
end
|
|
356
381
|
end
|
data/spec/documents_spec.rb
CHANGED
|
@@ -23,8 +23,6 @@ describe Documentrix::Documents do
|
|
|
23
23
|
expect(ollama).to receive(:embed).
|
|
24
24
|
with(model:, input: %w[ foo bar ], options: nil).
|
|
25
25
|
and_return(double(embeddings: [ [ 0.1 ], [ 0.2 ] ]))
|
|
26
|
-
expect(documents).to receive(:invalidate_collections_cache!).
|
|
27
|
-
and_call_original
|
|
28
26
|
expect(documents.add(%w[ foo bar ])).to eq documents
|
|
29
27
|
expect(documents.exist?('foo')).to eq true
|
|
30
28
|
expect(documents.exist?('bar')).to eq true
|
|
@@ -35,8 +33,6 @@ describe Documentrix::Documents do
|
|
|
35
33
|
expect(ollama).to receive(:embed).
|
|
36
34
|
with(model:, input: %w[ foo ], options: nil).
|
|
37
35
|
and_return(double(embeddings: [ [ 0.1 ] ]))
|
|
38
|
-
expect(documents).to receive(:invalidate_collections_cache!).
|
|
39
|
-
and_call_original
|
|
40
36
|
expect(documents << 'foo').to eq documents
|
|
41
37
|
expect(documents.exist?('foo')).to eq true
|
|
42
38
|
expect(documents.exist?('bar')).to eq false
|
|
@@ -127,8 +123,6 @@ describe Documentrix::Documents do
|
|
|
127
123
|
|
|
128
124
|
it 'can delete texts' do
|
|
129
125
|
expect(documents << 'foo').to eq documents
|
|
130
|
-
expect(documents).to receive(:invalidate_collections_cache!).
|
|
131
|
-
and_call_original
|
|
132
126
|
expect {
|
|
133
127
|
documents.delete('foo')
|
|
134
128
|
}.to change { documents.exist?('foo') }.from(true).to(false)
|
|
@@ -142,8 +136,6 @@ describe Documentrix::Documents do
|
|
|
142
136
|
|
|
143
137
|
it 'can clear texts' do
|
|
144
138
|
expect(documents << 'foo').to eq documents
|
|
145
|
-
expect(documents).to receive(:invalidate_collections_cache!).
|
|
146
|
-
and_call_original
|
|
147
139
|
expect {
|
|
148
140
|
documents.clear
|
|
149
141
|
}.to change { documents.size }.from(1).to(0)
|
|
@@ -156,13 +148,9 @@ describe Documentrix::Documents do
|
|
|
156
148
|
expect(documents.add('foo', tags: %w[ test ])).to eq documents
|
|
157
149
|
expect(documents.add('bar', tags: %w[ test2 ])).to eq documents
|
|
158
150
|
expect(documents.tags.to_a).to eq %w[ test test2 ]
|
|
159
|
-
expect(documents).to receive(:invalidate_collections_cache!).
|
|
160
|
-
and_call_original
|
|
161
151
|
expect {
|
|
162
152
|
documents.clear tags: 'test'
|
|
163
153
|
}.to change { documents.size }.from(2).to(1)
|
|
164
|
-
expect(documents).to receive(:invalidate_collections_cache!).
|
|
165
|
-
and_call_original
|
|
166
154
|
expect {
|
|
167
155
|
documents.clear tags: :test2
|
|
168
156
|
}.to change { documents.size }.from(1).to(0)
|
|
@@ -178,8 +166,6 @@ describe Documentrix::Documents do
|
|
|
178
166
|
|
|
179
167
|
expect(documents.size).to eq 3
|
|
180
168
|
|
|
181
|
-
expect(documents).to receive(:invalidate_collections_cache!).
|
|
182
|
-
and_call_original
|
|
183
169
|
documents.source_remove('source1')
|
|
184
170
|
|
|
185
171
|
expect(documents.size).to eq 1
|
|
@@ -196,8 +182,6 @@ describe Documentrix::Documents do
|
|
|
196
182
|
documents.collection = :foo
|
|
197
183
|
documents << 'foo'
|
|
198
184
|
expect(documents.collections).to eq %i[ default foo ]
|
|
199
|
-
expect(documents).to receive(:invalidate_collections_cache!).
|
|
200
|
-
and_call_original
|
|
201
185
|
documents.rename_collection(:bar)
|
|
202
186
|
expect(documents.collection).to eq :bar
|
|
203
187
|
expect(documents.collections).to eq %i[ default bar ]
|
|
@@ -211,8 +195,6 @@ describe Documentrix::Documents do
|
|
|
211
195
|
documents.collection = :bar
|
|
212
196
|
documents << 'foo'
|
|
213
197
|
expect(documents.collections).to eq %i[ default foo bar ]
|
|
214
|
-
expect(documents).not_to receive(:invalidate_collections_cache!).
|
|
215
|
-
and_call_original
|
|
216
198
|
expect {
|
|
217
199
|
documents.rename_collection(:foo)
|
|
218
200
|
}.to raise_error(ArgumentError, 'new collection foo already exists!')
|
|
@@ -287,8 +269,6 @@ describe Documentrix::Documents do
|
|
|
287
269
|
documents.add('foo', source: 's1')
|
|
288
270
|
|
|
289
271
|
expect(ollama).not_to receive(:embed)
|
|
290
|
-
expect(documents).to receive(:invalidate_collections_cache!).
|
|
291
|
-
and_call_original
|
|
292
272
|
documents.source_update(['foo'], source: 's1')
|
|
293
273
|
expect(documents.exist?('foo')).to be true
|
|
294
274
|
end
|
|
@@ -301,8 +281,6 @@ describe Documentrix::Documents do
|
|
|
301
281
|
allow(documents.cache).to receive(:compute_file_digest).with('s1').and_return('d2')
|
|
302
282
|
|
|
303
283
|
expect(ollama).to receive(:embed).once
|
|
304
|
-
expect(documents).to receive(:invalidate_collections_cache!).
|
|
305
|
-
at_least(1).and_call_original
|
|
306
284
|
documents.source_update(['bar'], source: 's1')
|
|
307
285
|
|
|
308
286
|
expect(documents.exist?('bar')).to be true
|
|
@@ -311,8 +289,6 @@ describe Documentrix::Documents do
|
|
|
311
289
|
|
|
312
290
|
it 'updates the source if it is an URL' do
|
|
313
291
|
expect(ollama).to receive(:embed).once
|
|
314
|
-
expect(documents).to receive(:invalidate_collections_cache!).
|
|
315
|
-
and_call_original
|
|
316
292
|
documents.source_update('foo', source: 'https://www.example.com/s1')
|
|
317
293
|
expect(documents.exist?('foo')).to be true
|
|
318
294
|
end
|