documentrix 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +23 -0
- data/documentrix.gemspec +2 -2
- data/lib/documentrix/documents/cache/sqlite_cache.rb +8 -8
- data/lib/documentrix/documents.rb +24 -7
- data/lib/documentrix/version.rb +1 -1
- data/spec/documents_spec.rb +24 -0
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 332c67275b90bcc797cdd8e8df75a751bcfa7a52c759c9bcbe4a55970e905401
|
|
4
|
+
data.tar.gz: c546f89c61613b11d18c2cd724cadae8c303a988f925ad1624c5121f511e88fd
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c80a4c253b4fe367fae1dbb7e6aea50e274a495a0026a7b676e1e962b80c33806fe89976338c12931f11a5059e384fdfddcfe4665f68bde406b118edd8c04d15
|
|
7
|
+
data.tar.gz: '09292fd5d6aed6c939c0244467c743d515e68e5e253d7f443ef96a9c295a61689f1be5b9aec53e402287b6b6c7d305757eb26f79f5ade0161f7e82d534a7b3bf'
|
data/CHANGES.md
CHANGED
|
@@ -1,5 +1,28 @@
|
|
|
1
1
|
# Changes
|
|
2
2
|
|
|
3
|
+
## 2026-05-20 v0.3.2
|
|
4
|
+
|
|
5
|
+
### Performance Improvements
|
|
6
|
+
|
|
7
|
+
- Optimized collections lookup by memoizing
|
|
8
|
+
`Documentrix::Documents#collections` using `@collections_cache`.
|
|
9
|
+
- Added `Documentrix::Documents#invalidate_collections_cache!` to reset the
|
|
10
|
+
memoized list.
|
|
11
|
+
- Integrated `invalidate_collections_cache!` into `add`, `delete`, `clear`,
|
|
12
|
+
`source_remove`, and `rename_collection` to maintain cache consistency.
|
|
13
|
+
|
|
14
|
+
### Database & Cache
|
|
15
|
+
|
|
16
|
+
- Standardized SQL keyword casing to uppercase for data types (e.g., `FLOAT`,
|
|
17
|
+
`TEXT`, `INTEGER`, `JSON`) within `SQLiteCache`.
|
|
18
|
+
|
|
19
|
+
### Documentation & Testing
|
|
20
|
+
|
|
21
|
+
- Refined documentation and return type descriptions for
|
|
22
|
+
`Documentrix::Documents#find` and `Documentrix::Documents#prefix`.
|
|
23
|
+
- Updated `spec/documents_spec.rb` to ensure `invalidate_collections_cache!` is
|
|
24
|
+
correctly triggered during mutations.
|
|
25
|
+
|
|
3
26
|
## 2026-05-18 v0.3.1
|
|
4
27
|
|
|
5
28
|
- Fixed scoping bugs in `clear_by_source` and `source_exist?` by implementing a
|
data/documentrix.gemspec
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
|
2
|
-
# stub: documentrix 0.3.
|
|
2
|
+
# stub: documentrix 0.3.2 ruby lib
|
|
3
3
|
|
|
4
4
|
Gem::Specification.new do |s|
|
|
5
5
|
s.name = "documentrix".freeze
|
|
6
|
-
s.version = "0.3.
|
|
6
|
+
s.version = "0.3.2".freeze
|
|
7
7
|
|
|
8
8
|
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
|
9
9
|
s.require_paths = ["lib".freeze]
|
|
@@ -443,18 +443,18 @@ class Documentrix::Documents::Cache::SQLiteCache
|
|
|
443
443
|
@database.enable_load_extension(false)
|
|
444
444
|
execute %{
|
|
445
445
|
CREATE VIRTUAL TABLE IF NOT EXISTS embeddings USING vec0(
|
|
446
|
-
embedding
|
|
446
|
+
embedding FLOAT[#@embedding_length]
|
|
447
447
|
)
|
|
448
448
|
}
|
|
449
449
|
execute %{
|
|
450
450
|
CREATE TABLE IF NOT EXISTS records (
|
|
451
|
-
key
|
|
452
|
-
text
|
|
453
|
-
embedding_id
|
|
454
|
-
norm
|
|
455
|
-
source
|
|
456
|
-
digest
|
|
457
|
-
tags
|
|
451
|
+
key TEXT NOT NULL PRIMARY KEY ON CONFLICT REPLACE,
|
|
452
|
+
text TEXT NOT NULL DEFAULT '',
|
|
453
|
+
embedding_id INTEGER,
|
|
454
|
+
norm FLOAT NOT NULL DEFAULT 0.0,
|
|
455
|
+
source TEXT,
|
|
456
|
+
digest TEXT,
|
|
457
|
+
tags JSON NOT NULL DEFAULT [],
|
|
458
458
|
FOREIGN KEY(embedding_id) REFERENCES embeddings(id) ON DELETE CASCADE
|
|
459
459
|
)
|
|
460
460
|
}
|
|
@@ -162,7 +162,7 @@ class Documentrix::Documents
|
|
|
162
162
|
infobar.progress by: batch.size
|
|
163
163
|
end
|
|
164
164
|
infobar.newline
|
|
165
|
-
|
|
165
|
+
invalidate_collections_cache!
|
|
166
166
|
end
|
|
167
167
|
alias << add
|
|
168
168
|
|
|
@@ -201,7 +201,9 @@ class Documentrix::Documents
|
|
|
201
201
|
# @return [ FalseClass, TrueClass ] true if the text was removed, false
|
|
202
202
|
# otherwise.
|
|
203
203
|
def delete(text)
|
|
204
|
-
@cache.delete(key(text))
|
|
204
|
+
res = @cache.delete(key(text))
|
|
205
|
+
invalidate_collections_cache! if res
|
|
206
|
+
res
|
|
205
207
|
end
|
|
206
208
|
|
|
207
209
|
# The size method returns the number of texts stored in the cache of this
|
|
@@ -220,7 +222,7 @@ class Documentrix::Documents
|
|
|
220
222
|
# @return [ Documentrix::Documents ] self
|
|
221
223
|
def clear(tags: nil)
|
|
222
224
|
@cache.clear(tags:)
|
|
223
|
-
|
|
225
|
+
invalidate_collections_cache!
|
|
224
226
|
end
|
|
225
227
|
|
|
226
228
|
# Normalizes the source identifier to a canonical form.
|
|
@@ -318,7 +320,7 @@ class Documentrix::Documents
|
|
|
318
320
|
def source_remove(source, digest: nil)
|
|
319
321
|
source = normalize_source(source)
|
|
320
322
|
@cache.clear_by_source(source, digest:, operator: '!=')
|
|
321
|
-
|
|
323
|
+
invalidate_collections_cache!
|
|
322
324
|
end
|
|
323
325
|
|
|
324
326
|
# The find method searches for strings within the cache by computing their
|
|
@@ -359,7 +361,7 @@ class Documentrix::Documents
|
|
|
359
361
|
# @param text_count [Integer] the maximum number of records to return
|
|
360
362
|
# @param opts [Hash] additional options passed to #find, such as:
|
|
361
363
|
# * :tags [Array<String>] filter results by tags
|
|
362
|
-
# * :prompt [String]
|
|
364
|
+
# * :prompt [String] use for the search
|
|
363
365
|
# * :min_similarity [Numeric] minimum similarity score
|
|
364
366
|
#
|
|
365
367
|
# @example
|
|
@@ -379,7 +381,9 @@ class Documentrix::Documents
|
|
|
379
381
|
#
|
|
380
382
|
# @return [Array] An array of unique collection names
|
|
381
383
|
def collections
|
|
382
|
-
|
|
384
|
+
@collections_cache ||= (
|
|
385
|
+
[ default_collection ] + @cache.collections('%s-' % class_prefix)
|
|
386
|
+
).uniq
|
|
383
387
|
end
|
|
384
388
|
|
|
385
389
|
# Rename the current collection, moving all keys from the old prefix to a new
|
|
@@ -395,6 +399,7 @@ class Documentrix::Documents
|
|
|
395
399
|
new_prefix = '%s-%s-' % [ class_prefix, new_collection ]
|
|
396
400
|
@cache.move_prefix(prefix, new_prefix)
|
|
397
401
|
self.collection = new_collection
|
|
402
|
+
invalidate_collections_cache!
|
|
398
403
|
end
|
|
399
404
|
|
|
400
405
|
# The tags method returns an array of unique tags from the cache.
|
|
@@ -406,6 +411,18 @@ class Documentrix::Documents
|
|
|
406
411
|
|
|
407
412
|
private
|
|
408
413
|
|
|
414
|
+
# Resets the memoized list of collections.
|
|
415
|
+
#
|
|
416
|
+
# This is called whenever a mutation occurs that could change the set of
|
|
417
|
+
# existing collections, ensuring that the #collections method returns a
|
|
418
|
+
# fresh, accurate list on the next call.
|
|
419
|
+
#
|
|
420
|
+
# @return [ Documentrix::Documents ] self
|
|
421
|
+
def invalidate_collections_cache!
|
|
422
|
+
@collections_cache = nil
|
|
423
|
+
self
|
|
424
|
+
end
|
|
425
|
+
|
|
409
426
|
# The connect_cache method initializes and returns an instance of the
|
|
410
427
|
# specified cache class.
|
|
411
428
|
#
|
|
@@ -482,7 +499,7 @@ class Documentrix::Documents
|
|
|
482
499
|
# The prefix method returns a string that is used as the prefix for keys in
|
|
483
500
|
# the cache of the currently configured collection.
|
|
484
501
|
#
|
|
485
|
-
# @return [ String ]
|
|
502
|
+
# @return [ String ] the prefix string
|
|
486
503
|
def prefix
|
|
487
504
|
'%s-%s-' % [ class_prefix, @collection ]
|
|
488
505
|
end
|
data/lib/documentrix/version.rb
CHANGED
data/spec/documents_spec.rb
CHANGED
|
@@ -23,6 +23,8 @@ describe Documentrix::Documents do
|
|
|
23
23
|
expect(ollama).to receive(:embed).
|
|
24
24
|
with(model:, input: %w[ foo bar ], options: nil).
|
|
25
25
|
and_return(double(embeddings: [ [ 0.1 ], [ 0.2 ] ]))
|
|
26
|
+
expect(documents).to receive(:invalidate_collections_cache!).
|
|
27
|
+
and_call_original
|
|
26
28
|
expect(documents.add(%w[ foo bar ])).to eq documents
|
|
27
29
|
expect(documents.exist?('foo')).to eq true
|
|
28
30
|
expect(documents.exist?('bar')).to eq true
|
|
@@ -33,6 +35,8 @@ describe Documentrix::Documents do
|
|
|
33
35
|
expect(ollama).to receive(:embed).
|
|
34
36
|
with(model:, input: %w[ foo ], options: nil).
|
|
35
37
|
and_return(double(embeddings: [ [ 0.1 ] ]))
|
|
38
|
+
expect(documents).to receive(:invalidate_collections_cache!).
|
|
39
|
+
and_call_original
|
|
36
40
|
expect(documents << 'foo').to eq documents
|
|
37
41
|
expect(documents.exist?('foo')).to eq true
|
|
38
42
|
expect(documents.exist?('bar')).to eq false
|
|
@@ -123,6 +127,8 @@ describe Documentrix::Documents do
|
|
|
123
127
|
|
|
124
128
|
it 'can delete texts' do
|
|
125
129
|
expect(documents << 'foo').to eq documents
|
|
130
|
+
expect(documents).to receive(:invalidate_collections_cache!).
|
|
131
|
+
and_call_original
|
|
126
132
|
expect {
|
|
127
133
|
documents.delete('foo')
|
|
128
134
|
}.to change { documents.exist?('foo') }.from(true).to(false)
|
|
@@ -136,6 +142,8 @@ describe Documentrix::Documents do
|
|
|
136
142
|
|
|
137
143
|
it 'can clear texts' do
|
|
138
144
|
expect(documents << 'foo').to eq documents
|
|
145
|
+
expect(documents).to receive(:invalidate_collections_cache!).
|
|
146
|
+
and_call_original
|
|
139
147
|
expect {
|
|
140
148
|
documents.clear
|
|
141
149
|
}.to change { documents.size }.from(1).to(0)
|
|
@@ -148,9 +156,13 @@ describe Documentrix::Documents do
|
|
|
148
156
|
expect(documents.add('foo', tags: %w[ test ])).to eq documents
|
|
149
157
|
expect(documents.add('bar', tags: %w[ test2 ])).to eq documents
|
|
150
158
|
expect(documents.tags.to_a).to eq %w[ test test2 ]
|
|
159
|
+
expect(documents).to receive(:invalidate_collections_cache!).
|
|
160
|
+
and_call_original
|
|
151
161
|
expect {
|
|
152
162
|
documents.clear tags: 'test'
|
|
153
163
|
}.to change { documents.size }.from(2).to(1)
|
|
164
|
+
expect(documents).to receive(:invalidate_collections_cache!).
|
|
165
|
+
and_call_original
|
|
154
166
|
expect {
|
|
155
167
|
documents.clear tags: :test2
|
|
156
168
|
}.to change { documents.size }.from(1).to(0)
|
|
@@ -166,6 +178,8 @@ describe Documentrix::Documents do
|
|
|
166
178
|
|
|
167
179
|
expect(documents.size).to eq 3
|
|
168
180
|
|
|
181
|
+
expect(documents).to receive(:invalidate_collections_cache!).
|
|
182
|
+
and_call_original
|
|
169
183
|
documents.source_remove('source1')
|
|
170
184
|
|
|
171
185
|
expect(documents.size).to eq 1
|
|
@@ -182,6 +196,8 @@ describe Documentrix::Documents do
|
|
|
182
196
|
documents.collection = :foo
|
|
183
197
|
documents << 'foo'
|
|
184
198
|
expect(documents.collections).to eq %i[ default foo ]
|
|
199
|
+
expect(documents).to receive(:invalidate_collections_cache!).
|
|
200
|
+
and_call_original
|
|
185
201
|
documents.rename_collection(:bar)
|
|
186
202
|
expect(documents.collection).to eq :bar
|
|
187
203
|
expect(documents.collections).to eq %i[ default bar ]
|
|
@@ -195,6 +211,8 @@ describe Documentrix::Documents do
|
|
|
195
211
|
documents.collection = :bar
|
|
196
212
|
documents << 'foo'
|
|
197
213
|
expect(documents.collections).to eq %i[ default foo bar ]
|
|
214
|
+
expect(documents).not_to receive(:invalidate_collections_cache!).
|
|
215
|
+
and_call_original
|
|
198
216
|
expect {
|
|
199
217
|
documents.rename_collection(:foo)
|
|
200
218
|
}.to raise_error(ArgumentError, 'new collection foo already exists!')
|
|
@@ -243,6 +261,8 @@ describe Documentrix::Documents do
|
|
|
243
261
|
documents.add('foo', source: 's1')
|
|
244
262
|
|
|
245
263
|
expect(ollama).not_to receive(:embed)
|
|
264
|
+
expect(documents).to receive(:invalidate_collections_cache!).
|
|
265
|
+
and_call_original
|
|
246
266
|
documents.source_update(['foo'], source: 's1')
|
|
247
267
|
expect(documents.exist?('foo')).to be true
|
|
248
268
|
end
|
|
@@ -255,6 +275,8 @@ describe Documentrix::Documents do
|
|
|
255
275
|
allow(documents.cache).to receive(:compute_file_digest).with('s1').and_return('d2')
|
|
256
276
|
|
|
257
277
|
expect(ollama).to receive(:embed).once
|
|
278
|
+
expect(documents).to receive(:invalidate_collections_cache!).
|
|
279
|
+
at_least(1).and_call_original
|
|
258
280
|
documents.source_update(['bar'], source: 's1')
|
|
259
281
|
|
|
260
282
|
expect(documents.exist?('bar')).to be true
|
|
@@ -263,6 +285,8 @@ describe Documentrix::Documents do
|
|
|
263
285
|
|
|
264
286
|
it 'updates the source if it is an URL' do
|
|
265
287
|
expect(ollama).to receive(:embed).once
|
|
288
|
+
expect(documents).to receive(:invalidate_collections_cache!).
|
|
289
|
+
and_call_original
|
|
266
290
|
documents.source_update('foo', source: 'https://www.example.com/s1')
|
|
267
291
|
expect(documents.exist?('foo')).to be true
|
|
268
292
|
end
|