documentrix 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c3ab97426ab9fbd4ec832a422a18d574f4c03b11f22391cbc6eded16b9ba0609
4
- data.tar.gz: 991215fb26b4165a4b3562075d9742e3756218e2a24f3f32a550e232f59fb92f
3
+ metadata.gz: 332c67275b90bcc797cdd8e8df75a751bcfa7a52c759c9bcbe4a55970e905401
4
+ data.tar.gz: c546f89c61613b11d18c2cd724cadae8c303a988f925ad1624c5121f511e88fd
5
5
  SHA512:
6
- metadata.gz: d67e453e27428bcd8364349e988556af844caab5faa8a8edb3dcfba9650554b8dbfc66ad322a6f9f59d5ac64350d7006bf899a16b6c83f92909793301b6954d5
7
- data.tar.gz: 49706d4984b27ee7b3d0f8ef8a9efec4a8e74d8bef78d78f15ed5c76f5bacc0faf20c36cef739aa78c1f68e5733696d16999bbd14228ae7be8e6cf9c9baf0a75
6
+ metadata.gz: c80a4c253b4fe367fae1dbb7e6aea50e274a495a0026a7b676e1e962b80c33806fe89976338c12931f11a5059e384fdfddcfe4665f68bde406b118edd8c04d15
7
+ data.tar.gz: '09292fd5d6aed6c939c0244467c743d515e68e5e253d7f443ef96a9c295a61689f1be5b9aec53e402287b6b6c7d305757eb26f79f5ade0161f7e82d534a7b3bf'
data/CHANGES.md CHANGED
@@ -1,5 +1,28 @@
1
1
  # Changes
2
2
 
3
+ ## 2026-05-20 v0.3.2
4
+
5
+ ### Performance Improvements
6
+
7
+ - Optimized collections lookup by memoizing
8
+ `Documentrix::Documents#collections` using `@collections_cache`.
9
+ - Added `Documentrix::Documents#invalidate_collections_cache!` to reset the
10
+ memoized list.
11
+ - Integrated `invalidate_collections_cache!` into `add`, `delete`, `clear`,
12
+ `source_remove`, and `rename_collection` to maintain cache consistency.
13
+
14
+ ### Database & Cache
15
+
16
+ - Standardized SQL keyword casing to uppercase for data types (e.g., `FLOAT`,
17
+ `TEXT`, `INTEGER`, `JSON`) within `SQLiteCache`.
18
+
19
+ ### Documentation & Testing
20
+
21
+ - Refined documentation and return type descriptions for
22
+ `Documentrix::Documents#find` and `Documentrix::Documents#prefix`.
23
+ - Updated `spec/documents_spec.rb` to ensure `invalidate_collections_cache!` is
24
+ correctly triggered during mutations.
25
+
3
26
  ## 2026-05-18 v0.3.1
4
27
 
5
28
  - Fixed scoping bugs in `clear_by_source` and `source_exist?` by implementing a
data/documentrix.gemspec CHANGED
@@ -1,9 +1,9 @@
1
1
  # -*- encoding: utf-8 -*-
2
- # stub: documentrix 0.3.1 ruby lib
2
+ # stub: documentrix 0.3.2 ruby lib
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "documentrix".freeze
6
- s.version = "0.3.1".freeze
6
+ s.version = "0.3.2".freeze
7
7
 
8
8
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
9
9
  s.require_paths = ["lib".freeze]
@@ -443,18 +443,18 @@ class Documentrix::Documents::Cache::SQLiteCache
443
443
  @database.enable_load_extension(false)
444
444
  execute %{
445
445
  CREATE VIRTUAL TABLE IF NOT EXISTS embeddings USING vec0(
446
- embedding float[#@embedding_length]
446
+ embedding FLOAT[#@embedding_length]
447
447
  )
448
448
  }
449
449
  execute %{
450
450
  CREATE TABLE IF NOT EXISTS records (
451
- key text NOT NULL PRIMARY KEY ON CONFLICT REPLACE,
452
- text text NOT NULL DEFAULT '',
453
- embedding_id integer,
454
- norm float NOT NULL DEFAULT 0.0,
455
- source text,
456
- digest text,
457
- tags json NOT NULL DEFAULT [],
451
+ key TEXT NOT NULL PRIMARY KEY ON CONFLICT REPLACE,
452
+ text TEXT NOT NULL DEFAULT '',
453
+ embedding_id INTEGER,
454
+ norm FLOAT NOT NULL DEFAULT 0.0,
455
+ source TEXT,
456
+ digest TEXT,
457
+ tags JSON NOT NULL DEFAULT [],
458
458
  FOREIGN KEY(embedding_id) REFERENCES embeddings(id) ON DELETE CASCADE
459
459
  )
460
460
  }
@@ -162,7 +162,7 @@ class Documentrix::Documents
162
162
  infobar.progress by: batch.size
163
163
  end
164
164
  infobar.newline
165
- self
165
+ invalidate_collections_cache!
166
166
  end
167
167
  alias << add
168
168
 
@@ -201,7 +201,9 @@ class Documentrix::Documents
201
201
  # @return [ FalseClass, TrueClass ] true if the text was removed, false
202
202
  # otherwise.
203
203
  def delete(text)
204
- @cache.delete(key(text))
204
+ res = @cache.delete(key(text))
205
+ invalidate_collections_cache! if res
206
+ res
205
207
  end
206
208
 
207
209
  # The size method returns the number of texts stored in the cache of this
@@ -220,7 +222,7 @@ class Documentrix::Documents
220
222
  # @return [ Documentrix::Documents ] self
221
223
  def clear(tags: nil)
222
224
  @cache.clear(tags:)
223
- self
225
+ invalidate_collections_cache!
224
226
  end
225
227
 
226
228
  # Normalizes the source identifier to a canonical form.
@@ -318,7 +320,7 @@ class Documentrix::Documents
318
320
  def source_remove(source, digest: nil)
319
321
  source = normalize_source(source)
320
322
  @cache.clear_by_source(source, digest:, operator: '!=')
321
- self
323
+ invalidate_collections_cache!
322
324
  end
323
325
 
324
326
  # The find method searches for strings within the cache by computing their
@@ -359,7 +361,7 @@ class Documentrix::Documents
359
361
  # @param text_count [Integer] the maximum number of records to return
360
362
  # @param opts [Hash] additional options passed to #find, such as:
361
363
  # * :tags [Array<String>] filter results by tags
362
- # * :prompt [String] a prompt to use for the search
364
+ # * :prompt [String] use for the search
363
365
  # * :min_similarity [Numeric] minimum similarity score
364
366
  #
365
367
  # @example
@@ -379,7 +381,9 @@ class Documentrix::Documents
379
381
  #
380
382
  # @return [Array] An array of unique collection names
381
383
  def collections
382
- ([ default_collection ] + @cache.collections('%s-' % class_prefix)).uniq
384
+ @collections_cache ||= (
385
+ [ default_collection ] + @cache.collections('%s-' % class_prefix)
386
+ ).uniq
383
387
  end
384
388
 
385
389
  # Rename the current collection, moving all keys from the old prefix to a new
@@ -395,6 +399,7 @@ class Documentrix::Documents
395
399
  new_prefix = '%s-%s-' % [ class_prefix, new_collection ]
396
400
  @cache.move_prefix(prefix, new_prefix)
397
401
  self.collection = new_collection
402
+ invalidate_collections_cache!
398
403
  end
399
404
 
400
405
  # The tags method returns an array of unique tags from the cache.
@@ -406,6 +411,18 @@ class Documentrix::Documents
406
411
 
407
412
  private
408
413
 
414
+ # Resets the memoized list of collections.
415
+ #
416
+ # This is called whenever a mutation occurs that could change the set of
417
+ # existing collections, ensuring that the #collections method returns a
418
+ # fresh, accurate list on the next call.
419
+ #
420
+ # @return [ Documentrix::Documents ] self
421
+ def invalidate_collections_cache!
422
+ @collections_cache = nil
423
+ self
424
+ end
425
+
409
426
  # The connect_cache method initializes and returns an instance of the
410
427
  # specified cache class.
411
428
  #
@@ -482,7 +499,7 @@ class Documentrix::Documents
482
499
  # The prefix method returns a string that is used as the prefix for keys in
483
500
  # the cache of the currently configured collection.
484
501
  #
485
- # @return [ String ] The prefix string
502
+ # @return [ String ] the prefix string
486
503
  def prefix
487
504
  '%s-%s-' % [ class_prefix, @collection ]
488
505
  end
@@ -1,6 +1,6 @@
1
1
  module Documentrix
2
2
  # Documentrix version
3
- VERSION = '0.3.1'
3
+ VERSION = '0.3.2'
4
4
  VERSION_ARRAY = VERSION.split('.').map(&:to_i) # :nodoc:
5
5
  VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
6
6
  VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
@@ -23,6 +23,8 @@ describe Documentrix::Documents do
23
23
  expect(ollama).to receive(:embed).
24
24
  with(model:, input: %w[ foo bar ], options: nil).
25
25
  and_return(double(embeddings: [ [ 0.1 ], [ 0.2 ] ]))
26
+ expect(documents).to receive(:invalidate_collections_cache!).
27
+ and_call_original
26
28
  expect(documents.add(%w[ foo bar ])).to eq documents
27
29
  expect(documents.exist?('foo')).to eq true
28
30
  expect(documents.exist?('bar')).to eq true
@@ -33,6 +35,8 @@ describe Documentrix::Documents do
33
35
  expect(ollama).to receive(:embed).
34
36
  with(model:, input: %w[ foo ], options: nil).
35
37
  and_return(double(embeddings: [ [ 0.1 ] ]))
38
+ expect(documents).to receive(:invalidate_collections_cache!).
39
+ and_call_original
36
40
  expect(documents << 'foo').to eq documents
37
41
  expect(documents.exist?('foo')).to eq true
38
42
  expect(documents.exist?('bar')).to eq false
@@ -123,6 +127,8 @@ describe Documentrix::Documents do
123
127
 
124
128
  it 'can delete texts' do
125
129
  expect(documents << 'foo').to eq documents
130
+ expect(documents).to receive(:invalidate_collections_cache!).
131
+ and_call_original
126
132
  expect {
127
133
  documents.delete('foo')
128
134
  }.to change { documents.exist?('foo') }.from(true).to(false)
@@ -136,6 +142,8 @@ describe Documentrix::Documents do
136
142
 
137
143
  it 'can clear texts' do
138
144
  expect(documents << 'foo').to eq documents
145
+ expect(documents).to receive(:invalidate_collections_cache!).
146
+ and_call_original
139
147
  expect {
140
148
  documents.clear
141
149
  }.to change { documents.size }.from(1).to(0)
@@ -148,9 +156,13 @@ describe Documentrix::Documents do
148
156
  expect(documents.add('foo', tags: %w[ test ])).to eq documents
149
157
  expect(documents.add('bar', tags: %w[ test2 ])).to eq documents
150
158
  expect(documents.tags.to_a).to eq %w[ test test2 ]
159
+ expect(documents).to receive(:invalidate_collections_cache!).
160
+ and_call_original
151
161
  expect {
152
162
  documents.clear tags: 'test'
153
163
  }.to change { documents.size }.from(2).to(1)
164
+ expect(documents).to receive(:invalidate_collections_cache!).
165
+ and_call_original
154
166
  expect {
155
167
  documents.clear tags: :test2
156
168
  }.to change { documents.size }.from(1).to(0)
@@ -166,6 +178,8 @@ describe Documentrix::Documents do
166
178
 
167
179
  expect(documents.size).to eq 3
168
180
 
181
+ expect(documents).to receive(:invalidate_collections_cache!).
182
+ and_call_original
169
183
  documents.source_remove('source1')
170
184
 
171
185
  expect(documents.size).to eq 1
@@ -182,6 +196,8 @@ describe Documentrix::Documents do
182
196
  documents.collection = :foo
183
197
  documents << 'foo'
184
198
  expect(documents.collections).to eq %i[ default foo ]
199
+ expect(documents).to receive(:invalidate_collections_cache!).
200
+ and_call_original
185
201
  documents.rename_collection(:bar)
186
202
  expect(documents.collection).to eq :bar
187
203
  expect(documents.collections).to eq %i[ default bar ]
@@ -195,6 +211,8 @@ describe Documentrix::Documents do
195
211
  documents.collection = :bar
196
212
  documents << 'foo'
197
213
  expect(documents.collections).to eq %i[ default foo bar ]
214
+ expect(documents).not_to receive(:invalidate_collections_cache!).
215
+ and_call_original
198
216
  expect {
199
217
  documents.rename_collection(:foo)
200
218
  }.to raise_error(ArgumentError, 'new collection foo already exists!')
@@ -243,6 +261,8 @@ describe Documentrix::Documents do
243
261
  documents.add('foo', source: 's1')
244
262
 
245
263
  expect(ollama).not_to receive(:embed)
264
+ expect(documents).to receive(:invalidate_collections_cache!).
265
+ and_call_original
246
266
  documents.source_update(['foo'], source: 's1')
247
267
  expect(documents.exist?('foo')).to be true
248
268
  end
@@ -255,6 +275,8 @@ describe Documentrix::Documents do
255
275
  allow(documents.cache).to receive(:compute_file_digest).with('s1').and_return('d2')
256
276
 
257
277
  expect(ollama).to receive(:embed).once
278
+ expect(documents).to receive(:invalidate_collections_cache!).
279
+ at_least(1).and_call_original
258
280
  documents.source_update(['bar'], source: 's1')
259
281
 
260
282
  expect(documents.exist?('bar')).to be true
@@ -263,6 +285,8 @@ describe Documentrix::Documents do
263
285
 
264
286
  it 'updates the source if it is an URL' do
265
287
  expect(ollama).to receive(:embed).once
288
+ expect(documents).to receive(:invalidate_collections_cache!).
289
+ and_call_original
266
290
  documents.source_update('foo', source: 'https://www.example.com/s1')
267
291
  expect(documents.exist?('foo')).to be true
268
292
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: documentrix
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Frank