documentrix 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ef2b0d8150c99cc22cc17d7d29d0331729b06d116e593fd32289852eb7739228
4
- data.tar.gz: 386b85971a711fd6ba73aad746e98df33c14109c5524b48dd8fa20f3059fc875
3
+ metadata.gz: c45e75b570207ac77d9a04e95939050c33d8f5e0645c7ebac96c862de4e252f3
4
+ data.tar.gz: 18b0f526ec16115483de74c027a7d8eaa8c8ba2461ac41d4695f53f1ceff32c7
5
5
  SHA512:
6
- metadata.gz: 7b1abe342b523199e58724d1b9ec66ce56654389dc010fc59c2847647825876b8c03e1511c5b007dd97ee8f19f4e79caf132760dbb549906d5febdc9ac403bc1
7
- data.tar.gz: 5b638e7af884173350e48dcdb83a81d2adeb6c6a4f90dc20243b690d888f083d95edcd34e00fd3bffc9435aa8a29db151b0c1c6311f696d39379e397379ef9df
6
+ metadata.gz: dddf96ef71ab25c35c6872905cea070c8a27036caf841d97626fadc1db172e496a3b8f13748000cbb9a096b66ae59f459bae145fc2a4a06abffad7de267362a9
7
+ data.tar.gz: cab52c41f1749fe0ff56538cd01e4525787921e700dca91e44f10d939ec942afc59478c4f8256033fdbf5ef4a38ac649d80eadfbc711042bccb85e38b056a569
data/CHANGES.md CHANGED
@@ -1,5 +1,28 @@
1
1
  # Changes
2
2
 
3
+ ## 2026-05-12 v0.2.0
4
+
5
+ ### Added
6
+
7
+ - Implemented source-based document removal by adding the `remove` method to
8
+ `Documentrix::Documents`.
9
+ - Added `clear_by_source` to `Documentrix::Documents::Cache::Common` as the
10
+ default cache implementation.
11
+ - Added an optimized `clear_by_source` override in
12
+ `Documentrix::Documents::Cache::SQLiteCache` utilizing a direct SQL `DELETE`
13
+ query.
14
+
15
+ ### Changed
16
+
17
+ - Updated `documentrix.gemspec` to use `rubygems_version` **4.0.10**.
18
+ - Updated `gem_hadar` dependency to **2.17.1**.
19
+
20
+ ### Testing
21
+
22
+ - Expanded test coverage in `spec/documents_spec.rb`,
23
+ `spec/documentrix/documents/cache/interface_spec.rb`, and all specific cache
24
+ specs.
25
+
3
26
  ## 2026-03-31 v0.1.1
4
27
 
5
28
  - Improved compatibility and reliability by ensuring the gem uses a stable,
data/Rakefile CHANGED
@@ -33,7 +33,7 @@ GemHadar do
33
33
  dependency 'infobar', '~> 0.9'
34
34
  dependency 'json', '~> 2.0'
35
35
  dependency 'tins', '~> 1.34'
36
- dependency 'sqlite-vec', '>= 0.1.8'
36
+ dependency 'sqlite-vec', '>= 0.1.9'
37
37
  dependency 'sqlite3', '~> 2.0', '>= 2.0.1'
38
38
  dependency 'kramdown-ansi', '~> 0.0', '>= 0.0.1'
39
39
  dependency 'numo-narray-alt', '~> 0.9'
data/documentrix.gemspec CHANGED
@@ -1,9 +1,9 @@
1
1
  # -*- encoding: utf-8 -*-
2
- # stub: documentrix 0.1.1 ruby lib
2
+ # stub: documentrix 0.2.0 ruby lib
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "documentrix".freeze
6
- s.version = "0.1.1".freeze
6
+ s.version = "0.2.0".freeze
7
7
 
8
8
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
9
9
  s.require_paths = ["lib".freeze]
@@ -17,13 +17,13 @@ Gem::Specification.new do |s|
17
17
  s.licenses = ["MIT".freeze]
18
18
  s.rdoc_options = ["--title".freeze, "Documentrix - Ruby library for embedding vector database".freeze, "--main".freeze, "README.md".freeze]
19
19
  s.required_ruby_version = Gem::Requirement.new(">= 3.1".freeze)
20
- s.rubygems_version = "4.0.8".freeze
20
+ s.rubygems_version = "4.0.10".freeze
21
21
  s.summary = "Ruby library for embedding vector database".freeze
22
22
  s.test_files = ["spec/documentrix/documents/cache/interface_spec.rb".freeze, "spec/documentrix/documents/cache/memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_cache_spec.rb".freeze, "spec/documentrix/documents/cache/sqlite_cache_spec.rb".freeze, "spec/documentrix/documents/splitters/character_spec.rb".freeze, "spec/documentrix/documents/splitters/semantic_spec.rb".freeze, "spec/documents_spec.rb".freeze, "spec/spec_helper.rb".freeze, "spec/utils/colorize_texts_spec.rb".freeze, "spec/utils/tags_spec.rb".freeze]
23
23
 
24
24
  s.specification_version = 4
25
25
 
26
- s.add_development_dependency(%q<gem_hadar>.freeze, [">= 2.17.0".freeze])
26
+ s.add_development_dependency(%q<gem_hadar>.freeze, [">= 2.17.1".freeze])
27
27
  s.add_development_dependency(%q<all_images>.freeze, ["~> 0.12".freeze])
28
28
  s.add_development_dependency(%q<rspec>.freeze, ["~> 3.2".freeze])
29
29
  s.add_development_dependency(%q<kramdown>.freeze, ["~> 2.0".freeze])
@@ -32,7 +32,7 @@ Gem::Specification.new do |s|
32
32
  s.add_runtime_dependency(%q<infobar>.freeze, ["~> 0.9".freeze])
33
33
  s.add_runtime_dependency(%q<json>.freeze, ["~> 2.0".freeze])
34
34
  s.add_runtime_dependency(%q<tins>.freeze, ["~> 1.34".freeze])
35
- s.add_runtime_dependency(%q<sqlite-vec>.freeze, [">= 0.1.8".freeze])
35
+ s.add_runtime_dependency(%q<sqlite-vec>.freeze, [">= 0.1.9".freeze])
36
36
  s.add_runtime_dependency(%q<sqlite3>.freeze, ["~> 2.0".freeze, ">= 2.0.1".freeze])
37
37
  s.add_runtime_dependency(%q<kramdown-ansi>.freeze, ["~> 0.0".freeze, ">= 0.0.1".freeze])
38
38
  s.add_runtime_dependency(%q<numo-narray-alt>.freeze, ["~> 0.9".freeze])
@@ -116,6 +116,19 @@ module Documentrix::Documents::Cache::Common
116
116
  self
117
117
  end
118
118
 
119
+ # The clear_by_source method removes all records from the cache that
120
+ # have a source matching the given source.
121
+ #
122
+ # @param source [String] the source to filter records by
123
+ #
124
+ # @return [self] self
125
+ def clear_by_source(source)
126
+ each do |key, record|
127
+ delete(unpre(key)) if record.source == source
128
+ end
129
+ self
130
+ end
131
+
119
132
  # The clear method removes cached records based on the provided tags or
120
133
  # clears all records with the current prefix.
121
134
  #
@@ -157,6 +157,17 @@ class Documentrix::Documents::Cache::SQLiteCache
157
157
  self
158
158
  end
159
159
 
160
+ # The clear_by_source method removes all records from the cache that
161
+ # have a source matching the given source.
162
+ #
163
+ # @param source [String] the source to filter records by
164
+ #
165
+ # @return [Documentrix::Documents::Cache::SQLiteCache] self
166
+ def clear_by_source(source)
167
+ execute(%{DELETE FROM records WHERE source = ?}, [ source ])
168
+ self
169
+ end
170
+
160
171
  # Move a key prefix in the cache.
161
172
  #
162
173
  # This operation updates every record whose key starts with +old_prefix+,
@@ -219,6 +219,16 @@ class Documentrix::Documents
219
219
  self
220
220
  end
221
221
 
222
+ # The remove method removes all documents associated with the given source.
223
+ #
224
+ # @param source [String] the source of the documents to remove
225
+ #
226
+ # @return [Documentrix::Documents] self
227
+ def remove(source)
228
+ @cache.clear_by_source(source)
229
+ self
230
+ end
231
+
222
232
  # The find method searches for strings within the cache by computing their
223
233
  # similarity scores.
224
234
  #
@@ -1,6 +1,6 @@
1
1
  module Documentrix
2
2
  # Documentrix version
3
- VERSION = '0.1.1'
3
+ VERSION = '0.2.0'
4
4
  VERSION_ARRAY = VERSION.split('.').map(&:to_i) # :nodoc:
5
5
  VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
6
6
  VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
@@ -52,6 +52,9 @@ describe 'Documentrix::Documents::Cache Interface' do
52
52
  expect(cache).to respond_to(:clear_for_tags)
53
53
  expect(cache.method(:clear_for_tags).owner).to eq Documentrix::Documents::Cache::Common
54
54
 
55
+ expect(cache).to respond_to(:clear_by_source)
56
+ expect(cache.method(:clear_by_source).owner).to eq Documentrix::Documents::Cache::Common
57
+
55
58
  expect(cache).to respond_to(:clear)
56
59
  expect(cache.method(:clear).owner).to eq Documentrix::Documents::Cache::Common
57
60
  end
@@ -108,6 +111,9 @@ describe 'Documentrix::Documents::Cache Interface' do
108
111
  expect(cache).to respond_to(:clear_for_tags)
109
112
  expect(cache.method(:clear_for_tags).owner).to eq Documentrix::Documents::Cache::Common
110
113
 
114
+ expect(cache).to respond_to(:clear_by_source)
115
+ expect(cache.method(:clear_by_source).owner).to eq Documentrix::Documents::Cache::Common
116
+
111
117
  expect(cache).to respond_to(:clear)
112
118
  expect(cache.method(:clear).owner).to eq Documentrix::Documents::Cache::Common
113
119
 
@@ -168,6 +174,9 @@ describe 'Documentrix::Documents::Cache Interface' do
168
174
  expect(cache).to respond_to(:clear_for_tags)
169
175
  expect(cache.method(:clear_for_tags).owner).to eq Documentrix::Documents::Cache::SQLiteCache
170
176
 
177
+ expect(cache).to respond_to(:clear_by_source)
178
+ expect(cache.method(:clear_by_source).owner).to eq Documentrix::Documents::Cache::SQLiteCache
179
+
171
180
  expect(cache).to respond_to(:clear)
172
181
  expect(cache.method(:clear).owner).to eq Documentrix::Documents::Cache::Common
173
182
 
@@ -120,6 +120,17 @@ describe Documentrix::Documents::MemoryCache do
120
120
  }.from(1).to(0)
121
121
  end
122
122
 
123
+ it 'can clear by source' do
124
+ cache['foo'] = Documentrix::Documents::Record[text: 'foo', source: 's1', embedding: [0.1]]
125
+ cache['bar'] = Documentrix::Documents::Record[text: 'bar', source: 's1', embedding: [0.1]]
126
+ cache['baz'] = Documentrix::Documents::Record[text: 'baz', source: 's2', embedding: [0.1]]
127
+ expect {
128
+ cache.clear_by_source('s1')
129
+ }.to change { cache.size }.from(3).to(1)
130
+ expect(cache.key?('baz')).to be true
131
+ expect(cache.key?('foo')).to be false
132
+ end
133
+
123
134
  it 'can iterate over keys under a prefix' do
124
135
  cache['foo'] = 'bar'
125
136
  expect(cache.to_a).to eq [ %W[ #{prefix}foo bar ] ]
@@ -109,6 +109,20 @@ describe Documentrix::Documents::RedisCache do
109
109
  expect(cache.clear).to eq cache
110
110
  end
111
111
 
112
+ it 'can clear by source' do
113
+ object_class = Class.new(JSON::GenericObject)
114
+ cache = described_class.new(prefix:, url: 'something', object_class:)
115
+ expect(redis).to receive(:scan_each).with(match: 'test-*').and_yield(
116
+ 'test-foo'
117
+ ).and_yield(
118
+ 'test-bar'
119
+ )
120
+ expect(redis).to receive(:get).with('test-foo').and_return(JSON(source: 's1'))
121
+ expect(redis).to receive(:get).with('test-bar').and_return(JSON(source: 's2'))
122
+ expect(redis).to receive(:del).with('test-foo')
123
+ expect(cache.clear_by_source('s1')).to eq cache
124
+ end
125
+
112
126
  it 'can iterate over keys under a prefix' do
113
127
  expect(redis).to receive(:scan_each).with(match: 'test-*')
114
128
  cache.to_a
@@ -145,6 +145,20 @@ describe Documentrix::Documents::SQLiteCache do
145
145
  expect(cache).to be_key 'bar'
146
146
  end
147
147
 
148
+ it 'can clear by source' do
149
+ val1 = test_value.merge(source: 's1')
150
+ val2 = test_value.merge(source: 's1')
151
+ val3 = test_value.merge(source: 's2')
152
+ cache['foo'] = val1
153
+ cache['bar'] = val2
154
+ cache['baz'] = val3
155
+ expect {
156
+ cache.clear_by_source('s1')
157
+ }.to change { cache.size }.from(3).to(1)
158
+ expect(cache.key?('baz')).to be true
159
+ expect(cache.key?('foo')).to be false
160
+ end
161
+
148
162
  it 'can return tags' do
149
163
  key, value = 'foo', { tags: %w[ foo ], embedding: [ 0.5 ] * 1_024 }
150
164
  cache[key] = value
@@ -158,6 +158,24 @@ describe Documentrix::Documents do
158
158
  }.to change { documents.size }.from(1).to(0)
159
159
  end
160
160
 
161
+ it 'can remove sources' do
162
+ allow(ollama).to receive(:embed).at_least(:once).
163
+ and_return(double(embeddings: [ [ 0.1 ] ]))
164
+
165
+ documents.add('foo', source: 'source1')
166
+ documents.add('bar', source: 'source1')
167
+ documents.add('baz', source: 'source2')
168
+
169
+ expect(documents.size).to eq 3
170
+
171
+ documents.remove('source1')
172
+
173
+ expect(documents.size).to eq 1
174
+ expect(documents.exist?('baz')).to be true
175
+ expect(documents.exist?('foo')).to be false
176
+ expect(documents.exist?('bar')).to be false
177
+ end
178
+
161
179
  it 'returns collections' do
162
180
  expect(documents.collections).to eq [ :default ]
163
181
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: documentrix
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Frank
@@ -15,14 +15,14 @@ dependencies:
15
15
  requirements:
16
16
  - - ">="
17
17
  - !ruby/object:Gem::Version
18
- version: 2.17.0
18
+ version: 2.17.1
19
19
  type: :development
20
20
  prerelease: false
21
21
  version_requirements: !ruby/object:Gem::Requirement
22
22
  requirements:
23
23
  - - ">="
24
24
  - !ruby/object:Gem::Version
25
- version: 2.17.0
25
+ version: 2.17.1
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: all_images
28
28
  requirement: !ruby/object:Gem::Requirement
@@ -141,14 +141,14 @@ dependencies:
141
141
  requirements:
142
142
  - - ">="
143
143
  - !ruby/object:Gem::Version
144
- version: 0.1.8
144
+ version: 0.1.9
145
145
  type: :runtime
146
146
  prerelease: false
147
147
  version_requirements: !ruby/object:Gem::Requirement
148
148
  requirements:
149
149
  - - ">="
150
150
  - !ruby/object:Gem::Version
151
- version: 0.1.8
151
+ version: 0.1.9
152
152
  - !ruby/object:Gem::Dependency
153
153
  name: sqlite3
154
154
  requirement: !ruby/object:Gem::Requirement
@@ -314,7 +314,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
314
314
  - !ruby/object:Gem::Version
315
315
  version: '0'
316
316
  requirements: []
317
- rubygems_version: 4.0.8
317
+ rubygems_version: 4.0.10
318
318
  specification_version: 4
319
319
  summary: Ruby library for embedding vector database
320
320
  test_files: