documentrix 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +23 -0
- data/Rakefile +1 -1
- data/documentrix.gemspec +5 -5
- data/lib/documentrix/documents/cache/common.rb +13 -0
- data/lib/documentrix/documents/cache/sqlite_cache.rb +11 -0
- data/lib/documentrix/documents.rb +10 -0
- data/lib/documentrix/version.rb +1 -1
- data/spec/documentrix/documents/cache/interface_spec.rb +9 -0
- data/spec/documentrix/documents/cache/memory_cache_spec.rb +11 -0
- data/spec/documentrix/documents/cache/redis_cache_spec.rb +14 -0
- data/spec/documentrix/documents/cache/sqlite_cache_spec.rb +14 -0
- data/spec/documents_spec.rb +18 -0
- metadata +6 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c45e75b570207ac77d9a04e95939050c33d8f5e0645c7ebac96c862de4e252f3
|
|
4
|
+
data.tar.gz: 18b0f526ec16115483de74c027a7d8eaa8c8ba2461ac41d4695f53f1ceff32c7
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: dddf96ef71ab25c35c6872905cea070c8a27036caf841d97626fadc1db172e496a3b8f13748000cbb9a096b66ae59f459bae145fc2a4a06abffad7de267362a9
|
|
7
|
+
data.tar.gz: cab52c41f1749fe0ff56538cd01e4525787921e700dca91e44f10d939ec942afc59478c4f8256033fdbf5ef4a38ac649d80eadfbc711042bccb85e38b056a569
|
data/CHANGES.md
CHANGED
|
@@ -1,5 +1,28 @@
|
|
|
1
1
|
# Changes
|
|
2
2
|
|
|
3
|
+
## 2026-05-12 v0.2.0
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
|
|
7
|
+
- Implemented source-based document removal by adding the `remove` method to
|
|
8
|
+
`Documentrix::Documents`.
|
|
9
|
+
- Added `clear_by_source` to `Documentrix::Documents::Cache::Common` as the
|
|
10
|
+
default cache implementation.
|
|
11
|
+
- Added an optimized `clear_by_source` override in
|
|
12
|
+
`Documentrix::Documents::Cache::SQLiteCache` utilizing a direct SQL `DELETE`
|
|
13
|
+
query.
|
|
14
|
+
|
|
15
|
+
### Changed
|
|
16
|
+
|
|
17
|
+
- Updated `documentrix.gemspec` to use `rubygems_version` **4.0.10**.
|
|
18
|
+
- Updated `gem_hadar` dependency to **2.17.1**.
|
|
19
|
+
|
|
20
|
+
### Testing
|
|
21
|
+
|
|
22
|
+
- Expanded test coverage in `spec/documents_spec.rb`,
|
|
23
|
+
`spec/documentrix/documents/cache/interface_spec.rb`, and all specific cache
|
|
24
|
+
specs.
|
|
25
|
+
|
|
3
26
|
## 2026-03-31 v0.1.1
|
|
4
27
|
|
|
5
28
|
- Improved compatibility and reliability by ensuring the gem uses a stable,
|
data/Rakefile
CHANGED
|
@@ -33,7 +33,7 @@ GemHadar do
|
|
|
33
33
|
dependency 'infobar', '~> 0.9'
|
|
34
34
|
dependency 'json', '~> 2.0'
|
|
35
35
|
dependency 'tins', '~> 1.34'
|
|
36
|
-
dependency 'sqlite-vec', '>= 0.1.
|
|
36
|
+
dependency 'sqlite-vec', '>= 0.1.9'
|
|
37
37
|
dependency 'sqlite3', '~> 2.0', '>= 2.0.1'
|
|
38
38
|
dependency 'kramdown-ansi', '~> 0.0', '>= 0.0.1'
|
|
39
39
|
dependency 'numo-narray-alt', '~> 0.9'
|
data/documentrix.gemspec
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
|
2
|
-
# stub: documentrix 0.
|
|
2
|
+
# stub: documentrix 0.2.0 ruby lib
|
|
3
3
|
|
|
4
4
|
Gem::Specification.new do |s|
|
|
5
5
|
s.name = "documentrix".freeze
|
|
6
|
-
s.version = "0.
|
|
6
|
+
s.version = "0.2.0".freeze
|
|
7
7
|
|
|
8
8
|
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
|
9
9
|
s.require_paths = ["lib".freeze]
|
|
@@ -17,13 +17,13 @@ Gem::Specification.new do |s|
|
|
|
17
17
|
s.licenses = ["MIT".freeze]
|
|
18
18
|
s.rdoc_options = ["--title".freeze, "Documentrix - Ruby library for embedding vector database".freeze, "--main".freeze, "README.md".freeze]
|
|
19
19
|
s.required_ruby_version = Gem::Requirement.new(">= 3.1".freeze)
|
|
20
|
-
s.rubygems_version = "4.0.
|
|
20
|
+
s.rubygems_version = "4.0.10".freeze
|
|
21
21
|
s.summary = "Ruby library for embedding vector database".freeze
|
|
22
22
|
s.test_files = ["spec/documentrix/documents/cache/interface_spec.rb".freeze, "spec/documentrix/documents/cache/memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_cache_spec.rb".freeze, "spec/documentrix/documents/cache/sqlite_cache_spec.rb".freeze, "spec/documentrix/documents/splitters/character_spec.rb".freeze, "spec/documentrix/documents/splitters/semantic_spec.rb".freeze, "spec/documents_spec.rb".freeze, "spec/spec_helper.rb".freeze, "spec/utils/colorize_texts_spec.rb".freeze, "spec/utils/tags_spec.rb".freeze]
|
|
23
23
|
|
|
24
24
|
s.specification_version = 4
|
|
25
25
|
|
|
26
|
-
s.add_development_dependency(%q<gem_hadar>.freeze, [">= 2.17.
|
|
26
|
+
s.add_development_dependency(%q<gem_hadar>.freeze, [">= 2.17.1".freeze])
|
|
27
27
|
s.add_development_dependency(%q<all_images>.freeze, ["~> 0.12".freeze])
|
|
28
28
|
s.add_development_dependency(%q<rspec>.freeze, ["~> 3.2".freeze])
|
|
29
29
|
s.add_development_dependency(%q<kramdown>.freeze, ["~> 2.0".freeze])
|
|
@@ -32,7 +32,7 @@ Gem::Specification.new do |s|
|
|
|
32
32
|
s.add_runtime_dependency(%q<infobar>.freeze, ["~> 0.9".freeze])
|
|
33
33
|
s.add_runtime_dependency(%q<json>.freeze, ["~> 2.0".freeze])
|
|
34
34
|
s.add_runtime_dependency(%q<tins>.freeze, ["~> 1.34".freeze])
|
|
35
|
-
s.add_runtime_dependency(%q<sqlite-vec>.freeze, [">= 0.1.
|
|
35
|
+
s.add_runtime_dependency(%q<sqlite-vec>.freeze, [">= 0.1.9".freeze])
|
|
36
36
|
s.add_runtime_dependency(%q<sqlite3>.freeze, ["~> 2.0".freeze, ">= 2.0.1".freeze])
|
|
37
37
|
s.add_runtime_dependency(%q<kramdown-ansi>.freeze, ["~> 0.0".freeze, ">= 0.0.1".freeze])
|
|
38
38
|
s.add_runtime_dependency(%q<numo-narray-alt>.freeze, ["~> 0.9".freeze])
|
|
@@ -116,6 +116,19 @@ module Documentrix::Documents::Cache::Common
|
|
|
116
116
|
self
|
|
117
117
|
end
|
|
118
118
|
|
|
119
|
+
# The clear_by_source method removes all records from the cache that
|
|
120
|
+
# have a source matching the given source.
|
|
121
|
+
#
|
|
122
|
+
# @param source [String] the source to filter records by
|
|
123
|
+
#
|
|
124
|
+
# @return [self] self
|
|
125
|
+
def clear_by_source(source)
|
|
126
|
+
each do |key, record|
|
|
127
|
+
delete(unpre(key)) if record.source == source
|
|
128
|
+
end
|
|
129
|
+
self
|
|
130
|
+
end
|
|
131
|
+
|
|
119
132
|
# The clear method removes cached records based on the provided tags or
|
|
120
133
|
# clears all records with the current prefix.
|
|
121
134
|
#
|
|
@@ -157,6 +157,17 @@ class Documentrix::Documents::Cache::SQLiteCache
|
|
|
157
157
|
self
|
|
158
158
|
end
|
|
159
159
|
|
|
160
|
+
# The clear_by_source method removes all records from the cache that
|
|
161
|
+
# have a source matching the given source.
|
|
162
|
+
#
|
|
163
|
+
# @param source [String] the source to filter records by
|
|
164
|
+
#
|
|
165
|
+
# @return [Documentrix::Documents::Cache::SQLiteCache] self
|
|
166
|
+
def clear_by_source(source)
|
|
167
|
+
execute(%{DELETE FROM records WHERE source = ?}, [ source ])
|
|
168
|
+
self
|
|
169
|
+
end
|
|
170
|
+
|
|
160
171
|
# Move a key prefix in the cache.
|
|
161
172
|
#
|
|
162
173
|
# This operation updates every record whose key starts with +old_prefix+,
|
|
@@ -219,6 +219,16 @@ class Documentrix::Documents
|
|
|
219
219
|
self
|
|
220
220
|
end
|
|
221
221
|
|
|
222
|
+
# The remove method removes all documents associated with the given source.
|
|
223
|
+
#
|
|
224
|
+
# @param source [String] the source of the documents to remove
|
|
225
|
+
#
|
|
226
|
+
# @return [Documentrix::Documents] self
|
|
227
|
+
def remove(source)
|
|
228
|
+
@cache.clear_by_source(source)
|
|
229
|
+
self
|
|
230
|
+
end
|
|
231
|
+
|
|
222
232
|
# The find method searches for strings within the cache by computing their
|
|
223
233
|
# similarity scores.
|
|
224
234
|
#
|
data/lib/documentrix/version.rb
CHANGED
|
@@ -52,6 +52,9 @@ describe 'Documentrix::Documents::Cache Interface' do
|
|
|
52
52
|
expect(cache).to respond_to(:clear_for_tags)
|
|
53
53
|
expect(cache.method(:clear_for_tags).owner).to eq Documentrix::Documents::Cache::Common
|
|
54
54
|
|
|
55
|
+
expect(cache).to respond_to(:clear_by_source)
|
|
56
|
+
expect(cache.method(:clear_by_source).owner).to eq Documentrix::Documents::Cache::Common
|
|
57
|
+
|
|
55
58
|
expect(cache).to respond_to(:clear)
|
|
56
59
|
expect(cache.method(:clear).owner).to eq Documentrix::Documents::Cache::Common
|
|
57
60
|
end
|
|
@@ -108,6 +111,9 @@ describe 'Documentrix::Documents::Cache Interface' do
|
|
|
108
111
|
expect(cache).to respond_to(:clear_for_tags)
|
|
109
112
|
expect(cache.method(:clear_for_tags).owner).to eq Documentrix::Documents::Cache::Common
|
|
110
113
|
|
|
114
|
+
expect(cache).to respond_to(:clear_by_source)
|
|
115
|
+
expect(cache.method(:clear_by_source).owner).to eq Documentrix::Documents::Cache::Common
|
|
116
|
+
|
|
111
117
|
expect(cache).to respond_to(:clear)
|
|
112
118
|
expect(cache.method(:clear).owner).to eq Documentrix::Documents::Cache::Common
|
|
113
119
|
|
|
@@ -168,6 +174,9 @@ describe 'Documentrix::Documents::Cache Interface' do
|
|
|
168
174
|
expect(cache).to respond_to(:clear_for_tags)
|
|
169
175
|
expect(cache.method(:clear_for_tags).owner).to eq Documentrix::Documents::Cache::SQLiteCache
|
|
170
176
|
|
|
177
|
+
expect(cache).to respond_to(:clear_by_source)
|
|
178
|
+
expect(cache.method(:clear_by_source).owner).to eq Documentrix::Documents::Cache::SQLiteCache
|
|
179
|
+
|
|
171
180
|
expect(cache).to respond_to(:clear)
|
|
172
181
|
expect(cache.method(:clear).owner).to eq Documentrix::Documents::Cache::Common
|
|
173
182
|
|
|
@@ -120,6 +120,17 @@ describe Documentrix::Documents::MemoryCache do
|
|
|
120
120
|
}.from(1).to(0)
|
|
121
121
|
end
|
|
122
122
|
|
|
123
|
+
it 'can clear by source' do
|
|
124
|
+
cache['foo'] = Documentrix::Documents::Record[text: 'foo', source: 's1', embedding: [0.1]]
|
|
125
|
+
cache['bar'] = Documentrix::Documents::Record[text: 'bar', source: 's1', embedding: [0.1]]
|
|
126
|
+
cache['baz'] = Documentrix::Documents::Record[text: 'baz', source: 's2', embedding: [0.1]]
|
|
127
|
+
expect {
|
|
128
|
+
cache.clear_by_source('s1')
|
|
129
|
+
}.to change { cache.size }.from(3).to(1)
|
|
130
|
+
expect(cache.key?('baz')).to be true
|
|
131
|
+
expect(cache.key?('foo')).to be false
|
|
132
|
+
end
|
|
133
|
+
|
|
123
134
|
it 'can iterate over keys under a prefix' do
|
|
124
135
|
cache['foo'] = 'bar'
|
|
125
136
|
expect(cache.to_a).to eq [ %W[ #{prefix}foo bar ] ]
|
|
@@ -109,6 +109,20 @@ describe Documentrix::Documents::RedisCache do
|
|
|
109
109
|
expect(cache.clear).to eq cache
|
|
110
110
|
end
|
|
111
111
|
|
|
112
|
+
it 'can clear by source' do
|
|
113
|
+
object_class = Class.new(JSON::GenericObject)
|
|
114
|
+
cache = described_class.new(prefix:, url: 'something', object_class:)
|
|
115
|
+
expect(redis).to receive(:scan_each).with(match: 'test-*').and_yield(
|
|
116
|
+
'test-foo'
|
|
117
|
+
).and_yield(
|
|
118
|
+
'test-bar'
|
|
119
|
+
)
|
|
120
|
+
expect(redis).to receive(:get).with('test-foo').and_return(JSON(source: 's1'))
|
|
121
|
+
expect(redis).to receive(:get).with('test-bar').and_return(JSON(source: 's2'))
|
|
122
|
+
expect(redis).to receive(:del).with('test-foo')
|
|
123
|
+
expect(cache.clear_by_source('s1')).to eq cache
|
|
124
|
+
end
|
|
125
|
+
|
|
112
126
|
it 'can iterate over keys under a prefix' do
|
|
113
127
|
expect(redis).to receive(:scan_each).with(match: 'test-*')
|
|
114
128
|
cache.to_a
|
|
@@ -145,6 +145,20 @@ describe Documentrix::Documents::SQLiteCache do
|
|
|
145
145
|
expect(cache).to be_key 'bar'
|
|
146
146
|
end
|
|
147
147
|
|
|
148
|
+
it 'can clear by source' do
|
|
149
|
+
val1 = test_value.merge(source: 's1')
|
|
150
|
+
val2 = test_value.merge(source: 's1')
|
|
151
|
+
val3 = test_value.merge(source: 's2')
|
|
152
|
+
cache['foo'] = val1
|
|
153
|
+
cache['bar'] = val2
|
|
154
|
+
cache['baz'] = val3
|
|
155
|
+
expect {
|
|
156
|
+
cache.clear_by_source('s1')
|
|
157
|
+
}.to change { cache.size }.from(3).to(1)
|
|
158
|
+
expect(cache.key?('baz')).to be true
|
|
159
|
+
expect(cache.key?('foo')).to be false
|
|
160
|
+
end
|
|
161
|
+
|
|
148
162
|
it 'can return tags' do
|
|
149
163
|
key, value = 'foo', { tags: %w[ foo ], embedding: [ 0.5 ] * 1_024 }
|
|
150
164
|
cache[key] = value
|
data/spec/documents_spec.rb
CHANGED
|
@@ -158,6 +158,24 @@ describe Documentrix::Documents do
|
|
|
158
158
|
}.to change { documents.size }.from(1).to(0)
|
|
159
159
|
end
|
|
160
160
|
|
|
161
|
+
it 'can remove sources' do
|
|
162
|
+
allow(ollama).to receive(:embed).at_least(:once).
|
|
163
|
+
and_return(double(embeddings: [ [ 0.1 ] ]))
|
|
164
|
+
|
|
165
|
+
documents.add('foo', source: 'source1')
|
|
166
|
+
documents.add('bar', source: 'source1')
|
|
167
|
+
documents.add('baz', source: 'source2')
|
|
168
|
+
|
|
169
|
+
expect(documents.size).to eq 3
|
|
170
|
+
|
|
171
|
+
documents.remove('source1')
|
|
172
|
+
|
|
173
|
+
expect(documents.size).to eq 1
|
|
174
|
+
expect(documents.exist?('baz')).to be true
|
|
175
|
+
expect(documents.exist?('foo')).to be false
|
|
176
|
+
expect(documents.exist?('bar')).to be false
|
|
177
|
+
end
|
|
178
|
+
|
|
161
179
|
it 'returns collections' do
|
|
162
180
|
expect(documents.collections).to eq [ :default ]
|
|
163
181
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: documentrix
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Florian Frank
|
|
@@ -15,14 +15,14 @@ dependencies:
|
|
|
15
15
|
requirements:
|
|
16
16
|
- - ">="
|
|
17
17
|
- !ruby/object:Gem::Version
|
|
18
|
-
version: 2.17.
|
|
18
|
+
version: 2.17.1
|
|
19
19
|
type: :development
|
|
20
20
|
prerelease: false
|
|
21
21
|
version_requirements: !ruby/object:Gem::Requirement
|
|
22
22
|
requirements:
|
|
23
23
|
- - ">="
|
|
24
24
|
- !ruby/object:Gem::Version
|
|
25
|
-
version: 2.17.
|
|
25
|
+
version: 2.17.1
|
|
26
26
|
- !ruby/object:Gem::Dependency
|
|
27
27
|
name: all_images
|
|
28
28
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -141,14 +141,14 @@ dependencies:
|
|
|
141
141
|
requirements:
|
|
142
142
|
- - ">="
|
|
143
143
|
- !ruby/object:Gem::Version
|
|
144
|
-
version: 0.1.
|
|
144
|
+
version: 0.1.9
|
|
145
145
|
type: :runtime
|
|
146
146
|
prerelease: false
|
|
147
147
|
version_requirements: !ruby/object:Gem::Requirement
|
|
148
148
|
requirements:
|
|
149
149
|
- - ">="
|
|
150
150
|
- !ruby/object:Gem::Version
|
|
151
|
-
version: 0.1.
|
|
151
|
+
version: 0.1.9
|
|
152
152
|
- !ruby/object:Gem::Dependency
|
|
153
153
|
name: sqlite3
|
|
154
154
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -314,7 +314,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
314
314
|
- !ruby/object:Gem::Version
|
|
315
315
|
version: '0'
|
|
316
316
|
requirements: []
|
|
317
|
-
rubygems_version: 4.0.
|
|
317
|
+
rubygems_version: 4.0.10
|
|
318
318
|
specification_version: 4
|
|
319
319
|
summary: Ruby library for embedding vector database
|
|
320
320
|
test_files:
|