documentrix 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +18 -0
- data/documentrix.gemspec +2 -2
- data/lib/documentrix/documents/cache/sqlite_cache.rb +7 -2
- data/lib/documentrix/documents.rb +7 -4
- data/lib/documentrix/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 11ea07d3133f8de898211353a4bcff7f926b9f75114696af333e928862ee5aeb
|
|
4
|
+
data.tar.gz: b60d8606b1974bcb43e6584f690ecb21a7e54af6915b09d9adb904bc9269b417
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ae907af900fc6932de6b2d022a4ca97367d6e1901442ed30e838eb7bf128b5e5d7dd572862f29cfe6196f58553d394e390062287b21cac6185e2c6845244ee5a
|
|
7
|
+
data.tar.gz: 75b95e852cbd6b412ae2651668db5cbffcb9f1c5845dd3bdfb2ea252380b9ab37d15f4b8381398f941c4c1dabeb1e7a174e9e6ef7dc4e0dc153455c4c0413b1e
|
data/CHANGES.md
CHANGED
|
@@ -1,5 +1,23 @@
|
|
|
1
1
|
# Changes
|
|
2
2
|
|
|
3
|
+
## 2026-06-16 v0.5.0
|
|
4
|
+
|
|
5
|
+
### Improvements
|
|
6
|
+
|
|
7
|
+
- Enhanced SQLite concurrency and prevented database locks:
|
|
8
|
+
- Added `database_busy_timeout` parameter to
|
|
9
|
+
`Documentrix::Documents#initialize`, defaulting to **5000**ms.
|
|
10
|
+
- Updated `Documentrix::Documents#connect_cache` to pass the timeout value
|
|
11
|
+
to the cache backend.
|
|
12
|
+
- Implemented `busy_timeout` support in
|
|
13
|
+
`Documentrix::Documents::Cache::SQLiteCache#initialize`.
|
|
14
|
+
- Configured `@database.busy_handler_timeout` in
|
|
15
|
+
`Documentrix::Documents::Cache::SQLiteCache#setup_database` to ensure
|
|
16
|
+
GVL-friendly waiting during lock contention.
|
|
17
|
+
- Prevented immediate `SQLITE_BUSY` errors on writes by updating
|
|
18
|
+
`Documentrix::Documents::Cache::SQLiteCache#[]=` to use `BEGIN IMMEDIATE`
|
|
19
|
+
instead of `BEGIN`, avoiding transaction upgrade failures.
|
|
20
|
+
|
|
3
21
|
## 2026-05-22 v0.4.0
|
|
4
22
|
|
|
5
23
|
### Added
|
data/documentrix.gemspec
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
|
2
|
-
# stub: documentrix 0.
|
|
2
|
+
# stub: documentrix 0.5.0 ruby lib
|
|
3
3
|
|
|
4
4
|
Gem::Specification.new do |s|
|
|
5
5
|
s.name = "documentrix".freeze
|
|
6
|
-
s.version = "0.
|
|
6
|
+
s.version = "0.5.0".freeze
|
|
7
7
|
|
|
8
8
|
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
|
9
9
|
s.require_paths = ["lib".freeze]
|
|
@@ -21,13 +21,15 @@ class Documentrix::Documents::Cache::SQLiteCache
|
|
|
21
21
|
# @param embedding_length [ Integer ] the length of the embeddings vector
|
|
22
22
|
# @param filename [ String ] the name of the SQLite database file or ':memory:' for in-memory.
|
|
23
23
|
# @param debug [ FalseClass, TrueClass ] whether to enable debugging
|
|
24
|
+
# @param busy_timeout [ Integer ] the SQLite busy timeout in milliseconds (defaults to 5000)
|
|
24
25
|
#
|
|
25
26
|
# @return [ void ]
|
|
26
|
-
def initialize(prefix:, embedding_length: 1_024, filename: ':memory:', debug: false)
|
|
27
|
+
def initialize(prefix:, embedding_length: 1_024, filename: ':memory:', debug: false, busy_timeout: 5000)
|
|
27
28
|
super(prefix:)
|
|
28
29
|
@embedding_length = embedding_length
|
|
29
30
|
@filename = filename
|
|
30
31
|
@debug = debug
|
|
32
|
+
@busy_timeout = busy_timeout
|
|
31
33
|
setup_database(filename)
|
|
32
34
|
end
|
|
33
35
|
|
|
@@ -69,7 +71,7 @@ class Documentrix::Documents::Cache::SQLiteCache
|
|
|
69
71
|
value = convert_value_to_record(value)
|
|
70
72
|
digest = compute_file_digest(value.source)
|
|
71
73
|
embedding = value.embedding.pack("f*")
|
|
72
|
-
execute(%{BEGIN})
|
|
74
|
+
execute(%{BEGIN IMMEDIATE})
|
|
73
75
|
execute(%{INSERT INTO embeddings(embedding) VALUES(?)}, [ embedding ])
|
|
74
76
|
embedding_id, = execute(%{ SELECT last_insert_rowid() }).flatten
|
|
75
77
|
execute(%{
|
|
@@ -170,6 +172,8 @@ class Documentrix::Documents::Cache::SQLiteCache
|
|
|
170
172
|
# @param source [String] the source identifier used to filter records
|
|
171
173
|
# @param digest [String, nil] the SHA256 hexadecimal digest of the source.
|
|
172
174
|
# Records matching this digest will be preserved.
|
|
175
|
+
# @param operator [String] the operator to use for comparison ('=' or '!=').
|
|
176
|
+
# Defaults to '='.
|
|
173
177
|
#
|
|
174
178
|
# @return [self] the cache instance for method chaining
|
|
175
179
|
def clear_by_source(source, digest: nil, operator: ?=)
|
|
@@ -438,6 +442,7 @@ class Documentrix::Documents::Cache::SQLiteCache
|
|
|
438
442
|
# @return [ nil ]
|
|
439
443
|
def setup_database(filename)
|
|
440
444
|
@database = SQLite3::Database.new(filename)
|
|
445
|
+
@database.busy_handler_timeout = @busy_timeout
|
|
441
446
|
@database.enable_load_extension(true)
|
|
442
447
|
SqliteVec.load(@database)
|
|
443
448
|
@database.enable_load_extension(false)
|
|
@@ -76,12 +76,13 @@ class Documentrix::Documents
|
|
|
76
76
|
# @param database_filename [ String ] the filename of the SQLite database to use (defaults to ':memory:')
|
|
77
77
|
# @param redis_url [ String ] the URL of the Redis server to use (defaults to nil)
|
|
78
78
|
# @param debug [ FalseClass, TrueClass ] whether to enable debugging mode (defaults to false)
|
|
79
|
-
|
|
79
|
+
# @param database_busy_timeout [ Integer ] the SQLite busy timeout in milliseconds (defaults to 5000)
|
|
80
|
+
def initialize(ollama:, model:, model_options: nil, collection: nil, embedding_length: 1_024, cache: MemoryCache, database_filename: nil, redis_url: nil, debug: false, database_busy_timeout: 5000)
|
|
80
81
|
collection ||= default_collection
|
|
81
82
|
@ollama, @model, @model_options, @collection, @debug =
|
|
82
83
|
ollama, model, model_options, collection.to_sym, debug
|
|
83
84
|
database_filename ||= ':memory:'
|
|
84
|
-
@cache = connect_cache(cache, redis_url, embedding_length, database_filename)
|
|
85
|
+
@cache = connect_cache(cache, redis_url, embedding_length, database_filename, database_busy_timeout)
|
|
85
86
|
end
|
|
86
87
|
|
|
87
88
|
# The default_collection method returns the default collection name.
|
|
@@ -104,7 +105,7 @@ class Documentrix::Documents
|
|
|
104
105
|
# The prepare_texts method filters out existing texts from the input array
|
|
105
106
|
# and returns the filtered array.
|
|
106
107
|
#
|
|
107
|
-
# @param texts [ Array ] an array of text strings
|
|
108
|
+
# @param texts [ Array ] an array of text strings
|
|
108
109
|
#
|
|
109
110
|
# @return [ Array ] the filtered array of text strings
|
|
110
111
|
private def prepare_texts(texts)
|
|
@@ -446,9 +447,10 @@ class Documentrix::Documents
|
|
|
446
447
|
# @param redis_url [String] the URL of the Redis server
|
|
447
448
|
# @param embedding_length [Integer] the length of the embeddings used in the cache
|
|
448
449
|
# @param database_filename [String] the filename of the SQLite database file
|
|
450
|
+
# @param database_busy_timeout [Integer] the SQLite busy timeout in milliseconds
|
|
449
451
|
#
|
|
450
452
|
# @return [CacheInstance] an instance of the specified cache class
|
|
451
|
-
def connect_cache(cache_class, redis_url, embedding_length, database_filename)
|
|
453
|
+
def connect_cache(cache_class, redis_url, embedding_length, database_filename, database_busy_timeout)
|
|
452
454
|
cache = nil
|
|
453
455
|
if (cache_class.instance_method(:redis) rescue nil)
|
|
454
456
|
begin
|
|
@@ -465,6 +467,7 @@ class Documentrix::Documents
|
|
|
465
467
|
prefix:,
|
|
466
468
|
embedding_length:,
|
|
467
469
|
filename: database_filename,
|
|
470
|
+
busy_timeout: database_busy_timeout,
|
|
468
471
|
debug: @debug
|
|
469
472
|
)
|
|
470
473
|
end
|
data/lib/documentrix/version.rb
CHANGED