documentrix 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e32a72c0a1f93a96f7c3cecd185f13a0f17a2629b2dc509ec3a29fd2d7b51a41
4
- data.tar.gz: 3f2c21125adf7061dcba94f1843456064245222839de0df6ddbcd8743a1aea13
3
+ metadata.gz: 11ea07d3133f8de898211353a4bcff7f926b9f75114696af333e928862ee5aeb
4
+ data.tar.gz: b60d8606b1974bcb43e6584f690ecb21a7e54af6915b09d9adb904bc9269b417
5
5
  SHA512:
6
- metadata.gz: 91dbf3ddfdeb124661ff78a4cbc1635dbb8bdf6606078aeeef77b4f9d14688d073261ad08b59f0333895c7391d83eff8a6c76467f19af4f04ca55172c1d934a5
7
- data.tar.gz: d40e5a53ceeda71c7a2be37d3bfb718ffc2ddb0fddc0c6eb2346c295d06713c895493854fab5f768af075bec2595e2f23c40e893d7f7266c6f61cd95c534d733
6
+ metadata.gz: ae907af900fc6932de6b2d022a4ca97367d6e1901442ed30e838eb7bf128b5e5d7dd572862f29cfe6196f58553d394e390062287b21cac6185e2c6845244ee5a
7
+ data.tar.gz: 75b95e852cbd6b412ae2651668db5cbffcb9f1c5845dd3bdfb2ea252380b9ab37d15f4b8381398f941c4c1dabeb1e7a174e9e6ef7dc4e0dc153455c4c0413b1e
data/CHANGES.md CHANGED
@@ -1,5 +1,23 @@
1
1
  # Changes
2
2
 
3
+ ## 2026-06-16 v0.5.0
4
+
5
+ ### Improvements
6
+
7
+ - Enhanced SQLite concurrency and prevented database locks:
8
+ - Added `database_busy_timeout` parameter to
9
+ `Documentrix::Documents#initialize`, defaulting to **5000**ms.
10
+ - Updated `Documentrix::Documents#connect_cache` to pass the timeout value
11
+ to the cache backend.
12
+ - Implemented `busy_timeout` support in
13
+ `Documentrix::Documents::Cache::SQLiteCache#initialize`.
14
+ - Configured `@database.busy_handler_timeout` in
15
+ `Documentrix::Documents::Cache::SQLiteCache#setup_database` to ensure
16
+ GVL-friendly waiting during lock contention.
17
+ - Prevented immediate `SQLITE_BUSY` errors on writes by updating
18
+ `Documentrix::Documents::Cache::SQLiteCache#[]=` to use `BEGIN IMMEDIATE`
19
+ instead of `BEGIN`, avoiding transaction upgrade failures.
20
+
3
21
  ## 2026-05-22 v0.4.0
4
22
 
5
23
  ### Added
data/documentrix.gemspec CHANGED
@@ -1,9 +1,9 @@
1
1
  # -*- encoding: utf-8 -*-
2
- # stub: documentrix 0.4.0 ruby lib
2
+ # stub: documentrix 0.5.0 ruby lib
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "documentrix".freeze
6
- s.version = "0.4.0".freeze
6
+ s.version = "0.5.0".freeze
7
7
 
8
8
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
9
9
  s.require_paths = ["lib".freeze]
@@ -21,13 +21,15 @@ class Documentrix::Documents::Cache::SQLiteCache
21
21
  # @param embedding_length [ Integer ] the length of the embeddings vector
22
22
  # @param filename [ String ] the name of the SQLite database file or ':memory:' for in-memory.
23
23
  # @param debug [ FalseClass, TrueClass ] whether to enable debugging
24
+ # @param busy_timeout [ Integer ] the SQLite busy timeout in milliseconds (defaults to 5000)
24
25
  #
25
26
  # @return [ void ]
26
- def initialize(prefix:, embedding_length: 1_024, filename: ':memory:', debug: false)
27
+ def initialize(prefix:, embedding_length: 1_024, filename: ':memory:', debug: false, busy_timeout: 5000)
27
28
  super(prefix:)
28
29
  @embedding_length = embedding_length
29
30
  @filename = filename
30
31
  @debug = debug
32
+ @busy_timeout = busy_timeout
31
33
  setup_database(filename)
32
34
  end
33
35
 
@@ -69,7 +71,7 @@ class Documentrix::Documents::Cache::SQLiteCache
69
71
  value = convert_value_to_record(value)
70
72
  digest = compute_file_digest(value.source)
71
73
  embedding = value.embedding.pack("f*")
72
- execute(%{BEGIN})
74
+ execute(%{BEGIN IMMEDIATE})
73
75
  execute(%{INSERT INTO embeddings(embedding) VALUES(?)}, [ embedding ])
74
76
  embedding_id, = execute(%{ SELECT last_insert_rowid() }).flatten
75
77
  execute(%{
@@ -170,6 +172,8 @@ class Documentrix::Documents::Cache::SQLiteCache
170
172
  # @param source [String] the source identifier used to filter records
171
173
  # @param digest [String, nil] the SHA256 hexadecimal digest of the source.
172
174
  # Records matching this digest will be preserved.
175
+ # @param operator [String] the operator to use for comparison ('=' or '!=').
176
+ # Defaults to '='.
173
177
  #
174
178
  # @return [self] the cache instance for method chaining
175
179
  def clear_by_source(source, digest: nil, operator: ?=)
@@ -438,6 +442,7 @@ class Documentrix::Documents::Cache::SQLiteCache
438
442
  # @return [ nil ]
439
443
  def setup_database(filename)
440
444
  @database = SQLite3::Database.new(filename)
445
+ @database.busy_handler_timeout = @busy_timeout
441
446
  @database.enable_load_extension(true)
442
447
  SqliteVec.load(@database)
443
448
  @database.enable_load_extension(false)
@@ -76,12 +76,13 @@ class Documentrix::Documents
76
76
  # @param database_filename [ String ] the filename of the SQLite database to use (defaults to ':memory:')
77
77
  # @param redis_url [ String ] the URL of the Redis server to use (defaults to nil)
78
78
  # @param debug [ FalseClass, TrueClass ] whether to enable debugging mode (defaults to false)
79
- def initialize(ollama:, model:, model_options: nil, collection: nil, embedding_length: 1_024, cache: MemoryCache, database_filename: nil, redis_url: nil, debug: false)
79
+ # @param database_busy_timeout [ Integer ] the SQLite busy timeout in milliseconds (defaults to 5000)
80
+ def initialize(ollama:, model:, model_options: nil, collection: nil, embedding_length: 1_024, cache: MemoryCache, database_filename: nil, redis_url: nil, debug: false, database_busy_timeout: 5000)
80
81
  collection ||= default_collection
81
82
  @ollama, @model, @model_options, @collection, @debug =
82
83
  ollama, model, model_options, collection.to_sym, debug
83
84
  database_filename ||= ':memory:'
84
- @cache = connect_cache(cache, redis_url, embedding_length, database_filename)
85
+ @cache = connect_cache(cache, redis_url, embedding_length, database_filename, database_busy_timeout)
85
86
  end
86
87
 
87
88
  # The default_collection method returns the default collection name.
@@ -104,7 +105,7 @@ class Documentrix::Documents
104
105
  # The prepare_texts method filters out existing texts from the input array
105
106
  # and returns the filtered array.
106
107
  #
107
- # @param texts [ Array ] an array of text strings or #read objects.
108
+ # @param texts [ Array ] an array of text strings
108
109
  #
109
110
  # @return [ Array ] the filtered array of text strings
110
111
  private def prepare_texts(texts)
@@ -446,9 +447,10 @@ class Documentrix::Documents
446
447
  # @param redis_url [String] the URL of the Redis server
447
448
  # @param embedding_length [Integer] the length of the embeddings used in the cache
448
449
  # @param database_filename [String] the filename of the SQLite database file
450
+ # @param database_busy_timeout [Integer] the SQLite busy timeout in milliseconds
449
451
  #
450
452
  # @return [CacheInstance] an instance of the specified cache class
451
- def connect_cache(cache_class, redis_url, embedding_length, database_filename)
453
+ def connect_cache(cache_class, redis_url, embedding_length, database_filename, database_busy_timeout)
452
454
  cache = nil
453
455
  if (cache_class.instance_method(:redis) rescue nil)
454
456
  begin
@@ -465,6 +467,7 @@ class Documentrix::Documents
465
467
  prefix:,
466
468
  embedding_length:,
467
469
  filename: database_filename,
470
+ busy_timeout: database_busy_timeout,
468
471
  debug: @debug
469
472
  )
470
473
  end
@@ -1,6 +1,6 @@
1
1
  module Documentrix
2
2
  # Documentrix version
3
- VERSION = '0.4.0'
3
+ VERSION = '0.5.0'
4
4
  VERSION_ARRAY = VERSION.split('.').map(&:to_i) # :nodoc:
5
5
  VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
6
6
  VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: documentrix
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Frank