documentrix 0.0.4 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.utilsrc +107 -0
- data/CHANGES.md +28 -0
- data/Rakefile +5 -1
- data/docker-compose.yml +1 -1
- data/documentrix.gemspec +8 -8
- data/lib/documentrix/documents/cache/common.rb +102 -6
- data/lib/documentrix/documents/cache/memory_cache.rb +36 -8
- data/lib/documentrix/documents/cache/records.rb +14 -90
- data/lib/documentrix/documents/cache/redis_cache.rb +57 -25
- data/lib/documentrix/documents/cache/sqlite_cache.rb +41 -5
- data/lib/documentrix/documents.rb +62 -6
- data/lib/documentrix/utils/colorize_texts.rb +11 -0
- data/lib/documentrix/utils/math.rb +5 -0
- data/lib/documentrix/utils/tags.rb +34 -0
- data/lib/documentrix/utils.rb +10 -0
- data/lib/documentrix/version.rb +1 -1
- data/lib/documentrix.rb +6 -0
- data/spec/documentrix/documents/cache/interface_spec.rb +188 -0
- data/spec/documentrix/documents/cache/memory_cache_spec.rb +29 -0
- data/spec/documentrix/documents/cache/redis_cache_spec.rb +13 -11
- data/spec/documentrix/documents/cache/sqlite_cache_spec.rb +34 -0
- data/spec/documents_spec.rb +22 -0
- data/spec/spec_helper.rb +2 -6
- metadata +11 -12
- data/lib/documentrix/documents/cache/redis_backed_memory_cache.rb +0 -64
- data/spec/documentrix/documents/cache/redis_backed_memory_cache_spec.rb +0 -121
data/spec/spec_helper.rb
CHANGED
|
@@ -1,9 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
SimpleCov.start do
|
|
4
|
-
add_filter "#{File.basename(File.dirname(__FILE__))}/"
|
|
5
|
-
end
|
|
6
|
-
end
|
|
1
|
+
require 'gem_hadar/simplecov'
|
|
2
|
+
GemHadar::SimpleCov.start
|
|
7
3
|
require 'rspec'
|
|
8
4
|
require 'tins/xt/expose'
|
|
9
5
|
begin
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: documentrix
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0
|
|
4
|
+
version: 0.1.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Florian Frank
|
|
@@ -13,30 +13,30 @@ dependencies:
|
|
|
13
13
|
name: gem_hadar
|
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
|
15
15
|
requirements:
|
|
16
|
-
- - "
|
|
16
|
+
- - ">="
|
|
17
17
|
- !ruby/object:Gem::Version
|
|
18
|
-
version:
|
|
18
|
+
version: 2.17.0
|
|
19
19
|
type: :development
|
|
20
20
|
prerelease: false
|
|
21
21
|
version_requirements: !ruby/object:Gem::Requirement
|
|
22
22
|
requirements:
|
|
23
|
-
- - "
|
|
23
|
+
- - ">="
|
|
24
24
|
- !ruby/object:Gem::Version
|
|
25
|
-
version:
|
|
25
|
+
version: 2.17.0
|
|
26
26
|
- !ruby/object:Gem::Dependency
|
|
27
27
|
name: all_images
|
|
28
28
|
requirement: !ruby/object:Gem::Requirement
|
|
29
29
|
requirements:
|
|
30
30
|
- - "~>"
|
|
31
31
|
- !ruby/object:Gem::Version
|
|
32
|
-
version: '0.
|
|
32
|
+
version: '0.12'
|
|
33
33
|
type: :development
|
|
34
34
|
prerelease: false
|
|
35
35
|
version_requirements: !ruby/object:Gem::Requirement
|
|
36
36
|
requirements:
|
|
37
37
|
- - "~>"
|
|
38
38
|
- !ruby/object:Gem::Version
|
|
39
|
-
version: '0.
|
|
39
|
+
version: '0.12'
|
|
40
40
|
- !ruby/object:Gem::Dependency
|
|
41
41
|
name: rspec
|
|
42
42
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -246,7 +246,6 @@ extra_rdoc_files:
|
|
|
246
246
|
- lib/documentrix/documents/cache/common.rb
|
|
247
247
|
- lib/documentrix/documents/cache/memory_cache.rb
|
|
248
248
|
- lib/documentrix/documents/cache/records.rb
|
|
249
|
-
- lib/documentrix/documents/cache/redis_backed_memory_cache.rb
|
|
250
249
|
- lib/documentrix/documents/cache/redis_cache.rb
|
|
251
250
|
- lib/documentrix/documents/cache/sqlite_cache.rb
|
|
252
251
|
- lib/documentrix/documents/splitters/character.rb
|
|
@@ -258,6 +257,7 @@ extra_rdoc_files:
|
|
|
258
257
|
- lib/documentrix/version.rb
|
|
259
258
|
files:
|
|
260
259
|
- ".envrc"
|
|
260
|
+
- ".utilsrc"
|
|
261
261
|
- ".yardopts"
|
|
262
262
|
- CHANGES.md
|
|
263
263
|
- Gemfile
|
|
@@ -271,7 +271,6 @@ files:
|
|
|
271
271
|
- lib/documentrix/documents/cache/common.rb
|
|
272
272
|
- lib/documentrix/documents/cache/memory_cache.rb
|
|
273
273
|
- lib/documentrix/documents/cache/records.rb
|
|
274
|
-
- lib/documentrix/documents/cache/redis_backed_memory_cache.rb
|
|
275
274
|
- lib/documentrix/documents/cache/redis_cache.rb
|
|
276
275
|
- lib/documentrix/documents/cache/sqlite_cache.rb
|
|
277
276
|
- lib/documentrix/documents/splitters/character.rb
|
|
@@ -283,8 +282,8 @@ files:
|
|
|
283
282
|
- lib/documentrix/version.rb
|
|
284
283
|
- redis/redis.conf
|
|
285
284
|
- spec/assets/embeddings.json
|
|
285
|
+
- spec/documentrix/documents/cache/interface_spec.rb
|
|
286
286
|
- spec/documentrix/documents/cache/memory_cache_spec.rb
|
|
287
|
-
- spec/documentrix/documents/cache/redis_backed_memory_cache_spec.rb
|
|
288
287
|
- spec/documentrix/documents/cache/redis_cache_spec.rb
|
|
289
288
|
- spec/documentrix/documents/cache/sqlite_cache_spec.rb
|
|
290
289
|
- spec/documentrix/documents/splitters/character_spec.rb
|
|
@@ -315,12 +314,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
315
314
|
- !ruby/object:Gem::Version
|
|
316
315
|
version: '0'
|
|
317
316
|
requirements: []
|
|
318
|
-
rubygems_version: 4.0.
|
|
317
|
+
rubygems_version: 4.0.8
|
|
319
318
|
specification_version: 4
|
|
320
319
|
summary: Ruby library for embedding vector database
|
|
321
320
|
test_files:
|
|
321
|
+
- spec/documentrix/documents/cache/interface_spec.rb
|
|
322
322
|
- spec/documentrix/documents/cache/memory_cache_spec.rb
|
|
323
|
-
- spec/documentrix/documents/cache/redis_backed_memory_cache_spec.rb
|
|
324
323
|
- spec/documentrix/documents/cache/redis_cache_spec.rb
|
|
325
324
|
- spec/documentrix/documents/cache/sqlite_cache_spec.rb
|
|
326
325
|
- spec/documentrix/documents/splitters/character_spec.rb
|
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
require 'redis'
|
|
2
|
-
|
|
3
|
-
class Documentrix::Documents
|
|
4
|
-
class RedisBackedMemoryCache < MemoryCache
|
|
5
|
-
|
|
6
|
-
# The initialize method sets up the RedisBackedMemoryCache cache by
|
|
7
|
-
# creating a new instance and populating it with data from the internally
|
|
8
|
-
# created RedisCache.
|
|
9
|
-
#
|
|
10
|
-
# @param prefix [String] The prefix for keys in the Redis cache
|
|
11
|
-
# @param url [String] The URL of the Redis server (default: ENV['REDIS_URL'])
|
|
12
|
-
# @param object_class [Class] The class to use for deserializing values from Redis (default: nil)
|
|
13
|
-
#
|
|
14
|
-
# @raise [ArgumentError] If the redis_url environment variable is not set
|
|
15
|
-
def initialize(prefix:, url: ENV['REDIS_URL'], object_class: nil)
|
|
16
|
-
super(prefix:)
|
|
17
|
-
url or raise ArgumentError, 'require redis url'
|
|
18
|
-
@url, @object_class = url, object_class
|
|
19
|
-
@redis_cache = Documentrix::Documents::RedisCache.new(prefix:, url:, object_class:)
|
|
20
|
-
@redis_cache.extend(Documentrix::Documents::Cache::Records::RedisFullEach)
|
|
21
|
-
@redis_cache.full_each { |key, value| @data[key] = value }
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
attr_reader :object_class # the class of objects stored in the cache
|
|
25
|
-
|
|
26
|
-
# The redis method returns the Redis client instance used by the cache.
|
|
27
|
-
#
|
|
28
|
-
# @return [Redis] The Redis client instance
|
|
29
|
-
def redis
|
|
30
|
-
@redis_cache.redis
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
# The set method sets the value for a given key in memory and in Redis.
|
|
34
|
-
#
|
|
35
|
-
# @param [String] key the key to be set
|
|
36
|
-
# @param [Hash] value the hash containing the data to be stored
|
|
37
|
-
def []=(key, value)
|
|
38
|
-
super
|
|
39
|
-
redis.set(pre(key), JSON(value))
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
# The delete method removes a key from the cache by calling Redis's del
|
|
43
|
-
# method and then calling the superclass's delete method.
|
|
44
|
-
#
|
|
45
|
-
# @param [String] key the key to be deleted
|
|
46
|
-
#
|
|
47
|
-
# @return [FalseClass, TrueClass] true if the key was successfully deleted, false otherwise.
|
|
48
|
-
def delete(key)
|
|
49
|
-
result = redis.del(pre(key))
|
|
50
|
-
super && result == 1
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
# The clear method deletes all keys from the cache by scanning redis for
|
|
54
|
-
# keys that match the prefix `prefix` and then deleting them, then it does
|
|
55
|
-
# the same for the MemoryCache by calling its super.
|
|
56
|
-
#
|
|
57
|
-
# @return [self] self
|
|
58
|
-
def clear
|
|
59
|
-
redis.scan_each(match: "#@prefix*") { |key| redis.del(key) }
|
|
60
|
-
super
|
|
61
|
-
self
|
|
62
|
-
end
|
|
63
|
-
end
|
|
64
|
-
end
|
|
@@ -1,121 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
describe Documentrix::Documents::RedisBackedMemoryCache do
|
|
4
|
-
let :prefix do
|
|
5
|
-
'test-'
|
|
6
|
-
end
|
|
7
|
-
|
|
8
|
-
let :cache do
|
|
9
|
-
described_class.new prefix: 'test-', url: 'something'
|
|
10
|
-
end
|
|
11
|
-
|
|
12
|
-
it 'raises ArgumentError if url is missing' do
|
|
13
|
-
expect {
|
|
14
|
-
described_class.new prefix:, url: nil
|
|
15
|
-
}.to raise_error ArgumentError
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
context 'test redis interactions' do
|
|
19
|
-
let :data do
|
|
20
|
-
cache.instance_eval { @data }
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
let :redis_cache do
|
|
24
|
-
cache.instance_eval { @redis_cache }
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
let :redis do
|
|
28
|
-
double('Redis')
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
before do
|
|
32
|
-
allow_any_instance_of(Documentrix::Documents::RedisCache).to\
|
|
33
|
-
receive(:redis).and_return(redis)
|
|
34
|
-
allow(redis).to receive(:scan_each)
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
it 'can be instantiated and initialized' do
|
|
38
|
-
expect(cache).to be_a described_class
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
it 'defaults to nil object_class' do
|
|
42
|
-
expect(cache.object_class).to be_nil
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
it 'can be configured with object_class' do
|
|
46
|
-
object_class = Class.new(JSON::GenericObject)
|
|
47
|
-
cache = described_class.new(prefix: 'test-', url: 'something', object_class:)
|
|
48
|
-
expect(cache.object_class).to eq object_class
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
it 'has Redis client' do
|
|
52
|
-
expect(cache.redis).to eq redis
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
it 'can get a key' do
|
|
56
|
-
key = 'foo'
|
|
57
|
-
expect(data).to receive(:[]).with('test-' + key).and_return 666
|
|
58
|
-
expect(cache[key]).to eq 666
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
it 'can set a value for a key' do
|
|
62
|
-
key, value = 'foo', { test: true }
|
|
63
|
-
expect(data).to receive(:[]=).with('test-' + key, { test: true }).and_call_original
|
|
64
|
-
expect(redis).to receive(:set).with('test-' + key, JSON(value))
|
|
65
|
-
cache[key] = value
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
it 'can determine if key exists' do
|
|
69
|
-
key = 'foo'
|
|
70
|
-
expect(data).to receive(:key?).with('test-' + key).and_return(false, true)
|
|
71
|
-
expect(cache.key?('foo')).to eq false
|
|
72
|
-
expect(cache.key?('foo')).to eq true
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
it 'can delete' do
|
|
76
|
-
key = 'foo'
|
|
77
|
-
expect(data).to receive(:delete).with('test-' + key).and_return 'bar'
|
|
78
|
-
expect(redis).to receive(:del).with('test-' + key).and_return 1
|
|
79
|
-
expect(cache.delete(key)).to eq true
|
|
80
|
-
expect(data).to receive(:delete).with('test-' + key).and_return nil
|
|
81
|
-
expect(redis).to receive(:del).with(prefix + key).and_return 0
|
|
82
|
-
expect(cache.delete(key)).to eq false
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
it 'can iterate over keys, values' do
|
|
86
|
-
key, value = 'foo', { 'test' => true }
|
|
87
|
-
expect(redis).to receive(:set).with('test-' + key, JSON(value))
|
|
88
|
-
cache[key] = value
|
|
89
|
-
cache.each do |k, v|
|
|
90
|
-
expect(k).to eq prefix + key
|
|
91
|
-
expect(v).to eq value
|
|
92
|
-
end
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
it 'returns size' do
|
|
96
|
-
expect(cache).to receive(:count).and_return 3
|
|
97
|
-
expect(cache.size).to eq 3
|
|
98
|
-
end
|
|
99
|
-
|
|
100
|
-
it 'can clear' do
|
|
101
|
-
expect(redis).to receive(:scan_each).with(match: 'test-*').and_yield(
|
|
102
|
-
'test-foo'
|
|
103
|
-
)
|
|
104
|
-
expect(redis).to receive(:del).with('test-foo')
|
|
105
|
-
expect(cache.clear).to eq cache
|
|
106
|
-
end
|
|
107
|
-
|
|
108
|
-
it 'can iterate over keys under a prefix' do
|
|
109
|
-
data['test-foo'] = 'bar'
|
|
110
|
-
expect(cache.to_a).to eq [ %w[ test-foo bar ] ]
|
|
111
|
-
end
|
|
112
|
-
|
|
113
|
-
it 'can compute prefix with pre' do
|
|
114
|
-
expect(cache.pre('foo')).to eq 'test-foo'
|
|
115
|
-
end
|
|
116
|
-
|
|
117
|
-
it 'can remove prefix with unpre' do
|
|
118
|
-
expect(cache.unpre('test-foo')).to eq 'foo'
|
|
119
|
-
end
|
|
120
|
-
end
|
|
121
|
-
end
|