whoosh 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/whoosh/app.rb +5 -0
- data/lib/whoosh/cli/project_generator.rb +9 -1
- data/lib/whoosh/vector_store/memory_store.rb +79 -0
- data/lib/whoosh/vector_store.rb +42 -0
- data/lib/whoosh/version.rb +1 -1
- data/lib/whoosh.rb +1 -0
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 133af39d011ac77fc6e0773ea19652cb990f5c8d129d40b7f301796062e23e1f
|
|
4
|
+
data.tar.gz: 5145a7c3177f2ddef7667f8faa6a6c2681b38fd279351c95ceda93ce69999c2a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 4e99f75634ea05e7e2673164f6bdd8b71ea704e6ee02023370b8b77207666bee6291a02d7423d8d036c4014651ba17673c3f23639e64a2a68b5038799b1dca99
|
|
7
|
+
data.tar.gz: 4427cce1a58e9d35841a372ca782f831d6aa3ef513e2309f14e4f6b284a4d12461b102fe5a963768e1a01af2e98db7fc82f7084e970f1a150ef7092f2033c8bf
|
data/lib/whoosh/app.rb
CHANGED
|
@@ -29,6 +29,7 @@ module Whoosh
|
|
|
29
29
|
auto_register_database
|
|
30
30
|
auto_register_storage
|
|
31
31
|
auto_register_http
|
|
32
|
+
auto_register_vectors
|
|
32
33
|
auto_configure_jobs
|
|
33
34
|
@metrics = Metrics.new
|
|
34
35
|
auto_register_metrics
|
|
@@ -302,6 +303,10 @@ module Whoosh
|
|
|
302
303
|
@di.provide(:http) { HTTP }
|
|
303
304
|
end
|
|
304
305
|
|
|
306
|
+
def auto_register_vectors
|
|
307
|
+
@di.provide(:vectors) { VectorStore.build(@config.data) }
|
|
308
|
+
end
|
|
309
|
+
|
|
305
310
|
def auto_configure_jobs
|
|
306
311
|
backend = Jobs.build_backend(@config.data)
|
|
307
312
|
Jobs.configure(backend: backend, di: @di)
|
|
@@ -175,8 +175,10 @@ module Whoosh
|
|
|
175
175
|
max_connections: 10
|
|
176
176
|
log_level: debug
|
|
177
177
|
|
|
178
|
+
# Cache & Jobs auto-detect:
|
|
179
|
+
# No REDIS_URL → in-memory (just works)
|
|
180
|
+
# Set REDIS_URL → auto-switches to Redis
|
|
178
181
|
cache:
|
|
179
|
-
store: memory
|
|
180
182
|
default_ttl: 300
|
|
181
183
|
|
|
182
184
|
jobs:
|
|
@@ -188,6 +190,12 @@ module Whoosh
|
|
|
188
190
|
level: info
|
|
189
191
|
format: json
|
|
190
192
|
|
|
193
|
+
# Vector store auto-detect:
|
|
194
|
+
# zvec gem installed → uses zvec, otherwise → in-memory
|
|
195
|
+
# vector:
|
|
196
|
+
# adapter: auto
|
|
197
|
+
# path: db/vectors
|
|
198
|
+
|
|
191
199
|
docs:
|
|
192
200
|
enabled: true
|
|
193
201
|
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Whoosh
|
|
4
|
+
module VectorStore
|
|
5
|
+
class MemoryStore
|
|
6
|
+
def initialize
|
|
7
|
+
@collections = {}
|
|
8
|
+
@mutex = Mutex.new
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
# Store a vector with metadata
|
|
12
|
+
def insert(collection, id:, vector:, metadata: {})
|
|
13
|
+
@mutex.synchronize do
|
|
14
|
+
@collections[collection] ||= {}
|
|
15
|
+
@collections[collection][id] = { vector: vector, metadata: metadata }
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Search by cosine similarity, return top-k results
|
|
20
|
+
def search(collection, vector:, limit: 10)
|
|
21
|
+
@mutex.synchronize do
|
|
22
|
+
items = @collections[collection]
|
|
23
|
+
return [] unless items && !items.empty?
|
|
24
|
+
|
|
25
|
+
scored = items.map do |id, data|
|
|
26
|
+
score = cosine_similarity(vector, data[:vector])
|
|
27
|
+
{ id: id, score: score, metadata: data[:metadata] }
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
scored.sort_by { |r| -r[:score] }.first(limit)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Delete a vector
|
|
35
|
+
def delete(collection, id:)
|
|
36
|
+
@mutex.synchronize do
|
|
37
|
+
@collections[collection]&.delete(id)
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Count vectors in a collection
|
|
42
|
+
def count(collection)
|
|
43
|
+
@mutex.synchronize do
|
|
44
|
+
@collections[collection]&.size || 0
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Drop a collection
|
|
49
|
+
def drop(collection)
|
|
50
|
+
@mutex.synchronize do
|
|
51
|
+
@collections.delete(collection)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def close
|
|
56
|
+
# No-op
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
def cosine_similarity(a, b)
|
|
62
|
+
return 0.0 if a.empty? || b.empty? || a.length != b.length
|
|
63
|
+
|
|
64
|
+
dot = 0.0
|
|
65
|
+
mag_a = 0.0
|
|
66
|
+
mag_b = 0.0
|
|
67
|
+
|
|
68
|
+
a.length.times do |i|
|
|
69
|
+
dot += a[i] * b[i]
|
|
70
|
+
mag_a += a[i] * a[i]
|
|
71
|
+
mag_b += b[i] * b[i]
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
denom = Math.sqrt(mag_a) * Math.sqrt(mag_b)
|
|
75
|
+
denom.zero? ? 0.0 : dot / denom
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Whoosh
|
|
4
|
+
module VectorStore
|
|
5
|
+
autoload :MemoryStore, "whoosh/vector_store/memory_store"
|
|
6
|
+
|
|
7
|
+
# Auto-detect: zvec gem → use it, otherwise → in-memory
|
|
8
|
+
def self.build(config_data = {})
|
|
9
|
+
vector_config = config_data["vector"] || {}
|
|
10
|
+
adapter = vector_config["adapter"] || "auto"
|
|
11
|
+
|
|
12
|
+
case adapter
|
|
13
|
+
when "auto"
|
|
14
|
+
# Try zvec first, fall back to memory
|
|
15
|
+
if zvec_available?
|
|
16
|
+
require "whoosh/vector_store/zvec_store"
|
|
17
|
+
ZvecStore.new(**zvec_options(vector_config))
|
|
18
|
+
else
|
|
19
|
+
MemoryStore.new
|
|
20
|
+
end
|
|
21
|
+
when "memory"
|
|
22
|
+
MemoryStore.new
|
|
23
|
+
when "zvec"
|
|
24
|
+
require "whoosh/vector_store/zvec_store"
|
|
25
|
+
ZvecStore.new(**zvec_options(vector_config))
|
|
26
|
+
else
|
|
27
|
+
MemoryStore.new
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def self.zvec_available?
|
|
32
|
+
require "zvec"
|
|
33
|
+
true
|
|
34
|
+
rescue LoadError
|
|
35
|
+
false
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def self.zvec_options(config)
|
|
39
|
+
{ path: config["path"] || "db/vectors" }
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
data/lib/whoosh/version.rb
CHANGED
data/lib/whoosh.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: whoosh
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Johannes Dwi Cahyo
|
|
@@ -241,6 +241,8 @@ files:
|
|
|
241
241
|
- lib/whoosh/test.rb
|
|
242
242
|
- lib/whoosh/types.rb
|
|
243
243
|
- lib/whoosh/uploaded_file.rb
|
|
244
|
+
- lib/whoosh/vector_store.rb
|
|
245
|
+
- lib/whoosh/vector_store/memory_store.rb
|
|
244
246
|
- lib/whoosh/version.rb
|
|
245
247
|
homepage: https://github.com/johannesdwicahyo/whoosh
|
|
246
248
|
licenses:
|