vectra-client 0.3.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +10 -0
- data/CHANGELOG.md +26 -8
- data/README.md +35 -2
- data/docs/_layouts/default.html +1 -0
- data/docs/_layouts/home.html +44 -3
- data/docs/_layouts/page.html +42 -9
- data/docs/assets/style.css +226 -1
- data/docs/examples/index.md +9 -0
- data/docs/examples/real-world.md +576 -0
- data/docs/grafana_final.png +0 -0
- data/docs/guides/getting-started.md +70 -2
- data/docs/guides/monitoring.md +50 -0
- data/docs/providers/index.md +12 -0
- data/docs/providers/memory.md +145 -0
- data/docs/providers/weaviate.md +84 -25
- data/examples/GRAFANA_QUICKSTART.md +158 -0
- data/examples/README.md +332 -0
- data/examples/comprehensive_demo.rb +1116 -0
- data/examples/grafana-dashboard.json +878 -0
- data/examples/grafana-setup.md +340 -0
- data/examples/prometheus-exporter.rb +229 -0
- data/lib/vectra/batch.rb +63 -8
- data/lib/vectra/client.rb +188 -1
- data/lib/vectra/configuration.rb +4 -2
- data/lib/vectra/credential_rotation.rb +2 -3
- data/lib/vectra/providers/base.rb +19 -1
- data/lib/vectra/providers/memory.rb +298 -0
- data/lib/vectra/providers/qdrant.rb +31 -0
- data/lib/vectra/providers/weaviate.rb +454 -10
- data/lib/vectra/vector.rb +56 -0
- data/lib/vectra/version.rb +1 -1
- data/lib/vectra.rb +20 -0
- data/vectra.gemspec +56 -0
- metadata +12 -1
|
@@ -2,47 +2,491 @@
|
|
|
2
2
|
|
|
3
3
|
module Vectra
|
|
4
4
|
module Providers
|
|
5
|
-
# Weaviate vector database provider
|
|
5
|
+
# Weaviate vector database provider
|
|
6
6
|
#
|
|
7
|
-
#
|
|
7
|
+
# Weaviate is an open-source vector search engine with semantic search
|
|
8
|
+
# capabilities, accessed via a REST and GraphQL API.
|
|
8
9
|
#
|
|
10
|
+
# This implementation focuses on the core CRUD + query surface that matches
|
|
11
|
+
# the Vectra client API. Each Vectra "index" maps to a Weaviate class.
|
|
12
|
+
#
|
|
13
|
+
# @example Basic usage
|
|
14
|
+
# Vectra.configure do |config|
|
|
15
|
+
# config.provider = :weaviate
|
|
16
|
+
# config.api_key = ENV["WEAVIATE_API_KEY"]
|
|
17
|
+
# config.host = "http://localhost:8080"
|
|
18
|
+
# end
|
|
19
|
+
#
|
|
20
|
+
# client = Vectra::Client.new
|
|
21
|
+
# client.upsert(index: "Document", vectors: [...])
|
|
22
|
+
#
|
|
23
|
+
# rubocop:disable Metrics/ClassLength
|
|
9
24
|
class Weaviate < Base
|
|
25
|
+
API_BASE_PATH = "/v1"
|
|
26
|
+
|
|
10
27
|
def provider_name
|
|
11
28
|
:weaviate
|
|
12
29
|
end
|
|
13
30
|
|
|
14
31
|
def upsert(index:, vectors:, namespace: nil)
|
|
15
|
-
|
|
32
|
+
normalized = normalize_vectors(vectors)
|
|
33
|
+
|
|
34
|
+
objects = normalized.map do |vec|
|
|
35
|
+
properties = (vec[:metadata] || {}).dup
|
|
36
|
+
properties["_namespace"] = namespace if namespace
|
|
37
|
+
|
|
38
|
+
{
|
|
39
|
+
"class" => index,
|
|
40
|
+
"id" => vec[:id],
|
|
41
|
+
"vector" => vec[:values],
|
|
42
|
+
"properties" => properties
|
|
43
|
+
}
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
body = { "objects" => objects }
|
|
47
|
+
|
|
48
|
+
response = with_error_handling do
|
|
49
|
+
connection.post("#{API_BASE_PATH}/batch/objects", body)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
if response.success?
|
|
53
|
+
upserted = response.body["objects"]&.size || normalized.size
|
|
54
|
+
log_debug("Upserted #{upserted} vectors to #{index}")
|
|
55
|
+
{ upserted_count: upserted }
|
|
56
|
+
else
|
|
57
|
+
handle_error(response)
|
|
58
|
+
end
|
|
16
59
|
end
|
|
17
60
|
|
|
18
61
|
def query(index:, vector:, top_k: 10, namespace: nil, filter: nil,
|
|
19
62
|
include_values: false, include_metadata: true)
|
|
20
|
-
|
|
63
|
+
where_filter = build_where(filter, namespace)
|
|
64
|
+
|
|
65
|
+
selection_fields = []
|
|
66
|
+
selection_fields << "_additional { id distance }"
|
|
67
|
+
selection_fields << "vector" if include_values
|
|
68
|
+
selection_fields << "metadata" if include_metadata
|
|
69
|
+
|
|
70
|
+
selection_block = selection_fields.join(" ")
|
|
71
|
+
|
|
72
|
+
graphql = <<~GRAPHQL
|
|
73
|
+
{
|
|
74
|
+
Get {
|
|
75
|
+
#{index}(
|
|
76
|
+
limit: #{top_k}
|
|
77
|
+
nearVector: { vector: [#{vector.map { |v| format('%.10f', v.to_f) }.join(', ')}] }
|
|
78
|
+
#{"where: #{JSON.generate(where_filter)}" if where_filter}
|
|
79
|
+
) {
|
|
80
|
+
#{selection_block}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
GRAPHQL
|
|
85
|
+
|
|
86
|
+
body = { "query" => graphql }
|
|
87
|
+
|
|
88
|
+
response = with_error_handling do
|
|
89
|
+
connection.post("#{API_BASE_PATH}/graphql", body)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
if response.success?
|
|
93
|
+
matches = extract_query_matches(response.body, index, include_values, include_metadata)
|
|
94
|
+
log_debug("Query returned #{matches.size} results")
|
|
95
|
+
|
|
96
|
+
QueryResult.from_response(
|
|
97
|
+
matches: matches,
|
|
98
|
+
namespace: namespace
|
|
99
|
+
)
|
|
100
|
+
else
|
|
101
|
+
handle_error(response)
|
|
102
|
+
end
|
|
21
103
|
end
|
|
22
104
|
|
|
105
|
+
# rubocop:disable Metrics/PerceivedComplexity
|
|
23
106
|
def fetch(index:, ids:, namespace: nil)
|
|
24
|
-
|
|
107
|
+
body = {
|
|
108
|
+
"class" => index,
|
|
109
|
+
"ids" => ids,
|
|
110
|
+
"include" => ["vector", "properties"]
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
# Namespace is stored as a property, so we filter client-side
|
|
114
|
+
response = with_error_handling do
|
|
115
|
+
connection.post("#{API_BASE_PATH}/objects/_mget", body)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
if response.success?
|
|
119
|
+
objects = response.body["objects"] || []
|
|
120
|
+
vectors = {}
|
|
121
|
+
|
|
122
|
+
objects.each do |obj|
|
|
123
|
+
next unless obj["status"] == "SUCCESS"
|
|
124
|
+
|
|
125
|
+
props = obj.dig("result", "properties") || {}
|
|
126
|
+
obj_namespace = props["_namespace"]
|
|
127
|
+
next if namespace && obj_namespace != namespace
|
|
128
|
+
|
|
129
|
+
clean_metadata = props.reject { |k, _| k.to_s.start_with?("_") }
|
|
130
|
+
|
|
131
|
+
vectors[obj.dig("result", "id")] = Vector.new(
|
|
132
|
+
id: obj.dig("result", "id"),
|
|
133
|
+
values: obj.dig("result", "vector") || [],
|
|
134
|
+
metadata: clean_metadata
|
|
135
|
+
)
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
vectors
|
|
139
|
+
else
|
|
140
|
+
handle_error(response)
|
|
141
|
+
end
|
|
25
142
|
end
|
|
143
|
+
# rubocop:enable Metrics/PerceivedComplexity
|
|
26
144
|
|
|
27
145
|
def update(index:, id:, metadata:, namespace: nil)
|
|
28
|
-
|
|
146
|
+
body = {
|
|
147
|
+
"class" => index,
|
|
148
|
+
"id" => id
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if metadata
|
|
152
|
+
props = metadata.dup
|
|
153
|
+
props["_namespace"] = namespace if namespace
|
|
154
|
+
body["properties"] = props
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
response = with_error_handling do
|
|
158
|
+
connection.patch("#{API_BASE_PATH}/objects/#{id}", body)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
if response.success?
|
|
162
|
+
log_debug("Updated metadata for vector #{id}")
|
|
163
|
+
{ updated: true }
|
|
164
|
+
else
|
|
165
|
+
handle_error(response)
|
|
166
|
+
end
|
|
29
167
|
end
|
|
30
168
|
|
|
169
|
+
# rubocop:disable Metrics/MethodLength, Metrics/PerceivedComplexity
|
|
31
170
|
def delete(index:, ids: nil, namespace: nil, filter: nil, delete_all: false)
|
|
32
|
-
|
|
171
|
+
if ids
|
|
172
|
+
# Delete individual objects by ID
|
|
173
|
+
ids.each do |id|
|
|
174
|
+
with_error_handling do
|
|
175
|
+
response = connection.delete("#{API_BASE_PATH}/objects/#{id}") do |req|
|
|
176
|
+
req.params["class"] = index
|
|
177
|
+
end
|
|
178
|
+
handle_error(response) unless response.success?
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
log_debug("Deleted #{ids.size} vectors from #{index}")
|
|
183
|
+
{ deleted: true }
|
|
184
|
+
else
|
|
185
|
+
# Delete by filter / namespace / delete_all
|
|
186
|
+
where_filter = if delete_all && namespace.nil? && filter.nil?
|
|
187
|
+
nil
|
|
188
|
+
else
|
|
189
|
+
build_where(filter, namespace)
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
body = {
|
|
193
|
+
"class" => index
|
|
194
|
+
}
|
|
195
|
+
body["where"] = where_filter if where_filter
|
|
196
|
+
|
|
197
|
+
response = with_error_handling do
|
|
198
|
+
connection.post("#{API_BASE_PATH}/objects/delete", body)
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
if response.success?
|
|
202
|
+
log_debug("Deleted vectors from #{index} with filter")
|
|
203
|
+
{ deleted: true }
|
|
204
|
+
else
|
|
205
|
+
handle_error(response)
|
|
206
|
+
end
|
|
207
|
+
end
|
|
33
208
|
end
|
|
209
|
+
# rubocop:enable Metrics/MethodLength, Metrics/PerceivedComplexity
|
|
34
210
|
|
|
35
211
|
def list_indexes
|
|
36
|
-
|
|
212
|
+
response = with_error_handling do
|
|
213
|
+
connection.get("#{API_BASE_PATH}/schema")
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
if response.success?
|
|
217
|
+
classes = response.body["classes"] || []
|
|
218
|
+
classes.map do |cls|
|
|
219
|
+
vector_cfg = cls["vectorIndexConfig"] || {}
|
|
220
|
+
{
|
|
221
|
+
name: cls["class"],
|
|
222
|
+
dimension: vector_cfg["dimension"],
|
|
223
|
+
metric: distance_to_metric(vector_cfg["distance"]),
|
|
224
|
+
status: "ready"
|
|
225
|
+
}
|
|
226
|
+
end
|
|
227
|
+
else
|
|
228
|
+
handle_error(response)
|
|
229
|
+
end
|
|
37
230
|
end
|
|
38
231
|
|
|
39
232
|
def describe_index(index:)
|
|
40
|
-
|
|
233
|
+
response = with_error_handling do
|
|
234
|
+
connection.get("#{API_BASE_PATH}/schema/#{index}")
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
if response.success?
|
|
238
|
+
body = response.body
|
|
239
|
+
vector_cfg = body["vectorIndexConfig"] || {}
|
|
240
|
+
|
|
241
|
+
{
|
|
242
|
+
name: body["class"] || index,
|
|
243
|
+
dimension: vector_cfg["dimension"],
|
|
244
|
+
metric: distance_to_metric(vector_cfg["distance"]),
|
|
245
|
+
status: "ready"
|
|
246
|
+
}
|
|
247
|
+
else
|
|
248
|
+
handle_error(response)
|
|
249
|
+
end
|
|
41
250
|
end
|
|
42
251
|
|
|
252
|
+
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
43
253
|
def stats(index:, namespace: nil)
|
|
44
|
-
|
|
254
|
+
where_filter = namespace ? build_where({}, namespace) : nil
|
|
255
|
+
|
|
256
|
+
where_clause = where_filter ? "where: #{JSON.generate(where_filter)}" : ""
|
|
257
|
+
|
|
258
|
+
graphql = <<~GRAPHQL
|
|
259
|
+
{
|
|
260
|
+
Aggregate {
|
|
261
|
+
#{index}(
|
|
262
|
+
#{where_clause}
|
|
263
|
+
) {
|
|
264
|
+
meta {
|
|
265
|
+
count
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
GRAPHQL
|
|
271
|
+
|
|
272
|
+
body = { "query" => graphql }
|
|
273
|
+
|
|
274
|
+
response = with_error_handling do
|
|
275
|
+
connection.post("#{API_BASE_PATH}/graphql", body)
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
if response.success?
|
|
279
|
+
data = response.body["data"] || {}
|
|
280
|
+
aggregate = data["Aggregate"] || {}
|
|
281
|
+
class_stats = aggregate[index]&.first || {}
|
|
282
|
+
meta = class_stats["meta"] || {}
|
|
283
|
+
|
|
284
|
+
{
|
|
285
|
+
total_vector_count: meta["count"] || 0,
|
|
286
|
+
dimension: nil,
|
|
287
|
+
namespaces: namespace ? { namespace => { vector_count: meta["count"] || 0 } } : {}
|
|
288
|
+
}
|
|
289
|
+
else
|
|
290
|
+
handle_error(response)
|
|
291
|
+
end
|
|
292
|
+
end
|
|
293
|
+
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
294
|
+
|
|
295
|
+
private
|
|
296
|
+
|
|
297
|
+
def validate_config!
|
|
298
|
+
super
|
|
299
|
+
raise ConfigurationError, "Host must be configured for Weaviate" if config.host.nil? || config.host.empty?
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
def connection
|
|
303
|
+
@connection ||= begin
|
|
304
|
+
base_url = config.host
|
|
305
|
+
base_url = "http://#{base_url}" unless base_url.start_with?("http://", "https://")
|
|
306
|
+
|
|
307
|
+
build_connection(
|
|
308
|
+
base_url,
|
|
309
|
+
auth_headers
|
|
310
|
+
)
|
|
311
|
+
end
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
def auth_headers
|
|
315
|
+
return {} unless config.api_key && !config.api_key.empty?
|
|
316
|
+
|
|
317
|
+
{ "Authorization" => "Bearer #{config.api_key}" }
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
# Wrap HTTP calls to handle Faraday::RetriableResponse
|
|
321
|
+
def with_error_handling
|
|
322
|
+
yield
|
|
323
|
+
rescue Faraday::RetriableResponse => e
|
|
324
|
+
handle_retriable_response(e)
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
# Build Weaviate "where" filter for GraphQL API from generic filter + namespace
|
|
328
|
+
#
|
|
329
|
+
# Weaviate expects a structure like:
|
|
330
|
+
# {
|
|
331
|
+
# operator: "And",
|
|
332
|
+
# operands: [
|
|
333
|
+
# { path: ["category"], operator: "Equal", valueString: "tech" },
|
|
334
|
+
# ...
|
|
335
|
+
# ]
|
|
336
|
+
# }
|
|
337
|
+
def build_where(filter, namespace)
|
|
338
|
+
operands = []
|
|
339
|
+
|
|
340
|
+
if namespace
|
|
341
|
+
operands << {
|
|
342
|
+
"path" => ["_namespace"],
|
|
343
|
+
"operator" => "Equal",
|
|
344
|
+
"valueString" => namespace
|
|
345
|
+
}
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
if filter.is_a?(Hash)
|
|
349
|
+
filter.each do |key, value|
|
|
350
|
+
operands << build_where_operand(key.to_s, value)
|
|
351
|
+
end
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
return nil if operands.empty?
|
|
355
|
+
|
|
356
|
+
{
|
|
357
|
+
"operator" => "And",
|
|
358
|
+
"operands" => operands
|
|
359
|
+
}
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
def build_where_operand(key, value)
|
|
363
|
+
case value
|
|
364
|
+
when Hash
|
|
365
|
+
build_operator_operand(key, value)
|
|
366
|
+
when Array
|
|
367
|
+
{
|
|
368
|
+
"path" => [key],
|
|
369
|
+
"operator" => "ContainsAny",
|
|
370
|
+
"valueStringArray" => value.map(&:to_s)
|
|
371
|
+
}
|
|
372
|
+
else
|
|
373
|
+
{
|
|
374
|
+
"path" => [key],
|
|
375
|
+
"operator" => "Equal",
|
|
376
|
+
infer_value_key(value) => value
|
|
377
|
+
}
|
|
378
|
+
end
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
# rubocop:disable Metrics/MethodLength
|
|
382
|
+
def build_operator_operand(key, operator_hash)
|
|
383
|
+
op, val = operator_hash.first
|
|
384
|
+
|
|
385
|
+
case op.to_s
|
|
386
|
+
when "$gt"
|
|
387
|
+
{
|
|
388
|
+
"path" => [key],
|
|
389
|
+
"operator" => "GreaterThan",
|
|
390
|
+
infer_value_key(val) => val
|
|
391
|
+
}
|
|
392
|
+
when "$gte"
|
|
393
|
+
{
|
|
394
|
+
"path" => [key],
|
|
395
|
+
"operator" => "GreaterThanEqual",
|
|
396
|
+
infer_value_key(val) => val
|
|
397
|
+
}
|
|
398
|
+
when "$lt"
|
|
399
|
+
{
|
|
400
|
+
"path" => [key],
|
|
401
|
+
"operator" => "LessThan",
|
|
402
|
+
infer_value_key(val) => val
|
|
403
|
+
}
|
|
404
|
+
when "$lte"
|
|
405
|
+
{
|
|
406
|
+
"path" => [key],
|
|
407
|
+
"operator" => "LessThanEqual",
|
|
408
|
+
infer_value_key(val) => val
|
|
409
|
+
}
|
|
410
|
+
when "$ne"
|
|
411
|
+
{
|
|
412
|
+
"path" => [key],
|
|
413
|
+
"operator" => "NotEqual",
|
|
414
|
+
infer_value_key(val) => val
|
|
415
|
+
}
|
|
416
|
+
else
|
|
417
|
+
{
|
|
418
|
+
"path" => [key],
|
|
419
|
+
"operator" => "Equal",
|
|
420
|
+
infer_value_key(val) => val
|
|
421
|
+
}
|
|
422
|
+
end
|
|
423
|
+
end
|
|
424
|
+
# rubocop:enable Metrics/MethodLength
|
|
425
|
+
|
|
426
|
+
# Choose the appropriate GraphQL value key based on Ruby type
|
|
427
|
+
def infer_value_key(value)
|
|
428
|
+
case value
|
|
429
|
+
when Integer
|
|
430
|
+
"valueInt"
|
|
431
|
+
when Float
|
|
432
|
+
"valueNumber"
|
|
433
|
+
when TrueClass, FalseClass
|
|
434
|
+
"valueBoolean"
|
|
435
|
+
else
|
|
436
|
+
"valueString"
|
|
437
|
+
end
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
# Extract matches from GraphQL response
|
|
441
|
+
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
442
|
+
def extract_query_matches(body, index, include_values, include_metadata)
|
|
443
|
+
data = body["data"] || {}
|
|
444
|
+
get_block = data["Get"] || {}
|
|
445
|
+
raw_matches = get_block[index] || []
|
|
446
|
+
|
|
447
|
+
raw_matches.map do |obj|
|
|
448
|
+
additional = obj["_additional"] || {}
|
|
449
|
+
distance = additional["distance"]
|
|
450
|
+
certainty = additional["certainty"]
|
|
451
|
+
|
|
452
|
+
score = if certainty
|
|
453
|
+
certainty.to_f
|
|
454
|
+
elsif distance
|
|
455
|
+
1.0 - distance.to_f
|
|
456
|
+
end
|
|
457
|
+
|
|
458
|
+
metadata = if include_metadata
|
|
459
|
+
obj["metadata"] || {}
|
|
460
|
+
else
|
|
461
|
+
{}
|
|
462
|
+
end
|
|
463
|
+
|
|
464
|
+
values = include_values ? obj["vector"] : nil
|
|
465
|
+
|
|
466
|
+
{
|
|
467
|
+
id: additional["id"] || obj["id"],
|
|
468
|
+
score: score,
|
|
469
|
+
values: values,
|
|
470
|
+
metadata: metadata
|
|
471
|
+
}
|
|
472
|
+
end
|
|
473
|
+
end
|
|
474
|
+
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
475
|
+
|
|
476
|
+
# Convert Weaviate distance name to Vectra metric
|
|
477
|
+
def distance_to_metric(distance)
|
|
478
|
+
case distance.to_s.downcase
|
|
479
|
+
when "cosine"
|
|
480
|
+
"cosine"
|
|
481
|
+
when "l2-squared", "l2"
|
|
482
|
+
"euclidean"
|
|
483
|
+
when "dot"
|
|
484
|
+
"dot_product"
|
|
485
|
+
else
|
|
486
|
+
distance.to_s.downcase
|
|
487
|
+
end
|
|
45
488
|
end
|
|
46
489
|
end
|
|
490
|
+
# rubocop:enable Metrics/ClassLength
|
|
47
491
|
end
|
|
48
492
|
end
|
data/lib/vectra/vector.rb
CHANGED
|
@@ -106,6 +106,62 @@ module Vectra
|
|
|
106
106
|
Math.sqrt(values.zip(other_values).sum { |a, b| (a - b)**2 })
|
|
107
107
|
end
|
|
108
108
|
|
|
109
|
+
# Normalize the vector in-place (mutates the vector)
|
|
110
|
+
#
|
|
111
|
+
# @param type [Symbol] normalization type: :l2 (default) or :l1
|
|
112
|
+
# @return [Vector] self (for method chaining)
|
|
113
|
+
#
|
|
114
|
+
# @example L2 normalization (unit vector)
|
|
115
|
+
# vector = Vectra::Vector.new(id: 'v1', values: [3.0, 4.0])
|
|
116
|
+
# vector.normalize!
|
|
117
|
+
# vector.values # => [0.6, 0.8] (magnitude = 1.0)
|
|
118
|
+
#
|
|
119
|
+
# @example L1 normalization (sum = 1)
|
|
120
|
+
# vector.normalize!(type: :l1)
|
|
121
|
+
# vector.values.sum(&:abs) # => 1.0
|
|
122
|
+
def normalize!(type: :l2)
|
|
123
|
+
case type
|
|
124
|
+
when :l2
|
|
125
|
+
magnitude = Math.sqrt(values.sum { |v| v**2 })
|
|
126
|
+
if magnitude.zero?
|
|
127
|
+
# Zero vector - cannot normalize, return as-is
|
|
128
|
+
return self
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
@values = values.map { |v| v / magnitude }
|
|
132
|
+
when :l1
|
|
133
|
+
sum = values.sum(&:abs)
|
|
134
|
+
if sum.zero?
|
|
135
|
+
# Zero vector - cannot normalize, return as-is
|
|
136
|
+
return self
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
@values = values.map { |v| v / sum }
|
|
140
|
+
else
|
|
141
|
+
raise ArgumentError, "Unknown normalization type: #{type}. Use :l2 or :l1"
|
|
142
|
+
end
|
|
143
|
+
self
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Normalize a vector array without creating a Vector object
|
|
147
|
+
#
|
|
148
|
+
# @param vector [Array<Float>] vector values to normalize
|
|
149
|
+
# @param type [Symbol] normalization type: :l2 (default) or :l1
|
|
150
|
+
# @return [Array<Float>] normalized vector values
|
|
151
|
+
#
|
|
152
|
+
# @example Normalize OpenAI embedding
|
|
153
|
+
# embedding = openai_response['data'][0]['embedding']
|
|
154
|
+
# normalized = Vectra::Vector.normalize(embedding)
|
|
155
|
+
# client.upsert(vectors: [{ id: '1', values: normalized }])
|
|
156
|
+
#
|
|
157
|
+
# @example L1 normalization
|
|
158
|
+
# normalized = Vectra::Vector.normalize([1.0, 2.0, 3.0], type: :l1)
|
|
159
|
+
def self.normalize(vector, type: :l2)
|
|
160
|
+
temp_vector = new(id: "temp", values: vector.dup)
|
|
161
|
+
temp_vector.normalize!(type: type)
|
|
162
|
+
temp_vector.values
|
|
163
|
+
end
|
|
164
|
+
|
|
109
165
|
# Check equality with another vector
|
|
110
166
|
#
|
|
111
167
|
# @param other [Vector] the other vector
|
data/lib/vectra/version.rb
CHANGED
data/lib/vectra.rb
CHANGED
|
@@ -23,6 +23,7 @@ require_relative "vectra/providers/pinecone"
|
|
|
23
23
|
require_relative "vectra/providers/qdrant"
|
|
24
24
|
require_relative "vectra/providers/weaviate"
|
|
25
25
|
require_relative "vectra/providers/pgvector"
|
|
26
|
+
require_relative "vectra/providers/memory"
|
|
26
27
|
require_relative "vectra/client"
|
|
27
28
|
|
|
28
29
|
# Vectra - Unified Ruby client for vector databases
|
|
@@ -157,5 +158,24 @@ module Vectra
|
|
|
157
158
|
**options
|
|
158
159
|
)
|
|
159
160
|
end
|
|
161
|
+
|
|
162
|
+
# Shortcut to create a Memory client (for testing)
|
|
163
|
+
#
|
|
164
|
+
# @param options [Hash] additional options
|
|
165
|
+
# @return [Client]
|
|
166
|
+
#
|
|
167
|
+
# @example In test environment
|
|
168
|
+
# Vectra.configure do |config|
|
|
169
|
+
# config.provider = :memory if Rails.env.test?
|
|
170
|
+
# end
|
|
171
|
+
#
|
|
172
|
+
# client = Vectra::Client.new
|
|
173
|
+
#
|
|
174
|
+
def memory(**options)
|
|
175
|
+
Client.new(
|
|
176
|
+
provider: :memory,
|
|
177
|
+
**options
|
|
178
|
+
)
|
|
179
|
+
end
|
|
160
180
|
end
|
|
161
181
|
end
|
data/vectra.gemspec
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "lib/vectra/version"
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = "vectra-client"
|
|
7
|
+
spec.version = Vectra::VERSION
|
|
8
|
+
spec.authors = ["Mijo Kristo"]
|
|
9
|
+
spec.email = ["mijo@mijokristo.com"]
|
|
10
|
+
|
|
11
|
+
spec.summary = "Unified Ruby client for vector databases"
|
|
12
|
+
spec.description = "Vectra provides a unified interface to work with multiple vector database providers including Pinecone, Qdrant, Weaviate, and PostgreSQL with pgvector. Write once, switch providers easily."
|
|
13
|
+
spec.homepage = "https://github.com/stokry/vectra"
|
|
14
|
+
spec.license = "MIT"
|
|
15
|
+
spec.required_ruby_version = ">= 3.2.0"
|
|
16
|
+
|
|
17
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
|
18
|
+
spec.metadata["source_code_uri"] = "https://github.com/stokry/vectra"
|
|
19
|
+
spec.metadata["changelog_uri"] = "https://github.com/stokry/vectra/blob/main/CHANGELOG.md"
|
|
20
|
+
spec.metadata["rubygems_mfa_required"] = "true"
|
|
21
|
+
|
|
22
|
+
spec.files = Dir.chdir(__dir__) do
|
|
23
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
|
24
|
+
(File.expand_path(f) == __FILE__) ||
|
|
25
|
+
f.start_with?(*%w[bin/ test/ spec/ features/ .git .github appveyor Gemfile])
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
spec.bindir = "exe"
|
|
30
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
|
31
|
+
spec.require_paths = ["lib"]
|
|
32
|
+
|
|
33
|
+
# Runtime dependencies
|
|
34
|
+
spec.add_dependency "faraday", "~> 2.0"
|
|
35
|
+
spec.add_dependency "faraday-retry", "~> 2.0"
|
|
36
|
+
spec.add_dependency "concurrent-ruby", "~> 1.2"
|
|
37
|
+
|
|
38
|
+
# Optional runtime dependencies (required for specific features)
|
|
39
|
+
# For ActiveRecord integration: gem 'activerecord', '>= 6.0'
|
|
40
|
+
# For pgvector provider: gem 'pg', '~> 1.5'
|
|
41
|
+
# For connection pooling: gem 'connection_pool', '~> 2.4'
|
|
42
|
+
# For Datadog instrumentation: gem 'dogstatsd-ruby'
|
|
43
|
+
|
|
44
|
+
# Development dependencies
|
|
45
|
+
spec.add_development_dependency "activerecord", ">= 6.0"
|
|
46
|
+
spec.add_development_dependency "sqlite3", ">= 2.1" # For AR tests (ActiveRecord 8+ requires >= 2.1)
|
|
47
|
+
spec.add_development_dependency "pg", "~> 1.5"
|
|
48
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
|
49
|
+
spec.add_development_dependency "rspec", "~> 3.12"
|
|
50
|
+
spec.add_development_dependency "webmock", "~> 3.19"
|
|
51
|
+
spec.add_development_dependency "vcr", "~> 6.2"
|
|
52
|
+
spec.add_development_dependency "rubocop", "~> 1.57"
|
|
53
|
+
spec.add_development_dependency "rubocop-rspec", "~> 2.25"
|
|
54
|
+
spec.add_development_dependency "simplecov", "~> 0.22"
|
|
55
|
+
spec.add_development_dependency "yard", "~> 0.9"
|
|
56
|
+
end
|