vectra-client 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,47 +2,491 @@
2
2
 
3
3
  module Vectra
4
4
  module Providers
5
- # Weaviate vector database provider (planned for v0.3.0)
5
+ # Weaviate vector database provider
6
6
  #
7
- # @note This provider is not yet implemented
7
+ # Weaviate is an open-source vector search engine with semantic search
8
+ # capabilities, accessed via a REST and GraphQL API.
8
9
  #
10
+ # This implementation focuses on the core CRUD + query surface that matches
11
+ # the Vectra client API. Each Vectra "index" maps to a Weaviate class.
12
+ #
13
+ # @example Basic usage
14
+ # Vectra.configure do |config|
15
+ # config.provider = :weaviate
16
+ # config.api_key = ENV["WEAVIATE_API_KEY"]
17
+ # config.host = "http://localhost:8080"
18
+ # end
19
+ #
20
+ # client = Vectra::Client.new
21
+ # client.upsert(index: "Document", vectors: [...])
22
+ #
23
+ # rubocop:disable Metrics/ClassLength
9
24
  class Weaviate < Base
25
+ API_BASE_PATH = "/v1"
26
+
10
27
  def provider_name
11
28
  :weaviate
12
29
  end
13
30
 
14
31
  def upsert(index:, vectors:, namespace: nil)
15
- raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
32
+ normalized = normalize_vectors(vectors)
33
+
34
+ objects = normalized.map do |vec|
35
+ properties = (vec[:metadata] || {}).dup
36
+ properties["_namespace"] = namespace if namespace
37
+
38
+ {
39
+ "class" => index,
40
+ "id" => vec[:id],
41
+ "vector" => vec[:values],
42
+ "properties" => properties
43
+ }
44
+ end
45
+
46
+ body = { "objects" => objects }
47
+
48
+ response = with_error_handling do
49
+ connection.post("#{API_BASE_PATH}/batch/objects", body)
50
+ end
51
+
52
+ if response.success?
53
+ upserted = response.body["objects"]&.size || normalized.size
54
+ log_debug("Upserted #{upserted} vectors to #{index}")
55
+ { upserted_count: upserted }
56
+ else
57
+ handle_error(response)
58
+ end
16
59
  end
17
60
 
18
61
  def query(index:, vector:, top_k: 10, namespace: nil, filter: nil,
19
62
  include_values: false, include_metadata: true)
20
- raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
63
+ where_filter = build_where(filter, namespace)
64
+
65
+ selection_fields = []
66
+ selection_fields << "_additional { id distance }"
67
+ selection_fields << "vector" if include_values
68
+ selection_fields << "metadata" if include_metadata
69
+
70
+ selection_block = selection_fields.join(" ")
71
+
72
+ graphql = <<~GRAPHQL
73
+ {
74
+ Get {
75
+ #{index}(
76
+ limit: #{top_k}
77
+ nearVector: { vector: [#{vector.map { |v| format('%.10f', v.to_f) }.join(', ')}] }
78
+ #{"where: #{JSON.generate(where_filter)}" if where_filter}
79
+ ) {
80
+ #{selection_block}
81
+ }
82
+ }
83
+ }
84
+ GRAPHQL
85
+
86
+ body = { "query" => graphql }
87
+
88
+ response = with_error_handling do
89
+ connection.post("#{API_BASE_PATH}/graphql", body)
90
+ end
91
+
92
+ if response.success?
93
+ matches = extract_query_matches(response.body, index, include_values, include_metadata)
94
+ log_debug("Query returned #{matches.size} results")
95
+
96
+ QueryResult.from_response(
97
+ matches: matches,
98
+ namespace: namespace
99
+ )
100
+ else
101
+ handle_error(response)
102
+ end
21
103
  end
22
104
 
105
+ # rubocop:disable Metrics/PerceivedComplexity
23
106
  def fetch(index:, ids:, namespace: nil)
24
- raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
107
+ body = {
108
+ "class" => index,
109
+ "ids" => ids,
110
+ "include" => ["vector", "properties"]
111
+ }
112
+
113
+ # Namespace is stored as a property, so we filter client-side
114
+ response = with_error_handling do
115
+ connection.post("#{API_BASE_PATH}/objects/_mget", body)
116
+ end
117
+
118
+ if response.success?
119
+ objects = response.body["objects"] || []
120
+ vectors = {}
121
+
122
+ objects.each do |obj|
123
+ next unless obj["status"] == "SUCCESS"
124
+
125
+ props = obj.dig("result", "properties") || {}
126
+ obj_namespace = props["_namespace"]
127
+ next if namespace && obj_namespace != namespace
128
+
129
+ clean_metadata = props.reject { |k, _| k.to_s.start_with?("_") }
130
+
131
+ vectors[obj.dig("result", "id")] = Vector.new(
132
+ id: obj.dig("result", "id"),
133
+ values: obj.dig("result", "vector") || [],
134
+ metadata: clean_metadata
135
+ )
136
+ end
137
+
138
+ vectors
139
+ else
140
+ handle_error(response)
141
+ end
25
142
  end
143
+ # rubocop:enable Metrics/PerceivedComplexity
26
144
 
27
145
  def update(index:, id:, metadata:, namespace: nil)
28
- raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
146
+ body = {
147
+ "class" => index,
148
+ "id" => id
149
+ }
150
+
151
+ if metadata
152
+ props = metadata.dup
153
+ props["_namespace"] = namespace if namespace
154
+ body["properties"] = props
155
+ end
156
+
157
+ response = with_error_handling do
158
+ connection.patch("#{API_BASE_PATH}/objects/#{id}", body)
159
+ end
160
+
161
+ if response.success?
162
+ log_debug("Updated metadata for vector #{id}")
163
+ { updated: true }
164
+ else
165
+ handle_error(response)
166
+ end
29
167
  end
30
168
 
169
+ # rubocop:disable Metrics/MethodLength, Metrics/PerceivedComplexity
31
170
  def delete(index:, ids: nil, namespace: nil, filter: nil, delete_all: false)
32
- raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
171
+ if ids
172
+ # Delete individual objects by ID
173
+ ids.each do |id|
174
+ with_error_handling do
175
+ response = connection.delete("#{API_BASE_PATH}/objects/#{id}") do |req|
176
+ req.params["class"] = index
177
+ end
178
+ handle_error(response) unless response.success?
179
+ end
180
+ end
181
+
182
+ log_debug("Deleted #{ids.size} vectors from #{index}")
183
+ { deleted: true }
184
+ else
185
+ # Delete by filter / namespace / delete_all
186
+ where_filter = if delete_all && namespace.nil? && filter.nil?
187
+ nil
188
+ else
189
+ build_where(filter, namespace)
190
+ end
191
+
192
+ body = {
193
+ "class" => index
194
+ }
195
+ body["where"] = where_filter if where_filter
196
+
197
+ response = with_error_handling do
198
+ connection.post("#{API_BASE_PATH}/objects/delete", body)
199
+ end
200
+
201
+ if response.success?
202
+ log_debug("Deleted vectors from #{index} with filter")
203
+ { deleted: true }
204
+ else
205
+ handle_error(response)
206
+ end
207
+ end
33
208
  end
209
+ # rubocop:enable Metrics/MethodLength, Metrics/PerceivedComplexity
34
210
 
35
211
  def list_indexes
36
- raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
212
+ response = with_error_handling do
213
+ connection.get("#{API_BASE_PATH}/schema")
214
+ end
215
+
216
+ if response.success?
217
+ classes = response.body["classes"] || []
218
+ classes.map do |cls|
219
+ vector_cfg = cls["vectorIndexConfig"] || {}
220
+ {
221
+ name: cls["class"],
222
+ dimension: vector_cfg["dimension"],
223
+ metric: distance_to_metric(vector_cfg["distance"]),
224
+ status: "ready"
225
+ }
226
+ end
227
+ else
228
+ handle_error(response)
229
+ end
37
230
  end
38
231
 
39
232
  def describe_index(index:)
40
- raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
233
+ response = with_error_handling do
234
+ connection.get("#{API_BASE_PATH}/schema/#{index}")
235
+ end
236
+
237
+ if response.success?
238
+ body = response.body
239
+ vector_cfg = body["vectorIndexConfig"] || {}
240
+
241
+ {
242
+ name: body["class"] || index,
243
+ dimension: vector_cfg["dimension"],
244
+ metric: distance_to_metric(vector_cfg["distance"]),
245
+ status: "ready"
246
+ }
247
+ else
248
+ handle_error(response)
249
+ end
41
250
  end
42
251
 
252
+ # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
43
253
  def stats(index:, namespace: nil)
44
- raise NotImplementedError, "Weaviate provider is planned for v0.3.0"
254
+ where_filter = namespace ? build_where({}, namespace) : nil
255
+
256
+ where_clause = where_filter ? "where: #{JSON.generate(where_filter)}" : ""
257
+
258
+ graphql = <<~GRAPHQL
259
+ {
260
+ Aggregate {
261
+ #{index}(
262
+ #{where_clause}
263
+ ) {
264
+ meta {
265
+ count
266
+ }
267
+ }
268
+ }
269
+ }
270
+ GRAPHQL
271
+
272
+ body = { "query" => graphql }
273
+
274
+ response = with_error_handling do
275
+ connection.post("#{API_BASE_PATH}/graphql", body)
276
+ end
277
+
278
+ if response.success?
279
+ data = response.body["data"] || {}
280
+ aggregate = data["Aggregate"] || {}
281
+ class_stats = aggregate[index]&.first || {}
282
+ meta = class_stats["meta"] || {}
283
+
284
+ {
285
+ total_vector_count: meta["count"] || 0,
286
+ dimension: nil,
287
+ namespaces: namespace ? { namespace => { vector_count: meta["count"] || 0 } } : {}
288
+ }
289
+ else
290
+ handle_error(response)
291
+ end
292
+ end
293
+ # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
294
+
295
+ private
296
+
297
+ def validate_config!
298
+ super
299
+ raise ConfigurationError, "Host must be configured for Weaviate" if config.host.nil? || config.host.empty?
300
+ end
301
+
302
+ def connection
303
+ @connection ||= begin
304
+ base_url = config.host
305
+ base_url = "http://#{base_url}" unless base_url.start_with?("http://", "https://")
306
+
307
+ build_connection(
308
+ base_url,
309
+ auth_headers
310
+ )
311
+ end
312
+ end
313
+
314
+ def auth_headers
315
+ return {} unless config.api_key && !config.api_key.empty?
316
+
317
+ { "Authorization" => "Bearer #{config.api_key}" }
318
+ end
319
+
320
+ # Wrap HTTP calls to handle Faraday::RetriableResponse
321
+ def with_error_handling
322
+ yield
323
+ rescue Faraday::RetriableResponse => e
324
+ handle_retriable_response(e)
325
+ end
326
+
327
+ # Build Weaviate "where" filter for GraphQL API from generic filter + namespace
328
+ #
329
+ # Weaviate expects a structure like:
330
+ # {
331
+ # operator: "And",
332
+ # operands: [
333
+ # { path: ["category"], operator: "Equal", valueString: "tech" },
334
+ # ...
335
+ # ]
336
+ # }
337
+ def build_where(filter, namespace)
338
+ operands = []
339
+
340
+ if namespace
341
+ operands << {
342
+ "path" => ["_namespace"],
343
+ "operator" => "Equal",
344
+ "valueString" => namespace
345
+ }
346
+ end
347
+
348
+ if filter.is_a?(Hash)
349
+ filter.each do |key, value|
350
+ operands << build_where_operand(key.to_s, value)
351
+ end
352
+ end
353
+
354
+ return nil if operands.empty?
355
+
356
+ {
357
+ "operator" => "And",
358
+ "operands" => operands
359
+ }
360
+ end
361
+
362
+ def build_where_operand(key, value)
363
+ case value
364
+ when Hash
365
+ build_operator_operand(key, value)
366
+ when Array
367
+ {
368
+ "path" => [key],
369
+ "operator" => "ContainsAny",
370
+ "valueStringArray" => value.map(&:to_s)
371
+ }
372
+ else
373
+ {
374
+ "path" => [key],
375
+ "operator" => "Equal",
376
+ infer_value_key(value) => value
377
+ }
378
+ end
379
+ end
380
+
381
+ # rubocop:disable Metrics/MethodLength
382
+ def build_operator_operand(key, operator_hash)
383
+ op, val = operator_hash.first
384
+
385
+ case op.to_s
386
+ when "$gt"
387
+ {
388
+ "path" => [key],
389
+ "operator" => "GreaterThan",
390
+ infer_value_key(val) => val
391
+ }
392
+ when "$gte"
393
+ {
394
+ "path" => [key],
395
+ "operator" => "GreaterThanEqual",
396
+ infer_value_key(val) => val
397
+ }
398
+ when "$lt"
399
+ {
400
+ "path" => [key],
401
+ "operator" => "LessThan",
402
+ infer_value_key(val) => val
403
+ }
404
+ when "$lte"
405
+ {
406
+ "path" => [key],
407
+ "operator" => "LessThanEqual",
408
+ infer_value_key(val) => val
409
+ }
410
+ when "$ne"
411
+ {
412
+ "path" => [key],
413
+ "operator" => "NotEqual",
414
+ infer_value_key(val) => val
415
+ }
416
+ else
417
+ {
418
+ "path" => [key],
419
+ "operator" => "Equal",
420
+ infer_value_key(val) => val
421
+ }
422
+ end
423
+ end
424
+ # rubocop:enable Metrics/MethodLength
425
+
426
+ # Choose the appropriate GraphQL value key based on Ruby type
427
+ def infer_value_key(value)
428
+ case value
429
+ when Integer
430
+ "valueInt"
431
+ when Float
432
+ "valueNumber"
433
+ when TrueClass, FalseClass
434
+ "valueBoolean"
435
+ else
436
+ "valueString"
437
+ end
438
+ end
439
+
440
+ # Extract matches from GraphQL response
441
+ # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
442
+ def extract_query_matches(body, index, include_values, include_metadata)
443
+ data = body["data"] || {}
444
+ get_block = data["Get"] || {}
445
+ raw_matches = get_block[index] || []
446
+
447
+ raw_matches.map do |obj|
448
+ additional = obj["_additional"] || {}
449
+ distance = additional["distance"]
450
+ certainty = additional["certainty"]
451
+
452
+ score = if certainty
453
+ certainty.to_f
454
+ elsif distance
455
+ 1.0 - distance.to_f
456
+ end
457
+
458
+ metadata = if include_metadata
459
+ obj["metadata"] || {}
460
+ else
461
+ {}
462
+ end
463
+
464
+ values = include_values ? obj["vector"] : nil
465
+
466
+ {
467
+ id: additional["id"] || obj["id"],
468
+ score: score,
469
+ values: values,
470
+ metadata: metadata
471
+ }
472
+ end
473
+ end
474
+ # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
475
+
476
+ # Convert Weaviate distance name to Vectra metric
477
+ def distance_to_metric(distance)
478
+ case distance.to_s.downcase
479
+ when "cosine"
480
+ "cosine"
481
+ when "l2-squared", "l2"
482
+ "euclidean"
483
+ when "dot"
484
+ "dot_product"
485
+ else
486
+ distance.to_s.downcase
487
+ end
45
488
  end
46
489
  end
490
+ # rubocop:enable Metrics/ClassLength
47
491
  end
48
492
  end
data/lib/vectra/vector.rb CHANGED
@@ -106,6 +106,62 @@ module Vectra
106
106
  Math.sqrt(values.zip(other_values).sum { |a, b| (a - b)**2 })
107
107
  end
108
108
 
109
+ # Normalize the vector in-place (mutates the vector)
110
+ #
111
+ # @param type [Symbol] normalization type: :l2 (default) or :l1
112
+ # @return [Vector] self (for method chaining)
113
+ #
114
+ # @example L2 normalization (unit vector)
115
+ # vector = Vectra::Vector.new(id: 'v1', values: [3.0, 4.0])
116
+ # vector.normalize!
117
+ # vector.values # => [0.6, 0.8] (magnitude = 1.0)
118
+ #
119
+ # @example L1 normalization (sum = 1)
120
+ # vector.normalize!(type: :l1)
121
+ # vector.values.sum(&:abs) # => 1.0
122
+ def normalize!(type: :l2)
123
+ case type
124
+ when :l2
125
+ magnitude = Math.sqrt(values.sum { |v| v**2 })
126
+ if magnitude.zero?
127
+ # Zero vector - cannot normalize, return as-is
128
+ return self
129
+ end
130
+
131
+ @values = values.map { |v| v / magnitude }
132
+ when :l1
133
+ sum = values.sum(&:abs)
134
+ if sum.zero?
135
+ # Zero vector - cannot normalize, return as-is
136
+ return self
137
+ end
138
+
139
+ @values = values.map { |v| v / sum }
140
+ else
141
+ raise ArgumentError, "Unknown normalization type: #{type}. Use :l2 or :l1"
142
+ end
143
+ self
144
+ end
145
+
146
+ # Normalize a vector array without creating a Vector object
147
+ #
148
+ # @param vector [Array<Float>] vector values to normalize
149
+ # @param type [Symbol] normalization type: :l2 (default) or :l1
150
+ # @return [Array<Float>] normalized vector values
151
+ #
152
+ # @example Normalize OpenAI embedding
153
+ # embedding = openai_response['data'][0]['embedding']
154
+ # normalized = Vectra::Vector.normalize(embedding)
155
+ # client.upsert(vectors: [{ id: '1', values: normalized }])
156
+ #
157
+ # @example L1 normalization
158
+ # normalized = Vectra::Vector.normalize([1.0, 2.0, 3.0], type: :l1)
159
+ def self.normalize(vector, type: :l2)
160
+ temp_vector = new(id: "temp", values: vector.dup)
161
+ temp_vector.normalize!(type: type)
162
+ temp_vector.values
163
+ end
164
+
109
165
  # Check equality with another vector
110
166
  #
111
167
  # @param other [Vector] the other vector
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Vectra
4
- VERSION = "0.3.3"
4
+ VERSION = "0.4.0"
5
5
  end
data/lib/vectra.rb CHANGED
@@ -23,6 +23,7 @@ require_relative "vectra/providers/pinecone"
23
23
  require_relative "vectra/providers/qdrant"
24
24
  require_relative "vectra/providers/weaviate"
25
25
  require_relative "vectra/providers/pgvector"
26
+ require_relative "vectra/providers/memory"
26
27
  require_relative "vectra/client"
27
28
 
28
29
  # Vectra - Unified Ruby client for vector databases
@@ -157,5 +158,24 @@ module Vectra
157
158
  **options
158
159
  )
159
160
  end
161
+
162
+ # Shortcut to create a Memory client (for testing)
163
+ #
164
+ # @param options [Hash] additional options
165
+ # @return [Client]
166
+ #
167
+ # @example In test environment
168
+ # Vectra.configure do |config|
169
+ # config.provider = :memory if Rails.env.test?
170
+ # end
171
+ #
172
+ # client = Vectra::Client.new
173
+ #
174
+ def memory(**options)
175
+ Client.new(
176
+ provider: :memory,
177
+ **options
178
+ )
179
+ end
160
180
  end
161
181
  end
data/vectra.gemspec ADDED
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/vectra/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "vectra-client"
7
+ spec.version = Vectra::VERSION
8
+ spec.authors = ["Mijo Kristo"]
9
+ spec.email = ["mijo@mijokristo.com"]
10
+
11
+ spec.summary = "Unified Ruby client for vector databases"
12
+ spec.description = "Vectra provides a unified interface to work with multiple vector database providers including Pinecone, Qdrant, Weaviate, and PostgreSQL with pgvector. Write once, switch providers easily."
13
+ spec.homepage = "https://github.com/stokry/vectra"
14
+ spec.license = "MIT"
15
+ spec.required_ruby_version = ">= 3.2.0"
16
+
17
+ spec.metadata["homepage_uri"] = spec.homepage
18
+ spec.metadata["source_code_uri"] = "https://github.com/stokry/vectra"
19
+ spec.metadata["changelog_uri"] = "https://github.com/stokry/vectra/blob/main/CHANGELOG.md"
20
+ spec.metadata["rubygems_mfa_required"] = "true"
21
+
22
+ spec.files = Dir.chdir(__dir__) do
23
+ `git ls-files -z`.split("\x0").reject do |f|
24
+ (File.expand_path(f) == __FILE__) ||
25
+ f.start_with?(*%w[bin/ test/ spec/ features/ .git .github appveyor Gemfile])
26
+ end
27
+ end
28
+
29
+ spec.bindir = "exe"
30
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
31
+ spec.require_paths = ["lib"]
32
+
33
+ # Runtime dependencies
34
+ spec.add_dependency "faraday", "~> 2.0"
35
+ spec.add_dependency "faraday-retry", "~> 2.0"
36
+ spec.add_dependency "concurrent-ruby", "~> 1.2"
37
+
38
+ # Optional runtime dependencies (required for specific features)
39
+ # For ActiveRecord integration: gem 'activerecord', '>= 6.0'
40
+ # For pgvector provider: gem 'pg', '~> 1.5'
41
+ # For connection pooling: gem 'connection_pool', '~> 2.4'
42
+ # For Datadog instrumentation: gem 'dogstatsd-ruby'
43
+
44
+ # Development dependencies
45
+ spec.add_development_dependency "activerecord", ">= 6.0"
46
+ spec.add_development_dependency "sqlite3", ">= 2.1" # For AR tests (ActiveRecord 8+ requires >= 2.1)
47
+ spec.add_development_dependency "pg", "~> 1.5"
48
+ spec.add_development_dependency "rake", "~> 13.0"
49
+ spec.add_development_dependency "rspec", "~> 3.12"
50
+ spec.add_development_dependency "webmock", "~> 3.19"
51
+ spec.add_development_dependency "vcr", "~> 6.2"
52
+ spec.add_development_dependency "rubocop", "~> 1.57"
53
+ spec.add_development_dependency "rubocop-rspec", "~> 2.25"
54
+ spec.add_development_dependency "simplecov", "~> 0.22"
55
+ spec.add_development_dependency "yard", "~> 0.9"
56
+ end