vectra-client 0.4.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,157 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rails/generators/base"
4
+
5
+ module Vectra
6
+ module Generators
7
+ # Rails generator for creating a vectra-enabled index for a model.
8
+ #
9
+ # @example
10
+ # rails generate vectra:index Product embedding dimension:1536 provider:qdrant
11
+ #
12
+ # This will:
13
+ # - Generate a pgvector migration (when provider=pgvector)
14
+ # - Create a model concern with `has_vector`
15
+ # - Update the model to include the concern
16
+ # - Append an entry to config/vectra.yml (no secrets)
17
+ class IndexGenerator < Rails::Generators::Base
18
+ argument :model_name, type: :string, banner: "ModelName"
19
+ argument :column_name, type: :string, default: "embedding", banner: "column_name"
20
+
21
+ class_option :dimension, type: :numeric, default: 1536,
22
+ desc: "Vector dimension (e.g. 1536 for OpenAI)"
23
+ class_option :provider, type: :string, default: "qdrant",
24
+ desc: "Vector provider (qdrant, pgvector, pinecone, weaviate, memory)"
25
+ class_option :index, type: :string, default: nil,
26
+ desc: "Index/collection name (defaults to table name)"
27
+
28
+ def initialize(args = [], options = {}, config = {})
29
+ super
30
+ @model_name = args[0]&.to_s
31
+ @column_name = (args[1] || "embedding")&.to_s
32
+ end
33
+
34
+ def create_migration_for_pgvector
35
+ return unless provider_name == "pgvector"
36
+
37
+ timestamp = Time.now.utc.strftime("%Y%m%d%H%M%S")
38
+ file_name = "#{timestamp}_add_#{column_name.underscore}_to_#{table_name}.rb"
39
+ path = File.join("db/migrate", file_name)
40
+
41
+ migration_body = <<~RUBY
42
+ class Add#{column_name.camelize}To#{table_name.camelize} < ActiveRecord::Migration#{migration_version}
43
+ def change
44
+ add_column :#{table_name}, :#{column_name}, :vector, limit: #{dimension}
45
+ end
46
+ end
47
+ RUBY
48
+
49
+ create_file(path, migration_body)
50
+ end
51
+
52
+ def create_model_concern
53
+ path = File.join("app/models/concerns", "#{model_name.underscore}_vector.rb")
54
+
55
+ concern_body = <<~RUBY
56
+ # frozen_string_literal: true
57
+
58
+ module #{concern_module_name}
59
+ extend ActiveSupport::Concern
60
+
61
+ included do
62
+ include Vectra::ActiveRecord
63
+
64
+ has_vector :#{column_name},
65
+ provider: :#{provider_name},
66
+ index: "#{index_name}",
67
+ dimension: #{dimension}
68
+ end
69
+ end
70
+ RUBY
71
+
72
+ create_file(path, concern_body)
73
+ end
74
+
75
+ def update_model_file
76
+ path = File.join("app/models", "#{model_name.underscore}.rb")
77
+
78
+ unless File.exist?(path)
79
+ create_file(
80
+ path,
81
+ <<~RUBY
82
+ # frozen_string_literal: true
83
+
84
+ class #{model_name.camelize} < ApplicationRecord
85
+ include #{concern_module_name}
86
+ end
87
+ RUBY
88
+ )
89
+ return
90
+ end
91
+
92
+ content = File.read(path)
93
+ return if content.include?("include #{concern_module_name}")
94
+
95
+ new_content = content.sub(/end\s*\z/, " include #{concern_module_name}\nend\n")
96
+ File.write(path, new_content)
97
+ end
98
+
99
+ def update_vectra_config
100
+ path = File.join(destination_root || ".", "config", "vectra.yml")
101
+
102
+ existing = File.exist?(path) ? File.read(path) : ""
103
+ entry_key = "#{table_name}:"
104
+
105
+ return if existing.include?(entry_key)
106
+
107
+ header = +"# Vectra index configuration (do NOT store API keys here)\n"
108
+ header << "# Generated by vectra:index for #{model_name}\n\n"
109
+
110
+ config_body = <<~YAML
111
+ #{table_name}:
112
+ provider: #{provider_name}
113
+ index: #{index_name}
114
+ dimension: #{dimension}
115
+
116
+ YAML
117
+
118
+ FileUtils.mkdir_p(File.dirname(path))
119
+ File.write(path, "#{existing.presence || header}#{config_body}")
120
+ end
121
+
122
+ private
123
+
124
+ def model_name
125
+ @model_name || raise("Model name is required")
126
+ end
127
+
128
+ def column_name
129
+ @column_name || "embedding"
130
+ end
131
+
132
+ def table_name
133
+ model_name.underscore.pluralize
134
+ end
135
+
136
+ def provider_name
137
+ options[:provider].to_s
138
+ end
139
+
140
+ def index_name
141
+ (options[:index] || table_name).to_s
142
+ end
143
+
144
+ def dimension
145
+ options[:dimension].to_i
146
+ end
147
+
148
+ def concern_module_name
149
+ "#{model_name.camelize}Vector"
150
+ end
151
+
152
+ def migration_version
153
+ "[#{Rails::VERSION::MAJOR}.#{Rails::VERSION::MINOR}]"
154
+ end
155
+ end
156
+ end
157
+ end
data/lib/vectra/client.rb CHANGED
@@ -287,6 +287,71 @@ module Vectra
287
287
  provider.stats(index: index, namespace: namespace)
288
288
  end
289
289
 
290
+ # Hybrid search combining semantic (vector) and keyword (text) search
291
+ #
292
+ # Combines the best of both worlds: semantic understanding from vectors
293
+ # and exact keyword matching from text search.
294
+ #
295
+ # @param index [String] the index/collection name
296
+ # @param vector [Array<Float>] query vector for semantic search
297
+ # @param text [String] text query for keyword search
298
+ # @param alpha [Float] balance between semantic and keyword (0.0 = pure keyword, 1.0 = pure semantic)
299
+ # @param top_k [Integer] number of results to return
300
+ # @param namespace [String, nil] optional namespace
301
+ # @param filter [Hash, nil] metadata filter
302
+ # @param include_values [Boolean] include vector values in results
303
+ # @param include_metadata [Boolean] include metadata in results
304
+ # @return [QueryResult] search results
305
+ #
306
+ # @example Basic hybrid search
307
+ # results = client.hybrid_search(
308
+ # index: 'docs',
309
+ # vector: embedding,
310
+ # text: 'ruby programming',
311
+ # alpha: 0.7 # 70% semantic, 30% keyword
312
+ # )
313
+ #
314
+ # @example Pure semantic (alpha = 1.0)
315
+ # results = client.hybrid_search(
316
+ # index: 'docs',
317
+ # vector: embedding,
318
+ # text: 'ruby',
319
+ # alpha: 1.0
320
+ # )
321
+ #
322
+ # @example Pure keyword (alpha = 0.0)
323
+ # results = client.hybrid_search(
324
+ # index: 'docs',
325
+ # vector: embedding,
326
+ # text: 'ruby programming',
327
+ # alpha: 0.0
328
+ # )
329
+ #
330
+ def hybrid_search(index:, vector:, text:, alpha: 0.5, top_k: 10, namespace: nil,
331
+ filter: nil, include_values: false, include_metadata: true)
332
+ validate_index!(index)
333
+ validate_query_vector!(vector)
334
+ raise ValidationError, "Text query cannot be nil or empty" if text.nil? || text.empty?
335
+ raise ValidationError, "Alpha must be between 0.0 and 1.0" unless (0.0..1.0).include?(alpha)
336
+
337
+ unless provider.respond_to?(:hybrid_search)
338
+ raise UnsupportedFeatureError,
339
+ "Hybrid search is not supported by #{provider_name} provider"
340
+ end
341
+
342
+ provider.hybrid_search(
343
+ index: index,
344
+ vector: vector,
345
+ text: text,
346
+ alpha: alpha,
347
+ top_k: top_k,
348
+ namespace: namespace,
349
+ filter: filter,
350
+ include_values: include_values,
351
+ include_metadata: include_metadata
352
+ )
353
+ end
354
+
290
355
  # Get the provider name
291
356
  #
292
357
  # @return [Symbol]
data/lib/vectra/errors.rb CHANGED
@@ -57,6 +57,9 @@ module Vectra
57
57
  # Raised when the provider is not supported
58
58
  class UnsupportedProviderError < Error; end
59
59
 
60
+ # Raised when a feature is not supported by the provider
61
+ class UnsupportedFeatureError < Error; end
62
+
60
63
  # Raised when an operation times out
61
64
  class TimeoutError < Error; end
62
65
 
@@ -94,6 +94,74 @@ module Vectra
94
94
  QueryResult.from_response(matches: matches, namespace: namespace)
95
95
  end
96
96
 
97
+ # Hybrid search combining vector similarity and PostgreSQL full-text search
98
+ #
99
+ # Combines pgvector similarity search with PostgreSQL's native full-text search.
100
+ # Requires a text search column (tsvector) in your table.
101
+ #
102
+ # @param index [String] table name
103
+ # @param vector [Array<Float>] query vector
104
+ # @param text [String] text query for full-text search
105
+ # @param alpha [Float] balance (0.0 = full-text, 1.0 = vector)
106
+ # @param top_k [Integer] number of results
107
+ # @param namespace [String, nil] optional namespace
108
+ # @param filter [Hash, nil] metadata filter
109
+ # @param include_values [Boolean] include vector values
110
+ # @param include_metadata [Boolean] include metadata
111
+ # @param text_column [String] column name for full-text search (default: 'content')
112
+ # @return [QueryResult] search results
113
+ #
114
+ # @note Your table should have a text column with a tsvector index:
115
+ # CREATE INDEX idx_content_fts ON my_index USING gin(to_tsvector('english', content));
116
+ def hybrid_search(index:, vector:, text:, alpha:, top_k:, namespace: nil,
117
+ filter: nil, include_values: false, include_metadata: true,
118
+ text_column: "content")
119
+ ensure_table_exists!(index)
120
+
121
+ vector_literal = format_vector(vector)
122
+ distance_op = DISTANCE_FUNCTIONS[table_metric(index)]
123
+
124
+ # Build hybrid score: alpha * vector_similarity + (1-alpha) * text_rank
125
+ # Vector similarity: 1 - (distance / max_distance)
126
+ # Text rank: ts_rank from full-text search
127
+ select_cols = ["id"]
128
+ select_cols << "embedding" if include_values
129
+ select_cols << "metadata" if include_metadata
130
+
131
+ # Calculate hybrid score
132
+ # For vector: use cosine distance (1 - distance gives similarity)
133
+ # For text: use ts_rank
134
+ vector_score = "1.0 - (embedding #{distance_op} '#{vector_literal}'::vector)"
135
+ text_score = "ts_rank(to_tsvector('english', COALESCE(#{quote_ident(text_column)}, '')), " \
136
+ "plainto_tsquery('english', #{escape_literal(text)}))"
137
+
138
+ # Normalize scores to 0-1 range and combine with alpha
139
+ hybrid_score = "(#{alpha} * #{vector_score} + (1.0 - #{alpha}) * #{text_score})"
140
+
141
+ select_cols << "#{hybrid_score} AS score"
142
+ select_cols << "#{vector_score} AS vector_score"
143
+ select_cols << "#{text_score} AS text_score"
144
+
145
+ where_clauses = build_where_clauses(namespace, filter)
146
+ where_clauses << "to_tsvector('english', COALESCE(#{quote_ident(text_column)}, '')) @@ " \
147
+ "plainto_tsquery('english', #{escape_literal(text)})"
148
+
149
+ sql = "SELECT #{select_cols.join(', ')} FROM #{quote_ident(index)}"
150
+ sql += " WHERE #{where_clauses.join(' AND ')}" if where_clauses.any?
151
+ sql += " ORDER BY score DESC"
152
+ sql += " LIMIT #{top_k.to_i}"
153
+
154
+ result = execute(sql)
155
+ matches = result.map { |row| build_match_from_row(row, include_values, include_metadata) }
156
+
157
+ log_debug("Hybrid search returned #{matches.size} results (alpha: #{alpha})")
158
+
159
+ QueryResult.from_response(
160
+ matches: matches,
161
+ namespace: namespace
162
+ )
163
+ end
164
+
97
165
  # @see Base#fetch
98
166
  def fetch(index:, ids:, namespace: nil)
99
167
  ensure_table_exists!(index)
@@ -67,6 +67,63 @@ module Vectra
67
67
  end
68
68
  end
69
69
 
70
+ # Hybrid search combining dense (vector) and sparse (keyword) search
71
+ #
72
+ # Pinecone supports hybrid search using sparse-dense vectors.
73
+ # For text-based keyword search, you need to provide sparse vectors.
74
+ #
75
+ # @param index [String] index name
76
+ # @param vector [Array<Float>] dense query vector
77
+ # @param text [String] text query (converted to sparse vector)
78
+ # @param alpha [Float] balance (0.0 = sparse, 1.0 = dense)
79
+ # @param top_k [Integer] number of results
80
+ # @param namespace [String, nil] optional namespace
81
+ # @param filter [Hash, nil] metadata filter
82
+ # @param include_values [Boolean] include vector values
83
+ # @param include_metadata [Boolean] include metadata
84
+ # @return [QueryResult] search results
85
+ #
86
+ # @note For proper hybrid search, you should generate sparse vectors
87
+ # from text using a tokenizer (e.g., BM25). This method accepts text
88
+ # but requires sparse vector generation externally.
89
+ def hybrid_search(index:, vector:, alpha:, top_k:, namespace: nil,
90
+ filter: nil, include_values: false, include_metadata: true, text: nil)
91
+ # Pinecone hybrid search requires sparse vectors
92
+ # For now, we'll use dense vector only and log a warning
93
+ # In production, users should generate sparse vectors from text
94
+ if text
95
+ log_debug("Pinecone hybrid search: text parameter ignored. " \
96
+ "For true hybrid search, provide sparse vectors via sparse_values parameter.")
97
+ end
98
+
99
+ # Use dense vector search with alpha weighting
100
+ # Note: Pinecone's actual hybrid search requires sparse vectors
101
+ # This is a simplified implementation
102
+ body = {
103
+ vector: vector.map(&:to_f),
104
+ topK: top_k,
105
+ includeValues: include_values,
106
+ includeMetadata: include_metadata
107
+ }
108
+ body[:namespace] = namespace if namespace
109
+ body[:filter] = transform_filter(filter) if filter
110
+
111
+ # Alpha is used conceptually here - Pinecone's actual hybrid search
112
+ # requires sparse vectors in the query
113
+ response = data_connection(index).post("/query", body)
114
+
115
+ if response.success?
116
+ log_debug("Hybrid search returned #{response.body['matches']&.size || 0} results (alpha: #{alpha})")
117
+ QueryResult.from_response(
118
+ matches: transform_matches(response.body["matches"] || []),
119
+ namespace: response.body["namespace"],
120
+ usage: response.body["usage"]
121
+ )
122
+ else
123
+ handle_error(response)
124
+ end
125
+ end
126
+
70
127
  # @see Base#fetch
71
128
  def fetch(index:, ids:, namespace: nil)
72
129
  params = { ids: ids }
@@ -83,6 +83,33 @@ module Vectra
83
83
  end
84
84
  end
85
85
 
86
+ # Hybrid search combining vector and text search
87
+ #
88
+ # Uses Qdrant's prefetch + rescore API for efficient hybrid search
89
+ #
90
+ # @param index [String] collection name
91
+ # @param vector [Array<Float>] query vector
92
+ # @param text [String] text query for keyword search
93
+ # @param alpha [Float] balance (0.0 = keyword, 1.0 = vector)
94
+ # @param top_k [Integer] number of results
95
+ # @param namespace [String, nil] optional namespace
96
+ # @param filter [Hash, nil] metadata filter
97
+ # @param include_values [Boolean] include vector values
98
+ # @param include_metadata [Boolean] include metadata
99
+ # @return [QueryResult] search results
100
+ def hybrid_search(index:, vector:, text:, alpha:, top_k:, namespace: nil,
101
+ filter: nil, include_values: false, include_metadata: true)
102
+ qdrant_filter = build_filter(filter, namespace)
103
+ body = build_hybrid_search_body(vector, text, alpha, top_k, qdrant_filter,
104
+ include_values, include_metadata)
105
+
106
+ response = with_error_handling do
107
+ connection.post("/collections/#{index}/points/query", body)
108
+ end
109
+
110
+ handle_hybrid_search_response(response, alpha, namespace)
111
+ end
112
+
86
113
  # @see Base#fetch
87
114
  def fetch(index:, ids:, namespace: nil) # rubocop:disable Lint/UnusedMethodArgument
88
115
  point_ids = ids.map { |id| generate_point_id(id) }
@@ -280,6 +307,38 @@ module Vectra
280
307
 
281
308
  private
282
309
 
310
+ def build_hybrid_search_body(vector, text, alpha, top_k, filter, include_values, include_metadata)
311
+ body = {
312
+ prefetch: {
313
+ query: { text: text },
314
+ limit: top_k * 2
315
+ },
316
+ query: { vector: vector.map(&:to_f) },
317
+ limit: top_k,
318
+ params: { alpha: alpha },
319
+ with_vector: include_values,
320
+ with_payload: include_metadata
321
+ }
322
+
323
+ body[:prefetch][:filter] = filter if filter
324
+ body[:query][:filter] = filter if filter
325
+ body
326
+ end
327
+
328
+ def handle_hybrid_search_response(response, alpha, namespace)
329
+ if response.success?
330
+ matches = transform_search_results(response.body["result"] || [])
331
+ log_debug("Hybrid search returned #{matches.size} results (alpha: #{alpha})")
332
+
333
+ QueryResult.from_response(
334
+ matches: matches,
335
+ namespace: namespace
336
+ )
337
+ else
338
+ handle_error(response)
339
+ end
340
+ end
341
+
283
342
  def validate_config!
284
343
  super
285
344
  raise ConfigurationError, "Host must be configured for Qdrant" if config.host.nil? || config.host.empty?
@@ -102,6 +102,43 @@ module Vectra
102
102
  end
103
103
  end
104
104
 
105
+ # Hybrid search combining vector and BM25 text search
106
+ #
107
+ # Uses Weaviate's hybrid search API with alpha parameter
108
+ #
109
+ # @param index [String] class name
110
+ # @param vector [Array<Float>] query vector
111
+ # @param text [String] text query for BM25 search
112
+ # @param alpha [Float] balance (0.0 = BM25, 1.0 = vector)
113
+ # @param top_k [Integer] number of results
114
+ # @param namespace [String, nil] optional namespace (not used in Weaviate)
115
+ # @param filter [Hash, nil] metadata filter
116
+ # @param include_values [Boolean] include vector values
117
+ # @param include_metadata [Boolean] include metadata
118
+ # @return [QueryResult] search results
119
+ def hybrid_search(index:, vector:, text:, alpha:, top_k:, namespace: nil,
120
+ filter: nil, include_values: false, include_metadata: true)
121
+ where_filter = build_where(filter, namespace)
122
+ graphql = build_hybrid_search_graphql(
123
+ index: index,
124
+ vector: vector,
125
+ text: text,
126
+ alpha: alpha,
127
+ top_k: top_k,
128
+ where_filter: where_filter,
129
+ include_values: include_values,
130
+ include_metadata: include_metadata
131
+ )
132
+ body = { "query" => graphql }
133
+
134
+ response = with_error_handling do
135
+ connection.post("#{API_BASE_PATH}/graphql", body)
136
+ end
137
+
138
+ handle_hybrid_search_response(response, index, alpha, namespace,
139
+ include_values, include_metadata)
140
+ end
141
+
105
142
  # rubocop:disable Metrics/PerceivedComplexity
106
143
  def fetch(index:, ids:, namespace: nil)
107
144
  body = {
@@ -294,6 +331,54 @@ module Vectra
294
331
 
295
332
  private
296
333
 
334
+ def build_hybrid_search_graphql(index:, vector:, text:, alpha:, top_k:,
335
+ where_filter:, include_values:, include_metadata:)
336
+ selection_block = build_selection_fields(include_values, include_metadata).join(" ")
337
+ build_graphql_query(index, top_k, text, alpha, vector, where_filter, selection_block)
338
+ end
339
+
340
+ def build_graphql_query(index, top_k, text, alpha, vector, where_filter, selection_block)
341
+ <<~GRAPHQL
342
+ {
343
+ Get {
344
+ #{index}(
345
+ limit: #{top_k}
346
+ hybrid: {
347
+ query: "#{text.gsub('"', '\\"')}"
348
+ alpha: #{alpha}
349
+ }
350
+ nearVector: { vector: [#{vector.map { |v| format('%.10f', v.to_f) }.join(', ')}] }
351
+ #{"where: #{JSON.generate(where_filter)}" if where_filter}
352
+ ) {
353
+ #{selection_block}
354
+ }
355
+ }
356
+ }
357
+ GRAPHQL
358
+ end
359
+
360
+ def build_selection_fields(include_values, include_metadata)
361
+ fields = ["_additional { id distance }"]
362
+ fields << "vector" if include_values
363
+ fields << "metadata" if include_metadata
364
+ fields
365
+ end
366
+
367
+ def handle_hybrid_search_response(response, index, alpha, namespace,
368
+ include_values, include_metadata)
369
+ if response.success?
370
+ matches = extract_query_matches(response.body, index, include_values, include_metadata)
371
+ log_debug("Hybrid search returned #{matches.size} results (alpha: #{alpha})")
372
+
373
+ QueryResult.from_response(
374
+ matches: matches,
375
+ namespace: namespace
376
+ )
377
+ else
378
+ handle_error(response)
379
+ end
380
+ end
381
+
297
382
  def validate_config!
298
383
  super
299
384
  raise ConfigurationError, "Host must be configured for Weaviate" if config.host.nil? || config.host.empty?
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Vectra
4
- VERSION = "0.4.0"
4
+ VERSION = "1.0.1"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vectra-client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mijo Kristo
@@ -252,6 +252,11 @@ files:
252
252
  - docs/_site/robots.txt
253
253
  - docs/_site/sitemap.xml
254
254
  - docs/api/overview.md
255
+ - docs/assets/favicon.svg
256
+ - docs/assets/logo.svg
257
+ - docs/assets/radme.png
258
+ - docs/assets/readme-new.png
259
+ - docs/assets/seo.png
255
260
  - docs/assets/style.css
256
261
  - docs/community/contributing.md
257
262
  - docs/examples/basic-usage.md
@@ -282,6 +287,7 @@ files:
282
287
  - examples/grafana-setup.md
283
288
  - examples/instrumentation_demo.rb
284
289
  - examples/prometheus-exporter.rb
290
+ - lib/generators/vectra/index_generator.rb
285
291
  - lib/generators/vectra/install_generator.rb
286
292
  - lib/generators/vectra/templates/enable_pgvector_extension.rb
287
293
  - lib/generators/vectra/templates/vectra.rb