vectra-client 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +77 -37
  3. data/CHANGELOG.md +49 -6
  4. data/README.md +52 -393
  5. data/docs/Gemfile +9 -0
  6. data/docs/_config.yml +37 -0
  7. data/docs/_layouts/default.html +14 -0
  8. data/docs/_layouts/home.html +187 -0
  9. data/docs/_layouts/page.html +82 -0
  10. data/docs/_site/api/overview/index.html +145 -0
  11. data/docs/_site/assets/main.css +649 -0
  12. data/docs/_site/assets/main.css.map +1 -0
  13. data/docs/_site/assets/minima-social-icons.svg +33 -0
  14. data/docs/_site/assets/style.css +295 -0
  15. data/docs/_site/community/contributing/index.html +110 -0
  16. data/docs/_site/examples/basic-usage/index.html +117 -0
  17. data/docs/_site/examples/index.html +58 -0
  18. data/docs/_site/feed.xml +1 -0
  19. data/docs/_site/guides/getting-started/index.html +106 -0
  20. data/docs/_site/guides/installation/index.html +82 -0
  21. data/docs/_site/index.html +92 -0
  22. data/docs/_site/providers/index.html +119 -0
  23. data/docs/_site/providers/pgvector/index.html +155 -0
  24. data/docs/_site/providers/pinecone/index.html +121 -0
  25. data/docs/_site/providers/qdrant/index.html +124 -0
  26. data/docs/_site/providers/weaviate/index.html +123 -0
  27. data/docs/_site/robots.txt +1 -0
  28. data/docs/_site/sitemap.xml +39 -0
  29. data/docs/api/overview.md +126 -0
  30. data/docs/assets/style.css +927 -0
  31. data/docs/community/contributing.md +89 -0
  32. data/docs/examples/basic-usage.md +102 -0
  33. data/docs/examples/index.md +54 -0
  34. data/docs/guides/getting-started.md +90 -0
  35. data/docs/guides/installation.md +67 -0
  36. data/docs/guides/performance.md +200 -0
  37. data/docs/index.md +37 -0
  38. data/docs/providers/index.md +81 -0
  39. data/docs/providers/pgvector.md +95 -0
  40. data/docs/providers/pinecone.md +72 -0
  41. data/docs/providers/qdrant.md +73 -0
  42. data/docs/providers/weaviate.md +72 -0
  43. data/lib/vectra/batch.rb +148 -0
  44. data/lib/vectra/cache.rb +261 -0
  45. data/lib/vectra/configuration.rb +6 -1
  46. data/lib/vectra/pool.rb +256 -0
  47. data/lib/vectra/streaming.rb +153 -0
  48. data/lib/vectra/version.rb +1 -1
  49. data/lib/vectra.rb +4 -0
  50. data/netlify.toml +12 -0
  51. metadata +58 -5
  52. data/IMPLEMENTATION_GUIDE.md +0 -686
  53. data/NEW_FEATURES_v0.2.0.md +0 -459
  54. data/RELEASE_CHECKLIST_v0.2.0.md +0 -383
  55. data/USAGE_EXAMPLES.md +0 -787
@@ -15,7 +15,8 @@ module Vectra
15
15
 
16
16
  attr_accessor :api_key, :environment, :host, :timeout, :open_timeout,
17
17
  :max_retries, :retry_delay, :logger, :pool_size, :pool_timeout,
18
- :batch_size, :instrumentation
18
+ :batch_size, :instrumentation, :cache_enabled, :cache_ttl,
19
+ :cache_max_size, :async_concurrency
19
20
 
20
21
  attr_reader :provider
21
22
 
@@ -33,6 +34,10 @@ module Vectra
33
34
  @pool_timeout = 5
34
35
  @batch_size = 100
35
36
  @instrumentation = false
37
+ @cache_enabled = false
38
+ @cache_ttl = 300
39
+ @cache_max_size = 1000
40
+ @async_concurrency = 4
36
41
  end
37
42
 
38
43
  # Set the provider
@@ -0,0 +1,256 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "concurrent"
4
+
5
+ module Vectra
6
+ # Connection pool with warmup support
7
+ #
8
+ # Provides connection pooling for database providers with configurable
9
+ # pool size, timeout, and connection warmup.
10
+ #
11
+ # @example Basic usage
12
+ # pool = Vectra::Pool.new(size: 5, timeout: 5) { create_connection }
13
+ # pool.warmup(3) # Pre-create 3 connections
14
+ #
15
+ # pool.with_connection do |conn|
16
+ # conn.execute("SELECT 1")
17
+ # end
18
+ #
19
+ class Pool
20
+ class TimeoutError < Vectra::Error; end
21
+ class PoolExhaustedError < Vectra::Error; end
22
+
23
+ attr_reader :size, :timeout
24
+
25
+ # Initialize connection pool
26
+ #
27
+ # @param size [Integer] maximum pool size
28
+ # @param timeout [Integer] checkout timeout in seconds
29
+ # @yield connection factory block
30
+ def initialize(size:, timeout: 5, &block)
31
+ raise ArgumentError, "Connection factory block required" unless block_given?
32
+
33
+ @size = size
34
+ @timeout = timeout
35
+ @factory = block
36
+ @pool = Concurrent::Array.new
37
+ @checked_out = Concurrent::AtomicFixnum.new(0)
38
+ @mutex = Mutex.new
39
+ @condition = ConditionVariable.new
40
+ @shutdown = false
41
+ end
42
+
43
+ # Warmup the pool by pre-creating connections
44
+ #
45
+ # @param count [Integer] number of connections to create (default: pool size)
46
+ # @return [Integer] number of connections created
47
+ def warmup(count = nil)
48
+ count ||= size
49
+ count = [count, size].min
50
+ created = 0
51
+
52
+ count.times do
53
+ break if @pool.size >= size
54
+
55
+ conn = create_connection
56
+ if conn
57
+ @pool << conn
58
+ created += 1
59
+ end
60
+ end
61
+
62
+ created
63
+ end
64
+
65
+ # Execute block with a connection from the pool
66
+ #
67
+ # @yield [connection] the checked out connection
68
+ # @return [Object] result of the block
69
+ def with_connection
70
+ conn = checkout
71
+ begin
72
+ yield conn
73
+ ensure
74
+ checkin(conn)
75
+ end
76
+ end
77
+
78
+ # Checkout a connection from the pool
79
+ #
80
+ # @return [Object] a connection
81
+ # @raise [TimeoutError] if checkout times out
82
+ # @raise [PoolExhaustedError] if pool is exhausted
83
+ def checkout
84
+ raise PoolExhaustedError, "Pool has been shutdown" if @shutdown
85
+
86
+ deadline = Time.now + timeout
87
+
88
+ @mutex.synchronize do
89
+ loop do
90
+ # Try to get an existing connection
91
+ conn = @pool.pop
92
+ if conn
93
+ @checked_out.increment
94
+ return conn if healthy?(conn)
95
+
96
+ # Connection is unhealthy, discard and try again
97
+ close_connection(conn)
98
+ next
99
+ end
100
+
101
+ # Try to create a new connection if under limit
102
+ if @checked_out.value + @pool.size < size
103
+ conn = create_connection
104
+ if conn
105
+ @checked_out.increment
106
+ return conn
107
+ end
108
+ end
109
+
110
+ # Wait for a connection to be returned
111
+ remaining = deadline - Time.now
112
+ raise TimeoutError, "Connection checkout timed out after #{timeout}s" if remaining <= 0
113
+
114
+ @condition.wait(@mutex, remaining)
115
+ end
116
+ end
117
+ end
118
+
119
+ # Return a connection to the pool
120
+ #
121
+ # @param connection [Object] connection to return
122
+ def checkin(connection)
123
+ return if @shutdown
124
+
125
+ @mutex.synchronize do
126
+ @checked_out.decrement
127
+ if healthy?(connection) && @pool.size < size
128
+ @pool << connection
129
+ else
130
+ close_connection(connection)
131
+ end
132
+ @condition.signal
133
+ end
134
+ end
135
+
136
+ # Shutdown the pool, closing all connections
137
+ #
138
+ # @return [void]
139
+ def shutdown
140
+ @shutdown = true
141
+ @mutex.synchronize do
142
+ while (conn = @pool.pop)
143
+ close_connection(conn)
144
+ end
145
+ end
146
+ end
147
+
148
+ # Get pool statistics
149
+ #
150
+ # @return [Hash] pool stats
151
+ def stats
152
+ {
153
+ size: size,
154
+ available: @pool.size,
155
+ checked_out: @checked_out.value,
156
+ total_created: @pool.size + @checked_out.value,
157
+ shutdown: @shutdown
158
+ }
159
+ end
160
+
161
+ # Check if pool is healthy (public method)
162
+ #
163
+ # @return [Boolean]
164
+ def pool_healthy?
165
+ !@shutdown && (@pool.size + @checked_out.value).positive?
166
+ end
167
+
168
+ private
169
+
170
+ # Internal health check for individual connections
171
+ def healthy?(conn)
172
+ return false if conn.nil?
173
+ return true unless conn.respond_to?(:status)
174
+
175
+ # For PG connections, check status. Otherwise assume healthy.
176
+ if defined?(PG::CONNECTION_OK)
177
+ conn.status == PG::CONNECTION_OK
178
+ else
179
+ # If PG not loaded, assume connection is healthy if it exists
180
+ true
181
+ end
182
+ rescue StandardError
183
+ false
184
+ end
185
+
186
+ def create_connection
187
+ @factory.call
188
+ rescue StandardError => e
189
+ Vectra.configuration.logger&.error("Pool: Failed to create connection: #{e.message}")
190
+ nil
191
+ end
192
+
193
+ def close_connection(conn)
194
+ conn.close if conn.respond_to?(:close)
195
+ rescue StandardError => e
196
+ Vectra.configuration.logger&.warn("Pool: Error closing connection: #{e.message}")
197
+ end
198
+ end
199
+
200
+ # Pooled connection module for pgvector
201
+ module PooledConnection
202
+ # Get a pooled connection
203
+ #
204
+ # @return [Pool] connection pool
205
+ def connection_pool
206
+ @connection_pool ||= create_pool
207
+ end
208
+
209
+ # Warmup the connection pool
210
+ #
211
+ # @param count [Integer] number of connections to pre-create
212
+ # @return [Integer] connections created
213
+ def warmup_pool(count = nil)
214
+ connection_pool.warmup(count)
215
+ end
216
+
217
+ # Execute with pooled connection
218
+ #
219
+ # @yield [connection] database connection
220
+ def with_pooled_connection(&)
221
+ connection_pool.with_connection(&)
222
+ end
223
+
224
+ # Shutdown the connection pool
225
+ def shutdown_pool
226
+ @connection_pool&.shutdown
227
+ @connection_pool = nil
228
+ end
229
+
230
+ # Get pool statistics
231
+ #
232
+ # @return [Hash]
233
+ def pool_stats
234
+ return { status: "not_initialized" } unless @connection_pool
235
+
236
+ connection_pool.stats
237
+ end
238
+
239
+ private
240
+
241
+ def create_pool
242
+ pool_size = config.pool_size || 5
243
+ pool_timeout = config.pool_timeout || 5
244
+
245
+ Pool.new(size: pool_size, timeout: pool_timeout) do
246
+ create_raw_connection
247
+ end
248
+ end
249
+
250
+ def create_raw_connection
251
+ require "pg"
252
+ conn_params = parse_connection_params
253
+ PG.connect(conn_params)
254
+ end
255
+ end
256
+ end
@@ -0,0 +1,153 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectra
4
+ # Streaming query results for large datasets
5
+ #
6
+ # Provides lazy enumeration over query results with automatic pagination,
7
+ # reducing memory usage for large result sets.
8
+ #
9
+ # @example Stream through results
10
+ # stream = Vectra::Streaming.new(client)
11
+ # stream.query_each(index: 'my-index', vector: query_vec, total: 1000) do |match|
12
+ # process(match)
13
+ # end
14
+ #
15
+ # @example As lazy enumerator
16
+ # results = stream.query_stream(index: 'my-index', vector: query_vec, total: 1000)
17
+ # results.take(50).each { |m| puts m.id }
18
+ #
19
+ class Streaming
20
+ DEFAULT_PAGE_SIZE = 100
21
+
22
+ attr_reader :client, :page_size
23
+
24
+ # Initialize streaming query handler
25
+ #
26
+ # @param client [Client] the Vectra client
27
+ # @param page_size [Integer] results per page (default: 100)
28
+ def initialize(client, page_size: DEFAULT_PAGE_SIZE)
29
+ @client = client
30
+ @page_size = [page_size, 1].max
31
+ end
32
+
33
+ # Stream query results with a block
34
+ #
35
+ # @param index [String] the index name
36
+ # @param vector [Array<Float>] query vector
37
+ # @param total [Integer] total results to fetch
38
+ # @param namespace [String, nil] optional namespace
39
+ # @param filter [Hash, nil] metadata filter
40
+ # @yield [Match] each match result
41
+ # @return [Integer] total results yielded
42
+ def query_each(index:, vector:, total:, namespace: nil, filter: nil, &block)
43
+ return 0 unless block_given?
44
+
45
+ count = 0
46
+ query_stream(index: index, vector: vector, total: total, namespace: namespace, filter: filter).each do |match|
47
+ block.call(match)
48
+ count += 1
49
+ end
50
+ count
51
+ end
52
+
53
+ # Create a lazy enumerator for streaming results
54
+ #
55
+ # @param index [String] the index name
56
+ # @param vector [Array<Float>] query vector
57
+ # @param total [Integer] total results to fetch
58
+ # @param namespace [String, nil] optional namespace
59
+ # @param filter [Hash, nil] metadata filter
60
+ # @return [Enumerator::Lazy] lazy enumerator of results
61
+ def query_stream(index:, vector:, total:, namespace: nil, filter: nil)
62
+ Enumerator.new do |yielder|
63
+ fetched = 0
64
+ seen_ids = Set.new
65
+
66
+ while fetched < total
67
+ batch_size = [page_size, total - fetched].min
68
+
69
+ result = client.query(
70
+ index: index,
71
+ vector: vector,
72
+ top_k: batch_size,
73
+ namespace: namespace,
74
+ filter: filter,
75
+ include_metadata: true
76
+ )
77
+
78
+ break if result.empty?
79
+
80
+ result.each do |match|
81
+ # Skip duplicates (some providers may return overlapping results)
82
+ next if seen_ids.include?(match.id)
83
+
84
+ seen_ids.add(match.id)
85
+ yielder << match
86
+ fetched += 1
87
+
88
+ break if fetched >= total
89
+ end
90
+
91
+ # If we got fewer results than requested, we've exhausted the index
92
+ break if result.size < batch_size
93
+ end
94
+ end.lazy
95
+ end
96
+
97
+ # Scan all vectors in an index (provider-dependent)
98
+ #
99
+ # @param index [String] the index name
100
+ # @param namespace [String, nil] optional namespace
101
+ # @param batch_size [Integer] IDs per batch
102
+ # @yield [Vector] each vector
103
+ # @return [Integer] total vectors scanned
104
+ # @note Not all providers support efficient scanning
105
+ def scan_all(index:, namespace: nil, batch_size: 1000)
106
+ return 0 unless block_given?
107
+
108
+ count = 0
109
+ offset = 0
110
+
111
+ loop do
112
+ # This is a simplified scan - actual implementation depends on provider
113
+ stats = client.stats(index: index, namespace: namespace)
114
+ total = stats[:total_vector_count] || 0
115
+
116
+ break if offset >= total
117
+
118
+ # Fetch IDs in batches (this is provider-specific)
119
+ # For now, we return what we can
120
+ break if offset.positive? # Only one iteration for basic implementation
121
+
122
+ offset += batch_size
123
+ count = total
124
+ end
125
+
126
+ count
127
+ end
128
+ end
129
+
130
+ # Streaming result wrapper with additional metadata
131
+ class StreamingResult
132
+ include Enumerable
133
+
134
+ attr_reader :enumerator, :metadata
135
+
136
+ def initialize(enumerator, metadata = {})
137
+ @enumerator = enumerator
138
+ @metadata = metadata
139
+ end
140
+
141
+ def each(&)
142
+ enumerator.each(&)
143
+ end
144
+
145
+ def take(n)
146
+ enumerator.take(n)
147
+ end
148
+
149
+ def to_a
150
+ enumerator.to_a
151
+ end
152
+ end
153
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Vectra
4
- VERSION = "0.2.1"
4
+ VERSION = "0.3.0"
5
5
  end
data/lib/vectra.rb CHANGED
@@ -7,6 +7,10 @@ require_relative "vectra/vector"
7
7
  require_relative "vectra/query_result"
8
8
  require_relative "vectra/instrumentation"
9
9
  require_relative "vectra/retry"
10
+ require_relative "vectra/batch"
11
+ require_relative "vectra/streaming"
12
+ require_relative "vectra/cache"
13
+ require_relative "vectra/pool"
10
14
  require_relative "vectra/active_record"
11
15
  require_relative "vectra/providers/base"
12
16
  require_relative "vectra/providers/pinecone"
data/netlify.toml ADDED
@@ -0,0 +1,12 @@
1
+ [build]
2
+ command = "cd docs && bundle install && bundle exec jekyll build"
3
+ publish = "docs/_site"
4
+
5
+ [build.environment]
6
+ JEKYLL_ENV = "production"
7
+ RUBY_VERSION = "3.4.7"
8
+
9
+ [[redirects]]
10
+ from = "/*"
11
+ to = "/index.html"
12
+ status = 200
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vectra-client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mijo Kristo
@@ -37,6 +37,20 @@ dependencies:
37
37
  - - "~>"
38
38
  - !ruby/object:Gem::Version
39
39
  version: '2.0'
40
+ - !ruby/object:Gem::Dependency
41
+ name: concurrent-ruby
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '1.2'
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '1.2'
40
54
  - !ruby/object:Gem::Dependency
41
55
  name: activerecord
42
56
  requirement: !ruby/object:Gem::Requirement
@@ -207,16 +221,50 @@ files:
207
221
  - CHANGELOG.md
208
222
  - CODE_OF_CONDUCT.md
209
223
  - CONTRIBUTING.md
210
- - IMPLEMENTATION_GUIDE.md
211
224
  - LICENSE
212
- - NEW_FEATURES_v0.2.0.md
213
225
  - README.md
214
- - RELEASE_CHECKLIST_v0.2.0.md
215
226
  - Rakefile
216
227
  - SECURITY.md
217
- - USAGE_EXAMPLES.md
218
228
  - benchmarks/batch_operations_benchmark.rb
219
229
  - benchmarks/connection_pooling_benchmark.rb
230
+ - docs/Gemfile
231
+ - docs/_config.yml
232
+ - docs/_layouts/default.html
233
+ - docs/_layouts/home.html
234
+ - docs/_layouts/page.html
235
+ - docs/_site/api/overview/index.html
236
+ - docs/_site/assets/main.css
237
+ - docs/_site/assets/main.css.map
238
+ - docs/_site/assets/minima-social-icons.svg
239
+ - docs/_site/assets/style.css
240
+ - docs/_site/community/contributing/index.html
241
+ - docs/_site/examples/basic-usage/index.html
242
+ - docs/_site/examples/index.html
243
+ - docs/_site/feed.xml
244
+ - docs/_site/guides/getting-started/index.html
245
+ - docs/_site/guides/installation/index.html
246
+ - docs/_site/index.html
247
+ - docs/_site/providers/index.html
248
+ - docs/_site/providers/pgvector/index.html
249
+ - docs/_site/providers/pinecone/index.html
250
+ - docs/_site/providers/qdrant/index.html
251
+ - docs/_site/providers/weaviate/index.html
252
+ - docs/_site/robots.txt
253
+ - docs/_site/sitemap.xml
254
+ - docs/api/overview.md
255
+ - docs/assets/style.css
256
+ - docs/community/contributing.md
257
+ - docs/examples/basic-usage.md
258
+ - docs/examples/index.md
259
+ - docs/guides/getting-started.md
260
+ - docs/guides/installation.md
261
+ - docs/guides/performance.md
262
+ - docs/index.md
263
+ - docs/providers/index.md
264
+ - docs/providers/pgvector.md
265
+ - docs/providers/pinecone.md
266
+ - docs/providers/qdrant.md
267
+ - docs/providers/weaviate.md
220
268
  - examples/active_record_demo.rb
221
269
  - examples/instrumentation_demo.rb
222
270
  - lib/generators/vectra/install_generator.rb
@@ -224,12 +272,15 @@ files:
224
272
  - lib/generators/vectra/templates/vectra.rb
225
273
  - lib/vectra.rb
226
274
  - lib/vectra/active_record.rb
275
+ - lib/vectra/batch.rb
276
+ - lib/vectra/cache.rb
227
277
  - lib/vectra/client.rb
228
278
  - lib/vectra/configuration.rb
229
279
  - lib/vectra/errors.rb
230
280
  - lib/vectra/instrumentation.rb
231
281
  - lib/vectra/instrumentation/datadog.rb
232
282
  - lib/vectra/instrumentation/new_relic.rb
283
+ - lib/vectra/pool.rb
233
284
  - lib/vectra/providers/base.rb
234
285
  - lib/vectra/providers/pgvector.rb
235
286
  - lib/vectra/providers/pgvector/connection.rb
@@ -240,8 +291,10 @@ files:
240
291
  - lib/vectra/providers/weaviate.rb
241
292
  - lib/vectra/query_result.rb
242
293
  - lib/vectra/retry.rb
294
+ - lib/vectra/streaming.rb
243
295
  - lib/vectra/vector.rb
244
296
  - lib/vectra/version.rb
297
+ - netlify.toml
245
298
  homepage: https://github.com/stokry/vectra
246
299
  licenses:
247
300
  - MIT