vectra-client 0.3.2 โ†’ 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1116 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "vectra"
6
+ require "json"
7
+ require "securerandom"
8
+ require "digest"
9
+ require "stringio"
10
+
11
+ # Comprehensive Vectra Demo - Production Features
12
+ #
13
+ # This demo showcases all major Vectra features:
14
+ # - CRUD operations (Create, Read, Update, Delete)
15
+ # - Batch processing
16
+ # - Caching & performance optimization
17
+ # - Error handling & resilience
18
+ # - Health monitoring
19
+ # - Metadata filtering
20
+ # - Namespaces for multi-tenancy
21
+ #
22
+ # Prerequisites:
23
+ # docker run -p 6333:6333 qdrant/qdrant
24
+ #
25
+ # Run:
26
+ # bundle exec ruby examples/comprehensive_demo.rb
27
+
28
+ class ComprehensiveVectraDemo
29
+ INDEX_NAME = "documents"
30
+ DIMENSION = 128 # Smaller for demo purposes
31
+
32
+ def initialize(host = "http://localhost:6333")
33
+ @host = host
34
+ setup_clients
35
+ @stats = {
36
+ operations: 0,
37
+ cache_hits: 0,
38
+ errors: 0,
39
+ retries: 0
40
+ }
41
+ end
42
+
43
+ # =============================================================================
44
+ # SECTION 1: SETUP & INITIALIZATION
45
+ # =============================================================================
46
+
47
+ def run_demo
48
+ print_header("VECTRA COMPREHENSIVE DEMO")
49
+
50
+ section_1_basic_operations
51
+ section_2_batch_operations
52
+ section_3_advanced_queries
53
+ section_4_update_operations
54
+ section_5_delete_operations
55
+ section_6_cache_performance
56
+ section_7_error_handling
57
+ section_8_multi_tenancy
58
+ section_9_health_monitoring
59
+ section_10_async_batch
60
+ section_11_streaming
61
+ section_12_resilience
62
+ section_13_monitoring
63
+
64
+ print_summary
65
+ end
66
+
67
+ # =============================================================================
68
+ # SECTION 1: BASIC OPERATIONS
69
+ # =============================================================================
70
+
71
+ def section_1_basic_operations
72
+ print_section("1. Basic CRUD Operations")
73
+
74
+ # Health check
75
+ puts "๐Ÿฅ Checking system health..."
76
+ health = @client.health_check
77
+
78
+ if health.healthy?
79
+ puts " โœ… System healthy (#{health.latency_ms}ms latency)"
80
+ else
81
+ puts " โŒ System unhealthy: #{health.error_message}"
82
+ raise "Cannot proceed with unhealthy system"
83
+ end
84
+
85
+ # Create index
86
+ puts "\n๐Ÿ“ฆ Creating index '#{INDEX_NAME}'..."
87
+ begin
88
+ @client.provider.create_index(
89
+ name: INDEX_NAME,
90
+ dimension: DIMENSION,
91
+ metric: "cosine"
92
+ )
93
+ puts " โœ… Index created"
94
+ rescue StandardError => e
95
+ if e.message.include?("already exists")
96
+ puts " โ„น๏ธ Index already exists, deleting and recreating..."
97
+ @client.provider.delete_index(name: INDEX_NAME)
98
+ @client.provider.create_index(
99
+ name: INDEX_NAME,
100
+ dimension: DIMENSION,
101
+ metric: "cosine"
102
+ )
103
+ puts " โœ… Index recreated"
104
+ else
105
+ raise
106
+ end
107
+ end
108
+
109
+ # Insert single document
110
+ puts "\n๐Ÿ“ Inserting single document..."
111
+ doc = create_sample_document(
112
+ id: "doc-001",
113
+ title: "Introduction to Vector Databases",
114
+ content: "Vector databases enable semantic search using embeddings and similarity metrics.",
115
+ category: "Technology",
116
+ author: "John Doe"
117
+ )
118
+
119
+ result = @client.upsert(
120
+ index: INDEX_NAME,
121
+ vectors: [doc]
122
+ )
123
+ puts " โœ… Inserted #{result[:upserted_count]} document"
124
+ @stats[:operations] += 1
125
+
126
+ # Small delay to ensure consistency
127
+ sleep(0.1)
128
+
129
+ # Fetch by ID
130
+ puts "\n๐Ÿ” Fetching document by ID..."
131
+ fetched = @client.fetch(
132
+ index: INDEX_NAME,
133
+ ids: ["doc-001"]
134
+ )
135
+
136
+ if fetched["doc-001"]
137
+ doc = fetched["doc-001"]
138
+ title = doc.metadata[:title] || doc.metadata["title"] || "Untitled"
139
+ category = doc.metadata[:category] || doc.metadata["category"] || "Unknown"
140
+ author = doc.metadata[:author] || doc.metadata["author"] || "Unknown"
141
+ puts " โœ… Found: #{title}"
142
+ puts " ๐Ÿ“Š Metadata: category=#{category}, author=#{author}"
143
+ end
144
+ @stats[:operations] += 1
145
+
146
+ # Query by similarity
147
+ puts "\n๐Ÿ”Ž Querying by similarity..."
148
+ query_vec = generate_embedding("database search technology")
149
+ results = @client.query(
150
+ index: INDEX_NAME,
151
+ vector: query_vec,
152
+ top_k: 3,
153
+ include_metadata: true
154
+ )
155
+
156
+ puts " โœ… Found #{results.size} results"
157
+ results.each_with_index do |match, i|
158
+ title = match.metadata[:title] || match.metadata["title"] || "Untitled"
159
+ puts " #{i + 1}. #{title} (score: #{match.score.round(4)})"
160
+ end
161
+ @stats[:operations] += 1
162
+ end
163
+
164
+ # =============================================================================
165
+ # SECTION 2: BATCH OPERATIONS
166
+ # =============================================================================
167
+
168
+ def section_2_batch_operations
169
+ print_section("2. Batch Processing")
170
+
171
+ puts "๐Ÿ“ฆ Generating 50 sample documents..."
172
+ documents = generate_batch_documents(50)
173
+ puts " โœ… Generated #{documents.size} documents"
174
+
175
+ puts "\nโšก Batch upserting documents..."
176
+ start_time = Time.now
177
+
178
+ # Batch upsert (chunked)
179
+ chunk_size = 10
180
+ total_upserted = 0
181
+
182
+ documents.each_slice(chunk_size).with_index do |chunk, i|
183
+ result = @client.upsert(
184
+ index: INDEX_NAME,
185
+ vectors: chunk
186
+ )
187
+ total_upserted += result[:upserted_count]
188
+ print " ๐Ÿ“Š Progress: #{total_upserted}/#{documents.size}\r"
189
+ @stats[:operations] += 1
190
+ end
191
+
192
+ duration = ((Time.now - start_time) * 1000).round(2)
193
+ puts "\n โœ… Upserted #{total_upserted} documents in #{duration}ms"
194
+ puts " ๐Ÿ“ˆ Throughput: #{(total_upserted / (duration / 1000.0)).round(2)} docs/sec"
195
+ end
196
+
197
+ # =============================================================================
198
+ # SECTION 3: ADVANCED QUERIES
199
+ # =============================================================================
200
+
201
+ def section_3_advanced_queries
202
+ print_section("3. Advanced Query Features")
203
+
204
+ # Query with metadata filter
205
+ puts "๐Ÿ” Query 1: Filter by category (Technology)"
206
+ results = @client.query(
207
+ index: INDEX_NAME,
208
+ vector: generate_embedding("artificial intelligence machine learning"),
209
+ top_k: 5,
210
+ filter: { category: "Technology" },
211
+ include_metadata: true
212
+ )
213
+
214
+ puts " โœ… Found #{results.size} results in Technology category"
215
+ results.take(3).each_with_index do |match, i|
216
+ title = match.metadata[:title] || match.metadata["title"] || "Untitled"
217
+ category = match.metadata[:category] || match.metadata["category"] || "Unknown"
218
+ puts " #{i + 1}. #{title}"
219
+ puts " Category: #{category} | Score: #{match.score.round(4)}"
220
+ end
221
+ @stats[:operations] += 1
222
+
223
+ # Query different category
224
+ puts "\n๐Ÿ” Query 2: Filter by category (Business)"
225
+ results = @client.query(
226
+ index: INDEX_NAME,
227
+ vector: generate_embedding("market strategy growth"),
228
+ top_k: 5,
229
+ filter: { category: "Business" },
230
+ include_metadata: true
231
+ )
232
+
233
+ puts " โœ… Found #{results.size} results in Business category"
234
+ results.take(3).each_with_index do |match, i|
235
+ title = match.metadata[:title] || match.metadata["title"] || "Untitled"
236
+ puts " #{i + 1}. #{title}"
237
+ puts " Score: #{match.score.round(4)}"
238
+ end
239
+ @stats[:operations] += 1
240
+
241
+ # Query with include_values
242
+ puts "\n๐Ÿ” Query 3: Including vector values"
243
+ results = @client.query(
244
+ index: INDEX_NAME,
245
+ vector: generate_embedding("technology innovation"),
246
+ top_k: 2,
247
+ include_values: true,
248
+ include_metadata: true
249
+ )
250
+
251
+ puts " โœ… Retrieved #{results.size} results with vectors"
252
+ results.each_with_index do |match, i|
253
+ title = match.metadata[:title] || match.metadata["title"] || "Untitled"
254
+ vector_preview = match.values ? match.values.first(3).map { |v| v.round(3) } : []
255
+ puts " #{i + 1}. #{title}"
256
+ puts " Vector preview: [#{vector_preview.join(', ')}...]"
257
+ end
258
+ @stats[:operations] += 1
259
+ end
260
+
261
+ # =============================================================================
262
+ # SECTION 4: UPDATE OPERATIONS
263
+ # =============================================================================
264
+
265
+ def section_4_update_operations
266
+ print_section("4. Update Operations")
267
+
268
+ # Fetch original
269
+ puts "๐Ÿ“„ Fetching document to update..."
270
+ fetched = @client.fetch(index: INDEX_NAME, ids: ["doc-001"])
271
+ original = fetched["doc-001"]
272
+
273
+ if original.nil?
274
+ puts " โš ๏ธ Document doc-001 not found, skipping update operations"
275
+ return
276
+ end
277
+
278
+ puts " ๐Ÿ“Š Original metadata: #{original.metadata.slice(:category, :views)}"
279
+
280
+ # Update metadata
281
+ puts "\nโœ๏ธ Updating metadata..."
282
+ @client.update(
283
+ index: INDEX_NAME,
284
+ id: "doc-001",
285
+ metadata: {
286
+ views: 100,
287
+ featured: true,
288
+ updated_at: Time.now.iso8601
289
+ }
290
+ )
291
+ puts " โœ… Metadata updated"
292
+ @stats[:operations] += 1
293
+
294
+ # Verify update
295
+ puts "\n๐Ÿ” Verifying update..."
296
+ updated = @client.fetch(index: INDEX_NAME, ids: ["doc-001"])["doc-001"]
297
+ puts " ๐Ÿ“Š Updated metadata:"
298
+ puts " Views: #{updated.metadata[:views]}"
299
+ puts " Featured: #{updated.metadata[:featured]}"
300
+ puts " Updated at: #{updated.metadata[:updated_at]}"
301
+ @stats[:operations] += 1
302
+
303
+ # Update with new vector
304
+ puts "\nโœ๏ธ Updating vector values..."
305
+ new_vector = generate_embedding("updated content about databases and AI")
306
+ @client.update(
307
+ index: INDEX_NAME,
308
+ id: "doc-001",
309
+ values: new_vector
310
+ )
311
+ puts " โœ… Vector updated"
312
+ @stats[:operations] += 1
313
+ end
314
+
315
+ # =============================================================================
316
+ # SECTION 5: DELETE OPERATIONS
317
+ # =============================================================================
318
+
319
+ def section_5_delete_operations
320
+ print_section("5. Delete Operations")
321
+
322
+ # Delete single document
323
+ puts "๐Ÿ—‘๏ธ Delete 1: Single document by ID"
324
+ @client.delete(
325
+ index: INDEX_NAME,
326
+ ids: ["doc-001"]
327
+ )
328
+ puts " โœ… Deleted doc-001"
329
+ @stats[:operations] += 1
330
+
331
+ # Verify deletion
332
+ fetched = @client.fetch(index: INDEX_NAME, ids: ["doc-001"])
333
+ puts " โœ… Verified: #{fetched.empty? ? 'deleted' : 'still exists'}"
334
+
335
+ # Delete multiple documents
336
+ puts "\n๐Ÿ—‘๏ธ Delete 2: Multiple documents by IDs"
337
+ ids_to_delete = ["doc-002", "doc-003", "doc-004"]
338
+ @client.delete(
339
+ index: INDEX_NAME,
340
+ ids: ids_to_delete
341
+ )
342
+ puts " โœ… Deleted #{ids_to_delete.size} documents"
343
+ @stats[:operations] += 1
344
+
345
+ # Delete with filter
346
+ puts "\n๐Ÿ—‘๏ธ Delete 3: By metadata filter"
347
+ @client.delete(
348
+ index: INDEX_NAME,
349
+ filter: { category: "Science" }
350
+ )
351
+ puts " โœ… Deleted all documents in Science category"
352
+ @stats[:operations] += 1
353
+ end
354
+
355
+ # =============================================================================
356
+ # SECTION 6: CACHE PERFORMANCE
357
+ # =============================================================================
358
+
359
+ def section_6_cache_performance
360
+ print_section("6. Cache Performance")
361
+
362
+ query_vector = generate_embedding("artificial intelligence deep learning")
363
+
364
+ puts "โšก Running cache performance test..."
365
+ puts " Query: 'artificial intelligence deep learning'"
366
+ puts
367
+
368
+ # First call (cache miss)
369
+ puts "๐Ÿ“Š Attempt 1 (cache miss):"
370
+ start = Time.now
371
+ @cached_client.query(
372
+ index: INDEX_NAME,
373
+ vector: query_vector,
374
+ top_k: 5
375
+ )
376
+ first_time = ((Time.now - start) * 1000).round(2)
377
+ puts " โฑ๏ธ Duration: #{first_time}ms"
378
+ @stats[:operations] += 1
379
+
380
+ # Second call (cache hit)
381
+ puts "\n๐Ÿ“Š Attempt 2 (cache hit):"
382
+ start = Time.now
383
+ @cached_client.query(
384
+ index: INDEX_NAME,
385
+ vector: query_vector,
386
+ top_k: 5
387
+ )
388
+ second_time = ((Time.now - start) * 1000).round(2)
389
+ puts " โฑ๏ธ Duration: #{second_time}ms"
390
+ @stats[:cache_hits] += 1
391
+ @stats[:operations] += 1
392
+
393
+ # Third call (cache hit)
394
+ puts "\n๐Ÿ“Š Attempt 3 (cache hit):"
395
+ start = Time.now
396
+ @cached_client.query(
397
+ index: INDEX_NAME,
398
+ vector: query_vector,
399
+ top_k: 5
400
+ )
401
+ third_time = ((Time.now - start) * 1000).round(2)
402
+ puts " โฑ๏ธ Duration: #{third_time}ms"
403
+ @stats[:cache_hits] += 1
404
+ @stats[:operations] += 1
405
+
406
+ # Calculate speedup
407
+ avg_cached = ((second_time + third_time) / 2.0).round(2)
408
+ speedup = (first_time / avg_cached).round(2)
409
+ improvement = (((first_time - avg_cached) / first_time) * 100).round(1)
410
+
411
+ puts "\n๐Ÿ“ˆ Performance Analysis:"
412
+ puts " First call (no cache): #{first_time}ms"
413
+ puts " Avg cached calls: #{avg_cached}ms"
414
+ puts " Speedup: #{speedup}x faster"
415
+ puts " Improvement: #{improvement}% reduction in latency"
416
+
417
+ # Cache stats
418
+ cache_stats = @cache.stats
419
+ puts "\n๐Ÿ’พ Cache Statistics:"
420
+ puts " Size: #{cache_stats[:size]}/#{cache_stats[:max_size]}"
421
+ puts " TTL: #{cache_stats[:ttl]}s"
422
+ puts " Keys: #{cache_stats[:keys].size}"
423
+ end
424
+
425
+ # =============================================================================
426
+ # SECTION 7: ERROR HANDLING & RESILIENCE
427
+ # =============================================================================
428
+
429
+ def section_7_error_handling
430
+ print_section("7. Error Handling & Resilience")
431
+
432
+ puts "๐Ÿ›ก๏ธ Testing error handling scenarios..."
433
+
434
+ # Test 1: Invalid vector dimension
435
+ puts "\nโŒ Test 1: Invalid vector dimension"
436
+ begin
437
+ @client.upsert(
438
+ index: INDEX_NAME,
439
+ vectors: [{
440
+ id: "invalid-001",
441
+ values: [0.1, 0.2], # Wrong dimension (should be 128)
442
+ metadata: { title: "Invalid" }
443
+ }]
444
+ )
445
+ puts " โš ๏ธ Should have raised error"
446
+ rescue Vectra::ValidationError => e
447
+ puts " โœ… Caught ValidationError: #{e.message.split("\n").first}"
448
+ @stats[:errors] += 1
449
+ end
450
+
451
+ # Test 2: Non-existent index
452
+ puts "\nโŒ Test 2: Query non-existent index"
453
+ begin
454
+ @client.query(
455
+ index: "non_existent_index",
456
+ vector: generate_embedding("test"),
457
+ top_k: 5
458
+ )
459
+ puts " โš ๏ธ Should have raised error"
460
+ rescue Vectra::NotFoundError => e
461
+ puts " โœ… Caught NotFoundError: #{e.message}"
462
+ @stats[:errors] += 1
463
+ end
464
+
465
+ # Test 3: Invalid IDs
466
+ puts "\nโŒ Test 3: Fetch with empty IDs"
467
+ begin
468
+ @client.fetch(
469
+ index: INDEX_NAME,
470
+ ids: []
471
+ )
472
+ puts " โš ๏ธ Should have raised error"
473
+ rescue Vectra::ValidationError => e
474
+ puts " โœ… Caught ValidationError: #{e.message}"
475
+ @stats[:errors] += 1
476
+ end
477
+
478
+ puts "\nโœ… Error handling working correctly"
479
+ puts " ๐Ÿ“Š Total errors caught: #{@stats[:errors]}"
480
+ end
481
+
482
+ # =============================================================================
483
+ # SECTION 8: MULTI-TENANCY WITH NAMESPACES
484
+ # =============================================================================
485
+
486
+ def section_8_multi_tenancy
487
+ print_section("8. Multi-Tenancy with Namespaces")
488
+
489
+ puts "๐Ÿข Simulating multi-tenant application..."
490
+
491
+ # Tenant 1: Company A
492
+ puts "\n๐Ÿ“ฆ Tenant 1: Company A"
493
+ company_a_docs = [
494
+ {
495
+ id: "tenant-a-001",
496
+ values: generate_embedding("company A quarterly report"),
497
+ metadata: { title: "Q1 Report", tenant: "company-a" }
498
+ },
499
+ {
500
+ id: "tenant-a-002",
501
+ values: generate_embedding("company A product launch"),
502
+ metadata: { title: "Product Launch", tenant: "company-a" }
503
+ }
504
+ ]
505
+
506
+ @client.upsert(
507
+ index: INDEX_NAME,
508
+ vectors: company_a_docs,
509
+ namespace: "company-a"
510
+ )
511
+ puts " โœ… Inserted 2 documents for Company A"
512
+ @stats[:operations] += 1
513
+
514
+ # Tenant 2: Company B
515
+ puts "\n๐Ÿ“ฆ Tenant 2: Company B"
516
+ company_b_docs = [
517
+ {
518
+ id: "tenant-b-001",
519
+ values: generate_embedding("company B market analysis"),
520
+ metadata: { title: "Market Analysis", tenant: "company-b" }
521
+ },
522
+ {
523
+ id: "tenant-b-002",
524
+ values: generate_embedding("company B financial report"),
525
+ metadata: { title: "Financial Report", tenant: "company-b" }
526
+ }
527
+ ]
528
+
529
+ @client.upsert(
530
+ index: INDEX_NAME,
531
+ vectors: company_b_docs,
532
+ namespace: "company-b"
533
+ )
534
+ puts " โœ… Inserted 2 documents for Company B"
535
+ @stats[:operations] += 1
536
+
537
+ # Query tenant-specific data
538
+ puts "\n๐Ÿ” Querying Company A namespace..."
539
+ results_a = @client.query(
540
+ index: INDEX_NAME,
541
+ vector: generate_embedding("report"),
542
+ top_k: 5,
543
+ namespace: "company-a",
544
+ include_metadata: true
545
+ )
546
+ puts " โœ… Found #{results_a.size} documents for Company A:"
547
+ results_a.each do |r|
548
+ title = r.metadata[:title] || r.metadata["title"] || "Untitled"
549
+ puts " - #{title}"
550
+ end
551
+ @stats[:operations] += 1
552
+
553
+ puts "\n๐Ÿ” Querying Company B namespace..."
554
+ results_b = @client.query(
555
+ index: INDEX_NAME,
556
+ vector: generate_embedding("report"),
557
+ top_k: 5,
558
+ namespace: "company-b",
559
+ include_metadata: true
560
+ )
561
+ puts " โœ… Found #{results_b.size} documents for Company B:"
562
+ results_b.each do |r|
563
+ title = r.metadata[:title] || r.metadata["title"] || "Untitled"
564
+ puts " - #{title}"
565
+ end
566
+ @stats[:operations] += 1
567
+
568
+ puts "\nโœ… Namespace isolation verified"
569
+ end
570
+
571
+ # =============================================================================
572
+ # SECTION 9: HEALTH MONITORING
573
+ # =============================================================================
574
+
575
+ def section_9_health_monitoring
576
+ print_section("9. Health Monitoring & Statistics")
577
+
578
+ puts "๐Ÿฅ Performing comprehensive health check..."
579
+
580
+ # Detailed health check
581
+ health = @client.health_check(
582
+ index: INDEX_NAME,
583
+ include_stats: true
584
+ )
585
+
586
+ puts "\n๐Ÿ“Š System Health:"
587
+ puts " Status: #{health.healthy? ? 'โœ… Healthy' : 'โŒ Unhealthy'}"
588
+ puts " Provider: #{health.provider}"
589
+ puts " Latency: #{health.latency_ms}ms"
590
+ puts " Indexes available: #{health.indexes_available}"
591
+ puts " Checked at: #{health.checked_at}"
592
+
593
+ if health.stats
594
+ puts "\n๐Ÿ“ˆ Index Statistics:"
595
+ puts " Vector count: #{health.stats[:vector_count] || 'N/A'}"
596
+ puts " Dimension: #{health.stats[:dimension]}"
597
+ end
598
+
599
+ # List all indexes
600
+ puts "\n๐Ÿ“š Available Indexes:"
601
+ indexes = @client.provider.list_indexes
602
+ indexes.each do |idx|
603
+ puts " - #{idx[:name]}"
604
+ puts " Dimension: #{idx[:dimension]}, Metric: #{idx[:metric] || 'N/A'}"
605
+ end
606
+
607
+ # Index details
608
+ puts "\n๐Ÿ” Index Details:"
609
+ details = @client.provider.describe_index(index: INDEX_NAME)
610
+ puts " Name: #{details[:name]}"
611
+ puts " Dimension: #{details[:dimension]}"
612
+ puts " Metric: #{details[:metric]}"
613
+ puts " Status: #{details[:status]}"
614
+ end
615
+
616
+ # =============================================================================
617
+ # SECTION 10: ASYNC BATCH OPERATIONS
618
+ # =============================================================================
619
+
620
+ def section_10_async_batch
621
+ print_section("10. Async Batch Operations")
622
+
623
+ puts "โšก Testing concurrent batch upsert..."
624
+ puts " This demonstrates Vectra::Batch for parallel processing"
625
+
626
+ # Generate larger batch for async processing
627
+ large_batch = generate_batch_documents(30)
628
+ puts " ๐Ÿ“ฆ Generated #{large_batch.size} documents for async processing"
629
+
630
+ # Use async batch client
631
+ batch_client = Vectra::Batch.new(@client)
632
+
633
+ puts "\n๐Ÿš€ Starting async batch upsert (concurrency: 4)..."
634
+ start_time = Time.now
635
+
636
+ begin
637
+ result = batch_client.upsert_async(
638
+ index: INDEX_NAME,
639
+ vectors: large_batch,
640
+ concurrency: 4
641
+ )
642
+
643
+ duration = ((Time.now - start_time) * 1000).round(2)
644
+ puts " โœ… Async batch completed in #{duration}ms"
645
+ puts " ๐Ÿ“Š Results:"
646
+ puts " Success: #{result[:success]}"
647
+ puts " Failed: #{result[:failed]}"
648
+ puts " Total: #{result[:total]}"
649
+ puts " ๐Ÿ“ˆ Throughput: #{(result[:success] / (duration / 1000.0)).round(2)} docs/sec"
650
+
651
+ @stats[:operations] += 1
652
+ rescue StandardError => e
653
+ puts " โš ๏ธ Async batch error: #{e.message}"
654
+ puts " โ„น๏ธ Falling back to regular batch..."
655
+ # Fallback to regular batch
656
+ @client.upsert(index: INDEX_NAME, vectors: large_batch)
657
+ puts " โœ… Fallback batch completed"
658
+ end
659
+ end
660
+
661
+ # =============================================================================
662
+ # SECTION 11: STREAMING LARGE QUERIES
663
+ # =============================================================================
664
+
665
+ def section_11_streaming
666
+ print_section("11. Streaming Large Queries")
667
+
668
+ puts "๐ŸŒŠ Testing streaming for large result sets..."
669
+ puts " This demonstrates Vectra::Streaming for memory-efficient queries"
670
+
671
+ query_vector = generate_embedding("technology innovation")
672
+
673
+ # Use streaming client
674
+ streaming_client = Vectra::Streaming.new(@client)
675
+
676
+ puts "\n๐Ÿ“Š Streaming query results (batch_size: 10)..."
677
+ start_time = Time.now
678
+ total_results = 0
679
+ batches_processed = 0
680
+
681
+ begin
682
+ streaming_client.query_each(
683
+ index: INDEX_NAME,
684
+ vector: query_vector,
685
+ top_k: 50, # Large result set
686
+ batch_size: 10,
687
+ include_metadata: true
688
+ ) do |batch|
689
+ batches_processed += 1
690
+ total_results += batch.size
691
+ print " ๐Ÿ“ฆ Processed batch #{batches_processed}: #{batch.size} results (total: #{total_results})\r"
692
+ end
693
+
694
+ duration = ((Time.now - start_time) * 1000).round(2)
695
+ puts "\n โœ… Streaming completed in #{duration}ms"
696
+ puts " ๐Ÿ“Š Total results: #{total_results}"
697
+ puts " ๐Ÿ“ฆ Batches processed: #{batches_processed}"
698
+ puts " ๐Ÿ’พ Memory efficient: processed in chunks"
699
+
700
+ @stats[:operations] += 1
701
+ rescue StandardError => e
702
+ puts "\n โš ๏ธ Streaming error: #{e.message}"
703
+ puts " โ„น๏ธ Falling back to regular query..."
704
+ # Fallback to regular query
705
+ results = @client.query(
706
+ index: INDEX_NAME,
707
+ vector: query_vector,
708
+ top_k: 20
709
+ )
710
+ puts " โœ… Fallback query returned #{results.size} results"
711
+ end
712
+ end
713
+
714
+ # =============================================================================
715
+ # SECTION 12: RESILIENCE FEATURES
716
+ # =============================================================================
717
+
718
+ def section_12_resilience
719
+ print_section("12. Resilience Features (Rate Limiting & Circuit Breaker)")
720
+
721
+ puts "๐Ÿ›ก๏ธ Testing resilience patterns..."
722
+
723
+ # Rate Limiting
724
+ puts "\nโฑ๏ธ Rate Limiting Test:"
725
+ puts " Configuring rate limiter: 5 requests/second, burst: 10"
726
+
727
+ limiter = Vectra::RateLimiter.new(
728
+ requests_per_second: 5,
729
+ burst_size: 10
730
+ )
731
+
732
+ puts " Making 8 requests with rate limiting..."
733
+ start_time = Time.now
734
+ rate_limited_requests = 0
735
+
736
+ 8.times do |i|
737
+ limiter.acquire do
738
+ @client.query(
739
+ index: INDEX_NAME,
740
+ vector: generate_embedding("test query #{i}"),
741
+ top_k: 1
742
+ )
743
+ rate_limited_requests += 1
744
+ print " โœ… Request #{i + 1}/8 completed\r"
745
+ end
746
+ end
747
+
748
+ rate_limit_duration = ((Time.now - start_time) * 1000).round(2)
749
+ puts "\n โœ… Rate limited requests completed in #{rate_limit_duration}ms"
750
+ puts " ๐Ÿ“Š Requests: #{rate_limited_requests}/8"
751
+ puts " โฑ๏ธ Avg time per request: #{(rate_limit_duration / rate_limited_requests).round(2)}ms"
752
+
753
+ limiter_stats = limiter.stats
754
+ puts " ๐Ÿ“ˆ Rate limiter stats:"
755
+ puts " Available tokens: #{limiter_stats[:available_tokens].round(2)}"
756
+ puts " Requests/sec: #{limiter_stats[:requests_per_second]}"
757
+
758
+ @stats[:operations] += rate_limited_requests
759
+
760
+ # Circuit Breaker
761
+ puts "\n๐Ÿ”Œ Circuit Breaker Test:"
762
+ puts " Configuring circuit breaker: failure_threshold=3, recovery_timeout=5s"
763
+
764
+ breaker = Vectra::CircuitBreaker.new(
765
+ name: "demo-breaker",
766
+ failure_threshold: 3,
767
+ recovery_timeout: 5
768
+ )
769
+
770
+ puts " Testing circuit breaker with successful operations..."
771
+ success_count = 0
772
+ 5.times do |i|
773
+ begin
774
+ breaker.call do
775
+ @client.query(
776
+ index: INDEX_NAME,
777
+ vector: generate_embedding("circuit test #{i}"),
778
+ top_k: 1
779
+ )
780
+ end
781
+ success_count += 1
782
+ print " โœ… Operation #{i + 1}/5: Circuit #{breaker.state}\r"
783
+ rescue Vectra::CircuitBreakerOpenError => e
784
+ puts "\n โš ๏ธ Circuit opened: #{e.message}"
785
+ break
786
+ end
787
+ end
788
+
789
+ puts "\n โœ… Circuit breaker test completed"
790
+ puts " ๐Ÿ“Š Successful operations: #{success_count}/5"
791
+ puts " ๐Ÿ”Œ Circuit state: #{breaker.state}"
792
+ puts " ๐Ÿ“ˆ Circuit stats:"
793
+ stats = breaker.stats
794
+ puts " Failures: #{stats[:failures]}"
795
+ puts " Successes: #{stats[:successes]}"
796
+ puts " State: #{stats[:state]}"
797
+
798
+ @stats[:operations] += success_count
799
+ end
800
+
801
+ # =============================================================================
802
+ # SECTION 13: MONITORING & LOGGING
803
+ # =============================================================================
804
+
805
+ def section_13_monitoring
806
+ print_section("13. Monitoring & Logging")
807
+
808
+ puts "๐Ÿ“Š Setting up monitoring and logging..."
809
+
810
+ # Structured JSON Logging
811
+ puts "\n๐Ÿ“ Structured JSON Logging:"
812
+ begin
813
+ log_output = StringIO.new
814
+ Vectra::Logging.setup!(
815
+ output: log_output,
816
+ app: "vectra-demo",
817
+ env: "demo"
818
+ )
819
+
820
+ puts " โœ… JSON logger initialized"
821
+
822
+ # Log some operations
823
+ Vectra::Logging.log_info("Demo operation started", operation: "demo", index: INDEX_NAME)
824
+ Vectra::Logging.log_warn("Sample warning", message: "This is a test warning")
825
+ Vectra::Logging.log_error("Sample error", error: "Test error", recoverable: true)
826
+
827
+ log_output.rewind
828
+ log_lines = log_output.read.split("\n").reject(&:empty?)
829
+ puts " ๐Ÿ“Š Logged #{log_lines.size} entries"
830
+ puts " ๐Ÿ“„ Sample log entry:"
831
+ if log_lines.any?
832
+ sample = JSON.parse(log_lines.first)
833
+ puts " Level: #{sample['level']}"
834
+ puts " Message: #{sample['message']}"
835
+ puts " Timestamp: #{sample['timestamp']}"
836
+ end
837
+ rescue StandardError => e
838
+ puts " โš ๏ธ Logging setup error: #{e.message}"
839
+ end
840
+
841
+ # Audit Logging
842
+ puts "\n๐Ÿ”’ Audit Logging:"
843
+ begin
844
+ audit_output = StringIO.new
845
+ audit = Vectra::AuditLog.new(
846
+ output: audit_output,
847
+ enabled: true,
848
+ app: "vectra-demo"
849
+ )
850
+
851
+ puts " โœ… Audit logger initialized"
852
+
853
+ # Log audit events
854
+ audit.log_access(
855
+ user_id: "demo-user-123",
856
+ operation: "query",
857
+ index: INDEX_NAME,
858
+ result_count: 5
859
+ )
860
+
861
+ audit.log_authentication(
862
+ user_id: "demo-user-123",
863
+ success: true,
864
+ provider: "qdrant"
865
+ )
866
+
867
+ audit.log_data_modification(
868
+ user_id: "demo-user-123",
869
+ operation: "upsert",
870
+ index: INDEX_NAME,
871
+ record_count: 10
872
+ )
873
+
874
+ audit_output.rewind
875
+ audit_lines = audit_output.read.split("\n").reject(&:empty?)
876
+ puts " ๐Ÿ“Š Logged #{audit_lines.size} audit events"
877
+ puts " ๐Ÿ“„ Audit event types:"
878
+ audit_lines.each do |line|
879
+ event = JSON.parse(line)
880
+ puts " - #{event['event_type']}: #{event['operation'] || event['change_type'] || 'N/A'}"
881
+ end
882
+ rescue StandardError => e
883
+ puts " โš ๏ธ Audit logging error: #{e.message}"
884
+ end
885
+
886
+ # Instrumentation (Sentry example)
887
+ puts "\n๐Ÿ”” Error Tracking (Sentry):"
888
+ begin
889
+ # Mock Sentry for demo (in production, use real Sentry)
890
+ if defined?(Sentry)
891
+ Vectra::Instrumentation::Sentry.setup!
892
+ puts " โœ… Sentry instrumentation enabled"
893
+ puts " ๐Ÿ“Š Errors will be tracked to Sentry"
894
+ else
895
+ puts " โ„น๏ธ Sentry not available (install 'sentry-ruby' gem for production)"
896
+ puts " ๐Ÿ’ก In production, errors are automatically tracked"
897
+ end
898
+ rescue StandardError => e
899
+ puts " โš ๏ธ Sentry setup error: #{e.message}"
900
+ end
901
+
902
+ # Honeybadger example
903
+ puts "\n๐Ÿ Error Tracking (Honeybadger):"
904
+ begin
905
+ if defined?(Honeybadger)
906
+ Vectra::Instrumentation::Honeybadger.setup!
907
+ puts " โœ… Honeybadger instrumentation enabled"
908
+ puts " ๐Ÿ“Š Errors will be tracked to Honeybadger"
909
+ else
910
+ puts " โ„น๏ธ Honeybadger not available (install 'honeybadger' gem for production)"
911
+ puts " ๐Ÿ’ก In production, errors are automatically tracked"
912
+ end
913
+ rescue StandardError => e
914
+ puts " โš ๏ธ Honeybadger setup error: #{e.message}"
915
+ end
916
+
917
+ puts "\nโœ… Monitoring & logging setup complete"
918
+ puts " ๐Ÿ’ก In production, configure:"
919
+ puts " โ€ข Sentry for error tracking"
920
+ puts " โ€ข Honeybadger for error tracking"
921
+ puts " โ€ข Datadog/New Relic for APM"
922
+ puts " โ€ข JSON logs for log aggregation"
923
+ puts " โ€ข Audit logs for compliance"
924
+ end
925
+
926
+ # =============================================================================
927
+ # HELPER METHODS
928
+ # =============================================================================
929
+
930
+ private
931
+
932
+ def setup_clients
933
+ # Main client
934
+ @client = Vectra.qdrant(
935
+ host: @host,
936
+ api_key: nil
937
+ )
938
+
939
+ # Cached client for performance
940
+ @cache = Vectra::Cache.new(ttl: 300, max_size: 1000)
941
+ @cached_client = Vectra::CachedClient.new(@client, cache: @cache)
942
+ end
943
+
944
+ def create_sample_document(id:, title:, content:, category:, author:)
945
+ {
946
+ id: id,
947
+ values: generate_embedding(content),
948
+ metadata: {
949
+ title: title,
950
+ content: content,
951
+ category: category,
952
+ author: author,
953
+ created_at: Time.now.iso8601
954
+ }
955
+ }
956
+ end
957
+
958
+ def generate_batch_documents(count)
959
+ categories = ["Technology", "Business", "Science", "Health", "Education"]
960
+ titles_by_category = {
961
+ "Technology" => [
962
+ "Introduction to Machine Learning",
963
+ "Cloud Computing Best Practices",
964
+ "Microservices Architecture Patterns",
965
+ "DevOps and CI/CD Pipelines",
966
+ "Database Optimization Techniques"
967
+ ],
968
+ "Business" => [
969
+ "Market Analysis Q4 2024",
970
+ "Strategic Planning Guide",
971
+ "Customer Retention Strategies",
972
+ "Digital Transformation Roadmap",
973
+ "Competitive Analysis Framework"
974
+ ],
975
+ "Science" => [
976
+ "Quantum Computing Basics",
977
+ "Climate Change Research",
978
+ "Genetic Engineering Ethics",
979
+ "Space Exploration Updates",
980
+ "Renewable Energy Solutions"
981
+ ],
982
+ "Health" => [
983
+ "Nutrition and Wellness Guide",
984
+ "Mental Health Awareness",
985
+ "Exercise Science Fundamentals",
986
+ "Preventive Care Strategies",
987
+ "Sleep Quality Improvement"
988
+ ],
989
+ "Education" => [
990
+ "Modern Teaching Methods",
991
+ "E-Learning Platforms Comparison",
992
+ "Student Engagement Techniques",
993
+ "Curriculum Development Guide",
994
+ "Educational Technology Trends"
995
+ ]
996
+ }
997
+
998
+ count.times.map do |i|
999
+ category = categories[i % categories.size]
1000
+ title = titles_by_category[category][rand(5)]
1001
+
1002
+ {
1003
+ id: "doc-#{format('%03d', i + 2)}",
1004
+ values: generate_embedding("#{title} #{category}"),
1005
+ metadata: {
1006
+ title: "#{title} #{i + 1}",
1007
+ category: category,
1008
+ author: ["Alice", "Bob", "Charlie", "Diana", "Eve"][rand(5)],
1009
+ views: rand(100..1000),
1010
+ created_at: (Time.now - rand(1..90) * 86400).iso8601
1011
+ }
1012
+ }
1013
+ end
1014
+ end
1015
+
1016
+ # Simple TF-IDF inspired embedding (demo purposes)
1017
+ def generate_embedding(text)
1018
+ normalized = text.downcase.strip
1019
+ hash = Digest::SHA256.hexdigest(normalized)
1020
+
1021
+ # Create deterministic pseudo-embedding
1022
+ DIMENSION.times.map do |i|
1023
+ seed = hash[(i * 2) % hash.length, 2].to_i(16)
1024
+ (Math.sin(seed + i + text.length) + 1) / 2.0
1025
+ end
1026
+ end
1027
+
1028
+ def print_header(title)
1029
+ puts
1030
+ puts "=" * 80
1031
+ puts title.center(80)
1032
+ puts "=" * 80
1033
+ puts
1034
+ puts "Provider: Qdrant"
1035
+ puts "Host: #{@host}"
1036
+ puts "Index: #{INDEX_NAME}"
1037
+ puts "Dimension: #{DIMENSION}"
1038
+ puts
1039
+ end
1040
+
1041
+ def print_section(title)
1042
+ puts
1043
+ puts "โ”€" * 80
1044
+ puts "โ”‚ #{title}"
1045
+ puts "โ”€" * 80
1046
+ puts
1047
+ end
1048
+
1049
+ def print_summary
1050
+ print_section("Demo Summary")
1051
+
1052
+ puts "๐Ÿ“Š Operations Summary:"
1053
+ puts " Total operations: #{@stats[:operations]}"
1054
+ puts " Cache hits: #{@stats[:cache_hits]}"
1055
+ puts " Errors handled: #{@stats[:errors]}"
1056
+ puts " Retries: #{@stats[:retries]}"
1057
+ puts "\n๐ŸŽฏ Features Demonstrated:"
1058
+ puts " โœ… Basic CRUD operations"
1059
+ puts " โœ… Batch processing"
1060
+ puts " โœ… Async batch operations"
1061
+ puts " โœ… Streaming queries"
1062
+ puts " โœ… Advanced queries with filtering"
1063
+ puts " โœ… Update operations"
1064
+ puts " โœ… Delete operations"
1065
+ puts " โœ… Caching & performance"
1066
+ puts " โœ… Error handling"
1067
+ puts " โœ… Multi-tenancy (namespaces)"
1068
+ puts " โœ… Health monitoring"
1069
+ puts " โœ… Rate limiting"
1070
+ puts " โœ… Circuit breaker"
1071
+ puts " โœ… Monitoring & logging"
1072
+
1073
+ puts "\nโœ… Demo completed successfully!"
1074
+ puts "\n๐Ÿ’ก Next Steps:"
1075
+ puts " โ€ข Open Qdrant dashboard: #{@host}/dashboard"
1076
+ puts " โ€ข Explore the Vectra documentation"
1077
+ puts " โ€ข Try with different providers (Pinecone, Weaviate)"
1078
+ puts " โ€ข Integrate into your application"
1079
+
1080
+ puts "\n๐Ÿงน Cleanup:"
1081
+ puts " Run with --cleanup flag to delete the index"
1082
+ puts " Stop Qdrant: docker ps | grep qdrant | awk '{print $1}' | xargs docker stop"
1083
+ puts
1084
+ end
1085
+ end
1086
+
1087
+ # =============================================================================
1088
+ # MAIN EXECUTION
1089
+ # =============================================================================
1090
+
1091
+ if __FILE__ == $PROGRAM_NAME
1092
+ host = ARGV.reject { |arg| arg.start_with?("--") }.first || "http://localhost:6333"
1093
+ cleanup = ARGV.include?("--cleanup")
1094
+
1095
+ begin
1096
+ demo = ComprehensiveVectraDemo.new(host)
1097
+ demo.run_demo
1098
+
1099
+ # Cleanup if requested
1100
+ if cleanup
1101
+ puts "\n๐Ÿงน Cleaning up..."
1102
+ demo.instance_variable_get(:@client).provider.delete_index(name: ComprehensiveVectraDemo::INDEX_NAME)
1103
+ puts " โœ… Index deleted"
1104
+ end
1105
+
1106
+ rescue Interrupt
1107
+ puts "\n\nโš ๏ธ Demo interrupted by user"
1108
+ exit 1
1109
+ rescue StandardError => e
1110
+ puts "\n\nโŒ Error: #{e.class} - #{e.message}"
1111
+ puts e.backtrace.first(5).join("\n")
1112
+ puts "\n๐Ÿ’ก Make sure Qdrant is running:"
1113
+ puts " docker run -p 6333:6333 qdrant/qdrant"
1114
+ exit 1
1115
+ end
1116
+ end