prescient 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -117,6 +117,52 @@ Prescient.configure do |config|
117
117
  end
118
118
  ```
119
119
 
120
+ ### Provider Fallback Configuration
121
+
122
+ Prescient supports automatic fallback to backup providers when the primary provider fails. This ensures high availability for your AI applications.
123
+
124
+ ```ruby
125
+ Prescient.configure do |config|
126
+ # Configure primary provider
127
+ config.add_provider(:primary, Prescient::Provider::OpenAI,
128
+ api_key: ENV['OPENAI_API_KEY'],
129
+ embedding_model: 'text-embedding-3-small',
130
+ chat_model: 'gpt-3.5-turbo'
131
+ )
132
+
133
+ # Configure backup providers
134
+ config.add_provider(:backup1, Prescient::Provider::Anthropic,
135
+ api_key: ENV['ANTHROPIC_API_KEY'],
136
+ model: 'claude-3-haiku-20240307'
137
+ )
138
+
139
+ config.add_provider(:backup2, Prescient::Provider::Ollama,
140
+ url: 'http://localhost:11434',
141
+ embedding_model: 'nomic-embed-text',
142
+ chat_model: 'llama3.1:8b'
143
+ )
144
+
145
+ # Configure fallback order
146
+ config.fallback_providers = [:backup1, :backup2]
147
+ end
148
+
149
+ # Client with fallback enabled (default)
150
+ client = Prescient::Client.new(:primary, enable_fallback: true)
151
+
152
+ # Client without fallback
153
+ client_no_fallback = Prescient::Client.new(:primary, enable_fallback: false)
154
+
155
+ # Convenience methods also support fallback
156
+ response = Prescient.generate_response("Hello", provider: :primary, enable_fallback: true)
157
+ ```
158
+
159
+ **Fallback Behavior:**
160
+ - When a provider fails with a persistent error, Prescient automatically tries the next available provider
161
+ - Only available (healthy) providers are tried during fallback
162
+ - If no fallback providers are configured, all available providers are tried as fallbacks
163
+ - Transient errors (rate limits, timeouts) still use retry logic before fallback
164
+ - The fallback process preserves all method arguments and options
165
+
120
166
  ## Usage
121
167
 
122
168
  ### Quick Start
@@ -170,8 +216,8 @@ response = Prescient.generate_response(query, context_items,
170
216
  )
171
217
 
172
218
  puts response[:response]
173
- puts "Model: #{response[:model]}"
174
- puts "Provider: #{response[:provider]}"
219
+ puts "Model: " + response[:model]
220
+ puts "Provider: " + response[:provider]
175
221
  ```
176
222
 
177
223
  ### Error Handling
@@ -214,14 +260,14 @@ Prescient.configure do |config|
214
260
  prompt_templates: {
215
261
  system_prompt: 'You are a friendly customer service representative.',
216
262
  no_context_template: <<~TEMPLATE.strip,
217
- %{system_prompt}
263
+ %{ system_prompt }
218
264
 
219
265
  Customer Question: %{query}
220
266
 
221
267
  Please provide a helpful response.
222
268
  TEMPLATE
223
269
  with_context_template: <<~TEMPLATE.strip
224
- %{system_prompt} Use the company info below to help answer.
270
+ %{ system_prompt } Use the company info below to help answer.
225
271
 
226
272
  Company Information:
227
273
  %{context}
@@ -259,6 +305,7 @@ prompt_templates: {
259
305
  system_prompt: 'You are a technical documentation assistant. Provide detailed explanations with code examples.',
260
306
  # ... templates
261
307
  }
308
+
262
309
  ```
263
310
 
264
311
  #### Creative Writing
@@ -283,12 +330,12 @@ Prescient.configure do |config|
283
330
  context_configs: {
284
331
  'product' => {
285
332
  fields: %w[name description price category brand],
286
- format: '%{name} by %{brand}: %{description} - $%{price} (%{category})',
333
+ format: '%{ name } by %{ brand }: %{ description } - $%{ price } (%{ category })',
287
334
  embedding_fields: %w[name description category brand]
288
335
  },
289
336
  'review' => {
290
337
  fields: %w[product_name rating review_text reviewer_name],
291
- format: '%{product_name} - %{rating}/5 stars: "%{review_text}"',
338
+ format: '%{ product_name } - %{ rating }/5 stars: "%{ review_text }"',
292
339
  embedding_fields: %w[product_name review_text]
293
340
  }
294
341
  }
@@ -409,10 +456,10 @@ query_embedding = client.generate_embedding(query_text)
409
456
  query_vector = "[#{query_embedding.join(',')}]"
410
457
 
411
458
  results = db.exec_params(
412
- "SELECT d.title, d.content, de.embedding <=> $1::vector AS distance
413
- FROM documents d
414
- JOIN document_embeddings de ON d.id = de.document_id
415
- ORDER BY de.embedding <=> $1::vector
459
+ "SELECT d.title, d.content, de.embedding <=> $1::vector AS distance
460
+ FROM documents d
461
+ JOIN document_embeddings de ON d.id = de.document_id
462
+ ORDER BY de.embedding <=> $1::vector
416
463
  LIMIT 5",
417
464
  [query_vector]
418
465
  )
@@ -423,14 +470,14 @@ results = db.exec_params(
423
470
  pgvector supports three distance functions:
424
471
 
425
472
  - **Cosine Distance** (`<=>`): Best for normalized embeddings
426
- - **L2 Distance** (`<->`): Euclidean distance, good general purpose
473
+ - **L2 Distance** (`<->`): Euclidean distance, good general purpose
427
474
  - **Inner Product** (`<#>`): Dot product, useful for specific cases
428
475
 
429
476
  ```sql
430
477
  -- Cosine similarity (most common)
431
478
  ORDER BY embedding <=> query_vector
432
479
 
433
- -- L2 distance
480
+ -- L2 distance
434
481
  ORDER BY embedding <-> query_vector
435
482
 
436
483
  -- Inner product
@@ -443,8 +490,8 @@ The setup automatically creates HNSW indexes for fast similarity search:
443
490
 
444
491
  ```sql
445
492
  -- Example index for cosine distance
446
- CREATE INDEX idx_embeddings_cosine
447
- ON document_embeddings
493
+ CREATE INDEX idx_embeddings_cosine
494
+ ON document_embeddings
448
495
  USING hnsw (embedding vector_cosine_ops)
449
496
  WITH (m = 16, ef_construction = 64);
450
497
  ```
@@ -457,22 +504,22 @@ Combine vector similarity with metadata filtering:
457
504
  # Search with tag filtering
458
505
  results = db.exec_params(
459
506
  "SELECT d.title, de.embedding <=> $1::vector as distance
460
- FROM documents d
507
+ FROM documents d
461
508
  JOIN document_embeddings de ON d.id = de.document_id
462
509
  WHERE d.metadata->'tags' ? 'programming'
463
- ORDER BY de.embedding <=> $1::vector
510
+ ORDER BY de.embedding <=> $1::vector
464
511
  LIMIT 5",
465
512
  [query_vector]
466
513
  )
467
514
 
468
- # Search with difficulty and tag filters
515
+ # Search with difficulty and tag filters
469
516
  results = db.exec_params(
470
517
  "SELECT d.title, de.embedding <=> $1::vector as distance
471
- FROM documents d
518
+ FROM documents d
472
519
  JOIN document_embeddings de ON d.id = de.document_id
473
520
  WHERE d.metadata->>'difficulty' = 'beginner'
474
521
  AND d.metadata->'tags' ?| $2::text[]
475
- ORDER BY de.embedding <=> $1::vector
522
+ ORDER BY de.embedding <=> $1::vector
476
523
  LIMIT 5",
477
524
  [query_vector, ['ruby', 'programming']]
478
525
  )
@@ -488,7 +535,7 @@ For large datasets, tune HNSW parameters:
488
535
  -- High accuracy (slower build, more memory)
489
536
  WITH (m = 32, ef_construction = 128)
490
537
 
491
- -- Fast build (lower accuracy, less memory)
538
+ -- Fast build (lower accuracy, less memory)
492
539
  WITH (m = 8, ef_construction = 32)
493
540
 
494
541
  -- Balanced (recommended default)
@@ -502,9 +549,9 @@ WITH (m = 16, ef_construction = 64)
502
549
  SET hnsw.ef_search = 100; -- Higher = more accurate, slower
503
550
 
504
551
  -- Use EXPLAIN ANALYZE to optimize queries
505
- EXPLAIN ANALYZE
506
- SELECT * FROM document_embeddings
507
- ORDER BY embedding <=> '[0.1,0.2,...]'::vector
552
+ EXPLAIN ANALYZE
553
+ SELECT * FROM document_embeddings
554
+ ORDER BY embedding <=> '[0.1,0.2,...]'::vector
508
555
  LIMIT 10;
509
556
  ```
510
557
 
@@ -516,14 +563,14 @@ For large documents, use chunking for better search granularity:
516
563
  def chunk_document(text, chunk_size: 500, overlap: 50)
517
564
  chunks = []
518
565
  start = 0
519
-
566
+
520
567
  while start < text.length
521
568
  end_pos = [start + chunk_size, text.length].min
522
569
  chunk = text[start...end_pos]
523
570
  chunks << chunk
524
571
  start += chunk_size - overlap
525
572
  end
526
-
573
+
527
574
  chunks
528
575
  end
529
576
 
@@ -548,6 +595,7 @@ DB_HOST=localhost ruby examples/vector_search.rb
548
595
  ```
549
596
 
550
597
  The example demonstrates:
598
+
551
599
  - Document embedding generation and storage
552
600
  - Similarity search with different distance functions
553
601
  - Metadata filtering and advanced queries
@@ -596,7 +644,7 @@ info = client.provider_info
596
644
  puts info[:name] # => :ollama
597
645
  puts info[:class] # => "Prescient::Ollama::Provider"
598
646
  puts info[:available] # => true
599
- puts info[:options] # => {...} (excluding sensitive data)
647
+ puts info[:options] # => { ... } (excluding sensitive data)
600
648
  ```
601
649
 
602
650
  ## Provider-Specific Features
@@ -633,6 +681,7 @@ The easiest way to get started with Prescient and Ollama is using Docker Compose
633
681
  Before starting, ensure your system meets the minimum requirements for running Ollama:
634
682
 
635
683
  #### **Minimum Requirements:**
684
+
636
685
  - **CPU**: 4+ cores (x86_64 or ARM64)
637
686
  - **RAM**: 8GB+ (16GB recommended)
638
687
  - **Storage**: 10GB+ free space for models
@@ -640,20 +689,22 @@ Before starting, ensure your system meets the minimum requirements for running O
640
689
 
641
690
  #### **Model-Specific Requirements:**
642
691
 
643
- | Model | RAM Required | Storage | Notes |
644
- |-------|-------------|---------|-------|
645
- | `nomic-embed-text` | 1GB | 274MB | Embedding model |
646
- | `llama3.1:8b` | 8GB | 4.7GB | Chat model (8B parameters) |
647
- | `llama3.1:70b` | 64GB+ | 40GB | Large chat model (70B parameters) |
648
- | `codellama:7b` | 8GB | 3.8GB | Code generation model |
692
+ | Model | RAM Required | Storage | Notes |
693
+ | ------------------ | ------------ | ------- | --------------------------------- |
694
+ | `nomic-embed-text` | 1GB | 274MB | Embedding model |
695
+ | `llama3.1:8b` | 8GB | 4.7GB | Chat model (8B parameters) |
696
+ | `llama3.1:70b` | 64GB+ | 40GB | Large chat model (70B parameters) |
697
+ | `codellama:7b` | 8GB | 3.8GB | Code generation model |
649
698
 
650
699
  #### **Performance Recommendations:**
700
+
651
701
  - **SSD Storage**: Significantly faster model loading
652
702
  - **GPU (Optional)**: NVIDIA GPU with 8GB+ VRAM for acceleration
653
703
  - **Network**: Stable internet for initial model downloads
654
704
  - **Docker**: 4GB+ memory limit configured
655
705
 
656
706
  #### **GPU Acceleration (Optional):**
707
+
657
708
  - **NVIDIA GPU**: RTX 3060+ with 8GB+ VRAM recommended
658
709
  - **CUDA**: Version 11.8+ required
659
710
  - **Docker**: NVIDIA Container Toolkit installed
@@ -664,24 +715,27 @@ Before starting, ensure your system meets the minimum requirements for running O
664
715
  ### Quick Start with Docker
665
716
 
666
717
  1. **Start Ollama service:**
718
+
667
719
  ```bash
668
720
  docker-compose up -d ollama
669
721
  ```
670
722
 
671
723
  2. **Pull required models:**
724
+
672
725
  ```bash
673
726
  # Automatic setup
674
727
  docker-compose up ollama-init
675
-
728
+
676
729
  # Or manual setup
677
730
  ./scripts/setup-ollama-models.sh
678
731
  ```
679
732
 
680
733
  3. **Run examples:**
734
+
681
735
  ```bash
682
736
  # Set environment variable
683
737
  export OLLAMA_URL=http://localhost:11434
684
-
738
+
685
739
  # Run examples
686
740
  ruby examples/custom_contexts.rb
687
741
  ```
@@ -702,9 +756,9 @@ The included `docker-compose.yml` provides:
702
756
  services:
703
757
  ollama:
704
758
  ports:
705
- - "11434:11434" # Ollama API port
759
+ - "11434:11434" # Ollama API port
706
760
  volumes:
707
- - ollama_data:/root/.ollama # Persist models
761
+ - ollama_data:/root/.ollama # Persist models
708
762
  environment:
709
763
  - OLLAMA_HOST=0.0.0.0
710
764
  - OLLAMA_ORIGINS=*
@@ -749,7 +803,7 @@ curl http://localhost:11434/api/tags
749
803
  # Pull a specific model
750
804
  curl -X POST http://localhost:11434/api/pull \
751
805
  -H "Content-Type: application/json" \
752
- -d '{"name": "llama3.1:8b"}'
806
+ -d '{ "name": "llama3.1:8b"}'
753
807
 
754
808
  # Health check
755
809
  curl http://localhost:11434/api/version
@@ -770,6 +824,7 @@ For production use:
770
824
  #### **Common Issues:**
771
825
 
772
826
  **Out of Memory Errors:**
827
+
773
828
  ```bash
774
829
  # Check available memory
775
830
  free -h
@@ -782,6 +837,7 @@ OLLAMA_CHAT_MODEL=llama3.1:7b ruby examples/custom_contexts.rb
782
837
  ```
783
838
 
784
839
  **Slow Model Loading:**
840
+
785
841
  ```bash
786
842
  # Check disk I/O
787
843
  iostat -x 1
@@ -791,6 +847,7 @@ iostat -x 1
791
847
  ```
792
848
 
793
849
  **Model Download Failures:**
850
+
794
851
  ```bash
795
852
  # Check disk space
796
853
  df -h
@@ -800,6 +857,7 @@ docker exec prescient-ollama ollama pull llama3.1:8b
800
857
  ```
801
858
 
802
859
  **GPU Not Detected:**
860
+
803
861
  ```bash
804
862
  # Check NVIDIA Docker runtime
805
863
  docker run --rm --gpus all nvidia/cuda:11.8-base nvidia-smi
@@ -820,7 +878,7 @@ docker logs prescient-ollama
820
878
  # Test API response time
821
879
  time curl -X POST http://localhost:11434/api/generate \
822
880
  -H "Content-Type: application/json" \
823
- -d '{"model": "llama3.1:8b", "prompt": "Hello", "stream": false}'
881
+ -d '{ "model": "llama3.1:8b", "prompt": "Hello", "stream": false}'
824
882
  ```
825
883
 
826
884
  ## Testing
data/Rakefile CHANGED
@@ -26,5 +26,6 @@ task :console do
26
26
  require "bundler/setup"
27
27
  require "prescient"
28
28
  require "irb"
29
+ ARGV.clear
29
30
  IRB.start
30
- end
31
+ end
@@ -130,17 +130,17 @@ query_embedding = client.generate_embedding(query_text)
130
130
  query_vector = "[#{query_embedding.join(',')}]"
131
131
 
132
132
  results = db.exec_params(
133
- "SELECT d.title, d.content, de.embedding <=> $1::vector AS distance
134
- FROM documents d
135
- JOIN document_embeddings de ON d.id = de.document_id
136
- ORDER BY de.embedding <=> $1::vector
133
+ "SELECT d.title, d.content, de.embedding <=> $1::vector AS distance
134
+ FROM documents d
135
+ JOIN document_embeddings de ON d.id = de.document_id
136
+ ORDER BY de.embedding <=> $1::vector
137
137
  LIMIT 5",
138
138
  [query_vector]
139
139
  )
140
140
 
141
141
  results.each do |row|
142
142
  similarity = 1 - row['distance'].to_f
143
- puts "#{row['title']} (#{(similarity * 100).round(1)}% similar)"
143
+ puts "#{ row['title']} (#{ (similarity * 100).round(1)}% similar)"
144
144
  end
145
145
  ```
146
146
 
@@ -150,11 +150,11 @@ end
150
150
  # Search with metadata filtering
151
151
  results = db.exec_params(
152
152
  "SELECT d.title, de.embedding <=> $1::vector as distance
153
- FROM documents d
153
+ FROM documents d
154
154
  JOIN document_embeddings de ON d.id = de.document_id
155
155
  WHERE d.metadata->'tags' ? 'programming'
156
156
  AND d.metadata->>'difficulty' = 'beginner'
157
- ORDER BY de.embedding <=> $1::vector
157
+ ORDER BY de.embedding <=> $1::vector
158
158
  LIMIT 10",
159
159
  [query_vector]
160
160
  )
@@ -168,17 +168,17 @@ For large documents, split into chunks for better search granularity:
168
168
  def chunk_document(text, chunk_size: 500, overlap: 50)
169
169
  chunks = []
170
170
  start = 0
171
-
171
+
172
172
  while start < text.length
173
173
  end_pos = [start + chunk_size, text.length].min
174
-
174
+
175
175
  # Find word boundary to avoid cutting words
176
176
  if end_pos < text.length
177
177
  while end_pos > start && text[end_pos] != ' '
178
178
  end_pos -= 1
179
179
  end
180
180
  end
181
-
181
+
182
182
  chunk = text[start...end_pos].strip
183
183
  chunks << {
184
184
  text: chunk,
@@ -186,11 +186,11 @@ def chunk_document(text, chunk_size: 500, overlap: 50)
186
186
  end_pos: end_pos,
187
187
  index: chunks.length
188
188
  }
189
-
189
+
190
190
  start = end_pos - overlap
191
191
  break if start >= text.length
192
192
  end
193
-
193
+
194
194
  chunks
195
195
  end
196
196
 
@@ -200,14 +200,14 @@ chunks.each do |chunk|
200
200
  # Insert chunk
201
201
  chunk_result = db.exec_params(
202
202
  "INSERT INTO document_chunks (document_id, chunk_index, chunk_text, chunk_metadata) VALUES ($1, $2, $3, $4) RETURNING id",
203
- [document_id, chunk[:index], chunk[:text], {start_pos: chunk[:start_pos], end_pos: chunk[:end_pos]}.to_json]
203
+ [document_id, chunk[:index], chunk[:text], { start_pos: chunk[:start_pos], end_pos: chunk[:end_pos]}.to_json]
204
204
  )
205
205
  chunk_id = chunk_result[0]['id']
206
-
206
+
207
207
  # Generate embedding for chunk
208
208
  chunk_embedding = client.generate_embedding(chunk[:text])
209
209
  chunk_vector = "[#{chunk_embedding.join(',')}]"
210
-
210
+
211
211
  # Store chunk embedding
212
212
  db.exec_params(
213
213
  "INSERT INTO chunk_embeddings (chunk_id, document_id, embedding_provider, embedding_model, embedding_dimensions, embedding) VALUES ($1, $2, $3, $4, $5, $6)",
@@ -224,20 +224,20 @@ For different dataset sizes and performance requirements:
224
224
 
225
225
  ```sql
226
226
  -- Small datasets (< 100K vectors): Fast build, good accuracy
227
- CREATE INDEX idx_embeddings_small
228
- ON document_embeddings
227
+ CREATE INDEX idx_embeddings_small
228
+ ON document_embeddings
229
229
  USING hnsw (embedding vector_cosine_ops)
230
230
  WITH (m = 8, ef_construction = 32);
231
231
 
232
232
  -- Medium datasets (100K - 1M vectors): Balanced
233
- CREATE INDEX idx_embeddings_medium
234
- ON document_embeddings
233
+ CREATE INDEX idx_embeddings_medium
234
+ ON document_embeddings
235
235
  USING hnsw (embedding vector_cosine_ops)
236
236
  WITH (m = 16, ef_construction = 64);
237
237
 
238
238
  -- Large datasets (> 1M vectors): High accuracy
239
- CREATE INDEX idx_embeddings_large
240
- ON document_embeddings
239
+ CREATE INDEX idx_embeddings_large
240
+ ON document_embeddings
241
241
  USING hnsw (embedding vector_cosine_ops)
242
242
  WITH (m = 32, ef_construction = 128);
243
243
  ```
@@ -251,9 +251,9 @@ SET hnsw.ef_search = 100; -- Balanced (default)
251
251
  SET hnsw.ef_search = 200; -- High accuracy, slower
252
252
 
253
253
  -- Monitor query performance
254
- EXPLAIN (ANALYZE, BUFFERS)
255
- SELECT * FROM document_embeddings
256
- ORDER BY embedding <=> '[0.1,0.2,...]'::vector
254
+ EXPLAIN (ANALYZE, BUFFERS)
255
+ SELECT * FROM document_embeddings
256
+ ORDER BY embedding <=> '[0.1,0.2,...]'::vector
257
257
  LIMIT 10;
258
258
  ```
259
259
 
@@ -268,7 +268,7 @@ texts.each_slice(10) do |batch|
268
268
  batch.each do |text|
269
269
  embedding = client.generate_embedding(text)
270
270
  embeddings << embedding
271
-
271
+
272
272
  # Small delay to avoid rate limiting
273
273
  sleep(0.1)
274
274
  end
@@ -295,13 +295,13 @@ Combine vector similarity with traditional text search:
295
295
  ```sql
296
296
  WITH vector_results AS (
297
297
  SELECT document_id, embedding <=> $1::vector as distance
298
- FROM document_embeddings
299
- ORDER BY embedding <=> $1::vector
298
+ FROM document_embeddings
299
+ ORDER BY embedding <=> $1::vector
300
300
  LIMIT 20
301
301
  ),
302
302
  text_results AS (
303
303
  SELECT id as document_id, ts_rank(to_tsvector(content), plainto_tsquery($2)) as rank
304
- FROM documents
304
+ FROM documents
305
305
  WHERE to_tsvector(content) @@ plainto_tsquery($2)
306
306
  )
307
307
  SELECT d.title, d.content,
@@ -328,10 +328,10 @@ providers = [
328
328
 
329
329
  providers.each do |provider|
330
330
  next unless provider[:client].available?
331
-
331
+
332
332
  embedding = provider[:client].generate_embedding(text)
333
333
  vector_str = "[#{embedding.join(',')}]"
334
-
334
+
335
335
  db.exec_params(
336
336
  "INSERT INTO document_embeddings (document_id, embedding_provider, embedding_model, embedding_dimensions, embedding, embedding_text) VALUES ($1, $2, $3, $4, $5, $6)",
337
337
  [document_id, provider[:name], provider[:model], provider[:dims], vector_str, text]
@@ -348,14 +348,14 @@ end
348
348
  def track_search(query_text, results, provider, model)
349
349
  query_embedding = client.generate_embedding(query_text)
350
350
  query_vector = "[#{query_embedding.join(',')}]"
351
-
351
+
352
352
  # Insert search query
353
353
  query_result = db.exec_params(
354
354
  "INSERT INTO search_queries (query_text, embedding_provider, embedding_model, query_embedding, result_count) VALUES ($1, $2, $3, $4, $5) RETURNING id",
355
355
  [query_text, provider, model, query_vector, results.length]
356
356
  )
357
357
  query_id = query_result[0]['id']
358
-
358
+
359
359
  # Insert query results
360
360
  results.each_with_index do |result, index|
361
361
  db.exec_params(
@@ -371,14 +371,14 @@ end
371
371
  ```sql
372
372
  -- Popular search terms
373
373
  SELECT query_text, COUNT(*) as search_count
374
- FROM search_queries
374
+ FROM search_queries
375
375
  WHERE created_at > NOW() - INTERVAL '7 days'
376
376
  GROUP BY query_text
377
377
  ORDER BY search_count DESC
378
378
  LIMIT 10;
379
379
 
380
380
  -- Average similarity scores
381
- SELECT embedding_provider, embedding_model,
381
+ SELECT embedding_provider, embedding_model,
382
382
  AVG(similarity_score) as avg_similarity,
383
383
  COUNT(*) as result_count
384
384
  FROM query_results qr
@@ -400,11 +400,12 @@ ORDER BY hour;
400
400
  ### Common Issues
401
401
 
402
402
  **Slow queries:**
403
+
403
404
  ```sql
404
405
  -- Check if indexes are being used
405
- EXPLAIN (ANALYZE, BUFFERS)
406
- SELECT * FROM document_embeddings
407
- ORDER BY embedding <=> '[...]'::vector
406
+ EXPLAIN (ANALYZE, BUFFERS)
407
+ SELECT * FROM document_embeddings
408
+ ORDER BY embedding <=> '[...]'::vector
408
409
  LIMIT 10;
409
410
 
410
411
  -- Rebuild indexes if needed
@@ -412,10 +413,11 @@ REINDEX INDEX idx_document_embeddings_cosine;
412
413
  ```
413
414
 
414
415
  **Memory issues:**
416
+
415
417
  ```sql
416
418
  -- Check index sizes
417
419
  SELECT schemaname, tablename, indexname, pg_size_pretty(pg_relation_size(indexrelid)) as size
418
- FROM pg_stat_user_indexes
420
+ FROM pg_stat_user_indexes
419
421
  WHERE tablename LIKE '%embedding%'
420
422
  ORDER BY pg_relation_size(indexrelid) DESC;
421
423
 
@@ -424,6 +426,7 @@ SET work_mem = '256MB';
424
426
  ```
425
427
 
426
428
  **Dimension mismatches:**
429
+
427
430
  ```ruby
428
431
  # Validate embedding dimensions before storing
429
432
  expected_dims = 768
@@ -447,4 +450,4 @@ end
447
450
  - [pgvector Documentation](https://github.com/pgvector/pgvector)
448
451
  - [HNSW Algorithm](https://arxiv.org/abs/1603.09320)
449
452
  - [Vector Database Concepts](https://www.pinecone.io/learn/vector-database/)
450
- - [Embedding Best Practices](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings)
453
+ - [Embedding Best Practices](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings)