prescient 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +6 -2
- data/.yardopts +14 -0
- data/CHANGELOG.md +64 -0
- data/CHANGELOG.pdf +0 -0
- data/INTEGRATION_GUIDE.md +363 -0
- data/README.md +96 -38
- data/Rakefile +2 -1
- data/VECTOR_SEARCH_GUIDE.md +42 -39
- data/lib/prescient/base.rb +123 -19
- data/lib/prescient/client.rb +125 -21
- data/lib/prescient/provider/huggingface.rb +1 -3
- data/lib/prescient/version.rb +1 -1
- data/lib/prescient.rb +103 -1
- data/prescient.gemspec +17 -15
- metadata +67 -32
data/README.md
CHANGED
@@ -117,6 +117,52 @@ Prescient.configure do |config|
|
|
117
117
|
end
|
118
118
|
```
|
119
119
|
|
120
|
+
### Provider Fallback Configuration
|
121
|
+
|
122
|
+
Prescient supports automatic fallback to backup providers when the primary provider fails. This ensures high availability for your AI applications.
|
123
|
+
|
124
|
+
```ruby
|
125
|
+
Prescient.configure do |config|
|
126
|
+
# Configure primary provider
|
127
|
+
config.add_provider(:primary, Prescient::Provider::OpenAI,
|
128
|
+
api_key: ENV['OPENAI_API_KEY'],
|
129
|
+
embedding_model: 'text-embedding-3-small',
|
130
|
+
chat_model: 'gpt-3.5-turbo'
|
131
|
+
)
|
132
|
+
|
133
|
+
# Configure backup providers
|
134
|
+
config.add_provider(:backup1, Prescient::Provider::Anthropic,
|
135
|
+
api_key: ENV['ANTHROPIC_API_KEY'],
|
136
|
+
model: 'claude-3-haiku-20240307'
|
137
|
+
)
|
138
|
+
|
139
|
+
config.add_provider(:backup2, Prescient::Provider::Ollama,
|
140
|
+
url: 'http://localhost:11434',
|
141
|
+
embedding_model: 'nomic-embed-text',
|
142
|
+
chat_model: 'llama3.1:8b'
|
143
|
+
)
|
144
|
+
|
145
|
+
# Configure fallback order
|
146
|
+
config.fallback_providers = [:backup1, :backup2]
|
147
|
+
end
|
148
|
+
|
149
|
+
# Client with fallback enabled (default)
|
150
|
+
client = Prescient::Client.new(:primary, enable_fallback: true)
|
151
|
+
|
152
|
+
# Client without fallback
|
153
|
+
client_no_fallback = Prescient::Client.new(:primary, enable_fallback: false)
|
154
|
+
|
155
|
+
# Convenience methods also support fallback
|
156
|
+
response = Prescient.generate_response("Hello", provider: :primary, enable_fallback: true)
|
157
|
+
```
|
158
|
+
|
159
|
+
**Fallback Behavior:**
|
160
|
+
- When a provider fails with a persistent error, Prescient automatically tries the next available provider
|
161
|
+
- Only available (healthy) providers are tried during fallback
|
162
|
+
- If no fallback providers are configured, all available providers are tried as fallbacks
|
163
|
+
- Transient errors (rate limits, timeouts) still use retry logic before fallback
|
164
|
+
- The fallback process preserves all method arguments and options
|
165
|
+
|
120
166
|
## Usage
|
121
167
|
|
122
168
|
### Quick Start
|
@@ -170,8 +216,8 @@ response = Prescient.generate_response(query, context_items,
|
|
170
216
|
)
|
171
217
|
|
172
218
|
puts response[:response]
|
173
|
-
puts "Model:
|
174
|
-
puts "Provider:
|
219
|
+
puts "Model: " + response[:model]
|
220
|
+
puts "Provider: " + response[:provider]
|
175
221
|
```
|
176
222
|
|
177
223
|
### Error Handling
|
@@ -214,14 +260,14 @@ Prescient.configure do |config|
|
|
214
260
|
prompt_templates: {
|
215
261
|
system_prompt: 'You are a friendly customer service representative.',
|
216
262
|
no_context_template: <<~TEMPLATE.strip,
|
217
|
-
%{system_prompt}
|
263
|
+
%{ system_prompt }
|
218
264
|
|
219
265
|
Customer Question: %{query}
|
220
266
|
|
221
267
|
Please provide a helpful response.
|
222
268
|
TEMPLATE
|
223
269
|
with_context_template: <<~TEMPLATE.strip
|
224
|
-
%{system_prompt} Use the company info below to help answer.
|
270
|
+
%{ system_prompt } Use the company info below to help answer.
|
225
271
|
|
226
272
|
Company Information:
|
227
273
|
%{context}
|
@@ -259,6 +305,7 @@ prompt_templates: {
|
|
259
305
|
system_prompt: 'You are a technical documentation assistant. Provide detailed explanations with code examples.',
|
260
306
|
# ... templates
|
261
307
|
}
|
308
|
+
|
262
309
|
```
|
263
310
|
|
264
311
|
#### Creative Writing
|
@@ -283,12 +330,12 @@ Prescient.configure do |config|
|
|
283
330
|
context_configs: {
|
284
331
|
'product' => {
|
285
332
|
fields: %w[name description price category brand],
|
286
|
-
format: '%{name} by %{brand}: %{description} - $%{price} (%{category})',
|
333
|
+
format: '%{ name } by %{ brand }: %{ description } - $%{ price } (%{ category })',
|
287
334
|
embedding_fields: %w[name description category brand]
|
288
335
|
},
|
289
336
|
'review' => {
|
290
337
|
fields: %w[product_name rating review_text reviewer_name],
|
291
|
-
format: '%{product_name} - %{rating}/5 stars: "%{review_text}"',
|
338
|
+
format: '%{ product_name } - %{ rating }/5 stars: "%{ review_text }"',
|
292
339
|
embedding_fields: %w[product_name review_text]
|
293
340
|
}
|
294
341
|
}
|
@@ -409,10 +456,10 @@ query_embedding = client.generate_embedding(query_text)
|
|
409
456
|
query_vector = "[#{query_embedding.join(',')}]"
|
410
457
|
|
411
458
|
results = db.exec_params(
|
412
|
-
"SELECT d.title, d.content, de.embedding <=> $1::vector AS distance
|
413
|
-
FROM documents d
|
414
|
-
JOIN document_embeddings de ON d.id = de.document_id
|
415
|
-
ORDER BY de.embedding <=> $1::vector
|
459
|
+
"SELECT d.title, d.content, de.embedding <=> $1::vector AS distance
|
460
|
+
FROM documents d
|
461
|
+
JOIN document_embeddings de ON d.id = de.document_id
|
462
|
+
ORDER BY de.embedding <=> $1::vector
|
416
463
|
LIMIT 5",
|
417
464
|
[query_vector]
|
418
465
|
)
|
@@ -423,14 +470,14 @@ results = db.exec_params(
|
|
423
470
|
pgvector supports three distance functions:
|
424
471
|
|
425
472
|
- **Cosine Distance** (`<=>`): Best for normalized embeddings
|
426
|
-
- **L2 Distance** (`<->`): Euclidean distance, good general purpose
|
473
|
+
- **L2 Distance** (`<->`): Euclidean distance, good general purpose
|
427
474
|
- **Inner Product** (`<#>`): Dot product, useful for specific cases
|
428
475
|
|
429
476
|
```sql
|
430
477
|
-- Cosine similarity (most common)
|
431
478
|
ORDER BY embedding <=> query_vector
|
432
479
|
|
433
|
-
-- L2 distance
|
480
|
+
-- L2 distance
|
434
481
|
ORDER BY embedding <-> query_vector
|
435
482
|
|
436
483
|
-- Inner product
|
@@ -443,8 +490,8 @@ The setup automatically creates HNSW indexes for fast similarity search:
|
|
443
490
|
|
444
491
|
```sql
|
445
492
|
-- Example index for cosine distance
|
446
|
-
CREATE INDEX idx_embeddings_cosine
|
447
|
-
ON document_embeddings
|
493
|
+
CREATE INDEX idx_embeddings_cosine
|
494
|
+
ON document_embeddings
|
448
495
|
USING hnsw (embedding vector_cosine_ops)
|
449
496
|
WITH (m = 16, ef_construction = 64);
|
450
497
|
```
|
@@ -457,22 +504,22 @@ Combine vector similarity with metadata filtering:
|
|
457
504
|
# Search with tag filtering
|
458
505
|
results = db.exec_params(
|
459
506
|
"SELECT d.title, de.embedding <=> $1::vector as distance
|
460
|
-
FROM documents d
|
507
|
+
FROM documents d
|
461
508
|
JOIN document_embeddings de ON d.id = de.document_id
|
462
509
|
WHERE d.metadata->'tags' ? 'programming'
|
463
|
-
ORDER BY de.embedding <=> $1::vector
|
510
|
+
ORDER BY de.embedding <=> $1::vector
|
464
511
|
LIMIT 5",
|
465
512
|
[query_vector]
|
466
513
|
)
|
467
514
|
|
468
|
-
# Search with difficulty and tag filters
|
515
|
+
# Search with difficulty and tag filters
|
469
516
|
results = db.exec_params(
|
470
517
|
"SELECT d.title, de.embedding <=> $1::vector as distance
|
471
|
-
FROM documents d
|
518
|
+
FROM documents d
|
472
519
|
JOIN document_embeddings de ON d.id = de.document_id
|
473
520
|
WHERE d.metadata->>'difficulty' = 'beginner'
|
474
521
|
AND d.metadata->'tags' ?| $2::text[]
|
475
|
-
ORDER BY de.embedding <=> $1::vector
|
522
|
+
ORDER BY de.embedding <=> $1::vector
|
476
523
|
LIMIT 5",
|
477
524
|
[query_vector, ['ruby', 'programming']]
|
478
525
|
)
|
@@ -488,7 +535,7 @@ For large datasets, tune HNSW parameters:
|
|
488
535
|
-- High accuracy (slower build, more memory)
|
489
536
|
WITH (m = 32, ef_construction = 128)
|
490
537
|
|
491
|
-
-- Fast build (lower accuracy, less memory)
|
538
|
+
-- Fast build (lower accuracy, less memory)
|
492
539
|
WITH (m = 8, ef_construction = 32)
|
493
540
|
|
494
541
|
-- Balanced (recommended default)
|
@@ -502,9 +549,9 @@ WITH (m = 16, ef_construction = 64)
|
|
502
549
|
SET hnsw.ef_search = 100; -- Higher = more accurate, slower
|
503
550
|
|
504
551
|
-- Use EXPLAIN ANALYZE to optimize queries
|
505
|
-
EXPLAIN ANALYZE
|
506
|
-
SELECT * FROM document_embeddings
|
507
|
-
ORDER BY embedding <=> '[0.1,0.2,...]'::vector
|
552
|
+
EXPLAIN ANALYZE
|
553
|
+
SELECT * FROM document_embeddings
|
554
|
+
ORDER BY embedding <=> '[0.1,0.2,...]'::vector
|
508
555
|
LIMIT 10;
|
509
556
|
```
|
510
557
|
|
@@ -516,14 +563,14 @@ For large documents, use chunking for better search granularity:
|
|
516
563
|
def chunk_document(text, chunk_size: 500, overlap: 50)
|
517
564
|
chunks = []
|
518
565
|
start = 0
|
519
|
-
|
566
|
+
|
520
567
|
while start < text.length
|
521
568
|
end_pos = [start + chunk_size, text.length].min
|
522
569
|
chunk = text[start...end_pos]
|
523
570
|
chunks << chunk
|
524
571
|
start += chunk_size - overlap
|
525
572
|
end
|
526
|
-
|
573
|
+
|
527
574
|
chunks
|
528
575
|
end
|
529
576
|
|
@@ -548,6 +595,7 @@ DB_HOST=localhost ruby examples/vector_search.rb
|
|
548
595
|
```
|
549
596
|
|
550
597
|
The example demonstrates:
|
598
|
+
|
551
599
|
- Document embedding generation and storage
|
552
600
|
- Similarity search with different distance functions
|
553
601
|
- Metadata filtering and advanced queries
|
@@ -596,7 +644,7 @@ info = client.provider_info
|
|
596
644
|
puts info[:name] # => :ollama
|
597
645
|
puts info[:class] # => "Prescient::Ollama::Provider"
|
598
646
|
puts info[:available] # => true
|
599
|
-
puts info[:options] # => {...} (excluding sensitive data)
|
647
|
+
puts info[:options] # => { ... } (excluding sensitive data)
|
600
648
|
```
|
601
649
|
|
602
650
|
## Provider-Specific Features
|
@@ -633,6 +681,7 @@ The easiest way to get started with Prescient and Ollama is using Docker Compose
|
|
633
681
|
Before starting, ensure your system meets the minimum requirements for running Ollama:
|
634
682
|
|
635
683
|
#### **Minimum Requirements:**
|
684
|
+
|
636
685
|
- **CPU**: 4+ cores (x86_64 or ARM64)
|
637
686
|
- **RAM**: 8GB+ (16GB recommended)
|
638
687
|
- **Storage**: 10GB+ free space for models
|
@@ -640,20 +689,22 @@ Before starting, ensure your system meets the minimum requirements for running O
|
|
640
689
|
|
641
690
|
#### **Model-Specific Requirements:**
|
642
691
|
|
643
|
-
| Model
|
644
|
-
|
645
|
-
| `nomic-embed-text` | 1GB
|
646
|
-
| `llama3.1:8b`
|
647
|
-
| `llama3.1:70b`
|
648
|
-
| `codellama:7b`
|
692
|
+
| Model | RAM Required | Storage | Notes |
|
693
|
+
| ------------------ | ------------ | ------- | --------------------------------- |
|
694
|
+
| `nomic-embed-text` | 1GB | 274MB | Embedding model |
|
695
|
+
| `llama3.1:8b` | 8GB | 4.7GB | Chat model (8B parameters) |
|
696
|
+
| `llama3.1:70b` | 64GB+ | 40GB | Large chat model (70B parameters) |
|
697
|
+
| `codellama:7b` | 8GB | 3.8GB | Code generation model |
|
649
698
|
|
650
699
|
#### **Performance Recommendations:**
|
700
|
+
|
651
701
|
- **SSD Storage**: Significantly faster model loading
|
652
702
|
- **GPU (Optional)**: NVIDIA GPU with 8GB+ VRAM for acceleration
|
653
703
|
- **Network**: Stable internet for initial model downloads
|
654
704
|
- **Docker**: 4GB+ memory limit configured
|
655
705
|
|
656
706
|
#### **GPU Acceleration (Optional):**
|
707
|
+
|
657
708
|
- **NVIDIA GPU**: RTX 3060+ with 8GB+ VRAM recommended
|
658
709
|
- **CUDA**: Version 11.8+ required
|
659
710
|
- **Docker**: NVIDIA Container Toolkit installed
|
@@ -664,24 +715,27 @@ Before starting, ensure your system meets the minimum requirements for running O
|
|
664
715
|
### Quick Start with Docker
|
665
716
|
|
666
717
|
1. **Start Ollama service:**
|
718
|
+
|
667
719
|
```bash
|
668
720
|
docker-compose up -d ollama
|
669
721
|
```
|
670
722
|
|
671
723
|
2. **Pull required models:**
|
724
|
+
|
672
725
|
```bash
|
673
726
|
# Automatic setup
|
674
727
|
docker-compose up ollama-init
|
675
|
-
|
728
|
+
|
676
729
|
# Or manual setup
|
677
730
|
./scripts/setup-ollama-models.sh
|
678
731
|
```
|
679
732
|
|
680
733
|
3. **Run examples:**
|
734
|
+
|
681
735
|
```bash
|
682
736
|
# Set environment variable
|
683
737
|
export OLLAMA_URL=http://localhost:11434
|
684
|
-
|
738
|
+
|
685
739
|
# Run examples
|
686
740
|
ruby examples/custom_contexts.rb
|
687
741
|
```
|
@@ -702,9 +756,9 @@ The included `docker-compose.yml` provides:
|
|
702
756
|
services:
|
703
757
|
ollama:
|
704
758
|
ports:
|
705
|
-
- "11434:11434"
|
759
|
+
- "11434:11434" # Ollama API port
|
706
760
|
volumes:
|
707
|
-
- ollama_data:/root/.ollama
|
761
|
+
- ollama_data:/root/.ollama # Persist models
|
708
762
|
environment:
|
709
763
|
- OLLAMA_HOST=0.0.0.0
|
710
764
|
- OLLAMA_ORIGINS=*
|
@@ -749,7 +803,7 @@ curl http://localhost:11434/api/tags
|
|
749
803
|
# Pull a specific model
|
750
804
|
curl -X POST http://localhost:11434/api/pull \
|
751
805
|
-H "Content-Type: application/json" \
|
752
|
-
-d '{"name": "llama3.1:8b"}'
|
806
|
+
-d '{ "name": "llama3.1:8b"}'
|
753
807
|
|
754
808
|
# Health check
|
755
809
|
curl http://localhost:11434/api/version
|
@@ -770,6 +824,7 @@ For production use:
|
|
770
824
|
#### **Common Issues:**
|
771
825
|
|
772
826
|
**Out of Memory Errors:**
|
827
|
+
|
773
828
|
```bash
|
774
829
|
# Check available memory
|
775
830
|
free -h
|
@@ -782,6 +837,7 @@ OLLAMA_CHAT_MODEL=llama3.1:7b ruby examples/custom_contexts.rb
|
|
782
837
|
```
|
783
838
|
|
784
839
|
**Slow Model Loading:**
|
840
|
+
|
785
841
|
```bash
|
786
842
|
# Check disk I/O
|
787
843
|
iostat -x 1
|
@@ -791,6 +847,7 @@ iostat -x 1
|
|
791
847
|
```
|
792
848
|
|
793
849
|
**Model Download Failures:**
|
850
|
+
|
794
851
|
```bash
|
795
852
|
# Check disk space
|
796
853
|
df -h
|
@@ -800,6 +857,7 @@ docker exec prescient-ollama ollama pull llama3.1:8b
|
|
800
857
|
```
|
801
858
|
|
802
859
|
**GPU Not Detected:**
|
860
|
+
|
803
861
|
```bash
|
804
862
|
# Check NVIDIA Docker runtime
|
805
863
|
docker run --rm --gpus all nvidia/cuda:11.8-base nvidia-smi
|
@@ -820,7 +878,7 @@ docker logs prescient-ollama
|
|
820
878
|
# Test API response time
|
821
879
|
time curl -X POST http://localhost:11434/api/generate \
|
822
880
|
-H "Content-Type: application/json" \
|
823
|
-
-d '{"model": "llama3.1:8b", "prompt": "Hello", "stream": false}'
|
881
|
+
-d '{ "model": "llama3.1:8b", "prompt": "Hello", "stream": false}'
|
824
882
|
```
|
825
883
|
|
826
884
|
## Testing
|
data/Rakefile
CHANGED
data/VECTOR_SEARCH_GUIDE.md
CHANGED
@@ -130,17 +130,17 @@ query_embedding = client.generate_embedding(query_text)
|
|
130
130
|
query_vector = "[#{query_embedding.join(',')}]"
|
131
131
|
|
132
132
|
results = db.exec_params(
|
133
|
-
"SELECT d.title, d.content, de.embedding <=> $1::vector AS distance
|
134
|
-
FROM documents d
|
135
|
-
JOIN document_embeddings de ON d.id = de.document_id
|
136
|
-
ORDER BY de.embedding <=> $1::vector
|
133
|
+
"SELECT d.title, d.content, de.embedding <=> $1::vector AS distance
|
134
|
+
FROM documents d
|
135
|
+
JOIN document_embeddings de ON d.id = de.document_id
|
136
|
+
ORDER BY de.embedding <=> $1::vector
|
137
137
|
LIMIT 5",
|
138
138
|
[query_vector]
|
139
139
|
)
|
140
140
|
|
141
141
|
results.each do |row|
|
142
142
|
similarity = 1 - row['distance'].to_f
|
143
|
-
puts "#{row['title']} (#{(similarity * 100).round(1)}% similar)"
|
143
|
+
puts "#{ row['title']} (#{ (similarity * 100).round(1)}% similar)"
|
144
144
|
end
|
145
145
|
```
|
146
146
|
|
@@ -150,11 +150,11 @@ end
|
|
150
150
|
# Search with metadata filtering
|
151
151
|
results = db.exec_params(
|
152
152
|
"SELECT d.title, de.embedding <=> $1::vector as distance
|
153
|
-
FROM documents d
|
153
|
+
FROM documents d
|
154
154
|
JOIN document_embeddings de ON d.id = de.document_id
|
155
155
|
WHERE d.metadata->'tags' ? 'programming'
|
156
156
|
AND d.metadata->>'difficulty' = 'beginner'
|
157
|
-
ORDER BY de.embedding <=> $1::vector
|
157
|
+
ORDER BY de.embedding <=> $1::vector
|
158
158
|
LIMIT 10",
|
159
159
|
[query_vector]
|
160
160
|
)
|
@@ -168,17 +168,17 @@ For large documents, split into chunks for better search granularity:
|
|
168
168
|
def chunk_document(text, chunk_size: 500, overlap: 50)
|
169
169
|
chunks = []
|
170
170
|
start = 0
|
171
|
-
|
171
|
+
|
172
172
|
while start < text.length
|
173
173
|
end_pos = [start + chunk_size, text.length].min
|
174
|
-
|
174
|
+
|
175
175
|
# Find word boundary to avoid cutting words
|
176
176
|
if end_pos < text.length
|
177
177
|
while end_pos > start && text[end_pos] != ' '
|
178
178
|
end_pos -= 1
|
179
179
|
end
|
180
180
|
end
|
181
|
-
|
181
|
+
|
182
182
|
chunk = text[start...end_pos].strip
|
183
183
|
chunks << {
|
184
184
|
text: chunk,
|
@@ -186,11 +186,11 @@ def chunk_document(text, chunk_size: 500, overlap: 50)
|
|
186
186
|
end_pos: end_pos,
|
187
187
|
index: chunks.length
|
188
188
|
}
|
189
|
-
|
189
|
+
|
190
190
|
start = end_pos - overlap
|
191
191
|
break if start >= text.length
|
192
192
|
end
|
193
|
-
|
193
|
+
|
194
194
|
chunks
|
195
195
|
end
|
196
196
|
|
@@ -200,14 +200,14 @@ chunks.each do |chunk|
|
|
200
200
|
# Insert chunk
|
201
201
|
chunk_result = db.exec_params(
|
202
202
|
"INSERT INTO document_chunks (document_id, chunk_index, chunk_text, chunk_metadata) VALUES ($1, $2, $3, $4) RETURNING id",
|
203
|
-
[document_id, chunk[:index], chunk[:text], {start_pos: chunk[:start_pos], end_pos: chunk[:end_pos]}.to_json]
|
203
|
+
[document_id, chunk[:index], chunk[:text], { start_pos: chunk[:start_pos], end_pos: chunk[:end_pos]}.to_json]
|
204
204
|
)
|
205
205
|
chunk_id = chunk_result[0]['id']
|
206
|
-
|
206
|
+
|
207
207
|
# Generate embedding for chunk
|
208
208
|
chunk_embedding = client.generate_embedding(chunk[:text])
|
209
209
|
chunk_vector = "[#{chunk_embedding.join(',')}]"
|
210
|
-
|
210
|
+
|
211
211
|
# Store chunk embedding
|
212
212
|
db.exec_params(
|
213
213
|
"INSERT INTO chunk_embeddings (chunk_id, document_id, embedding_provider, embedding_model, embedding_dimensions, embedding) VALUES ($1, $2, $3, $4, $5, $6)",
|
@@ -224,20 +224,20 @@ For different dataset sizes and performance requirements:
|
|
224
224
|
|
225
225
|
```sql
|
226
226
|
-- Small datasets (< 100K vectors): Fast build, good accuracy
|
227
|
-
CREATE INDEX idx_embeddings_small
|
228
|
-
ON document_embeddings
|
227
|
+
CREATE INDEX idx_embeddings_small
|
228
|
+
ON document_embeddings
|
229
229
|
USING hnsw (embedding vector_cosine_ops)
|
230
230
|
WITH (m = 8, ef_construction = 32);
|
231
231
|
|
232
232
|
-- Medium datasets (100K - 1M vectors): Balanced
|
233
|
-
CREATE INDEX idx_embeddings_medium
|
234
|
-
ON document_embeddings
|
233
|
+
CREATE INDEX idx_embeddings_medium
|
234
|
+
ON document_embeddings
|
235
235
|
USING hnsw (embedding vector_cosine_ops)
|
236
236
|
WITH (m = 16, ef_construction = 64);
|
237
237
|
|
238
238
|
-- Large datasets (> 1M vectors): High accuracy
|
239
|
-
CREATE INDEX idx_embeddings_large
|
240
|
-
ON document_embeddings
|
239
|
+
CREATE INDEX idx_embeddings_large
|
240
|
+
ON document_embeddings
|
241
241
|
USING hnsw (embedding vector_cosine_ops)
|
242
242
|
WITH (m = 32, ef_construction = 128);
|
243
243
|
```
|
@@ -251,9 +251,9 @@ SET hnsw.ef_search = 100; -- Balanced (default)
|
|
251
251
|
SET hnsw.ef_search = 200; -- High accuracy, slower
|
252
252
|
|
253
253
|
-- Monitor query performance
|
254
|
-
EXPLAIN (ANALYZE, BUFFERS)
|
255
|
-
SELECT * FROM document_embeddings
|
256
|
-
ORDER BY embedding <=> '[0.1,0.2,...]'::vector
|
254
|
+
EXPLAIN (ANALYZE, BUFFERS)
|
255
|
+
SELECT * FROM document_embeddings
|
256
|
+
ORDER BY embedding <=> '[0.1,0.2,...]'::vector
|
257
257
|
LIMIT 10;
|
258
258
|
```
|
259
259
|
|
@@ -268,7 +268,7 @@ texts.each_slice(10) do |batch|
|
|
268
268
|
batch.each do |text|
|
269
269
|
embedding = client.generate_embedding(text)
|
270
270
|
embeddings << embedding
|
271
|
-
|
271
|
+
|
272
272
|
# Small delay to avoid rate limiting
|
273
273
|
sleep(0.1)
|
274
274
|
end
|
@@ -295,13 +295,13 @@ Combine vector similarity with traditional text search:
|
|
295
295
|
```sql
|
296
296
|
WITH vector_results AS (
|
297
297
|
SELECT document_id, embedding <=> $1::vector as distance
|
298
|
-
FROM document_embeddings
|
299
|
-
ORDER BY embedding <=> $1::vector
|
298
|
+
FROM document_embeddings
|
299
|
+
ORDER BY embedding <=> $1::vector
|
300
300
|
LIMIT 20
|
301
301
|
),
|
302
302
|
text_results AS (
|
303
303
|
SELECT id as document_id, ts_rank(to_tsvector(content), plainto_tsquery($2)) as rank
|
304
|
-
FROM documents
|
304
|
+
FROM documents
|
305
305
|
WHERE to_tsvector(content) @@ plainto_tsquery($2)
|
306
306
|
)
|
307
307
|
SELECT d.title, d.content,
|
@@ -328,10 +328,10 @@ providers = [
|
|
328
328
|
|
329
329
|
providers.each do |provider|
|
330
330
|
next unless provider[:client].available?
|
331
|
-
|
331
|
+
|
332
332
|
embedding = provider[:client].generate_embedding(text)
|
333
333
|
vector_str = "[#{embedding.join(',')}]"
|
334
|
-
|
334
|
+
|
335
335
|
db.exec_params(
|
336
336
|
"INSERT INTO document_embeddings (document_id, embedding_provider, embedding_model, embedding_dimensions, embedding, embedding_text) VALUES ($1, $2, $3, $4, $5, $6)",
|
337
337
|
[document_id, provider[:name], provider[:model], provider[:dims], vector_str, text]
|
@@ -348,14 +348,14 @@ end
|
|
348
348
|
def track_search(query_text, results, provider, model)
|
349
349
|
query_embedding = client.generate_embedding(query_text)
|
350
350
|
query_vector = "[#{query_embedding.join(',')}]"
|
351
|
-
|
351
|
+
|
352
352
|
# Insert search query
|
353
353
|
query_result = db.exec_params(
|
354
354
|
"INSERT INTO search_queries (query_text, embedding_provider, embedding_model, query_embedding, result_count) VALUES ($1, $2, $3, $4, $5) RETURNING id",
|
355
355
|
[query_text, provider, model, query_vector, results.length]
|
356
356
|
)
|
357
357
|
query_id = query_result[0]['id']
|
358
|
-
|
358
|
+
|
359
359
|
# Insert query results
|
360
360
|
results.each_with_index do |result, index|
|
361
361
|
db.exec_params(
|
@@ -371,14 +371,14 @@ end
|
|
371
371
|
```sql
|
372
372
|
-- Popular search terms
|
373
373
|
SELECT query_text, COUNT(*) as search_count
|
374
|
-
FROM search_queries
|
374
|
+
FROM search_queries
|
375
375
|
WHERE created_at > NOW() - INTERVAL '7 days'
|
376
376
|
GROUP BY query_text
|
377
377
|
ORDER BY search_count DESC
|
378
378
|
LIMIT 10;
|
379
379
|
|
380
380
|
-- Average similarity scores
|
381
|
-
SELECT embedding_provider, embedding_model,
|
381
|
+
SELECT embedding_provider, embedding_model,
|
382
382
|
AVG(similarity_score) as avg_similarity,
|
383
383
|
COUNT(*) as result_count
|
384
384
|
FROM query_results qr
|
@@ -400,11 +400,12 @@ ORDER BY hour;
|
|
400
400
|
### Common Issues
|
401
401
|
|
402
402
|
**Slow queries:**
|
403
|
+
|
403
404
|
```sql
|
404
405
|
-- Check if indexes are being used
|
405
|
-
EXPLAIN (ANALYZE, BUFFERS)
|
406
|
-
SELECT * FROM document_embeddings
|
407
|
-
ORDER BY embedding <=> '[...]'::vector
|
406
|
+
EXPLAIN (ANALYZE, BUFFERS)
|
407
|
+
SELECT * FROM document_embeddings
|
408
|
+
ORDER BY embedding <=> '[...]'::vector
|
408
409
|
LIMIT 10;
|
409
410
|
|
410
411
|
-- Rebuild indexes if needed
|
@@ -412,10 +413,11 @@ REINDEX INDEX idx_document_embeddings_cosine;
|
|
412
413
|
```
|
413
414
|
|
414
415
|
**Memory issues:**
|
416
|
+
|
415
417
|
```sql
|
416
418
|
-- Check index sizes
|
417
419
|
SELECT schemaname, tablename, indexname, pg_size_pretty(pg_relation_size(indexrelid)) as size
|
418
|
-
FROM pg_stat_user_indexes
|
420
|
+
FROM pg_stat_user_indexes
|
419
421
|
WHERE tablename LIKE '%embedding%'
|
420
422
|
ORDER BY pg_relation_size(indexrelid) DESC;
|
421
423
|
|
@@ -424,6 +426,7 @@ SET work_mem = '256MB';
|
|
424
426
|
```
|
425
427
|
|
426
428
|
**Dimension mismatches:**
|
429
|
+
|
427
430
|
```ruby
|
428
431
|
# Validate embedding dimensions before storing
|
429
432
|
expected_dims = 768
|
@@ -447,4 +450,4 @@ end
|
|
447
450
|
- [pgvector Documentation](https://github.com/pgvector/pgvector)
|
448
451
|
- [HNSW Algorithm](https://arxiv.org/abs/1603.09320)
|
449
452
|
- [Vector Database Concepts](https://www.pinecone.io/learn/vector-database/)
|
450
|
-
- [Embedding Best Practices](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings)
|
453
|
+
- [Embedding Best Practices](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings)
|