prescient 0.0.0 ā 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.env.example +37 -0
- data/.rubocop.yml +330 -0
- data/.yardopts +14 -0
- data/CHANGELOG.md +64 -0
- data/CHANGELOG.pdf +0 -0
- data/Dockerfile.example +41 -0
- data/INTEGRATION_GUIDE.md +363 -0
- data/README.md +917 -13
- data/Rakefile +26 -3
- data/VECTOR_SEARCH_GUIDE.md +453 -0
- data/db/init/01_enable_pgvector.sql +30 -0
- data/db/init/02_create_schema.sql +108 -0
- data/db/init/03_create_indexes.sql +96 -0
- data/db/init/04_insert_sample_data.sql +121 -0
- data/db/migrate/001_create_prescient_tables.rb +158 -0
- data/docker-compose.yml +153 -0
- data/examples/basic_usage.rb +123 -0
- data/examples/custom_contexts.rb +355 -0
- data/examples/custom_prompts.rb +212 -0
- data/examples/vector_search.rb +330 -0
- data/lib/prescient/base.rb +374 -0
- data/lib/prescient/client.rb +211 -0
- data/lib/prescient/provider/anthropic.rb +146 -0
- data/lib/prescient/provider/huggingface.rb +200 -0
- data/lib/prescient/provider/ollama.rb +172 -0
- data/lib/prescient/provider/openai.rb +181 -0
- data/lib/prescient/version.rb +1 -1
- data/lib/prescient.rb +186 -2
- data/prescient.gemspec +53 -0
- data/scripts/setup-ollama-models.sh +77 -0
- metadata +252 -14
- data/.vscode/settings.json +0 -1
@@ -0,0 +1,96 @@
|
|
1
|
+
-- Create vector similarity indexes for optimal search performance
|
2
|
+
-- These indexes are crucial for fast similarity search with large datasets
|
3
|
+
|
4
|
+
-- Vector indexes for document embeddings
|
5
|
+
-- Using HNSW (Hierarchical Navigable Small World) for approximate nearest neighbor search
|
6
|
+
-- Different distance functions: L2 distance (<->), inner product (<#>), cosine distance (<=>)
|
7
|
+
|
8
|
+
-- Index for L2 distance (Euclidean) - good general purpose
|
9
|
+
CREATE INDEX IF NOT EXISTS idx_document_embeddings_l2
|
10
|
+
ON document_embeddings
|
11
|
+
USING hnsw (embedding vector_l2_ops)
|
12
|
+
WITH (m = 16, ef_construction = 64);
|
13
|
+
|
14
|
+
-- Index for cosine distance - good for normalized embeddings
|
15
|
+
CREATE INDEX IF NOT EXISTS idx_document_embeddings_cosine
|
16
|
+
ON document_embeddings
|
17
|
+
USING hnsw (embedding vector_cosine_ops)
|
18
|
+
WITH (m = 16, ef_construction = 64);
|
19
|
+
|
20
|
+
-- Index for inner product - good for some specific use cases
|
21
|
+
CREATE INDEX IF NOT EXISTS idx_document_embeddings_ip
|
22
|
+
ON document_embeddings
|
23
|
+
USING hnsw (embedding vector_ip_ops)
|
24
|
+
WITH (m = 16, ef_construction = 64);
|
25
|
+
|
26
|
+
-- Vector indexes for chunk embeddings (same pattern)
|
27
|
+
CREATE INDEX IF NOT EXISTS idx_chunk_embeddings_l2
|
28
|
+
ON chunk_embeddings
|
29
|
+
USING hnsw (embedding vector_l2_ops)
|
30
|
+
WITH (m = 16, ef_construction = 64);
|
31
|
+
|
32
|
+
CREATE INDEX IF NOT EXISTS idx_chunk_embeddings_cosine
|
33
|
+
ON chunk_embeddings
|
34
|
+
USING hnsw (embedding vector_cosine_ops)
|
35
|
+
WITH (m = 16, ef_construction = 64);
|
36
|
+
|
37
|
+
CREATE INDEX IF NOT EXISTS idx_chunk_embeddings_ip
|
38
|
+
ON chunk_embeddings
|
39
|
+
USING hnsw (embedding vector_ip_ops)
|
40
|
+
WITH (m = 16, ef_construction = 64);
|
41
|
+
|
42
|
+
-- Vector indexes for search queries
|
43
|
+
CREATE INDEX IF NOT EXISTS idx_search_queries_l2
|
44
|
+
ON search_queries
|
45
|
+
USING hnsw (query_embedding vector_l2_ops)
|
46
|
+
WITH (m = 16, ef_construction = 64);
|
47
|
+
|
48
|
+
CREATE INDEX IF NOT EXISTS idx_search_queries_cosine
|
49
|
+
ON search_queries
|
50
|
+
USING hnsw (query_embedding vector_cosine_ops)
|
51
|
+
WITH (m = 16, ef_construction = 64);
|
52
|
+
|
53
|
+
-- Composite indexes for common query patterns
|
54
|
+
CREATE INDEX IF NOT EXISTS idx_document_embeddings_provider_dimensions_l2
|
55
|
+
ON document_embeddings
|
56
|
+
USING hnsw (embedding vector_l2_ops)
|
57
|
+
INCLUDE (embedding_provider, embedding_model, embedding_dimensions)
|
58
|
+
WITH (m = 16, ef_construction = 64);
|
59
|
+
|
60
|
+
CREATE INDEX IF NOT EXISTS idx_chunk_embeddings_document_cosine
|
61
|
+
ON chunk_embeddings
|
62
|
+
USING hnsw (embedding vector_cosine_ops)
|
63
|
+
INCLUDE (document_id, embedding_provider, embedding_model)
|
64
|
+
WITH (m = 16, ef_construction = 64);
|
65
|
+
|
66
|
+
-- Partial indexes for specific providers (more efficient if you mainly use one provider)
|
67
|
+
-- Uncomment and modify these based on your primary use case:
|
68
|
+
|
69
|
+
-- CREATE INDEX IF NOT EXISTS idx_document_embeddings_ollama_cosine
|
70
|
+
-- ON document_embeddings
|
71
|
+
-- USING hnsw (embedding vector_cosine_ops)
|
72
|
+
-- WITH (m = 16, ef_construction = 64)
|
73
|
+
-- WHERE embedding_provider = 'ollama';
|
74
|
+
|
75
|
+
-- CREATE INDEX IF NOT EXISTS idx_chunk_embeddings_openai_cosine
|
76
|
+
-- ON chunk_embeddings
|
77
|
+
-- USING hnsw (embedding vector_cosine_ops)
|
78
|
+
-- WITH (m = 16, ef_construction = 64)
|
79
|
+
-- WHERE embedding_provider = 'openai';
|
80
|
+
|
81
|
+
-- HNSW Index parameters explanation:
|
82
|
+
-- m: maximum number of connections for each node (16 is good default)
|
83
|
+
-- ef_construction: size of dynamic candidate list (64 is good default)
|
84
|
+
-- Higher values = better accuracy but slower build time and more memory
|
85
|
+
|
86
|
+
-- For very large datasets (millions of vectors), consider:
|
87
|
+
-- m = 32, ef_construction = 128 for better accuracy
|
88
|
+
-- m = 8, ef_construction = 32 for faster build/less memory
|
89
|
+
|
90
|
+
-- Log successful index creation
|
91
|
+
DO $$
|
92
|
+
BEGIN
|
93
|
+
RAISE NOTICE 'Vector similarity indexes created successfully';
|
94
|
+
RAISE NOTICE 'Index parameters: m=16, ef_construction=64';
|
95
|
+
RAISE NOTICE 'Distance functions: L2, Cosine, Inner Product';
|
96
|
+
END $$;
|
@@ -0,0 +1,121 @@
|
|
1
|
+
-- Insert sample data for testing and demonstration
|
2
|
+
-- This provides realistic examples of how to structure data for vector search
|
3
|
+
|
4
|
+
-- Insert sample documents
|
5
|
+
INSERT INTO documents (title, content, source_type, source_url, metadata) VALUES
|
6
|
+
('Ruby Programming Basics',
|
7
|
+
'Ruby is a dynamic, open-source programming language with a focus on simplicity and productivity. It has an elegant syntax that is natural to read and easy to write. Ruby supports multiple programming paradigms, including procedural, object-oriented, and functional programming.',
|
8
|
+
'article',
|
9
|
+
'https://example.com/ruby-basics',
|
10
|
+
'{"tags": ["programming", "ruby", "beginner"], "author": "Jane Doe", "difficulty": "beginner"}'::jsonb),
|
11
|
+
|
12
|
+
('Machine Learning with Python',
|
13
|
+
'Machine learning is a subset of artificial intelligence that enables computers to learn and make decisions from data without being explicitly programmed. Python has become the go-to language for machine learning due to its rich ecosystem of libraries like scikit-learn, TensorFlow, and PyTorch.',
|
14
|
+
'article',
|
15
|
+
'https://example.com/ml-python',
|
16
|
+
'{"tags": ["machine-learning", "python", "ai"], "author": "John Smith", "difficulty": "intermediate"}'::jsonb),
|
17
|
+
|
18
|
+
('Vector Databases Explained',
|
19
|
+
'Vector databases are specialized databases designed to store and query high-dimensional vectors. They are essential for similarity search, recommendation systems, and AI applications. Popular vector databases include Pinecone, Weaviate, and PostgreSQL with pgvector extension.',
|
20
|
+
'tutorial',
|
21
|
+
'https://example.com/vector-databases',
|
22
|
+
'{"tags": ["databases", "vectors", "similarity-search"], "author": "Alice Johnson", "difficulty": "advanced"}'::jsonb),
|
23
|
+
|
24
|
+
('API Design Best Practices',
|
25
|
+
'RESTful API design follows specific principles to create maintainable and scalable web services. Key principles include using HTTP methods correctly, designing intuitive URLs, handling errors gracefully, and providing clear documentation. Rate limiting and authentication are also crucial considerations.',
|
26
|
+
'guide',
|
27
|
+
'https://example.com/api-design',
|
28
|
+
'{"tags": ["api", "rest", "web-development"], "author": "Bob Wilson", "difficulty": "intermediate"}'::jsonb),
|
29
|
+
|
30
|
+
('Docker Container Security',
|
31
|
+
'Container security involves multiple layers of protection, from the host system to the container runtime and the applications themselves. Key practices include using minimal base images, scanning for vulnerabilities, implementing proper access controls, and monitoring container behavior in production.',
|
32
|
+
'security-guide',
|
33
|
+
'https://example.com/docker-security',
|
34
|
+
'{"tags": ["docker", "security", "containers"], "author": "Carol Brown", "difficulty": "advanced"}'::jsonb);
|
35
|
+
|
36
|
+
-- Insert sample chunks for the longer documents
|
37
|
+
-- Breaking documents into smaller chunks for better search granularity
|
38
|
+
INSERT INTO document_chunks (document_id, chunk_index, chunk_text, chunk_metadata) VALUES
|
39
|
+
(1, 1, 'Ruby is a dynamic, open-source programming language with a focus on simplicity and productivity.',
|
40
|
+
'{"start_pos": 0, "end_pos": 93, "word_count": 15}'::jsonb),
|
41
|
+
(1, 2, 'It has an elegant syntax that is natural to read and easy to write.',
|
42
|
+
'{"start_pos": 94, "end_pos": 161, "word_count": 13}'::jsonb),
|
43
|
+
(1, 3, 'Ruby supports multiple programming paradigms, including procedural, object-oriented, and functional programming.',
|
44
|
+
'{"start_pos": 162, "end_pos": 274, "word_count": 14}'::jsonb),
|
45
|
+
|
46
|
+
(2, 1, 'Machine learning is a subset of artificial intelligence that enables computers to learn and make decisions from data without being explicitly programmed.',
|
47
|
+
'{"start_pos": 0, "end_pos": 147, "word_count": 23}'::jsonb),
|
48
|
+
(2, 2, 'Python has become the go-to language for machine learning due to its rich ecosystem of libraries like scikit-learn, TensorFlow, and PyTorch.',
|
49
|
+
'{"start_pos": 148, "end_pos": 285, "word_count": 22}'::jsonb),
|
50
|
+
|
51
|
+
(3, 1, 'Vector databases are specialized databases designed to store and query high-dimensional vectors.',
|
52
|
+
'{"start_pos": 0, "end_pos": 95, "word_count": 14}'::jsonb),
|
53
|
+
(3, 2, 'They are essential for similarity search, recommendation systems, and AI applications.',
|
54
|
+
'{"start_pos": 96, "end_pos": 179, "word_count": 12}'::jsonb),
|
55
|
+
(3, 3, 'Popular vector databases include Pinecone, Weaviate, and PostgreSQL with pgvector extension.',
|
56
|
+
'{"start_pos": 180, "end_pos": 272, "word_count": 13}'::jsonb);
|
57
|
+
|
58
|
+
-- Note: In a real application, you would generate embeddings using the Prescient gem
|
59
|
+
-- and insert them into document_embeddings and chunk_embeddings tables.
|
60
|
+
--
|
61
|
+
-- Example workflow:
|
62
|
+
-- 1. Insert document into documents table
|
63
|
+
-- 2. Generate embedding using Prescient gem:
|
64
|
+
-- embedding = Prescient.generate_embedding(document.content)
|
65
|
+
-- 3. Insert embedding into document_embeddings table
|
66
|
+
-- 4. For large documents, split into chunks and generate embeddings for each chunk
|
67
|
+
|
68
|
+
-- Insert sample search queries for demonstration
|
69
|
+
INSERT INTO search_queries (query_text, embedding_provider, embedding_model, result_count, search_metadata) VALUES
|
70
|
+
('How to learn Ruby programming?', 'ollama', 'nomic-embed-text', 3,
|
71
|
+
'{"search_type": "semantic", "filters": {"difficulty": "beginner"}}'::jsonb),
|
72
|
+
('Vector similarity search techniques', 'openai', 'text-embedding-3-small', 2,
|
73
|
+
'{"search_type": "semantic", "filters": {"tags": ["vectors", "databases"]}}'::jsonb),
|
74
|
+
('Python machine learning libraries', 'ollama', 'nomic-embed-text', 5,
|
75
|
+
'{"search_type": "semantic", "filters": {"tags": ["python", "machine-learning"]}}'::jsonb);
|
76
|
+
|
77
|
+
-- Create a view for easy querying of documents with their embeddings
|
78
|
+
CREATE OR REPLACE VIEW documents_with_embeddings AS
|
79
|
+
SELECT
|
80
|
+
d.id,
|
81
|
+
d.title,
|
82
|
+
d.content,
|
83
|
+
d.source_type,
|
84
|
+
d.source_url,
|
85
|
+
d.metadata,
|
86
|
+
d.created_at,
|
87
|
+
de.embedding_provider,
|
88
|
+
de.embedding_model,
|
89
|
+
de.embedding_dimensions,
|
90
|
+
de.embedding,
|
91
|
+
de.embedding_text
|
92
|
+
FROM documents d
|
93
|
+
LEFT JOIN document_embeddings de ON d.id = de.document_id;
|
94
|
+
|
95
|
+
-- Create a view for easy querying of chunks with their embeddings
|
96
|
+
CREATE OR REPLACE VIEW chunks_with_embeddings AS
|
97
|
+
SELECT
|
98
|
+
dc.id as chunk_id,
|
99
|
+
dc.document_id,
|
100
|
+
dc.chunk_index,
|
101
|
+
dc.chunk_text,
|
102
|
+
dc.chunk_metadata,
|
103
|
+
d.title as document_title,
|
104
|
+
d.source_type,
|
105
|
+
ce.embedding_provider,
|
106
|
+
ce.embedding_model,
|
107
|
+
ce.embedding_dimensions,
|
108
|
+
ce.embedding
|
109
|
+
FROM document_chunks dc
|
110
|
+
JOIN documents d ON dc.document_id = d.id
|
111
|
+
LEFT JOIN chunk_embeddings ce ON dc.id = ce.chunk_id
|
112
|
+
ORDER BY dc.document_id, dc.chunk_index;
|
113
|
+
|
114
|
+
-- Log successful sample data insertion
|
115
|
+
DO $$
|
116
|
+
BEGIN
|
117
|
+
RAISE NOTICE 'Sample data inserted successfully';
|
118
|
+
RAISE NOTICE 'Documents: %', (SELECT COUNT(*) FROM documents);
|
119
|
+
RAISE NOTICE 'Chunks: %', (SELECT COUNT(*) FROM document_chunks);
|
120
|
+
RAISE NOTICE 'Sample queries: %', (SELECT COUNT(*) FROM search_queries);
|
121
|
+
END $$;
|
@@ -0,0 +1,158 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Rails migration for Prescient gem vector database tables
|
4
|
+
# Copy this file to your Rails db/migrate directory and adjust the timestamp
|
5
|
+
|
6
|
+
class CreatePrescientTables < ActiveRecord::Migration[7.0]
|
7
|
+
def up
|
8
|
+
# Enable pgvector extension
|
9
|
+
enable_extension 'vector'
|
10
|
+
|
11
|
+
# Documents table to store original content
|
12
|
+
create_table :documents do |t|
|
13
|
+
t.string :title, null: false, limit: 255
|
14
|
+
t.text :content, null: false
|
15
|
+
t.string :source_type, limit: 50 # e.g., 'pdf', 'webpage', 'text', 'api'
|
16
|
+
t.string :source_url, limit: 500
|
17
|
+
t.jsonb :metadata # Additional flexible metadata
|
18
|
+
t.timestamps
|
19
|
+
end
|
20
|
+
|
21
|
+
# Document embeddings table for vector search
|
22
|
+
create_table :document_embeddings do |t|
|
23
|
+
t.references :document, null: false, foreign_key: { on_delete: :cascade }
|
24
|
+
t.string :embedding_provider, null: false, limit: 50 # e.g., 'ollama', 'openai'
|
25
|
+
t.string :embedding_model, null: false, limit: 100 # e.g., 'nomic-embed-text'
|
26
|
+
t.integer :embedding_dimensions, null: false # e.g., 768, 1536, 384
|
27
|
+
t.vector :embedding, null: false # The actual vector embedding
|
28
|
+
t.text :embedding_text # The specific text that was embedded
|
29
|
+
t.timestamps
|
30
|
+
end
|
31
|
+
|
32
|
+
# Document chunks table for large documents split into smaller pieces
|
33
|
+
create_table :document_chunks do |t|
|
34
|
+
t.references :document, null: false, foreign_key: { on_delete: :cascade }
|
35
|
+
t.integer :chunk_index, null: false # Order of chunks within document
|
36
|
+
t.text :chunk_text, null: false
|
37
|
+
t.jsonb :chunk_metadata # Start/end positions, etc.
|
38
|
+
t.timestamps
|
39
|
+
end
|
40
|
+
|
41
|
+
# Chunk embeddings table for chunked document search
|
42
|
+
create_table :chunk_embeddings do |t|
|
43
|
+
t.references :chunk, null: false, foreign_key: { to_table: :document_chunks, on_delete: :cascade }
|
44
|
+
t.references :document, null: false, foreign_key: { on_delete: :cascade }
|
45
|
+
t.string :embedding_provider, null: false, limit: 50
|
46
|
+
t.string :embedding_model, null: false, limit: 100
|
47
|
+
t.integer :embedding_dimensions, null: false
|
48
|
+
t.vector :embedding, null: false
|
49
|
+
t.timestamps
|
50
|
+
end
|
51
|
+
|
52
|
+
# Search queries table to store user queries and results
|
53
|
+
create_table :search_queries do |t|
|
54
|
+
t.text :query_text, null: false
|
55
|
+
t.string :embedding_provider, null: false, limit: 50
|
56
|
+
t.string :embedding_model, null: false, limit: 100
|
57
|
+
t.vector :query_embedding
|
58
|
+
t.integer :result_count
|
59
|
+
t.jsonb :search_metadata # Search parameters, filters, etc.
|
60
|
+
t.timestamps
|
61
|
+
end
|
62
|
+
|
63
|
+
# Query results table to store search results for analysis
|
64
|
+
create_table :query_results do |t|
|
65
|
+
t.references :query, null: false, foreign_key: { to_table: :search_queries, on_delete: :cascade }
|
66
|
+
t.references :document, null: true, foreign_key: { on_delete: :cascade }
|
67
|
+
t.references :chunk, null: true, foreign_key: { to_table: :document_chunks, on_delete: :cascade }
|
68
|
+
t.float :similarity_score, null: false
|
69
|
+
t.integer :rank_position, null: false
|
70
|
+
t.timestamps
|
71
|
+
end
|
72
|
+
|
73
|
+
# Add indexes for better performance
|
74
|
+
add_index :documents, :source_type
|
75
|
+
add_index :documents, :created_at
|
76
|
+
add_index :documents, :metadata, using: :gin
|
77
|
+
|
78
|
+
add_index :document_embeddings, :document_id
|
79
|
+
add_index :document_embeddings, [:embedding_provider, :embedding_model], name: 'idx_doc_embeddings_provider_model'
|
80
|
+
add_index :document_embeddings, :embedding_dimensions
|
81
|
+
|
82
|
+
add_index :document_chunks, [:document_id, :chunk_index], unique: true
|
83
|
+
|
84
|
+
add_index :chunk_embeddings, :chunk_id
|
85
|
+
add_index :chunk_embeddings, :document_id
|
86
|
+
add_index :chunk_embeddings, [:embedding_provider, :embedding_model], name: 'idx_chunk_embeddings_provider_model'
|
87
|
+
|
88
|
+
add_index :search_queries, :created_at
|
89
|
+
add_index :query_results, :query_id
|
90
|
+
add_index :query_results, :similarity_score
|
91
|
+
|
92
|
+
# Create vector similarity indexes for fast search
|
93
|
+
# Using HNSW (Hierarchical Navigable Small World) for approximate nearest neighbor search
|
94
|
+
|
95
|
+
# Vector indexes for document embeddings
|
96
|
+
execute <<-SQL
|
97
|
+
CREATE INDEX idx_document_embeddings_cosine#{' '}
|
98
|
+
ON document_embeddings#{' '}
|
99
|
+
USING hnsw (embedding vector_cosine_ops)
|
100
|
+
WITH (m = 16, ef_construction = 64);
|
101
|
+
SQL
|
102
|
+
|
103
|
+
execute <<-SQL
|
104
|
+
CREATE INDEX idx_document_embeddings_l2#{' '}
|
105
|
+
ON document_embeddings#{' '}
|
106
|
+
USING hnsw (embedding vector_l2_ops)
|
107
|
+
WITH (m = 16, ef_construction = 64);
|
108
|
+
SQL
|
109
|
+
|
110
|
+
# Vector indexes for chunk embeddings
|
111
|
+
execute <<-SQL
|
112
|
+
CREATE INDEX idx_chunk_embeddings_cosine#{' '}
|
113
|
+
ON chunk_embeddings#{' '}
|
114
|
+
USING hnsw (embedding vector_cosine_ops)
|
115
|
+
WITH (m = 16, ef_construction = 64);
|
116
|
+
SQL
|
117
|
+
|
118
|
+
execute <<-SQL
|
119
|
+
CREATE INDEX idx_chunk_embeddings_l2#{' '}
|
120
|
+
ON chunk_embeddings#{' '}
|
121
|
+
USING hnsw (embedding vector_l2_ops)
|
122
|
+
WITH (m = 16, ef_construction = 64);
|
123
|
+
SQL
|
124
|
+
|
125
|
+
# Add helpful functions
|
126
|
+
execute <<-SQL
|
127
|
+
CREATE OR REPLACE FUNCTION cosine_similarity(a vector, b vector)
|
128
|
+
RETURNS float AS $$
|
129
|
+
BEGIN
|
130
|
+
RETURN 1 - (a <=> b);
|
131
|
+
END;
|
132
|
+
$$ LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE;
|
133
|
+
SQL
|
134
|
+
|
135
|
+
execute <<-SQL
|
136
|
+
CREATE OR REPLACE FUNCTION euclidean_distance(a vector, b vector)
|
137
|
+
RETURNS float AS $$
|
138
|
+
BEGIN
|
139
|
+
RETURN a <-> b;
|
140
|
+
END;
|
141
|
+
$$ LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE;
|
142
|
+
SQL
|
143
|
+
end
|
144
|
+
|
145
|
+
def down
|
146
|
+
drop_table :query_results
|
147
|
+
drop_table :search_queries
|
148
|
+
drop_table :chunk_embeddings
|
149
|
+
drop_table :document_chunks
|
150
|
+
drop_table :document_embeddings
|
151
|
+
drop_table :documents
|
152
|
+
|
153
|
+
execute 'DROP FUNCTION IF EXISTS cosine_similarity(vector, vector);'
|
154
|
+
execute 'DROP FUNCTION IF EXISTS euclidean_distance(vector, vector);'
|
155
|
+
|
156
|
+
disable_extension 'vector'
|
157
|
+
end
|
158
|
+
end
|
data/docker-compose.yml
ADDED
@@ -0,0 +1,153 @@
|
|
1
|
+
# Docker Compose configuration for running Ollama with Prescient gem
|
2
|
+
# This provides a local AI environment for development and testing
|
3
|
+
|
4
|
+
version: '3.8'
|
5
|
+
|
6
|
+
services:
|
7
|
+
ollama:
|
8
|
+
image: ollama/ollama:latest
|
9
|
+
container_name: prescient-ollama
|
10
|
+
ports:
|
11
|
+
- "11434:11434"
|
12
|
+
volumes:
|
13
|
+
# Persist models and data
|
14
|
+
- ollama_data:/root/.ollama
|
15
|
+
# Optional: Mount models from host for faster startup
|
16
|
+
# - ./models:/root/.ollama/models
|
17
|
+
environment:
|
18
|
+
# Optional: Set Ollama environment variables
|
19
|
+
- OLLAMA_HOST=0.0.0.0
|
20
|
+
- OLLAMA_ORIGINS=*
|
21
|
+
restart: unless-stopped
|
22
|
+
# Resource limits - adjust based on your hardware
|
23
|
+
deploy:
|
24
|
+
resources:
|
25
|
+
limits:
|
26
|
+
# Allocate most available memory for model loading
|
27
|
+
memory: 12G
|
28
|
+
cpus: '4.0'
|
29
|
+
reservations:
|
30
|
+
memory: 4G
|
31
|
+
cpus: '2.0'
|
32
|
+
# GPU support (uncomment if you have NVIDIA GPU)
|
33
|
+
# devices:
|
34
|
+
# - driver: nvidia
|
35
|
+
# count: 1
|
36
|
+
# capabilities: [gpu]
|
37
|
+
# Optional: Shared memory for better performance
|
38
|
+
shm_size: 2g
|
39
|
+
healthcheck:
|
40
|
+
test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
|
41
|
+
interval: 30s
|
42
|
+
timeout: 10s
|
43
|
+
retries: 3
|
44
|
+
start_period: 30s
|
45
|
+
|
46
|
+
# Optional: Model initialization service
|
47
|
+
# This pulls required models on startup
|
48
|
+
ollama-init:
|
49
|
+
image: curlimages/curl:latest
|
50
|
+
container_name: prescient-ollama-init
|
51
|
+
depends_on:
|
52
|
+
ollama:
|
53
|
+
condition: service_healthy
|
54
|
+
volumes:
|
55
|
+
- ./scripts:/scripts:ro
|
56
|
+
command: |
|
57
|
+
sh -c '
|
58
|
+
echo "Pulling required models for Prescient..."
|
59
|
+
|
60
|
+
# Pull embedding model
|
61
|
+
curl -X POST http://ollama:11434/api/pull \
|
62
|
+
-H "Content-Type: application/json" \
|
63
|
+
-d "{\"name\": \"nomic-embed-text\"}"
|
64
|
+
|
65
|
+
# Pull chat model
|
66
|
+
curl -X POST http://ollama:11434/api/pull \
|
67
|
+
-H "Content-Type: application/json" \
|
68
|
+
-d "{\"name\": \"llama3.1:8b\"}"
|
69
|
+
|
70
|
+
echo "Models pulled successfully!"
|
71
|
+
'
|
72
|
+
restart: "no"
|
73
|
+
|
74
|
+
# PostgreSQL with pgvector extension for vector storage
|
75
|
+
postgres:
|
76
|
+
image: pgvector/pgvector:pg16
|
77
|
+
container_name: prescient-postgres
|
78
|
+
ports:
|
79
|
+
- "5432:5432"
|
80
|
+
environment:
|
81
|
+
- POSTGRES_DB=prescient_development
|
82
|
+
- POSTGRES_USER=prescient
|
83
|
+
- POSTGRES_PASSWORD=prescient_password
|
84
|
+
- POSTGRES_INITDB_ARGS=--auth-host=scram-sha-256
|
85
|
+
volumes:
|
86
|
+
- postgres_data:/var/lib/postgresql/data
|
87
|
+
- ./db/init:/docker-entrypoint-initdb.d:ro
|
88
|
+
restart: unless-stopped
|
89
|
+
healthcheck:
|
90
|
+
test: ["CMD-SHELL", "pg_isready -U prescient -d prescient_development"]
|
91
|
+
interval: 30s
|
92
|
+
timeout: 10s
|
93
|
+
retries: 3
|
94
|
+
start_period: 30s
|
95
|
+
|
96
|
+
# Optional: Redis for caching embeddings (useful for development)
|
97
|
+
redis:
|
98
|
+
image: redis:7-alpine
|
99
|
+
container_name: prescient-redis
|
100
|
+
ports:
|
101
|
+
- "6379:6379"
|
102
|
+
volumes:
|
103
|
+
- redis_data:/data
|
104
|
+
command: redis-server --appendonly yes
|
105
|
+
restart: unless-stopped
|
106
|
+
healthcheck:
|
107
|
+
test: ["CMD", "redis-cli", "ping"]
|
108
|
+
interval: 30s
|
109
|
+
timeout: 5s
|
110
|
+
retries: 3
|
111
|
+
|
112
|
+
# Optional: Example Ruby application using Prescient
|
113
|
+
prescient-app:
|
114
|
+
build:
|
115
|
+
context: .
|
116
|
+
dockerfile: Dockerfile.example
|
117
|
+
container_name: prescient-example-app
|
118
|
+
depends_on:
|
119
|
+
ollama:
|
120
|
+
condition: service_healthy
|
121
|
+
environment:
|
122
|
+
# Ollama configuration
|
123
|
+
- OLLAMA_URL=http://ollama:11434
|
124
|
+
- OLLAMA_EMBEDDING_MODEL=nomic-embed-text
|
125
|
+
- OLLAMA_CHAT_MODEL=llama3.1:8b
|
126
|
+
|
127
|
+
# Optional: Other AI provider configurations
|
128
|
+
- OPENAI_API_KEY=${OPENAI_API_KEY:-}
|
129
|
+
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
130
|
+
- HUGGINGFACE_API_KEY=${HUGGINGFACE_API_KEY:-}
|
131
|
+
|
132
|
+
# Redis configuration (if using caching)
|
133
|
+
- REDIS_URL=redis://redis:6379/0
|
134
|
+
volumes:
|
135
|
+
# Mount your application code
|
136
|
+
- .:/app
|
137
|
+
- /app/vendor/bundle # Bundle cache volume
|
138
|
+
working_dir: /app
|
139
|
+
# Keep container running for development
|
140
|
+
command: tail -f /dev/null
|
141
|
+
restart: unless-stopped
|
142
|
+
|
143
|
+
volumes:
|
144
|
+
ollama_data:
|
145
|
+
driver: local
|
146
|
+
postgres_data:
|
147
|
+
driver: local
|
148
|
+
redis_data:
|
149
|
+
driver: local
|
150
|
+
|
151
|
+
networks:
|
152
|
+
default:
|
153
|
+
name: prescient-network
|
@@ -0,0 +1,123 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
# Example: Basic usage of the Prescient gem
|
5
|
+
# This example shows how to use different providers for embeddings and text generation
|
6
|
+
|
7
|
+
require_relative '../lib/prescient'
|
8
|
+
|
9
|
+
# Example 1: Using default Ollama provider
|
10
|
+
puts "=== Example 1: Default Ollama Provider ==="
|
11
|
+
|
12
|
+
begin
|
13
|
+
# Create client using default provider (Ollama)
|
14
|
+
client = Prescient.client
|
15
|
+
|
16
|
+
# Check if provider is available
|
17
|
+
if client.available?
|
18
|
+
puts "ā
Ollama is available"
|
19
|
+
|
20
|
+
# Generate embedding
|
21
|
+
text = "Ruby is a dynamic programming language"
|
22
|
+
embedding = client.generate_embedding(text)
|
23
|
+
puts "š Generated embedding with #{embedding.length} dimensions"
|
24
|
+
|
25
|
+
# Generate response
|
26
|
+
response = client.generate_response("What is Ruby programming language?")
|
27
|
+
puts "š¤ AI Response:"
|
28
|
+
puts response[:response]
|
29
|
+
puts "š Model: #{response[:model]}, Provider: #{response[:provider]}"
|
30
|
+
else
|
31
|
+
puts "ā Ollama is not available"
|
32
|
+
end
|
33
|
+
rescue Prescient::Error => e
|
34
|
+
puts "ā Error with Ollama: #{e.message}"
|
35
|
+
end
|
36
|
+
|
37
|
+
# Example 2: Context-Aware Generation
|
38
|
+
puts "\n=== Example 2: Context-Aware Generation ==="
|
39
|
+
|
40
|
+
begin
|
41
|
+
client = Prescient.client
|
42
|
+
|
43
|
+
# Simulate context items (would come from vector search in real applications)
|
44
|
+
context_items = [
|
45
|
+
{
|
46
|
+
'title' => 'Network Configuration Guide',
|
47
|
+
'content' => 'Network propagation typically takes 24-48 hours to complete globally. During this time, changes may not be visible from all locations.'
|
48
|
+
},
|
49
|
+
{
|
50
|
+
'title' => 'Technical FAQ',
|
51
|
+
'content' => 'DNS changes can take anywhere from a few minutes to 48 hours to propagate worldwide due to caching mechanisms.'
|
52
|
+
}
|
53
|
+
]
|
54
|
+
|
55
|
+
query = "How long does network propagation take?"
|
56
|
+
response = client.generate_response(query, context_items)
|
57
|
+
|
58
|
+
puts "š Query: #{query}"
|
59
|
+
puts "š Context: #{context_items.length} items"
|
60
|
+
puts "š¤ AI Response:"
|
61
|
+
puts response[:response]
|
62
|
+
|
63
|
+
rescue Prescient::Error => e
|
64
|
+
puts "ā Error with context example: #{e.message}"
|
65
|
+
end
|
66
|
+
|
67
|
+
# Example 3: Provider comparison (if multiple providers configured)
|
68
|
+
puts "\n=== Example 3: Provider Health Check ==="
|
69
|
+
|
70
|
+
providers = [:ollama, :anthropic, :openai, :huggingface]
|
71
|
+
|
72
|
+
providers.each do |provider_name|
|
73
|
+
begin
|
74
|
+
health = Prescient.health_check(provider: provider_name)
|
75
|
+
status_emoji = health[:status] == 'healthy' ? 'ā
' : 'ā'
|
76
|
+
puts "#{status_emoji} #{provider_name.to_s.capitalize}: #{health[:status]}"
|
77
|
+
|
78
|
+
if health[:ready]
|
79
|
+
puts " Ready: #{health[:ready]}"
|
80
|
+
end
|
81
|
+
|
82
|
+
if health[:models_available]
|
83
|
+
puts " Models: #{health[:models_available].first(3).join(', ')}#{'...' if health[:models_available].length > 3}"
|
84
|
+
end
|
85
|
+
|
86
|
+
rescue Prescient::Error => e
|
87
|
+
puts "ā #{provider_name.to_s.capitalize}: #{e.message}"
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
# Example 4: Error handling
|
92
|
+
puts "\n=== Example 4: Error Handling ==="
|
93
|
+
|
94
|
+
begin
|
95
|
+
# Try to use a provider that might not be configured
|
96
|
+
client = Prescient.client(:nonexistent)
|
97
|
+
rescue Prescient::Error => e
|
98
|
+
puts "ā Expected error: #{e.message}"
|
99
|
+
end
|
100
|
+
|
101
|
+
# Example 5: Custom configuration
|
102
|
+
puts "\n=== Example 5: Custom Configuration ==="
|
103
|
+
|
104
|
+
Prescient.configure do |config|
|
105
|
+
config.timeout = 30
|
106
|
+
config.retry_attempts = 2
|
107
|
+
config.retry_delay = 0.5
|
108
|
+
|
109
|
+
# Add custom Ollama configuration
|
110
|
+
config.add_provider(:custom_ollama, Prescient::Provider::Ollama,
|
111
|
+
url: ENV.fetch('OLLAMA_URL', 'http://localhost:11434'),
|
112
|
+
embedding_model: 'nomic-embed-text',
|
113
|
+
chat_model: 'llama3.1:8b',
|
114
|
+
timeout: 60
|
115
|
+
)
|
116
|
+
end
|
117
|
+
|
118
|
+
puts "āļø Custom configuration applied"
|
119
|
+
puts " Timeout: #{Prescient.configuration.timeout}s"
|
120
|
+
puts " Retry attempts: #{Prescient.configuration.retry_attempts}"
|
121
|
+
puts " Providers: #{Prescient.configuration.providers.keys.join(', ')}"
|
122
|
+
|
123
|
+
puts "\nš Examples completed!"
|