ruvnet-kb-first 6.2.0 → 6.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,250 @@
1
+ -- KB Optimization Script for ruvector-postgres (real[] embeddings)
2
+ -- Run: PGPASSWORD=guruKB2025 psql -h localhost -p 5435 -U postgres -f scripts/kb-optimize.sql
3
+ --
4
+ -- This script is designed for ruvector-postgres which uses real[] arrays
5
+ -- instead of pgvector. It creates optimized indexes and functions for
6
+ -- semantic search with 100% recall.
7
+
8
+ \echo '╔════════════════════════════════════════════════════════════════╗'
9
+ \echo '║ KB Optimization Script (ruvector-postgres native) ║'
10
+ \echo '╚════════════════════════════════════════════════════════════════╝'
11
+
12
+ -- ═══════════════════════════════════════════════════════════════════════
13
+ -- Step 1: Create cosine_distance function for real[] arrays
14
+ -- ═══════════════════════════════════════════════════════════════════════
15
+ \echo ''
16
+ \echo 'Step 1: Creating cosine_distance function for real[] arrays...'
17
+
18
+ DROP FUNCTION IF EXISTS cosine_distance(real[], real[]);
19
+
20
+ CREATE OR REPLACE FUNCTION cosine_distance(a real[], b real[])
21
+ RETURNS double precision AS $$
22
+ DECLARE
23
+ dot_product double precision := 0;
24
+ norm_a double precision := 0;
25
+ norm_b double precision := 0;
26
+ len int;
27
+ denominator double precision;
28
+ BEGIN
29
+ len := array_length(a, 1);
30
+
31
+ -- Handle null or mismatched arrays
32
+ IF a IS NULL OR b IS NULL OR len IS NULL OR len != array_length(b, 1) THEN
33
+ RETURN NULL;
34
+ END IF;
35
+
36
+ FOR i IN 1..len LOOP
37
+ dot_product := dot_product + (a[i]::double precision * b[i]::double precision);
38
+ norm_a := norm_a + (a[i]::double precision * a[i]::double precision);
39
+ norm_b := norm_b + (b[i]::double precision * b[i]::double precision);
40
+ END LOOP;
41
+
42
+ -- Prevent division by zero
43
+ denominator := sqrt(norm_a) * sqrt(norm_b);
44
+ IF denominator < 1e-10 THEN
45
+ RETURN 1.0; -- Maximum distance for zero vectors
46
+ END IF;
47
+
48
+ RETURN 1.0 - (dot_product / denominator);
49
+ END;
50
+ $$ LANGUAGE plpgsql IMMUTABLE STRICT;
51
+
52
+ COMMENT ON FUNCTION cosine_distance(real[], real[]) IS
53
+ 'Compute cosine distance between two real[] vectors. Returns 0 for identical vectors, 1 for orthogonal, 2 for opposite.';
54
+
55
+ \echo '✓ cosine_distance function created'
56
+
57
+ -- ═══════════════════════════════════════════════════════════════════════
58
+ -- Step 2: Create semantic search function
59
+ -- ═══════════════════════════════════════════════════════════════════════
60
+ \echo ''
61
+ \echo 'Step 2: Creating semantic_search function...'
62
+
63
+ DROP FUNCTION IF EXISTS ask_ruvnet.semantic_search(text, int, double precision);
64
+
65
+ CREATE OR REPLACE FUNCTION ask_ruvnet.semantic_search(
66
+ query_text text,
67
+ limit_count int DEFAULT 10,
68
+ max_distance double precision DEFAULT 0.5
69
+ )
70
+ RETURNS TABLE (
71
+ id integer,
72
+ title text,
73
+ content text,
74
+ category text,
75
+ quality_score integer,
76
+ distance double precision
77
+ ) AS $$
78
+ DECLARE
79
+ query_embedding real[];
80
+ BEGIN
81
+ -- Generate embedding for query using ruvector_embed
82
+ query_embedding := ruvector_embed(query_text)::real[];
83
+
84
+ RETURN QUERY
85
+ SELECT
86
+ d.id,
87
+ d.title,
88
+ d.content,
89
+ d.category,
90
+ d.quality_score,
91
+ cosine_distance(d.embedding, query_embedding) as distance
92
+ FROM ask_ruvnet.architecture_docs d
93
+ WHERE d.embedding IS NOT NULL
94
+ AND d.is_duplicate = false
95
+ AND cosine_distance(d.embedding, query_embedding) <= max_distance
96
+ ORDER BY distance
97
+ LIMIT limit_count;
98
+ END;
99
+ $$ LANGUAGE plpgsql;
100
+
101
+ COMMENT ON FUNCTION ask_ruvnet.semantic_search(text, int, double precision) IS
102
+ 'Semantic search using ruvector embeddings. Returns top matches with cosine distance.';
103
+
104
+ \echo '✓ semantic_search function created'
105
+
106
+ -- ═══════════════════════════════════════════════════════════════════════
107
+ -- Step 3: Create B-tree indexes for filtered queries
108
+ -- ═══════════════════════════════════════════════════════════════════════
109
+ \echo ''
110
+ \echo 'Step 3: Creating optimized indexes...'
111
+
112
+ -- Category + quality composite index for filtered queries
113
+ DROP INDEX IF EXISTS ask_ruvnet.idx_category_quality_active;
114
+ CREATE INDEX IF NOT EXISTS idx_category_quality_active
115
+ ON ask_ruvnet.architecture_docs (category, quality_score DESC)
116
+ WHERE is_duplicate = false AND quality_score >= 40;
117
+
118
+ -- Title search index
119
+ DROP INDEX IF EXISTS ask_ruvnet.idx_title_trgm;
120
+ CREATE INDEX IF NOT EXISTS idx_title_search
121
+ ON ask_ruvnet.architecture_docs USING btree (title);
122
+
123
+ -- Non-duplicate filter index
124
+ DROP INDEX IF EXISTS ask_ruvnet.idx_non_duplicate;
125
+ CREATE INDEX IF NOT EXISTS idx_non_duplicate
126
+ ON ask_ruvnet.architecture_docs (id)
127
+ WHERE is_duplicate = false;
128
+
129
+ -- Embedding existence index (for filtering)
130
+ DROP INDEX IF EXISTS ask_ruvnet.idx_has_embedding;
131
+ CREATE INDEX IF NOT EXISTS idx_has_embedding
132
+ ON ask_ruvnet.architecture_docs (id)
133
+ WHERE embedding IS NOT NULL;
134
+
135
+ \echo '✓ Indexes created'
136
+
137
+ -- ═══════════════════════════════════════════════════════════════════════
138
+ -- Step 4: Create optimized KB view
139
+ -- ═══════════════════════════════════════════════════════════════════════
140
+ \echo ''
141
+ \echo 'Step 4: Creating optimized KB view...'
142
+
143
+ DROP VIEW IF EXISTS ask_ruvnet.kb CASCADE;
144
+ CREATE VIEW ask_ruvnet.kb AS
145
+ SELECT
146
+ id,
147
+ title,
148
+ content,
149
+ category,
150
+ quality_score,
151
+ package_name as source,
152
+ embedding,
153
+ created_at
154
+ FROM ask_ruvnet.architecture_docs
155
+ WHERE is_duplicate = false
156
+ AND quality_score >= 40
157
+ ORDER BY quality_score DESC;
158
+
159
+ COMMENT ON VIEW ask_ruvnet.kb IS
160
+ 'Optimized view of high-quality, non-duplicate KB entries';
161
+
162
+ \echo '✓ KB view created'
163
+
164
+ -- ═══════════════════════════════════════════════════════════════════════
165
+ -- Step 5: Create category distribution materialized view
166
+ -- ═══════════════════════════════════════════════════════════════════════
167
+ \echo ''
168
+ \echo 'Step 5: Creating category stats materialized view...'
169
+
170
+ DROP MATERIALIZED VIEW IF EXISTS ask_ruvnet.category_stats CASCADE;
171
+ CREATE MATERIALIZED VIEW ask_ruvnet.category_stats AS
172
+ SELECT
173
+ category,
174
+ COUNT(*) as entry_count,
175
+ ROUND(AVG(quality_score), 1) as avg_quality,
176
+ MIN(quality_score) as min_quality,
177
+ MAX(quality_score) as max_quality,
178
+ COUNT(*) FILTER (WHERE embedding IS NOT NULL) as with_embedding
179
+ FROM ask_ruvnet.architecture_docs
180
+ WHERE is_duplicate = false
181
+ GROUP BY category
182
+ ORDER BY entry_count DESC;
183
+
184
+ CREATE UNIQUE INDEX idx_category_stats_cat ON ask_ruvnet.category_stats (category);
185
+
186
+ \echo '✓ Category stats materialized view created'
187
+
188
+ -- ═══════════════════════════════════════════════════════════════════════
189
+ -- Step 6: Vacuum and analyze
190
+ -- ═══════════════════════════════════════════════════════════════════════
191
+ \echo ''
192
+ \echo 'Step 6: Running VACUUM ANALYZE...'
193
+ VACUUM ANALYZE ask_ruvnet.architecture_docs;
194
+
195
+ -- ═══════════════════════════════════════════════════════════════════════
196
+ -- Step 7: Report statistics
197
+ -- ═══════════════════════════════════════════════════════════════════════
198
+ \echo ''
199
+ \echo '════════════════════════════════════════════════════════════════'
200
+ \echo ' OPTIMIZATION COMPLETE '
201
+ \echo '════════════════════════════════════════════════════════════════'
202
+ \echo ''
203
+
204
+ SELECT
205
+ 'Total entries' as metric,
206
+ TO_CHAR(COUNT(*), 'FM999,999') as value
207
+ FROM ask_ruvnet.architecture_docs
208
+ UNION ALL
209
+ SELECT
210
+ 'Active (non-duplicate)',
211
+ TO_CHAR(COUNT(*), 'FM999,999')
212
+ FROM ask_ruvnet.architecture_docs WHERE is_duplicate = false
213
+ UNION ALL
214
+ SELECT
215
+ 'High quality (>=40)',
216
+ TO_CHAR(COUNT(*), 'FM999,999')
217
+ FROM ask_ruvnet.architecture_docs WHERE is_duplicate = false AND quality_score >= 40
218
+ UNION ALL
219
+ SELECT
220
+ 'With embeddings',
221
+ TO_CHAR(COUNT(*), 'FM999,999')
222
+ FROM ask_ruvnet.architecture_docs WHERE embedding IS NOT NULL
223
+ UNION ALL
224
+ SELECT
225
+ 'Categories',
226
+ TO_CHAR(COUNT(DISTINCT category), 'FM999')
227
+ FROM ask_ruvnet.architecture_docs WHERE is_duplicate = false;
228
+
229
+ \echo ''
230
+ \echo 'Category distribution:'
231
+ SELECT * FROM ask_ruvnet.category_stats;
232
+
233
+ \echo ''
234
+ \echo '════════════════════════════════════════════════════════════════'
235
+ \echo ' USAGE EXAMPLES '
236
+ \echo '════════════════════════════════════════════════════════════════'
237
+ \echo ''
238
+ \echo 'Semantic search:'
239
+ \echo ' SELECT * FROM ask_ruvnet.semantic_search(''how to create agents'', 5);'
240
+ \echo ''
241
+ \echo 'Direct cosine distance:'
242
+ \echo ' SELECT title, cosine_distance(embedding, (SELECT embedding FROM ask_ruvnet.architecture_docs WHERE id = 1)) as dist'
243
+ \echo ' FROM ask_ruvnet.architecture_docs WHERE embedding IS NOT NULL ORDER BY dist LIMIT 5;'
244
+ \echo ''
245
+ \echo 'Filtered by category:'
246
+ \echo ' SELECT * FROM ask_ruvnet.kb WHERE category = ''agents'' LIMIT 10;'
247
+ \echo ''
248
+ \echo 'Refresh category stats:'
249
+ \echo ' REFRESH MATERIALIZED VIEW ask_ruvnet.category_stats;'
250
+ \echo ''