ruvnet-kb-first 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +674 -0
- package/SKILL.md +740 -0
- package/bin/kb-first.js +123 -0
- package/install/init-project.sh +435 -0
- package/install/install-global.sh +257 -0
- package/install/kb-first-autodetect.sh +108 -0
- package/install/kb-first-command.md +80 -0
- package/install/kb-first-skill.md +262 -0
- package/package.json +87 -0
- package/phases/00-assessment.md +529 -0
- package/phases/01-storage.md +194 -0
- package/phases/01.5-hooks-setup.md +521 -0
- package/phases/02-kb-creation.md +413 -0
- package/phases/03-persistence.md +125 -0
- package/phases/04-visualization.md +170 -0
- package/phases/05-integration.md +114 -0
- package/phases/06-scaffold.md +130 -0
- package/phases/07-build.md +493 -0
- package/phases/08-verification.md +597 -0
- package/phases/09-security.md +512 -0
- package/phases/10-documentation.md +613 -0
- package/phases/11-deployment.md +670 -0
- package/phases/testing.md +713 -0
- package/scripts/1.5-hooks-verify.sh +252 -0
- package/scripts/8.1-code-scan.sh +58 -0
- package/scripts/8.2-import-check.sh +42 -0
- package/scripts/8.3-source-returns.sh +52 -0
- package/scripts/8.4-startup-verify.sh +65 -0
- package/scripts/8.5-fallback-check.sh +63 -0
- package/scripts/8.6-attribution.sh +56 -0
- package/scripts/8.7-confidence.sh +56 -0
- package/scripts/8.8-gap-logging.sh +70 -0
- package/scripts/9-security-audit.sh +202 -0
- package/scripts/init-project.sh +395 -0
- package/scripts/verify-enforcement.sh +167 -0
- package/src/commands/hooks.js +361 -0
- package/src/commands/init.js +315 -0
- package/src/commands/phase.js +372 -0
- package/src/commands/score.js +380 -0
- package/src/commands/status.js +193 -0
- package/src/commands/verify.js +286 -0
- package/src/index.js +56 -0
- package/src/mcp-server.js +412 -0
- package/templates/attention-router.ts +534 -0
- package/templates/code-analysis.ts +683 -0
- package/templates/federated-kb-learner.ts +649 -0
- package/templates/gnn-engine.ts +1091 -0
- package/templates/intentions.md +277 -0
- package/templates/kb-client.ts +905 -0
- package/templates/schema.sql +303 -0
- package/templates/sona-config.ts +312 -0
|
@@ -0,0 +1,905 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* KB-First v3.0 Knowledge Base Client
|
|
3
|
+
*
|
|
4
|
+
* Production-ready client for RuVector PostgreSQL
|
|
5
|
+
* Optimized for enterprise KB applications
|
|
6
|
+
*
|
|
7
|
+
* NEW IN v3.0:
|
|
8
|
+
* - ONNX local embeddings via ruvector (no API calls needed)
|
|
9
|
+
* - Federated learning support
|
|
10
|
+
* - Enhanced hybrid search with caching
|
|
11
|
+
* - Graph-aware clustering
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { Pool, PoolClient } from 'pg';
|
|
15
|
+
|
|
16
|
+
// =============================================================================
|
|
17
|
+
// ONNX LOCAL EMBEDDINGS (NEW in v3.0)
|
|
18
|
+
// =============================================================================
|
|
19
|
+
|
|
20
|
+
// Dynamic import for ruvector embeddings (works offline, no API costs)
|
|
21
|
+
let embedText: ((text: string) => Promise<number[]>) | null = null;
|
|
22
|
+
let embedBatch: ((texts: string[]) => Promise<number[][]>) | null = null;
|
|
23
|
+
|
|
24
|
+
async function initLocalEmbeddings(): Promise<boolean> {
|
|
25
|
+
if (embedText !== null) return true;
|
|
26
|
+
|
|
27
|
+
try {
|
|
28
|
+
// Try to load ruvector's ONNX embeddings (all-MiniLM-L6-v2, 384 dimensions)
|
|
29
|
+
const ruvector = await import('ruvector');
|
|
30
|
+
|
|
31
|
+
if (ruvector.embedText && ruvector.embedBatch) {
|
|
32
|
+
embedText = ruvector.embedText;
|
|
33
|
+
embedBatch = ruvector.embedBatch;
|
|
34
|
+
console.log('[KB-FIRST] ONNX local embeddings initialized (384d, offline)');
|
|
35
|
+
return true;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Fallback: check for hooks API
|
|
39
|
+
if (ruvector.hooks?.embed) {
|
|
40
|
+
embedText = async (text: string) => {
|
|
41
|
+
const result = await ruvector.hooks.embed(text);
|
|
42
|
+
return result.embedding;
|
|
43
|
+
};
|
|
44
|
+
embedBatch = async (texts: string[]) => {
|
|
45
|
+
return Promise.all(texts.map(t => embedText!(t)));
|
|
46
|
+
};
|
|
47
|
+
console.log('[KB-FIRST] RuVector hooks embeddings initialized');
|
|
48
|
+
return true;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
return false;
|
|
52
|
+
} catch (e) {
|
|
53
|
+
console.warn('[KB-FIRST] Local embeddings not available, will use DB or text search');
|
|
54
|
+
return false;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Initialize on module load
|
|
59
|
+
initLocalEmbeddings();
|
|
60
|
+
|
|
61
|
+
// =============================================================================
|
|
62
|
+
// EMBEDDING CACHE (Memory-optimized)
|
|
63
|
+
// =============================================================================
|
|
64
|
+
|
|
65
|
+
interface CachedEmbedding {
|
|
66
|
+
embedding: number[];
|
|
67
|
+
timestamp: number;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const embeddingCache = new Map<string, CachedEmbedding>();
|
|
71
|
+
const CACHE_TTL_MS = 3600000; // 1 hour
|
|
72
|
+
const MAX_CACHE_SIZE = 10000;
|
|
73
|
+
|
|
74
|
+
function getCachedEmbedding(text: string): number[] | null {
|
|
75
|
+
const cached = embeddingCache.get(text);
|
|
76
|
+
if (cached && Date.now() - cached.timestamp < CACHE_TTL_MS) {
|
|
77
|
+
return cached.embedding;
|
|
78
|
+
}
|
|
79
|
+
if (cached) {
|
|
80
|
+
embeddingCache.delete(text);
|
|
81
|
+
}
|
|
82
|
+
return null;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function setCachedEmbedding(text: string, embedding: number[]): void {
|
|
86
|
+
// Evict oldest if at capacity
|
|
87
|
+
if (embeddingCache.size >= MAX_CACHE_SIZE) {
|
|
88
|
+
const oldest = [...embeddingCache.entries()]
|
|
89
|
+
.sort((a, b) => a[1].timestamp - b[1].timestamp)[0];
|
|
90
|
+
if (oldest) embeddingCache.delete(oldest[0]);
|
|
91
|
+
}
|
|
92
|
+
embeddingCache.set(text, { embedding, timestamp: Date.now() });
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// =============================================================================
|
|
96
|
+
// CONFIGURATION
|
|
97
|
+
// =============================================================================
|
|
98
|
+
|
|
99
|
+
const pool = new Pool({
|
|
100
|
+
connectionString: process.env.RUVECTOR_POSTGRES_URL || process.env.DATABASE_URL,
|
|
101
|
+
max: 20,
|
|
102
|
+
idleTimeoutMillis: 30000,
|
|
103
|
+
connectionTimeoutMillis: 2000,
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
// =============================================================================
|
|
107
|
+
// TYPES
|
|
108
|
+
// =============================================================================
|
|
109
|
+
|
|
110
|
+
export interface KBNode {
|
|
111
|
+
id: string;
|
|
112
|
+
namespace: string;
|
|
113
|
+
path: string;
|
|
114
|
+
title: string;
|
|
115
|
+
content: string | null;
|
|
116
|
+
sourceExpert: string;
|
|
117
|
+
sourceUrl: string;
|
|
118
|
+
confidence: number;
|
|
119
|
+
metadata: Record<string, unknown>;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
export interface SearchResult extends KBNode {
|
|
123
|
+
distance: number;
|
|
124
|
+
relevanceScore: number;
|
|
125
|
+
bm25Score?: number;
|
|
126
|
+
clusterId?: number; // NEW: Graph cluster membership
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
export interface KBResponse<T> {
|
|
130
|
+
data: T;
|
|
131
|
+
sources: {
|
|
132
|
+
nodeIds: string[];
|
|
133
|
+
experts: string[];
|
|
134
|
+
urls: string[];
|
|
135
|
+
};
|
|
136
|
+
confidence: number;
|
|
137
|
+
confidenceLevel: 'high' | 'medium' | 'low';
|
|
138
|
+
gap: boolean;
|
|
139
|
+
gapReason?: string;
|
|
140
|
+
embeddingSource?: 'local' | 'database' | 'none'; // NEW: Track embedding source
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export interface SearchOptions {
|
|
144
|
+
limit?: number;
|
|
145
|
+
minConfidence?: number;
|
|
146
|
+
hybridWeight?: number; // 0-1, weight for semantic vs keyword
|
|
147
|
+
useLocalEmbeddings?: boolean; // NEW: Force local embeddings
|
|
148
|
+
includeClusterInfo?: boolean; // NEW: Include cluster membership
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
export interface GapReport {
|
|
152
|
+
query: string;
|
|
153
|
+
reason: string;
|
|
154
|
+
count: number;
|
|
155
|
+
lastSeen: Date;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// =============================================================================
|
|
159
|
+
// CORE EMBEDDING FUNCTION (NEW in v3.0)
|
|
160
|
+
// =============================================================================
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Get embedding for text using best available method:
|
|
164
|
+
* 1. Memory cache (0.01ms)
|
|
165
|
+
* 2. Local ONNX via ruvector (~400ms first, then cached)
|
|
166
|
+
* 3. Database function if available
|
|
167
|
+
* 4. null (fallback to text search)
|
|
168
|
+
*/
|
|
169
|
+
export async function getEmbedding(
|
|
170
|
+
text: string,
|
|
171
|
+
client?: PoolClient
|
|
172
|
+
): Promise<{ embedding: number[] | null; source: 'cache' | 'local' | 'database' | 'none' }> {
|
|
173
|
+
// 1. Check cache first
|
|
174
|
+
const cached = getCachedEmbedding(text);
|
|
175
|
+
if (cached) {
|
|
176
|
+
return { embedding: cached, source: 'cache' };
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// 2. Try local ONNX embeddings (no API, works offline)
|
|
180
|
+
await initLocalEmbeddings();
|
|
181
|
+
if (embedText) {
|
|
182
|
+
try {
|
|
183
|
+
const embedding = await embedText(text);
|
|
184
|
+
setCachedEmbedding(text, embedding);
|
|
185
|
+
return { embedding, source: 'local' };
|
|
186
|
+
} catch (e) {
|
|
187
|
+
console.warn('[KB-FIRST] Local embedding failed:', e);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// 3. Try database function
|
|
192
|
+
if (client) {
|
|
193
|
+
try {
|
|
194
|
+
const hasDbEmbed = await checkFunction(client, 'ruvector_embed');
|
|
195
|
+
if (hasDbEmbed) {
|
|
196
|
+
const result = await client.query(
|
|
197
|
+
`SELECT ruvector_embed('all-MiniLM-L6-v2', $1) AS embedding`,
|
|
198
|
+
[text]
|
|
199
|
+
);
|
|
200
|
+
if (result.rows[0]?.embedding) {
|
|
201
|
+
const embedding = result.rows[0].embedding;
|
|
202
|
+
setCachedEmbedding(text, embedding);
|
|
203
|
+
return { embedding, source: 'database' };
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
} catch (e) {
|
|
207
|
+
console.warn('[KB-FIRST] Database embedding failed:', e);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// 4. No embeddings available
|
|
212
|
+
return { embedding: null, source: 'none' };
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Batch embed multiple texts efficiently
|
|
217
|
+
*/
|
|
218
|
+
export async function getEmbeddingsBatch(
|
|
219
|
+
texts: string[]
|
|
220
|
+
): Promise<{ embeddings: (number[] | null)[]; source: 'local' | 'none' }> {
|
|
221
|
+
await initLocalEmbeddings();
|
|
222
|
+
|
|
223
|
+
if (embedBatch) {
|
|
224
|
+
try {
|
|
225
|
+
// Check cache for all texts
|
|
226
|
+
const results: (number[] | null)[] = [];
|
|
227
|
+
const uncachedTexts: string[] = [];
|
|
228
|
+
const uncachedIndices: number[] = [];
|
|
229
|
+
|
|
230
|
+
texts.forEach((text, i) => {
|
|
231
|
+
const cached = getCachedEmbedding(text);
|
|
232
|
+
if (cached) {
|
|
233
|
+
results[i] = cached;
|
|
234
|
+
} else {
|
|
235
|
+
uncachedTexts.push(text);
|
|
236
|
+
uncachedIndices.push(i);
|
|
237
|
+
}
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
// Batch embed uncached texts
|
|
241
|
+
if (uncachedTexts.length > 0) {
|
|
242
|
+
const embeddings = await embedBatch(uncachedTexts);
|
|
243
|
+
uncachedIndices.forEach((originalIndex, i) => {
|
|
244
|
+
results[originalIndex] = embeddings[i];
|
|
245
|
+
setCachedEmbedding(uncachedTexts[i], embeddings[i]);
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
return { embeddings: results, source: 'local' };
|
|
250
|
+
} catch (e) {
|
|
251
|
+
console.warn('[KB-FIRST] Batch embedding failed:', e);
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
return { embeddings: texts.map(() => null), source: 'none' };
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// =============================================================================
|
|
259
|
+
// CORE SEARCH FUNCTIONS
|
|
260
|
+
// =============================================================================
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* Primary KB search - uses hybrid (semantic + keyword) by default
|
|
264
|
+
* NEW in v3.0: Uses local ONNX embeddings when available
|
|
265
|
+
*/
|
|
266
|
+
export async function searchKB(
|
|
267
|
+
query: string,
|
|
268
|
+
namespace: string,
|
|
269
|
+
options: SearchOptions = {}
|
|
270
|
+
): Promise<SearchResult[]> {
|
|
271
|
+
const {
|
|
272
|
+
limit = 10,
|
|
273
|
+
minConfidence = 0,
|
|
274
|
+
hybridWeight = 0.7, // 70% semantic, 30% keyword
|
|
275
|
+
useLocalEmbeddings = true
|
|
276
|
+
} = options;
|
|
277
|
+
|
|
278
|
+
const client = await pool.connect();
|
|
279
|
+
|
|
280
|
+
try {
|
|
281
|
+
// Get query embedding using best available method
|
|
282
|
+
const { embedding: queryEmbedding, source: embeddingSource } = useLocalEmbeddings
|
|
283
|
+
? await getEmbedding(query, client)
|
|
284
|
+
: await getEmbedding(query, client);
|
|
285
|
+
|
|
286
|
+
let result;
|
|
287
|
+
|
|
288
|
+
if (queryEmbedding) {
|
|
289
|
+
// Use vector similarity search with local embedding
|
|
290
|
+
const embeddingStr = `[${queryEmbedding.join(',')}]`;
|
|
291
|
+
|
|
292
|
+
result = await client.query(`
|
|
293
|
+
WITH scored AS (
|
|
294
|
+
SELECT
|
|
295
|
+
n.id, n.namespace, n.path, n.title, n.content,
|
|
296
|
+
n.source_expert, n.source_url, n.confidence, n.metadata,
|
|
297
|
+
CASE
|
|
298
|
+
WHEN n.embedding IS NOT NULL THEN
|
|
299
|
+
1.0 / (1.0 + (n.embedding::vector <=> $1::vector))
|
|
300
|
+
ELSE 0.3
|
|
301
|
+
END AS semantic_score,
|
|
302
|
+
ts_rank(to_tsvector('english', COALESCE(n.title || ' ' || n.content, '')),
|
|
303
|
+
plainto_tsquery('english', $2)) AS text_score
|
|
304
|
+
FROM kb_nodes n
|
|
305
|
+
WHERE n.namespace = $3 AND n.confidence >= $4
|
|
306
|
+
)
|
|
307
|
+
SELECT
|
|
308
|
+
*,
|
|
309
|
+
($5 * semantic_score + $6 * text_score) AS combined_score,
|
|
310
|
+
1 - semantic_score AS distance
|
|
311
|
+
FROM scored
|
|
312
|
+
WHERE semantic_score > 0.01 OR text_score > 0
|
|
313
|
+
ORDER BY combined_score DESC
|
|
314
|
+
LIMIT $7
|
|
315
|
+
`, [embeddingStr, query, namespace, minConfidence, hybridWeight, 1 - hybridWeight, limit]);
|
|
316
|
+
|
|
317
|
+
} else {
|
|
318
|
+
// Fallback to text search only (no embeddings available)
|
|
319
|
+
result = await client.query(`
|
|
320
|
+
SELECT
|
|
321
|
+
id, namespace, path, title, content,
|
|
322
|
+
source_expert, source_url, confidence, metadata,
|
|
323
|
+
ts_rank(to_tsvector('english', COALESCE(title || ' ' || content, '')),
|
|
324
|
+
plainto_tsquery('english', $1)) AS text_score,
|
|
325
|
+
0.5 AS distance
|
|
326
|
+
FROM kb_nodes
|
|
327
|
+
WHERE namespace = $2
|
|
328
|
+
AND confidence >= $3
|
|
329
|
+
AND to_tsvector('english', COALESCE(title || ' ' || content, ''))
|
|
330
|
+
@@ plainto_tsquery('english', $1)
|
|
331
|
+
ORDER BY text_score DESC
|
|
332
|
+
LIMIT $4
|
|
333
|
+
`, [query, namespace, minConfidence, limit]);
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
// Update access tracking (fire and forget)
|
|
337
|
+
const nodeIds = result.rows.map(r => r.id);
|
|
338
|
+
if (nodeIds.length > 0) {
|
|
339
|
+
client.query(`
|
|
340
|
+
UPDATE kb_nodes
|
|
341
|
+
SET access_count = access_count + 1, last_accessed = NOW()
|
|
342
|
+
WHERE id = ANY($1)
|
|
343
|
+
`, [nodeIds]).catch(() => {});
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
console.log(`[KB-FIRST] Search completed: ${result.rows.length} results (embeddings: ${embeddingSource})`);
|
|
347
|
+
|
|
348
|
+
return result.rows.map(row => ({
|
|
349
|
+
id: row.id,
|
|
350
|
+
namespace: row.namespace,
|
|
351
|
+
path: row.path,
|
|
352
|
+
title: row.title,
|
|
353
|
+
content: row.content,
|
|
354
|
+
sourceExpert: row.source_expert,
|
|
355
|
+
sourceUrl: row.source_url,
|
|
356
|
+
confidence: row.confidence,
|
|
357
|
+
metadata: row.metadata || {},
|
|
358
|
+
distance: row.distance,
|
|
359
|
+
relevanceScore: row.combined_score || row.text_score,
|
|
360
|
+
bm25Score: row.text_score
|
|
361
|
+
}));
|
|
362
|
+
} finally {
|
|
363
|
+
client.release();
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
/**
|
|
368
|
+
* Build a KB-grounded response with proper attribution
|
|
369
|
+
*/
|
|
370
|
+
export async function buildResponse<T>(
|
|
371
|
+
results: SearchResult[],
|
|
372
|
+
answer: T,
|
|
373
|
+
embeddingSource?: 'local' | 'database' | 'none'
|
|
374
|
+
): Promise<KBResponse<T>> {
|
|
375
|
+
|
|
376
|
+
if (results.length === 0) {
|
|
377
|
+
return {
|
|
378
|
+
data: answer,
|
|
379
|
+
sources: { nodeIds: [], experts: [], urls: [] },
|
|
380
|
+
confidence: 0,
|
|
381
|
+
confidenceLevel: 'low',
|
|
382
|
+
gap: true,
|
|
383
|
+
gapReason: 'No matching KB content found',
|
|
384
|
+
embeddingSource
|
|
385
|
+
};
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
const avgConfidence = results.reduce((sum, r) => sum + r.confidence, 0) / results.length;
|
|
389
|
+
const weightedConfidence = results.reduce((sum, r, i) => {
|
|
390
|
+
const weight = 1 / (i + 1); // First result weighted more
|
|
391
|
+
return sum + (r.confidence * r.relevanceScore * weight);
|
|
392
|
+
}, 0) / results.reduce((sum, _, i) => sum + 1 / (i + 1), 0);
|
|
393
|
+
|
|
394
|
+
const confidence = Math.min(avgConfidence, weightedConfidence);
|
|
395
|
+
|
|
396
|
+
return {
|
|
397
|
+
data: answer,
|
|
398
|
+
sources: {
|
|
399
|
+
nodeIds: results.map(r => r.id),
|
|
400
|
+
experts: [...new Set(results.map(r => r.sourceExpert).filter(Boolean))],
|
|
401
|
+
urls: [...new Set(results.map(r => r.sourceUrl).filter(Boolean))]
|
|
402
|
+
},
|
|
403
|
+
confidence,
|
|
404
|
+
confidenceLevel: confidence >= 0.8 ? 'high' : confidence >= 0.5 ? 'medium' : 'low',
|
|
405
|
+
gap: false,
|
|
406
|
+
embeddingSource
|
|
407
|
+
};
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
// =============================================================================
|
|
411
|
+
// INGEST WITH LOCAL EMBEDDINGS (NEW in v3.0)
|
|
412
|
+
// =============================================================================
|
|
413
|
+
|
|
414
|
+
/**
|
|
415
|
+
* Ingest a document into the KB with automatic embedding
|
|
416
|
+
* Uses local ONNX embeddings - no API costs, works offline
|
|
417
|
+
*/
|
|
418
|
+
export async function ingestDocument(
|
|
419
|
+
namespace: string,
|
|
420
|
+
path: string,
|
|
421
|
+
title: string,
|
|
422
|
+
content: string,
|
|
423
|
+
options: {
|
|
424
|
+
sourceExpert: string;
|
|
425
|
+
sourceUrl: string;
|
|
426
|
+
confidence?: number;
|
|
427
|
+
metadata?: Record<string, unknown>;
|
|
428
|
+
}
|
|
429
|
+
): Promise<{ id: string; embeddingSource: 'local' | 'database' | 'none' }> {
|
|
430
|
+
const client = await pool.connect();
|
|
431
|
+
|
|
432
|
+
try {
|
|
433
|
+
// Generate embedding locally
|
|
434
|
+
const { embedding, source } = await getEmbedding(content.substring(0, 8000), client);
|
|
435
|
+
|
|
436
|
+
const embeddingStr = embedding ? `[${embedding.join(',')}]` : null;
|
|
437
|
+
|
|
438
|
+
const result = await client.query(`
|
|
439
|
+
INSERT INTO kb_nodes (
|
|
440
|
+
namespace, path, title, content,
|
|
441
|
+
source_expert, source_url, confidence, metadata, embedding
|
|
442
|
+
)
|
|
443
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9::vector)
|
|
444
|
+
ON CONFLICT (namespace, path) DO UPDATE SET
|
|
445
|
+
title = EXCLUDED.title,
|
|
446
|
+
content = EXCLUDED.content,
|
|
447
|
+
source_expert = EXCLUDED.source_expert,
|
|
448
|
+
source_url = EXCLUDED.source_url,
|
|
449
|
+
confidence = EXCLUDED.confidence,
|
|
450
|
+
metadata = EXCLUDED.metadata,
|
|
451
|
+
embedding = EXCLUDED.embedding,
|
|
452
|
+
updated_at = NOW()
|
|
453
|
+
RETURNING id
|
|
454
|
+
`, [
|
|
455
|
+
namespace,
|
|
456
|
+
path,
|
|
457
|
+
title,
|
|
458
|
+
content,
|
|
459
|
+
options.sourceExpert,
|
|
460
|
+
options.sourceUrl,
|
|
461
|
+
options.confidence || 0.8,
|
|
462
|
+
JSON.stringify(options.metadata || {}),
|
|
463
|
+
embeddingStr
|
|
464
|
+
]);
|
|
465
|
+
|
|
466
|
+
console.log(`[KB-FIRST] Ingested: ${title} (embedding: ${source})`);
|
|
467
|
+
|
|
468
|
+
return {
|
|
469
|
+
id: result.rows[0].id,
|
|
470
|
+
embeddingSource: source === 'cache' ? 'local' : source
|
|
471
|
+
};
|
|
472
|
+
} finally {
|
|
473
|
+
client.release();
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
/**
|
|
478
|
+
* Batch ingest multiple documents efficiently
|
|
479
|
+
*/
|
|
480
|
+
export async function ingestDocumentsBatch(
|
|
481
|
+
namespace: string,
|
|
482
|
+
documents: Array<{
|
|
483
|
+
path: string;
|
|
484
|
+
title: string;
|
|
485
|
+
content: string;
|
|
486
|
+
sourceExpert: string;
|
|
487
|
+
sourceUrl: string;
|
|
488
|
+
confidence?: number;
|
|
489
|
+
metadata?: Record<string, unknown>;
|
|
490
|
+
}>
|
|
491
|
+
): Promise<{ ingested: number; embeddingSource: 'local' | 'none' }> {
|
|
492
|
+
// Generate embeddings in batch
|
|
493
|
+
const contents = documents.map(d => d.content.substring(0, 8000));
|
|
494
|
+
const { embeddings, source } = await getEmbeddingsBatch(contents);
|
|
495
|
+
|
|
496
|
+
const client = await pool.connect();
|
|
497
|
+
|
|
498
|
+
try {
|
|
499
|
+
await client.query('BEGIN');
|
|
500
|
+
|
|
501
|
+
for (let i = 0; i < documents.length; i++) {
|
|
502
|
+
const doc = documents[i];
|
|
503
|
+
const embedding = embeddings[i];
|
|
504
|
+
const embeddingStr = embedding ? `[${embedding.join(',')}]` : null;
|
|
505
|
+
|
|
506
|
+
await client.query(`
|
|
507
|
+
INSERT INTO kb_nodes (
|
|
508
|
+
namespace, path, title, content,
|
|
509
|
+
source_expert, source_url, confidence, metadata, embedding
|
|
510
|
+
)
|
|
511
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9::vector)
|
|
512
|
+
ON CONFLICT (namespace, path) DO UPDATE SET
|
|
513
|
+
title = EXCLUDED.title,
|
|
514
|
+
content = EXCLUDED.content,
|
|
515
|
+
source_expert = EXCLUDED.source_expert,
|
|
516
|
+
source_url = EXCLUDED.source_url,
|
|
517
|
+
confidence = EXCLUDED.confidence,
|
|
518
|
+
metadata = EXCLUDED.metadata,
|
|
519
|
+
embedding = EXCLUDED.embedding,
|
|
520
|
+
updated_at = NOW()
|
|
521
|
+
`, [
|
|
522
|
+
namespace,
|
|
523
|
+
doc.path,
|
|
524
|
+
doc.title,
|
|
525
|
+
doc.content,
|
|
526
|
+
doc.sourceExpert,
|
|
527
|
+
doc.sourceUrl,
|
|
528
|
+
doc.confidence || 0.8,
|
|
529
|
+
JSON.stringify(doc.metadata || {}),
|
|
530
|
+
embeddingStr
|
|
531
|
+
]);
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
await client.query('COMMIT');
|
|
535
|
+
console.log(`[KB-FIRST] Batch ingested: ${documents.length} documents (embedding: ${source})`);
|
|
536
|
+
|
|
537
|
+
return { ingested: documents.length, embeddingSource: source };
|
|
538
|
+
} catch (e) {
|
|
539
|
+
await client.query('ROLLBACK');
|
|
540
|
+
throw e;
|
|
541
|
+
} finally {
|
|
542
|
+
client.release();
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
// =============================================================================
|
|
547
|
+
// GAP DETECTION
|
|
548
|
+
// =============================================================================
|
|
549
|
+
|
|
550
|
+
/**
|
|
551
|
+
* Log a query that couldn't be answered from the KB
|
|
552
|
+
*/
|
|
553
|
+
export async function logGap(
|
|
554
|
+
query: string,
|
|
555
|
+
reason: string,
|
|
556
|
+
namespace?: string
|
|
557
|
+
): Promise<void> {
|
|
558
|
+
const client = await pool.connect();
|
|
559
|
+
|
|
560
|
+
try {
|
|
561
|
+
await client.query(`
|
|
562
|
+
INSERT INTO kb_gaps (query, reason, namespace, first_seen, last_seen, occurrence_count)
|
|
563
|
+
VALUES ($1, $2, $3, NOW(), NOW(), 1)
|
|
564
|
+
ON CONFLICT (query, namespace) DO UPDATE SET
|
|
565
|
+
last_seen = NOW(),
|
|
566
|
+
occurrence_count = kb_gaps.occurrence_count + 1
|
|
567
|
+
`, [query.substring(0, 500), reason, namespace || 'default']);
|
|
568
|
+
} finally {
|
|
569
|
+
client.release();
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
/**
|
|
574
|
+
* Get gap report for KB improvement
|
|
575
|
+
*/
|
|
576
|
+
export async function getGapReport(
|
|
577
|
+
namespace?: string,
|
|
578
|
+
limit: number = 50
|
|
579
|
+
): Promise<GapReport[]> {
|
|
580
|
+
const client = await pool.connect();
|
|
581
|
+
|
|
582
|
+
try {
|
|
583
|
+
const result = await client.query(`
|
|
584
|
+
SELECT query, reason, occurrence_count as count, last_seen
|
|
585
|
+
FROM kb_gaps
|
|
586
|
+
WHERE ($1::text IS NULL OR namespace = $1)
|
|
587
|
+
ORDER BY occurrence_count DESC, last_seen DESC
|
|
588
|
+
LIMIT $2
|
|
589
|
+
`, [namespace, limit]);
|
|
590
|
+
|
|
591
|
+
return result.rows.map(row => ({
|
|
592
|
+
query: row.query,
|
|
593
|
+
reason: row.reason,
|
|
594
|
+
count: parseInt(row.count),
|
|
595
|
+
lastSeen: row.last_seen
|
|
596
|
+
}));
|
|
597
|
+
} finally {
|
|
598
|
+
client.release();
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
// =============================================================================
|
|
603
|
+
// REASONING BANK (Pattern Storage)
|
|
604
|
+
// =============================================================================
|
|
605
|
+
|
|
606
|
+
/**
|
|
607
|
+
* Store a successful query-response pattern with embedding
|
|
608
|
+
*/
|
|
609
|
+
export async function storePattern(
|
|
610
|
+
query: string,
|
|
611
|
+
response: string,
|
|
612
|
+
nodesUsed: string[],
|
|
613
|
+
feedbackScore: number
|
|
614
|
+
): Promise<void> {
|
|
615
|
+
if (feedbackScore < 0.7) return; // Only store good patterns
|
|
616
|
+
|
|
617
|
+
const client = await pool.connect();
|
|
618
|
+
|
|
619
|
+
try {
|
|
620
|
+
// Generate embedding for the query pattern
|
|
621
|
+
const { embedding } = await getEmbedding(query, client);
|
|
622
|
+
const embeddingStr = embedding ? `[${embedding.join(',')}]` : null;
|
|
623
|
+
|
|
624
|
+
await client.query(`
|
|
625
|
+
INSERT INTO reasoning_bank (query_pattern, successful_response, kb_nodes_used, feedback_score, use_count, embedding)
|
|
626
|
+
VALUES ($1, $2, $3, $4, 1, $5::vector)
|
|
627
|
+
ON CONFLICT (query_pattern) DO UPDATE SET
|
|
628
|
+
feedback_score = (reasoning_bank.feedback_score * reasoning_bank.use_count + $4)
|
|
629
|
+
/ (reasoning_bank.use_count + 1),
|
|
630
|
+
use_count = reasoning_bank.use_count + 1,
|
|
631
|
+
updated_at = NOW()
|
|
632
|
+
`, [query.substring(0, 500), response.substring(0, 2000), nodesUsed, feedbackScore, embeddingStr]);
|
|
633
|
+
} finally {
|
|
634
|
+
client.release();
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
/**
|
|
639
|
+
* Find similar successful patterns using vector similarity
|
|
640
|
+
*/
|
|
641
|
+
export async function findSimilarPatterns(
|
|
642
|
+
query: string,
|
|
643
|
+
limit: number = 3
|
|
644
|
+
): Promise<{ pattern: string; response: string; score: number }[]> {
|
|
645
|
+
const client = await pool.connect();
|
|
646
|
+
|
|
647
|
+
try {
|
|
648
|
+
// Get embedding for query
|
|
649
|
+
const { embedding } = await getEmbedding(query, client);
|
|
650
|
+
|
|
651
|
+
if (embedding) {
|
|
652
|
+
const embeddingStr = `[${embedding.join(',')}]`;
|
|
653
|
+
const result = await client.query(`
|
|
654
|
+
SELECT
|
|
655
|
+
query_pattern as pattern,
|
|
656
|
+
successful_response as response,
|
|
657
|
+
feedback_score as score,
|
|
658
|
+
1.0 / (1.0 + (embedding <=> $1::vector)) as similarity
|
|
659
|
+
FROM reasoning_bank
|
|
660
|
+
WHERE feedback_score >= 0.7
|
|
661
|
+
AND embedding IS NOT NULL
|
|
662
|
+
ORDER BY embedding <=> $1::vector
|
|
663
|
+
LIMIT $2
|
|
664
|
+
`, [embeddingStr, limit]);
|
|
665
|
+
|
|
666
|
+
return result.rows;
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
// Fallback to text similarity
|
|
670
|
+
const result = await client.query(`
|
|
671
|
+
SELECT
|
|
672
|
+
query_pattern as pattern,
|
|
673
|
+
successful_response as response,
|
|
674
|
+
feedback_score as score,
|
|
675
|
+
similarity(query_pattern, $1) as sim
|
|
676
|
+
FROM reasoning_bank
|
|
677
|
+
WHERE feedback_score >= 0.7
|
|
678
|
+
ORDER BY similarity(query_pattern, $1) DESC
|
|
679
|
+
LIMIT $2
|
|
680
|
+
`, [query, limit]);
|
|
681
|
+
|
|
682
|
+
return result.rows;
|
|
683
|
+
} catch {
|
|
684
|
+
return []; // No patterns found
|
|
685
|
+
} finally {
|
|
686
|
+
client.release();
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
// =============================================================================
|
|
691
|
+
// HEALTH & DIAGNOSTICS
|
|
692
|
+
// =============================================================================
|
|
693
|
+
|
|
694
|
+
/**
|
|
695
|
+
* Check KB health and capabilities
|
|
696
|
+
*/
|
|
697
|
+
export async function checkHealth(namespace?: string): Promise<{
|
|
698
|
+
healthy: boolean;
|
|
699
|
+
nodeCount: number;
|
|
700
|
+
gapCount: number;
|
|
701
|
+
patternCount: number;
|
|
702
|
+
features: {
|
|
703
|
+
vectorSearch: boolean;
|
|
704
|
+
localEmbeddings: boolean;
|
|
705
|
+
hybridSearch: boolean;
|
|
706
|
+
cacheSize: number;
|
|
707
|
+
};
|
|
708
|
+
error?: string;
|
|
709
|
+
}> {
|
|
710
|
+
const client = await pool.connect();
|
|
711
|
+
|
|
712
|
+
try {
|
|
713
|
+
// Test connection
|
|
714
|
+
await client.query('SELECT 1');
|
|
715
|
+
|
|
716
|
+
// Count nodes
|
|
717
|
+
const nodeResult = namespace
|
|
718
|
+
? await client.query('SELECT COUNT(*) FROM kb_nodes WHERE namespace = $1', [namespace])
|
|
719
|
+
: await client.query('SELECT COUNT(*) FROM kb_nodes');
|
|
720
|
+
|
|
721
|
+
// Count gaps
|
|
722
|
+
const gapResult = await client.query('SELECT COUNT(*) FROM kb_gaps');
|
|
723
|
+
|
|
724
|
+
// Count patterns
|
|
725
|
+
let patternCount = 0;
|
|
726
|
+
try {
|
|
727
|
+
const patternResult = await client.query('SELECT COUNT(*) FROM reasoning_bank');
|
|
728
|
+
patternCount = parseInt(patternResult.rows[0].count);
|
|
729
|
+
} catch { /* table may not exist */ }
|
|
730
|
+
|
|
731
|
+
// Check features
|
|
732
|
+
const hasVector = await checkFunction(client, 'vector_cosine_ops');
|
|
733
|
+
await initLocalEmbeddings();
|
|
734
|
+
const hasLocalEmbed = embedText !== null;
|
|
735
|
+
|
|
736
|
+
return {
|
|
737
|
+
healthy: true,
|
|
738
|
+
nodeCount: parseInt(nodeResult.rows[0].count),
|
|
739
|
+
gapCount: parseInt(gapResult.rows[0].count),
|
|
740
|
+
patternCount,
|
|
741
|
+
features: {
|
|
742
|
+
vectorSearch: hasVector,
|
|
743
|
+
localEmbeddings: hasLocalEmbed,
|
|
744
|
+
hybridSearch: true, // Always available via ts_rank
|
|
745
|
+
cacheSize: embeddingCache.size
|
|
746
|
+
}
|
|
747
|
+
};
|
|
748
|
+
} catch (e) {
|
|
749
|
+
return {
|
|
750
|
+
healthy: false,
|
|
751
|
+
nodeCount: 0,
|
|
752
|
+
gapCount: 0,
|
|
753
|
+
patternCount: 0,
|
|
754
|
+
features: { vectorSearch: false, localEmbeddings: false, hybridSearch: false, cacheSize: 0 },
|
|
755
|
+
error: e instanceof Error ? e.message : 'Unknown error'
|
|
756
|
+
};
|
|
757
|
+
} finally {
|
|
758
|
+
client.release();
|
|
759
|
+
}
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
// =============================================================================
|
|
763
|
+
// UTILITIES
|
|
764
|
+
// =============================================================================
|
|
765
|
+
|
|
766
|
+
async function checkFunction(client: PoolClient, funcName: string): Promise<boolean> {
|
|
767
|
+
try {
|
|
768
|
+
const result = await client.query(`
|
|
769
|
+
SELECT EXISTS(
|
|
770
|
+
SELECT 1 FROM pg_proc WHERE proname = $1
|
|
771
|
+
) OR EXISTS(
|
|
772
|
+
SELECT 1 FROM pg_operator WHERE oprname = $1
|
|
773
|
+
) AS exists
|
|
774
|
+
`, [funcName]);
|
|
775
|
+
return result.rows[0]?.exists || false;
|
|
776
|
+
} catch {
|
|
777
|
+
return false;
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
/**
|
|
782
|
+
* Clear embedding cache (useful for memory management)
|
|
783
|
+
*/
|
|
784
|
+
export function clearEmbeddingCache(): { cleared: number } {
|
|
785
|
+
const size = embeddingCache.size;
|
|
786
|
+
embeddingCache.clear();
|
|
787
|
+
return { cleared: size };
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
/**
|
|
791
|
+
* Initialize KB schema (run once)
|
|
792
|
+
*/
|
|
793
|
+
export async function initializeSchema(): Promise<void> {
|
|
794
|
+
const client = await pool.connect();
|
|
795
|
+
|
|
796
|
+
try {
|
|
797
|
+
await client.query(`
|
|
798
|
+
-- Enable extensions
|
|
799
|
+
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
|
800
|
+
CREATE EXTENSION IF NOT EXISTS pg_trgm; -- For text similarity
|
|
801
|
+
|
|
802
|
+
-- Try to enable ruvector (may not be available)
|
|
803
|
+
DO $$ BEGIN
|
|
804
|
+
CREATE EXTENSION IF NOT EXISTS ruvector;
|
|
805
|
+
EXCEPTION WHEN OTHERS THEN
|
|
806
|
+
-- Fall back to pgvector
|
|
807
|
+
CREATE EXTENSION IF NOT EXISTS vector;
|
|
808
|
+
END $$;
|
|
809
|
+
|
|
810
|
+
-- KB Nodes table
|
|
811
|
+
CREATE TABLE IF NOT EXISTS kb_nodes (
|
|
812
|
+
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
813
|
+
namespace TEXT NOT NULL DEFAULT 'default',
|
|
814
|
+
path TEXT NOT NULL,
|
|
815
|
+
title TEXT NOT NULL,
|
|
816
|
+
content TEXT,
|
|
817
|
+
source_expert TEXT NOT NULL,
|
|
818
|
+
source_url TEXT NOT NULL,
|
|
819
|
+
confidence REAL DEFAULT 1.0 CHECK (confidence >= 0 AND confidence <= 1),
|
|
820
|
+
metadata JSONB DEFAULT '{}',
|
|
821
|
+
embedding vector(384),
|
|
822
|
+
access_count INTEGER DEFAULT 0,
|
|
823
|
+
last_accessed TIMESTAMPTZ,
|
|
824
|
+
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
825
|
+
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
|
826
|
+
UNIQUE(namespace, path)
|
|
827
|
+
);
|
|
828
|
+
|
|
829
|
+
-- Gaps table
|
|
830
|
+
CREATE TABLE IF NOT EXISTS kb_gaps (
|
|
831
|
+
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
832
|
+
query TEXT NOT NULL,
|
|
833
|
+
reason TEXT,
|
|
834
|
+
namespace TEXT DEFAULT 'default',
|
|
835
|
+
first_seen TIMESTAMPTZ DEFAULT NOW(),
|
|
836
|
+
last_seen TIMESTAMPTZ DEFAULT NOW(),
|
|
837
|
+
occurrence_count INTEGER DEFAULT 1,
|
|
838
|
+
resolved BOOLEAN DEFAULT false,
|
|
839
|
+
UNIQUE(query, namespace)
|
|
840
|
+
);
|
|
841
|
+
|
|
842
|
+
-- Reasoning bank
|
|
843
|
+
CREATE TABLE IF NOT EXISTS reasoning_bank (
|
|
844
|
+
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
845
|
+
query_pattern TEXT UNIQUE NOT NULL,
|
|
846
|
+
successful_response TEXT NOT NULL,
|
|
847
|
+
kb_nodes_used UUID[] DEFAULT '{}',
|
|
848
|
+
feedback_score REAL DEFAULT 0.5,
|
|
849
|
+
use_count INTEGER DEFAULT 1,
|
|
850
|
+
embedding vector(384),
|
|
851
|
+
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
852
|
+
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
853
|
+
);
|
|
854
|
+
|
|
855
|
+
-- Indexes
|
|
856
|
+
CREATE INDEX IF NOT EXISTS kb_nodes_namespace_idx ON kb_nodes(namespace);
|
|
857
|
+
CREATE INDEX IF NOT EXISTS kb_nodes_content_trgm_idx ON kb_nodes USING gin(content gin_trgm_ops);
|
|
858
|
+
CREATE INDEX IF NOT EXISTS kb_gaps_namespace_idx ON kb_gaps(namespace);
|
|
859
|
+
CREATE INDEX IF NOT EXISTS kb_gaps_count_idx ON kb_gaps(occurrence_count DESC);
|
|
860
|
+
|
|
861
|
+
-- Vector index (if vector type exists)
|
|
862
|
+
DO $$ BEGIN
|
|
863
|
+
CREATE INDEX IF NOT EXISTS kb_nodes_embedding_idx ON kb_nodes
|
|
864
|
+
USING hnsw (embedding vector_cosine_ops);
|
|
865
|
+
EXCEPTION WHEN OTHERS THEN
|
|
866
|
+
NULL; -- Vector extension not available
|
|
867
|
+
END $$;
|
|
868
|
+
`);
|
|
869
|
+
|
|
870
|
+
console.log('[KB-FIRST] Schema initialized successfully');
|
|
871
|
+
} finally {
|
|
872
|
+
client.release();
|
|
873
|
+
}
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
// =============================================================================
|
|
877
|
+
// EXPORTS
|
|
878
|
+
// =============================================================================
|
|
879
|
+
|
|
880
|
+
export default {
|
|
881
|
+
// Core search
|
|
882
|
+
searchKB,
|
|
883
|
+
buildResponse,
|
|
884
|
+
|
|
885
|
+
// Embeddings (NEW in v3.0)
|
|
886
|
+
getEmbedding,
|
|
887
|
+
getEmbeddingsBatch,
|
|
888
|
+
clearEmbeddingCache,
|
|
889
|
+
|
|
890
|
+
// Ingestion (NEW in v3.0)
|
|
891
|
+
ingestDocument,
|
|
892
|
+
ingestDocumentsBatch,
|
|
893
|
+
|
|
894
|
+
// Gap detection
|
|
895
|
+
logGap,
|
|
896
|
+
getGapReport,
|
|
897
|
+
|
|
898
|
+
// Pattern storage
|
|
899
|
+
storePattern,
|
|
900
|
+
findSimilarPatterns,
|
|
901
|
+
|
|
902
|
+
// Health
|
|
903
|
+
checkHealth,
|
|
904
|
+
initializeSchema
|
|
905
|
+
};
|