ruvnet-kb-first 6.2.0 → 6.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +318 -533
- package/SKILL.md +139 -6
- package/bin/kb-first.js +9 -6
- package/package.json +5 -5
- package/scripts/kb-ingest-template.js +548 -0
- package/scripts/kb-optimize.sql +250 -0
- package/scripts/kb-quality-audit.js +956 -0
- package/src/commands/dashboard.js +359 -0
- package/src/commands/init.js +138 -9
- package/src/commands/status.js +59 -5
|
@@ -0,0 +1,548 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* KB Ingest Template - Optimized Knowledge Base Ingestion
|
|
5
|
+
*
|
|
6
|
+
* Usage:
|
|
7
|
+
* node scripts/kb-ingest-template.js --source ./docs --schema my_project
|
|
8
|
+
* node scripts/kb-ingest-template.js --source ./content --schema ask_ruvnet --update
|
|
9
|
+
*
|
|
10
|
+
* What it does:
|
|
11
|
+
* 1. Hash - SHA-256 of normalized content
|
|
12
|
+
* 2. Dedupe - Skip if hash exists (or update if higher quality)
|
|
13
|
+
* 3. Categorize - Auto-detect from 15 category rules
|
|
14
|
+
* 4. Score - Calculate quality 0-100
|
|
15
|
+
* 5. Embed - PostgreSQL ruvector_embed() (Rust, not WASM)
|
|
16
|
+
* 6. View - Auto-creates {schema}.kb optimized view
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { createHash } from 'crypto';
|
|
20
|
+
import { readFileSync, readdirSync, statSync, existsSync } from 'fs';
|
|
21
|
+
import { join, extname, basename, relative } from 'path';
|
|
22
|
+
import pg from 'pg';
|
|
23
|
+
|
|
24
|
+
// ============================================================================
|
|
25
|
+
// Configuration
|
|
26
|
+
// ============================================================================
|
|
27
|
+
|
|
28
|
+
const CONFIG = {
|
|
29
|
+
db: {
|
|
30
|
+
host: process.env.KB_HOST || 'localhost',
|
|
31
|
+
port: parseInt(process.env.KB_PORT || '5435'),
|
|
32
|
+
database: 'postgres',
|
|
33
|
+
user: 'postgres',
|
|
34
|
+
password: process.env.KB_PASSWORD || 'guruKB2025',
|
|
35
|
+
},
|
|
36
|
+
supportedExtensions: ['.md', '.txt', '.json', '.yaml', '.yml', '.ts', '.js', '.py', '.sql'],
|
|
37
|
+
minContentLength: 100,
|
|
38
|
+
maxContentLength: 50000,
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
// ============================================================================
|
|
42
|
+
// 15 Category Rules
|
|
43
|
+
// ============================================================================
|
|
44
|
+
|
|
45
|
+
const CATEGORY_RULES = [
|
|
46
|
+
// Agents & Orchestration
|
|
47
|
+
{ category: 'agents', patterns: [/agent/i, /swarm/i, /orchestrat/i, /coordinator/i, /spawn/i] },
|
|
48
|
+
{ category: 'workflows', patterns: [/workflow/i, /pipeline/i, /dag\b/i, /task.?flow/i] },
|
|
49
|
+
|
|
50
|
+
// AI/ML
|
|
51
|
+
{ category: 'embeddings', patterns: [/embed/i, /vector/i, /semantic/i, /onnx/i, /transformer/i] },
|
|
52
|
+
{ category: 'neural', patterns: [/neural/i, /gnn\b/i, /attention/i, /sona\b/i, /lora\b/i] },
|
|
53
|
+
{ category: 'inference', patterns: [/inference/i, /predict/i, /model\b/i, /llm\b/i, /prompt/i] },
|
|
54
|
+
|
|
55
|
+
// Data & Storage
|
|
56
|
+
{ category: 'database', patterns: [/database/i, /postgres/i, /sql\b/i, /schema/i, /migration/i] },
|
|
57
|
+
{ category: 'storage', patterns: [/storage/i, /persist/i, /cache/i, /memory/i, /store/i] },
|
|
58
|
+
|
|
59
|
+
// Security & Auth
|
|
60
|
+
{ category: 'security', patterns: [/security/i, /auth/i, /encrypt/i, /token/i, /credential/i] },
|
|
61
|
+
{ category: 'compliance', patterns: [/compliance/i, /audit/i, /gdpr/i, /pci/i, /hipaa/i] },
|
|
62
|
+
|
|
63
|
+
// Infrastructure
|
|
64
|
+
{ category: 'deployment', patterns: [/deploy/i, /docker/i, /kubernetes/i, /k8s/i, /ci.?cd/i] },
|
|
65
|
+
{ category: 'config', patterns: [/config/i, /setting/i, /environment/i, /\.env/i] },
|
|
66
|
+
|
|
67
|
+
// Development
|
|
68
|
+
{ category: 'api', patterns: [/api\b/i, /endpoint/i, /rest\b/i, /graphql/i, /grpc/i] },
|
|
69
|
+
{ category: 'testing', patterns: [/test/i, /spec\b/i, /mock/i, /fixture/i, /assert/i] },
|
|
70
|
+
{ category: 'patterns', patterns: [/pattern/i, /best.?practice/i, /architecture/i, /design/i] },
|
|
71
|
+
|
|
72
|
+
// Fallback
|
|
73
|
+
{ category: 'general', patterns: [/.*/] },
|
|
74
|
+
];
|
|
75
|
+
|
|
76
|
+
// ============================================================================
|
|
77
|
+
// Content Hashing
|
|
78
|
+
// ============================================================================
|
|
79
|
+
|
|
80
|
+
function normalizeContent(content) {
|
|
81
|
+
return content
|
|
82
|
+
.toLowerCase()
|
|
83
|
+
.replace(/\s+/g, ' ')
|
|
84
|
+
.replace(/[^\w\s]/g, '')
|
|
85
|
+
.trim();
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function hashContent(content) {
|
|
89
|
+
const normalized = normalizeContent(content);
|
|
90
|
+
return createHash('sha256').update(normalized).digest('hex');
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// ============================================================================
|
|
94
|
+
// Auto-Categorization
|
|
95
|
+
// ============================================================================
|
|
96
|
+
|
|
97
|
+
function categorize(title, content, filePath) {
|
|
98
|
+
const searchText = `${title} ${content} ${filePath}`;
|
|
99
|
+
|
|
100
|
+
for (const rule of CATEGORY_RULES) {
|
|
101
|
+
for (const pattern of rule.patterns) {
|
|
102
|
+
if (pattern.test(searchText)) {
|
|
103
|
+
return rule.category;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return 'general';
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// ============================================================================
|
|
112
|
+
// Quality Scoring (0-100)
|
|
113
|
+
// ============================================================================
|
|
114
|
+
|
|
115
|
+
function scoreQuality(entry) {
|
|
116
|
+
let score = 50; // Base score
|
|
117
|
+
|
|
118
|
+
const content = entry.content || '';
|
|
119
|
+
const title = entry.title || '';
|
|
120
|
+
|
|
121
|
+
// Length scoring (10-20 points)
|
|
122
|
+
const length = content.length;
|
|
123
|
+
if (length >= 500 && length <= 5000) score += 15;
|
|
124
|
+
else if (length >= 200 && length <= 10000) score += 10;
|
|
125
|
+
else if (length < 100) score -= 20;
|
|
126
|
+
|
|
127
|
+
// Structure scoring (up to 15 points)
|
|
128
|
+
if (content.includes('##') || content.includes('###')) score += 5;
|
|
129
|
+
if (content.includes('```')) score += 5; // Code blocks
|
|
130
|
+
if (content.includes('|') && content.includes('-')) score += 5; // Tables
|
|
131
|
+
|
|
132
|
+
// Source attribution (up to 10 points)
|
|
133
|
+
if (entry.source_expert) score += 5;
|
|
134
|
+
if (entry.source_url) score += 5;
|
|
135
|
+
|
|
136
|
+
// Title quality (up to 10 points)
|
|
137
|
+
if (title.length >= 10 && title.length <= 100) score += 5;
|
|
138
|
+
if (!title.includes('untitled') && !title.includes('undefined')) score += 5;
|
|
139
|
+
|
|
140
|
+
// Content quality indicators (up to 15 points)
|
|
141
|
+
if (/example|implementation|usage/i.test(content)) score += 5;
|
|
142
|
+
if (/best practice|recommended|should/i.test(content)) score += 5;
|
|
143
|
+
if (/warning|caution|avoid|don't/i.test(content)) score += 5;
|
|
144
|
+
|
|
145
|
+
// Penalties
|
|
146
|
+
if (/todo|fixme|hack|temporary/i.test(content)) score -= 10;
|
|
147
|
+
if (content.split('\n').length < 3) score -= 10;
|
|
148
|
+
|
|
149
|
+
return Math.max(0, Math.min(100, score));
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// ============================================================================
|
|
153
|
+
// File Processing
|
|
154
|
+
// ============================================================================
|
|
155
|
+
|
|
156
|
+
function extractTitleFromContent(content, filePath) {
|
|
157
|
+
// Try to find a markdown title
|
|
158
|
+
const h1Match = content.match(/^#\s+(.+)$/m);
|
|
159
|
+
if (h1Match) return h1Match[1].trim();
|
|
160
|
+
|
|
161
|
+
// Try first non-empty line
|
|
162
|
+
const firstLine = content.split('\n').find(line => line.trim().length > 0);
|
|
163
|
+
if (firstLine && firstLine.length < 100) {
|
|
164
|
+
return firstLine.replace(/^#+\s*/, '').trim();
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Fall back to filename
|
|
168
|
+
return basename(filePath, extname(filePath))
|
|
169
|
+
.replace(/[-_]/g, ' ')
|
|
170
|
+
.replace(/\b\w/g, c => c.toUpperCase());
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function processFile(filePath, baseDir) {
|
|
174
|
+
const ext = extname(filePath).toLowerCase();
|
|
175
|
+
if (!CONFIG.supportedExtensions.includes(ext)) {
|
|
176
|
+
return null;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
const content = readFileSync(filePath, 'utf-8');
|
|
180
|
+
|
|
181
|
+
if (content.length < CONFIG.minContentLength) {
|
|
182
|
+
return null;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
const truncatedContent = content.slice(0, CONFIG.maxContentLength);
|
|
186
|
+
const title = extractTitleFromContent(truncatedContent, filePath);
|
|
187
|
+
const relativePath = relative(baseDir, filePath);
|
|
188
|
+
|
|
189
|
+
const entry = {
|
|
190
|
+
title,
|
|
191
|
+
content: truncatedContent,
|
|
192
|
+
source_path: relativePath,
|
|
193
|
+
source_file: basename(filePath),
|
|
194
|
+
content_hash: hashContent(truncatedContent),
|
|
195
|
+
category: null,
|
|
196
|
+
quality_score: null,
|
|
197
|
+
source_expert: null,
|
|
198
|
+
source_url: null,
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
entry.category = categorize(title, truncatedContent, relativePath);
|
|
202
|
+
entry.quality_score = scoreQuality(entry);
|
|
203
|
+
|
|
204
|
+
return entry;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function walkDirectory(dir, baseDir = dir) {
|
|
208
|
+
const entries = [];
|
|
209
|
+
|
|
210
|
+
const items = readdirSync(dir);
|
|
211
|
+
for (const item of items) {
|
|
212
|
+
const fullPath = join(dir, item);
|
|
213
|
+
const stat = statSync(fullPath);
|
|
214
|
+
|
|
215
|
+
// Skip hidden files and node_modules
|
|
216
|
+
if (item.startsWith('.') || item === 'node_modules') continue;
|
|
217
|
+
|
|
218
|
+
if (stat.isDirectory()) {
|
|
219
|
+
entries.push(...walkDirectory(fullPath, baseDir));
|
|
220
|
+
} else {
|
|
221
|
+
const entry = processFile(fullPath, baseDir);
|
|
222
|
+
if (entry) {
|
|
223
|
+
entries.push(entry);
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
return entries;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// ============================================================================
|
|
232
|
+
// Database Operations
|
|
233
|
+
// ============================================================================
|
|
234
|
+
|
|
235
|
+
async function ensureSchema(client, schema) {
|
|
236
|
+
await client.query(`CREATE SCHEMA IF NOT EXISTS ${schema}`);
|
|
237
|
+
|
|
238
|
+
// Create main table
|
|
239
|
+
await client.query(`
|
|
240
|
+
CREATE TABLE IF NOT EXISTS ${schema}.kb_entries (
|
|
241
|
+
id SERIAL PRIMARY KEY,
|
|
242
|
+
title TEXT NOT NULL,
|
|
243
|
+
content TEXT NOT NULL,
|
|
244
|
+
content_hash VARCHAR(64) UNIQUE NOT NULL,
|
|
245
|
+
category VARCHAR(50) DEFAULT 'general',
|
|
246
|
+
quality_score INTEGER DEFAULT 50,
|
|
247
|
+
source_path TEXT,
|
|
248
|
+
source_file TEXT,
|
|
249
|
+
source_expert TEXT,
|
|
250
|
+
source_url TEXT,
|
|
251
|
+
embedding real[],
|
|
252
|
+
created_at TIMESTAMP DEFAULT NOW(),
|
|
253
|
+
updated_at TIMESTAMP DEFAULT NOW()
|
|
254
|
+
)
|
|
255
|
+
`);
|
|
256
|
+
|
|
257
|
+
// Create indexes
|
|
258
|
+
await client.query(`
|
|
259
|
+
CREATE INDEX IF NOT EXISTS idx_${schema}_kb_category
|
|
260
|
+
ON ${schema}.kb_entries(category)
|
|
261
|
+
`);
|
|
262
|
+
|
|
263
|
+
await client.query(`
|
|
264
|
+
CREATE INDEX IF NOT EXISTS idx_${schema}_kb_quality
|
|
265
|
+
ON ${schema}.kb_entries(quality_score DESC)
|
|
266
|
+
`);
|
|
267
|
+
|
|
268
|
+
await client.query(`
|
|
269
|
+
CREATE INDEX IF NOT EXISTS idx_${schema}_kb_hash
|
|
270
|
+
ON ${schema}.kb_entries(content_hash)
|
|
271
|
+
`);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
async function createOptimizedView(client, schema) {
|
|
275
|
+
// Drop and recreate the optimized view
|
|
276
|
+
await client.query(`DROP VIEW IF EXISTS ${schema}.kb`);
|
|
277
|
+
|
|
278
|
+
await client.query(`
|
|
279
|
+
CREATE VIEW ${schema}.kb AS
|
|
280
|
+
SELECT
|
|
281
|
+
id,
|
|
282
|
+
title,
|
|
283
|
+
content,
|
|
284
|
+
category,
|
|
285
|
+
quality_score,
|
|
286
|
+
source_expert,
|
|
287
|
+
source_url,
|
|
288
|
+
embedding,
|
|
289
|
+
created_at
|
|
290
|
+
FROM ${schema}.kb_entries
|
|
291
|
+
WHERE quality_score >= 40
|
|
292
|
+
ORDER BY quality_score DESC, created_at DESC
|
|
293
|
+
`);
|
|
294
|
+
|
|
295
|
+
console.log(` ✓ Created optimized view: ${schema}.kb`);
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
async function checkExistingHash(client, schema, contentHash) {
|
|
299
|
+
const result = await client.query(
|
|
300
|
+
`SELECT id, quality_score FROM ${schema}.kb_entries WHERE content_hash = $1`,
|
|
301
|
+
[contentHash]
|
|
302
|
+
);
|
|
303
|
+
return result.rows[0] || null;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
async function insertEntry(client, schema, entry) {
|
|
307
|
+
const result = await client.query(`
|
|
308
|
+
INSERT INTO ${schema}.kb_entries
|
|
309
|
+
(title, content, content_hash, category, quality_score, source_path, source_file, source_expert, source_url, embedding)
|
|
310
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, ruvector_embed($2))
|
|
311
|
+
RETURNING id
|
|
312
|
+
`, [
|
|
313
|
+
entry.title,
|
|
314
|
+
entry.content,
|
|
315
|
+
entry.content_hash,
|
|
316
|
+
entry.category,
|
|
317
|
+
entry.quality_score,
|
|
318
|
+
entry.source_path,
|
|
319
|
+
entry.source_file,
|
|
320
|
+
entry.source_expert,
|
|
321
|
+
entry.source_url,
|
|
322
|
+
]);
|
|
323
|
+
return result.rows[0].id;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
async function updateEntry(client, schema, id, entry) {
|
|
327
|
+
await client.query(`
|
|
328
|
+
UPDATE ${schema}.kb_entries
|
|
329
|
+
SET title = $1, content = $2, category = $3, quality_score = $4,
|
|
330
|
+
source_path = $5, source_file = $6, embedding = ruvector_embed($2),
|
|
331
|
+
updated_at = NOW()
|
|
332
|
+
WHERE id = $7
|
|
333
|
+
`, [
|
|
334
|
+
entry.title,
|
|
335
|
+
entry.content,
|
|
336
|
+
entry.category,
|
|
337
|
+
entry.quality_score,
|
|
338
|
+
entry.source_path,
|
|
339
|
+
entry.source_file,
|
|
340
|
+
id,
|
|
341
|
+
]);
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
// ============================================================================
|
|
345
|
+
// Main Ingestion Process
|
|
346
|
+
// ============================================================================
|
|
347
|
+
|
|
348
|
+
async function ingest(sourceDir, schema, options = {}) {
|
|
349
|
+
const { update = false, minQuality = 30 } = options;
|
|
350
|
+
|
|
351
|
+
console.log('\n╔════════════════════════════════════════════════════════════════╗');
|
|
352
|
+
console.log('║ RuvNet KB Ingest Template v1.0.0 ║');
|
|
353
|
+
console.log('╚════════════════════════════════════════════════════════════════╝\n');
|
|
354
|
+
|
|
355
|
+
console.log(` Source: ${sourceDir}`);
|
|
356
|
+
console.log(` Schema: ${schema}`);
|
|
357
|
+
console.log(` Mode: ${update ? 'Update (replace higher quality)' : 'Insert (skip duplicates)'}`);
|
|
358
|
+
console.log(` Min Score: ${minQuality}\n`);
|
|
359
|
+
|
|
360
|
+
// Validate source directory
|
|
361
|
+
if (!existsSync(sourceDir)) {
|
|
362
|
+
console.error(` ✗ Source directory not found: ${sourceDir}`);
|
|
363
|
+
process.exit(1);
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
// Connect to database
|
|
367
|
+
const client = new pg.Client(CONFIG.db);
|
|
368
|
+
|
|
369
|
+
try {
|
|
370
|
+
await client.connect();
|
|
371
|
+
console.log(' ✓ Connected to ruvector-postgres');
|
|
372
|
+
|
|
373
|
+
// Ensure schema and table exist
|
|
374
|
+
await ensureSchema(client, schema);
|
|
375
|
+
console.log(` ✓ Schema ${schema} ready`);
|
|
376
|
+
|
|
377
|
+
// Process files
|
|
378
|
+
console.log('\n Processing files...\n');
|
|
379
|
+
const entries = walkDirectory(sourceDir);
|
|
380
|
+
|
|
381
|
+
const stats = {
|
|
382
|
+
total: entries.length,
|
|
383
|
+
inserted: 0,
|
|
384
|
+
updated: 0,
|
|
385
|
+
skipped: 0,
|
|
386
|
+
lowQuality: 0,
|
|
387
|
+
duplicates: 0,
|
|
388
|
+
};
|
|
389
|
+
|
|
390
|
+
for (const entry of entries) {
|
|
391
|
+
// Skip low quality
|
|
392
|
+
if (entry.quality_score < minQuality) {
|
|
393
|
+
stats.lowQuality++;
|
|
394
|
+
continue;
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
// Check for existing hash
|
|
398
|
+
const existing = await checkExistingHash(client, schema, entry.content_hash);
|
|
399
|
+
|
|
400
|
+
if (existing) {
|
|
401
|
+
if (update && entry.quality_score > existing.quality_score) {
|
|
402
|
+
// Update with higher quality version
|
|
403
|
+
await updateEntry(client, schema, existing.id, entry);
|
|
404
|
+
stats.updated++;
|
|
405
|
+
console.log(` ↻ Updated: ${entry.title} (${entry.quality_score} > ${existing.quality_score})`);
|
|
406
|
+
} else {
|
|
407
|
+
stats.duplicates++;
|
|
408
|
+
}
|
|
409
|
+
} else {
|
|
410
|
+
// Insert new entry
|
|
411
|
+
await insertEntry(client, schema, entry);
|
|
412
|
+
stats.inserted++;
|
|
413
|
+
console.log(` + Inserted: ${entry.title} [${entry.category}] (${entry.quality_score}/100)`);
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
// Create optimized view
|
|
418
|
+
await createOptimizedView(client, schema);
|
|
419
|
+
|
|
420
|
+
// Get final count
|
|
421
|
+
const countResult = await client.query(`SELECT COUNT(*) FROM ${schema}.kb_entries`);
|
|
422
|
+
const totalEntries = parseInt(countResult.rows[0].count);
|
|
423
|
+
|
|
424
|
+
// Print summary
|
|
425
|
+
console.log('\n ══════════════════════════════════════════════════════════════');
|
|
426
|
+
console.log(' INGESTION COMPLETE');
|
|
427
|
+
console.log(' ══════════════════════════════════════════════════════════════');
|
|
428
|
+
console.log(` Files Processed: ${stats.total}`);
|
|
429
|
+
console.log(` Inserted: ${stats.inserted}`);
|
|
430
|
+
console.log(` Updated: ${stats.updated}`);
|
|
431
|
+
console.log(` Skipped (dup): ${stats.duplicates}`);
|
|
432
|
+
console.log(` Skipped (quality): ${stats.lowQuality}`);
|
|
433
|
+
console.log(` Total in KB: ${totalEntries}`);
|
|
434
|
+
console.log(' ══════════════════════════════════════════════════════════════\n');
|
|
435
|
+
|
|
436
|
+
// Category breakdown
|
|
437
|
+
const categoryResult = await client.query(`
|
|
438
|
+
SELECT category, COUNT(*) as count, AVG(quality_score)::int as avg_score
|
|
439
|
+
FROM ${schema}.kb_entries
|
|
440
|
+
GROUP BY category
|
|
441
|
+
ORDER BY count DESC
|
|
442
|
+
`);
|
|
443
|
+
|
|
444
|
+
console.log(' Category Breakdown:');
|
|
445
|
+
for (const row of categoryResult.rows) {
|
|
446
|
+
console.log(` ${row.category.padEnd(15)} ${row.count.toString().padStart(5)} entries (avg score: ${row.avg_score})`);
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
console.log('\n Query examples:');
|
|
450
|
+
console.log(` SELECT * FROM ${schema}.kb WHERE category = 'agents' LIMIT 5;`);
|
|
451
|
+
console.log(` SELECT * FROM ${schema}.kb WHERE quality_score >= 80;`);
|
|
452
|
+
console.log(` SELECT title, embedding <=> ruvector_embed('your query') AS distance`);
|
|
453
|
+
console.log(` FROM ${schema}.kb_entries ORDER BY distance LIMIT 10;\n`);
|
|
454
|
+
|
|
455
|
+
} catch (error) {
|
|
456
|
+
console.error(` ✗ Error: ${error.message}`);
|
|
457
|
+
process.exit(1);
|
|
458
|
+
} finally {
|
|
459
|
+
await client.end();
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
// ============================================================================
|
|
464
|
+
// CLI
|
|
465
|
+
// ============================================================================
|
|
466
|
+
|
|
467
|
+
function parseArgs() {
|
|
468
|
+
const args = process.argv.slice(2);
|
|
469
|
+
const options = {
|
|
470
|
+
source: null,
|
|
471
|
+
schema: null,
|
|
472
|
+
update: false,
|
|
473
|
+
minQuality: 30,
|
|
474
|
+
};
|
|
475
|
+
|
|
476
|
+
for (let i = 0; i < args.length; i++) {
|
|
477
|
+
switch (args[i]) {
|
|
478
|
+
case '--source':
|
|
479
|
+
case '-s':
|
|
480
|
+
options.source = args[++i];
|
|
481
|
+
break;
|
|
482
|
+
case '--schema':
|
|
483
|
+
options.schema = args[++i];
|
|
484
|
+
break;
|
|
485
|
+
case '--update':
|
|
486
|
+
case '-u':
|
|
487
|
+
options.update = true;
|
|
488
|
+
break;
|
|
489
|
+
case '--min-quality':
|
|
490
|
+
options.minQuality = parseInt(args[++i]);
|
|
491
|
+
break;
|
|
492
|
+
case '--help':
|
|
493
|
+
case '-h':
|
|
494
|
+
printHelp();
|
|
495
|
+
process.exit(0);
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
return options;
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
function printHelp() {
|
|
503
|
+
console.log(`
|
|
504
|
+
KB Ingest Template - Optimized Knowledge Base Ingestion
|
|
505
|
+
|
|
506
|
+
Usage:
|
|
507
|
+
node scripts/kb-ingest-template.js --source <dir> --schema <name> [options]
|
|
508
|
+
|
|
509
|
+
Options:
|
|
510
|
+
--source, -s <dir> Source directory to ingest
|
|
511
|
+
--schema <name> PostgreSQL schema name
|
|
512
|
+
--update, -u Update existing entries if quality is higher
|
|
513
|
+
--min-quality <n> Minimum quality score (default: 30)
|
|
514
|
+
--help, -h Show this help
|
|
515
|
+
|
|
516
|
+
Examples:
|
|
517
|
+
# Ingest documentation
|
|
518
|
+
node scripts/kb-ingest-template.js --source ./docs --schema my_project
|
|
519
|
+
|
|
520
|
+
# Update mode (replace if better quality)
|
|
521
|
+
node scripts/kb-ingest-template.js --source ./content --schema ask_ruvnet --update
|
|
522
|
+
|
|
523
|
+
# High quality only
|
|
524
|
+
node scripts/kb-ingest-template.js --source ./docs --schema my_kb --min-quality 60
|
|
525
|
+
|
|
526
|
+
Environment Variables:
|
|
527
|
+
KB_HOST Database host (default: localhost)
|
|
528
|
+
KB_PORT Database port (default: 5435)
|
|
529
|
+
KB_PASSWORD Database password (default: guruKB2025)
|
|
530
|
+
`);
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
// ============================================================================
|
|
534
|
+
// Entry Point
|
|
535
|
+
// ============================================================================
|
|
536
|
+
|
|
537
|
+
const options = parseArgs();
|
|
538
|
+
|
|
539
|
+
if (!options.source || !options.schema) {
|
|
540
|
+
console.error('Error: --source and --schema are required');
|
|
541
|
+
printHelp();
|
|
542
|
+
process.exit(1);
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
ingest(options.source, options.schema, {
|
|
546
|
+
update: options.update,
|
|
547
|
+
minQuality: options.minQuality,
|
|
548
|
+
}).catch(console.error);
|