cozo-memory 1.1.8 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adaptive-retrieval.js +10 -0
- package/dist/dynamic-fusion.js +15 -2
- package/dist/hybrid-search.js +51 -18
- package/dist/logger.js +56 -0
- package/dist/migrate-logging.js +113 -0
- package/dist/performance-monitor.js +108 -0
- package/dist/test-large-dataset.js +502 -0
- package/package.json +1 -1
|
@@ -322,6 +322,11 @@ class AdaptiveGraphRetrieval {
|
|
|
322
322
|
* Main adaptive retrieval method
|
|
323
323
|
*/
|
|
324
324
|
async retrieve(query, limit = 10) {
|
|
325
|
+
// Validate limit to prevent errors
|
|
326
|
+
if (limit <= 0) {
|
|
327
|
+
console.error('[AdaptiveRetrieval] Invalid limit value:', limit, '- must be positive. Defaulting to 10.');
|
|
328
|
+
limit = 10;
|
|
329
|
+
}
|
|
325
330
|
// 1. Classify query complexity
|
|
326
331
|
const complexity = this.classifyQueryComplexity(query);
|
|
327
332
|
console.error(`[AdaptiveRetrieval] Query complexity: ${complexity}`);
|
|
@@ -338,6 +343,11 @@ class AdaptiveGraphRetrieval {
|
|
|
338
343
|
}
|
|
339
344
|
// ==================== Strategy Implementations ====================
|
|
340
345
|
async vectorSearch(embedding, limit) {
|
|
346
|
+
// Validate limit
|
|
347
|
+
if (limit <= 0) {
|
|
348
|
+
console.error('[AdaptiveRetrieval] Invalid limit in vectorSearch:', limit);
|
|
349
|
+
return [];
|
|
350
|
+
}
|
|
341
351
|
const result = await this.db.run(`
|
|
342
352
|
?[id, name, type, score] :=
|
|
343
353
|
~entity:semantic{id | query: vec($embedding), k: $limit, ef: 100, bind_distance: dist},
|
package/dist/dynamic-fusion.js
CHANGED
|
@@ -74,10 +74,23 @@ class DynamicFusionSearch {
|
|
|
74
74
|
*/
|
|
75
75
|
async search(query, config = {}) {
|
|
76
76
|
const startTime = Date.now();
|
|
77
|
+
// Merge config with defaults first
|
|
78
|
+
const fullConfig = this.mergeConfig(config);
|
|
79
|
+
// Validate topK values to prevent errors
|
|
80
|
+
if (fullConfig.vector && fullConfig.vector.topK <= 0) {
|
|
81
|
+
console.error('[DynamicFusion] Invalid vector.topK:', fullConfig.vector.topK, '- must be positive. Defaulting to 20.');
|
|
82
|
+
fullConfig.vector.topK = 20;
|
|
83
|
+
}
|
|
84
|
+
if (fullConfig.sparse && fullConfig.sparse.topK <= 0) {
|
|
85
|
+
console.error('[DynamicFusion] Invalid sparse.topK:', fullConfig.sparse.topK, '- must be positive. Defaulting to 20.');
|
|
86
|
+
fullConfig.sparse.topK = 20;
|
|
87
|
+
}
|
|
88
|
+
if (fullConfig.fts && fullConfig.fts.topK <= 0) {
|
|
89
|
+
console.error('[DynamicFusion] Invalid fts.topK:', fullConfig.fts.topK, '- must be positive. Defaulting to 20.');
|
|
90
|
+
fullConfig.fts.topK = 20;
|
|
91
|
+
}
|
|
77
92
|
// Get adaptive weights based on query classification
|
|
78
93
|
const adaptiveWeights = await this.adaptiveQueryFusion.getAdaptiveWeights(query);
|
|
79
|
-
// Merge config with defaults first, then apply adaptive weights
|
|
80
|
-
const fullConfig = this.mergeConfig(config);
|
|
81
94
|
// Override weights with adaptive values
|
|
82
95
|
fullConfig.vector.weight = adaptiveWeights.vector;
|
|
83
96
|
fullConfig.sparse.weight = adaptiveWeights.sparse;
|
package/dist/hybrid-search.js
CHANGED
|
@@ -6,6 +6,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
6
6
|
exports.HybridSearch = void 0;
|
|
7
7
|
const crypto_1 = __importDefault(require("crypto"));
|
|
8
8
|
const reranker_service_1 = require("./reranker-service");
|
|
9
|
+
const logger_1 = require("./logger");
|
|
10
|
+
const performance_monitor_1 = require("./performance-monitor");
|
|
9
11
|
const SEMANTIC_CACHE_THRESHOLD = 0.95;
|
|
10
12
|
class HybridSearch {
|
|
11
13
|
db;
|
|
@@ -138,22 +140,30 @@ class HybridSearch {
|
|
|
138
140
|
}
|
|
139
141
|
}
|
|
140
142
|
async advancedSearch(options) {
|
|
141
|
-
|
|
143
|
+
logger_1.logger.debug('HybridSearch', 'Starting advancedSearch', { query: options.query, limit: options.limit });
|
|
142
144
|
const { query, limit = 10, filters, graphConstraints, vectorParams } = options;
|
|
145
|
+
// Validate limit to prevent infinite loops
|
|
146
|
+
if (limit <= 0) {
|
|
147
|
+
logger_1.logger.warn('HybridSearch', `Invalid limit value: ${limit} - must be positive. Defaulting to 10.`);
|
|
148
|
+
options.limit = 10;
|
|
149
|
+
}
|
|
150
|
+
const endTimer = performance_monitor_1.perfMonitor.startTimer('advancedSearch');
|
|
143
151
|
let queryEmbedding;
|
|
144
152
|
try {
|
|
145
153
|
queryEmbedding = await this.embeddingService.embed(query);
|
|
146
154
|
}
|
|
147
155
|
catch (e) {
|
|
148
|
-
|
|
156
|
+
logger_1.logger.error('HybridSearch', 'Embedding failed', e);
|
|
157
|
+
endTimer();
|
|
149
158
|
throw e;
|
|
150
159
|
}
|
|
151
160
|
const cachedResults = await this.tryCacheLookup(options, queryEmbedding);
|
|
152
161
|
if (cachedResults !== null) {
|
|
153
|
-
|
|
162
|
+
logger_1.logger.debug('HybridSearch', 'Cache hit for advancedSearch');
|
|
163
|
+
endTimer();
|
|
154
164
|
return cachedResults;
|
|
155
165
|
}
|
|
156
|
-
|
|
166
|
+
logger_1.logger.trace('HybridSearch', 'Cache miss, executing Datalog query...');
|
|
157
167
|
let topk = limit * 2;
|
|
158
168
|
const hasFilters = (filters?.metadata && Object.keys(filters.metadata).length > 0) ||
|
|
159
169
|
(filters?.entityTypes && filters.entityTypes.length > 0);
|
|
@@ -204,7 +214,7 @@ class HybridSearch {
|
|
|
204
214
|
semanticCall += `, filter: ${hnswFilters.join(" && ")}`;
|
|
205
215
|
}
|
|
206
216
|
semanticCall += `}`;
|
|
207
|
-
let bodyConstraints = [semanticCall, `*entity{id, name, type, metadata, created_at
|
|
217
|
+
let bodyConstraints = [semanticCall, `*entity{id, name, type, metadata, created_at}`];
|
|
208
218
|
if (metaJoins.length > 0) {
|
|
209
219
|
bodyConstraints.push(...metaJoins);
|
|
210
220
|
}
|
|
@@ -229,13 +239,13 @@ class HybridSearch {
|
|
|
229
239
|
}
|
|
230
240
|
const helperRules = [
|
|
231
241
|
`rank_val[id, r] := *entity_rank{entity_id: id, pagerank: r}`,
|
|
232
|
-
`rank_val[id, r] := *entity{id
|
|
242
|
+
`rank_val[id, r] := *entity{id}, not *entity_rank{entity_id: id}, r = 0.0`
|
|
233
243
|
];
|
|
234
244
|
if (graphConstraints?.requiredRelations && graphConstraints.requiredRelations.length > 0) {
|
|
235
|
-
helperRules.push(`rel_match[id, rel_type] := *relationship{from_id: id, relation_type: rel_type
|
|
245
|
+
helperRules.push(`rel_match[id, rel_type] := *relationship{from_id: id, relation_type: rel_type}`, `rel_match[id, rel_type] := *relationship{to_id: id, relation_type: rel_type}`);
|
|
236
246
|
}
|
|
237
247
|
if (graphConstraints?.targetEntityIds && graphConstraints.targetEntityIds.length > 0) {
|
|
238
|
-
helperRules.push(`target_match[id, target_id] := *relationship{from_id: id, to_id: target_id
|
|
248
|
+
helperRules.push(`target_match[id, target_id] := *relationship{from_id: id, to_id: target_id}`, `target_match[id, target_id] := *relationship{to_id: id, from_id: target_id}`);
|
|
239
249
|
}
|
|
240
250
|
const datalogQuery = [
|
|
241
251
|
...helperRules,
|
|
@@ -278,11 +288,16 @@ class HybridSearch {
|
|
|
278
288
|
return rerankedResults;
|
|
279
289
|
}
|
|
280
290
|
await this.updateCache(options, queryEmbedding, finalResults);
|
|
291
|
+
endTimer();
|
|
281
292
|
return finalResults;
|
|
282
293
|
}
|
|
283
294
|
catch (e) {
|
|
284
|
-
|
|
285
|
-
|
|
295
|
+
logger_1.logger.error('HybridSearch', 'Error in advancedSearch', e.message);
|
|
296
|
+
performance_monitor_1.perfMonitor.recordMetric('advancedSearch', 0, true);
|
|
297
|
+
endTimer();
|
|
298
|
+
// Prevent infinite recursion by returning empty results instead of calling search()
|
|
299
|
+
logger_1.logger.warn('HybridSearch', 'Returning empty results to prevent infinite loop');
|
|
300
|
+
return [];
|
|
286
301
|
}
|
|
287
302
|
}
|
|
288
303
|
async search(options) {
|
|
@@ -308,8 +323,14 @@ class HybridSearch {
|
|
|
308
323
|
});
|
|
309
324
|
}
|
|
310
325
|
async graphRag(options) {
|
|
311
|
-
|
|
326
|
+
logger_1.logger.debug('HybridSearch', 'Starting graphRag', { query: options.query, limit: options.limit });
|
|
312
327
|
const { query, limit = 5, filters, graphConstraints } = options;
|
|
328
|
+
// Validate limit to prevent infinite loops
|
|
329
|
+
if (limit <= 0) {
|
|
330
|
+
logger_1.logger.warn('HybridSearch', `Invalid limit value: ${limit} - must be positive. Defaulting to 5.`);
|
|
331
|
+
options.limit = 5;
|
|
332
|
+
}
|
|
333
|
+
const endTimer = performance_monitor_1.perfMonitor.startTimer('graphRag');
|
|
313
334
|
const maxDepth = graphConstraints?.maxDepth || 2;
|
|
314
335
|
const queryEmbedding = await this.embeddingService.embed(query);
|
|
315
336
|
const topk = limit * 2;
|
|
@@ -350,7 +371,7 @@ class HybridSearch {
|
|
|
350
371
|
// 4. Calculate a combined score based on vector distance, graph distance, and PageRank
|
|
351
372
|
const datalogQuery = `
|
|
352
373
|
rank_val[id, r] := *entity_rank{entity_id: id, pagerank: r}
|
|
353
|
-
rank_val[id, r] := *entity{id
|
|
374
|
+
rank_val[id, r] := *entity{id}, not *entity_rank{entity_id: id}, r = 0.0
|
|
354
375
|
|
|
355
376
|
seeds[id, score] := ${seedConstraints.join(", ")}, score = 1.0 - dist
|
|
356
377
|
|
|
@@ -360,7 +381,7 @@ class HybridSearch {
|
|
|
360
381
|
|
|
361
382
|
result_entities[id, final_score, depth] := path[seed_id, id, depth], seeds[seed_id, seed_score], rank_val[id, pr], final_score = seed_score * (1.0 - 0.2 * depth)
|
|
362
383
|
|
|
363
|
-
?[id, name, type, metadata, created_at, score, source, text] := result_entities[id, score, depth], *entity{id, name, type, metadata, created_at
|
|
384
|
+
?[id, name, type, metadata, created_at, score, source, text] := result_entities[id, score, depth], *entity{id, name, type, metadata, created_at}, source = 'graph_rag_entity', text = ''
|
|
364
385
|
|
|
365
386
|
:sort -score
|
|
366
387
|
:limit $limit
|
|
@@ -396,19 +417,31 @@ class HybridSearch {
|
|
|
396
417
|
}
|
|
397
418
|
const decayedResults = this.applyTimeDecay(searchResults);
|
|
398
419
|
if (options.rerank) {
|
|
399
|
-
|
|
420
|
+
const reranked = await this.applyReranking(options.query, decayedResults);
|
|
421
|
+
endTimer();
|
|
422
|
+
return reranked;
|
|
400
423
|
}
|
|
424
|
+
endTimer();
|
|
401
425
|
return decayedResults;
|
|
402
426
|
}
|
|
403
427
|
catch (e) {
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
428
|
+
logger_1.logger.error('HybridSearch', 'Error in graphRag', e.message);
|
|
429
|
+
performance_monitor_1.perfMonitor.recordMetric('graphRag', 0, true);
|
|
430
|
+
endTimer();
|
|
431
|
+
// Prevent infinite recursion by returning empty results
|
|
432
|
+
logger_1.logger.warn('HybridSearch', 'Returning empty results to prevent infinite loop');
|
|
433
|
+
return [];
|
|
407
434
|
}
|
|
408
435
|
}
|
|
409
436
|
async agenticRetrieve(options) {
|
|
410
|
-
|
|
437
|
+
logger_1.logger.debug('HybridSearch', 'Starting agenticRetrieve', { query: options.query });
|
|
411
438
|
const { query, routingModel = "demyagent-4b-i1:Q6_K" } = options;
|
|
439
|
+
// Validate limit to prevent infinite loops
|
|
440
|
+
if (options.limit !== undefined && options.limit <= 0) {
|
|
441
|
+
logger_1.logger.warn('HybridSearch', `Invalid limit value: ${options.limit} - must be positive. Defaulting to 10.`);
|
|
442
|
+
options.limit = 10;
|
|
443
|
+
}
|
|
444
|
+
const endTimer = performance_monitor_1.perfMonitor.startTimer('agenticRetrieve');
|
|
412
445
|
const systemPrompt = `You are a Routing Agent for an advanced Memory/RAG system.
|
|
413
446
|
Your job is to analyze the user's query and decide which search strategy is the most appropriate.
|
|
414
447
|
Available strategies:
|
package/dist/logger.js
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Centralized Logging System for CozoDB Memory
|
|
4
|
+
*
|
|
5
|
+
* Supports different log levels and can be configured via environment variables
|
|
6
|
+
*/
|
|
7
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
+
exports.logger = exports.LogLevel = void 0;
|
|
9
|
+
var LogLevel;
|
|
10
|
+
(function (LogLevel) {
|
|
11
|
+
LogLevel[LogLevel["ERROR"] = 0] = "ERROR";
|
|
12
|
+
LogLevel[LogLevel["WARN"] = 1] = "WARN";
|
|
13
|
+
LogLevel[LogLevel["INFO"] = 2] = "INFO";
|
|
14
|
+
LogLevel[LogLevel["DEBUG"] = 3] = "DEBUG";
|
|
15
|
+
LogLevel[LogLevel["TRACE"] = 4] = "TRACE";
|
|
16
|
+
})(LogLevel || (exports.LogLevel = LogLevel = {}));
|
|
17
|
+
class Logger {
|
|
18
|
+
level;
|
|
19
|
+
prefix;
|
|
20
|
+
constructor(prefix = '[CozoDB]') {
|
|
21
|
+
this.prefix = prefix;
|
|
22
|
+
// Read from environment variable, default to INFO
|
|
23
|
+
const envLevel = process.env.LOG_LEVEL?.toUpperCase();
|
|
24
|
+
this.level = LogLevel[envLevel] ?? LogLevel.INFO;
|
|
25
|
+
}
|
|
26
|
+
setLevel(level) {
|
|
27
|
+
this.level = level;
|
|
28
|
+
}
|
|
29
|
+
error(component, message, ...args) {
|
|
30
|
+
if (this.level >= LogLevel.ERROR) {
|
|
31
|
+
console.error(`${this.prefix}[${component}] ERROR:`, message, ...args);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
warn(component, message, ...args) {
|
|
35
|
+
if (this.level >= LogLevel.WARN) {
|
|
36
|
+
console.warn(`${this.prefix}[${component}] WARN:`, message, ...args);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
info(component, message, ...args) {
|
|
40
|
+
if (this.level >= LogLevel.INFO) {
|
|
41
|
+
console.error(`${this.prefix}[${component}] INFO:`, message, ...args);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
debug(component, message, ...args) {
|
|
45
|
+
if (this.level >= LogLevel.DEBUG) {
|
|
46
|
+
console.error(`${this.prefix}[${component}] DEBUG:`, message, ...args);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
trace(component, message, ...args) {
|
|
50
|
+
if (this.level >= LogLevel.TRACE) {
|
|
51
|
+
console.error(`${this.prefix}[${component}] TRACE:`, message, ...args);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
// Singleton instance
|
|
56
|
+
exports.logger = new Logger();
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Migration Script: Replace console.error with logger
|
|
4
|
+
*
|
|
5
|
+
* This script helps migrate from console.error to the centralized logger
|
|
6
|
+
*/
|
|
7
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
8
|
+
if (k2 === undefined) k2 = k;
|
|
9
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
10
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
11
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
12
|
+
}
|
|
13
|
+
Object.defineProperty(o, k2, desc);
|
|
14
|
+
}) : (function(o, m, k, k2) {
|
|
15
|
+
if (k2 === undefined) k2 = k;
|
|
16
|
+
o[k2] = m[k];
|
|
17
|
+
}));
|
|
18
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
19
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
20
|
+
}) : function(o, v) {
|
|
21
|
+
o["default"] = v;
|
|
22
|
+
});
|
|
23
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
24
|
+
var ownKeys = function(o) {
|
|
25
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
26
|
+
var ar = [];
|
|
27
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
28
|
+
return ar;
|
|
29
|
+
};
|
|
30
|
+
return ownKeys(o);
|
|
31
|
+
};
|
|
32
|
+
return function (mod) {
|
|
33
|
+
if (mod && mod.__esModule) return mod;
|
|
34
|
+
var result = {};
|
|
35
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
36
|
+
__setModuleDefault(result, mod);
|
|
37
|
+
return result;
|
|
38
|
+
};
|
|
39
|
+
})();
|
|
40
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
41
|
+
const fs = __importStar(require("fs"));
|
|
42
|
+
const filesToMigrate = [
|
|
43
|
+
'src/memory-service.ts',
|
|
44
|
+
'src/db-service.ts',
|
|
45
|
+
'src/embedding-service.ts',
|
|
46
|
+
'src/inference-engine.ts',
|
|
47
|
+
'src/dynamic-fusion.ts',
|
|
48
|
+
'src/adaptive-retrieval.ts',
|
|
49
|
+
'src/adaptive-query-fusion.ts',
|
|
50
|
+
'src/reranker-service.ts',
|
|
51
|
+
'src/export-import-service.ts',
|
|
52
|
+
'src/janitor-service.ts'
|
|
53
|
+
];
|
|
54
|
+
// Mapping of console.error patterns to logger calls
|
|
55
|
+
const migrations = [
|
|
56
|
+
{
|
|
57
|
+
pattern: /console\.error\(\s*\[([^\]]+)\]\s+([^,]+),/g,
|
|
58
|
+
replacement: "logger.error('$1', $2,"
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
pattern: /console\.error\(\s*\[([^\]]+)\]\s+([^)]+)\)/g,
|
|
62
|
+
replacement: "logger.error('$1', $2)"
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
pattern: /console\.warn\(\s*\[([^\]]+)\]\s+([^)]+)\)/g,
|
|
66
|
+
replacement: "logger.warn('$1', $2)"
|
|
67
|
+
}
|
|
68
|
+
];
|
|
69
|
+
function migrateFile(filePath) {
|
|
70
|
+
if (!fs.existsSync(filePath)) {
|
|
71
|
+
console.log(`Skipping ${filePath} - file not found`);
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
let content = fs.readFileSync(filePath, 'utf-8');
|
|
75
|
+
let modified = false;
|
|
76
|
+
// Check if logger is already imported
|
|
77
|
+
if (!content.includes("import { logger }")) {
|
|
78
|
+
// Find the last import statement
|
|
79
|
+
const importRegex = /^import .+ from .+;$/gm;
|
|
80
|
+
const imports = content.match(importRegex);
|
|
81
|
+
if (imports && imports.length > 0) {
|
|
82
|
+
const lastImport = imports[imports.length - 1];
|
|
83
|
+
const lastImportIndex = content.lastIndexOf(lastImport);
|
|
84
|
+
content = content.slice(0, lastImportIndex + lastImport.length) +
|
|
85
|
+
"\nimport { logger } from './logger';" +
|
|
86
|
+
content.slice(lastImportIndex + lastImport.length);
|
|
87
|
+
modified = true;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
// Apply migrations
|
|
91
|
+
for (const migration of migrations) {
|
|
92
|
+
if (migration.pattern.test(content)) {
|
|
93
|
+
content = content.replace(migration.pattern, migration.replacement);
|
|
94
|
+
modified = true;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
if (modified) {
|
|
98
|
+
fs.writeFileSync(filePath, content, 'utf-8');
|
|
99
|
+
console.log(`✓ Migrated ${filePath}`);
|
|
100
|
+
}
|
|
101
|
+
else {
|
|
102
|
+
console.log(`- No changes needed for ${filePath}`);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
console.log('Starting logging migration...\n');
|
|
106
|
+
for (const file of filesToMigrate) {
|
|
107
|
+
migrateFile(file);
|
|
108
|
+
}
|
|
109
|
+
console.log('\nMigration complete!');
|
|
110
|
+
console.log('\nNext steps:');
|
|
111
|
+
console.log('1. Review the changes');
|
|
112
|
+
console.log('2. Run: npm run build');
|
|
113
|
+
console.log('3. Test with: LOG_LEVEL=DEBUG npm start');
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Performance Monitoring System
|
|
4
|
+
*
|
|
5
|
+
* Tracks operation latencies, throughput, and resource usage
|
|
6
|
+
*/
|
|
7
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
+
exports.perfMonitor = exports.PerformanceMonitor = void 0;
|
|
9
|
+
const logger_1 = require("./logger");
|
|
10
|
+
class PerformanceMonitor {
|
|
11
|
+
metrics;
|
|
12
|
+
maxSamples;
|
|
13
|
+
constructor(maxSamples = 1000) {
|
|
14
|
+
this.metrics = new Map();
|
|
15
|
+
this.maxSamples = maxSamples;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Start timing an operation
|
|
19
|
+
*/
|
|
20
|
+
startTimer(operation) {
|
|
21
|
+
const startTime = Date.now();
|
|
22
|
+
return () => {
|
|
23
|
+
const duration = Date.now() - startTime;
|
|
24
|
+
this.recordMetric(operation, duration);
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Record a metric manually
|
|
29
|
+
*/
|
|
30
|
+
recordMetric(operation, duration, isError = false) {
|
|
31
|
+
let metric = this.metrics.get(operation);
|
|
32
|
+
if (!metric) {
|
|
33
|
+
metric = {
|
|
34
|
+
times: [],
|
|
35
|
+
errors: 0,
|
|
36
|
+
lastExecuted: Date.now()
|
|
37
|
+
};
|
|
38
|
+
this.metrics.set(operation, metric);
|
|
39
|
+
}
|
|
40
|
+
metric.times.push(duration);
|
|
41
|
+
metric.lastExecuted = Date.now();
|
|
42
|
+
if (isError) {
|
|
43
|
+
metric.errors++;
|
|
44
|
+
}
|
|
45
|
+
// Keep only last N samples
|
|
46
|
+
if (metric.times.length > this.maxSamples) {
|
|
47
|
+
metric.times.shift();
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Get metrics for a specific operation
|
|
52
|
+
*/
|
|
53
|
+
getMetrics(operation) {
|
|
54
|
+
const metric = this.metrics.get(operation);
|
|
55
|
+
if (!metric || metric.times.length === 0) {
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
const sorted = [...metric.times].sort((a, b) => a - b);
|
|
59
|
+
const count = sorted.length;
|
|
60
|
+
const totalTime = sorted.reduce((sum, t) => sum + t, 0);
|
|
61
|
+
return {
|
|
62
|
+
operation,
|
|
63
|
+
count,
|
|
64
|
+
totalTime,
|
|
65
|
+
avgTime: totalTime / count,
|
|
66
|
+
minTime: sorted[0],
|
|
67
|
+
maxTime: sorted[count - 1],
|
|
68
|
+
p50: sorted[Math.floor(count * 0.5)],
|
|
69
|
+
p95: sorted[Math.floor(count * 0.95)],
|
|
70
|
+
p99: sorted[Math.floor(count * 0.99)],
|
|
71
|
+
errors: metric.errors,
|
|
72
|
+
lastExecuted: metric.lastExecuted
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Get all metrics
|
|
77
|
+
*/
|
|
78
|
+
getAllMetrics() {
|
|
79
|
+
const results = [];
|
|
80
|
+
for (const operation of this.metrics.keys()) {
|
|
81
|
+
const metric = this.getMetrics(operation);
|
|
82
|
+
if (metric) {
|
|
83
|
+
results.push(metric);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
return results.sort((a, b) => b.count - a.count);
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Log performance summary
|
|
90
|
+
*/
|
|
91
|
+
logSummary() {
|
|
92
|
+
const metrics = this.getAllMetrics();
|
|
93
|
+
logger_1.logger.info('PerformanceMonitor', '=== Performance Summary ===');
|
|
94
|
+
for (const m of metrics) {
|
|
95
|
+
logger_1.logger.info('PerformanceMonitor', `${m.operation}: ${m.count} calls, avg=${m.avgTime.toFixed(2)}ms, ` +
|
|
96
|
+
`p95=${m.p95.toFixed(2)}ms, errors=${m.errors}`);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Reset all metrics
|
|
101
|
+
*/
|
|
102
|
+
reset() {
|
|
103
|
+
this.metrics.clear();
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
exports.PerformanceMonitor = PerformanceMonitor;
|
|
107
|
+
// Singleton instance
|
|
108
|
+
exports.perfMonitor = new PerformanceMonitor();
|
|
@@ -0,0 +1,502 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Large Dataset Performance Test
|
|
4
|
+
*
|
|
5
|
+
* Tests system performance with realistic data volumes
|
|
6
|
+
*/
|
|
7
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
+
const embedding_service_1 = require("./embedding-service");
|
|
9
|
+
const hybrid_search_1 = require("./hybrid-search");
|
|
10
|
+
const performance_monitor_1 = require("./performance-monitor");
|
|
11
|
+
const logger_1 = require("./logger");
|
|
12
|
+
const uuid_1 = require("uuid");
|
|
13
|
+
// Set log level to INFO for cleaner output
|
|
14
|
+
logger_1.logger.setLevel(logger_1.LogLevel.INFO);
|
|
15
|
+
const CONFIGS = {
|
|
16
|
+
small: {
|
|
17
|
+
numEntities: 50, // Reduced to keep total observations under 100
|
|
18
|
+
numObservationsPerEntity: 1,
|
|
19
|
+
numRelationships: 75,
|
|
20
|
+
searchQueries: 20
|
|
21
|
+
},
|
|
22
|
+
medium: {
|
|
23
|
+
numEntities: 200,
|
|
24
|
+
numObservationsPerEntity: 2,
|
|
25
|
+
numRelationships: 400,
|
|
26
|
+
searchQueries: 50
|
|
27
|
+
},
|
|
28
|
+
large: {
|
|
29
|
+
numEntities: 500,
|
|
30
|
+
numObservationsPerEntity: 3,
|
|
31
|
+
numRelationships: 1000,
|
|
32
|
+
searchQueries: 100
|
|
33
|
+
}
|
|
34
|
+
};
|
|
35
|
+
// Sample data generators
|
|
36
|
+
const ENTITY_TYPES = ['Person', 'Project', 'Technology', 'Document', 'Task'];
|
|
37
|
+
const RELATION_TYPES = ['works_on', 'uses', 'depends_on', 'created_by', 'related_to'];
|
|
38
|
+
function generateEntityName(type, index) {
|
|
39
|
+
const names = {
|
|
40
|
+
Person: ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve', 'Frank', 'Grace', 'Henry'],
|
|
41
|
+
Project: ['Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon', 'Zeta', 'Eta', 'Theta'],
|
|
42
|
+
Technology: ['TypeScript', 'React', 'Node.js', 'Python', 'Go', 'Rust', 'Java', 'C++'],
|
|
43
|
+
Document: ['Spec', 'Guide', 'Manual', 'Report', 'Analysis', 'Design', 'Plan', 'Review'],
|
|
44
|
+
Task: ['Implement', 'Test', 'Deploy', 'Review', 'Refactor', 'Document', 'Optimize', 'Debug']
|
|
45
|
+
};
|
|
46
|
+
const nameList = names[type] || ['Item'];
|
|
47
|
+
const baseName = nameList[index % nameList.length];
|
|
48
|
+
return `${baseName} ${Math.floor(index / nameList.length) + 1}`;
|
|
49
|
+
}
|
|
50
|
+
function generateObservation(entityName, type, index) {
|
|
51
|
+
const templates = [
|
|
52
|
+
`${entityName} is a ${type.toLowerCase()} that focuses on innovation and quality.`,
|
|
53
|
+
`Key characteristics of ${entityName} include reliability and performance.`,
|
|
54
|
+
`${entityName} has been actively developed and maintained since 2020.`,
|
|
55
|
+
`The primary goal of ${entityName} is to deliver exceptional results.`,
|
|
56
|
+
`${entityName} integrates seamlessly with modern development workflows.`
|
|
57
|
+
];
|
|
58
|
+
return templates[index % templates.length];
|
|
59
|
+
}
|
|
60
|
+
async function createTestData(db, embeddingService, config) {
|
|
61
|
+
const startTime = Date.now();
|
|
62
|
+
const entityIds = [];
|
|
63
|
+
logger_1.logger.info('TestLargeDataset', `Creating ${config.numEntities} entities...`);
|
|
64
|
+
// Pre-generate embeddings for entity types to speed up creation
|
|
65
|
+
const typeEmbeddings = new Map();
|
|
66
|
+
for (const type of ENTITY_TYPES) {
|
|
67
|
+
const contentEmbed = await embeddingService.embed(`${type} entity`);
|
|
68
|
+
const nameEmbed = await embeddingService.embed(type);
|
|
69
|
+
typeEmbeddings.set(type, { content: contentEmbed, name: nameEmbed });
|
|
70
|
+
}
|
|
71
|
+
// Create entities directly in CozoDB
|
|
72
|
+
for (let i = 0; i < config.numEntities; i++) {
|
|
73
|
+
const type = ENTITY_TYPES[i % ENTITY_TYPES.length];
|
|
74
|
+
const name = generateEntityName(type, i);
|
|
75
|
+
const id = (0, uuid_1.v4)();
|
|
76
|
+
const endTimer = performance_monitor_1.perfMonitor.startTimer('create_entity');
|
|
77
|
+
try {
|
|
78
|
+
// Reuse type embeddings for speed
|
|
79
|
+
const embeddings = typeEmbeddings.get(type);
|
|
80
|
+
// Insert into CozoDB
|
|
81
|
+
await db.run(`
|
|
82
|
+
?[id, name, type, content_embedding, name_embedding, metadata, created_at] <- [
|
|
83
|
+
[$id, $name, $type, $content_embedding, $name_embedding, $metadata, $created_at]
|
|
84
|
+
]
|
|
85
|
+
:put entity { id => name, type, content_embedding, name_embedding, metadata, created_at }
|
|
86
|
+
`, {
|
|
87
|
+
id,
|
|
88
|
+
name,
|
|
89
|
+
type,
|
|
90
|
+
content_embedding: embeddings.content,
|
|
91
|
+
name_embedding: embeddings.name,
|
|
92
|
+
metadata: { index: i, category: type.toLowerCase(), created_at: Date.now() },
|
|
93
|
+
created_at: [Date.now() * 1000, true]
|
|
94
|
+
});
|
|
95
|
+
entityIds.push(id);
|
|
96
|
+
endTimer();
|
|
97
|
+
}
|
|
98
|
+
catch (error) {
|
|
99
|
+
performance_monitor_1.perfMonitor.recordMetric('create_entity', 0, true);
|
|
100
|
+
logger_1.logger.error('TestLargeDataset', `Failed to create entity ${name}:`, error);
|
|
101
|
+
}
|
|
102
|
+
if ((i + 1) % 50 === 0) {
|
|
103
|
+
logger_1.logger.info('TestLargeDataset', `Created ${i + 1}/${config.numEntities} entities`);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
logger_1.logger.info('TestLargeDataset', `Creating observations...`);
|
|
107
|
+
// Pre-generate a few observation embeddings to reuse (for speed)
|
|
108
|
+
const observationTemplates = [
|
|
109
|
+
'This entity focuses on innovation and quality.',
|
|
110
|
+
'Key characteristics include reliability and performance.',
|
|
111
|
+
'Has been actively developed since 2020.',
|
|
112
|
+
'Primary goal is to deliver exceptional results.',
|
|
113
|
+
'Integrates seamlessly with modern workflows.'
|
|
114
|
+
];
|
|
115
|
+
logger_1.logger.info('TestLargeDataset', `Pre-generating ${observationTemplates.length} observation embeddings...`);
|
|
116
|
+
const templateEmbeddings = await Promise.all(observationTemplates.map(t => embeddingService.embed(t)));
|
|
117
|
+
// Create observations in batches for better performance
|
|
118
|
+
let obsCount = 0;
|
|
119
|
+
const totalObservations = entityIds.length * config.numObservationsPerEntity;
|
|
120
|
+
let lastLogTime = Date.now();
|
|
121
|
+
const BATCH_SIZE = 50; // Insert 50 observations at once
|
|
122
|
+
const observationBatch = [];
|
|
123
|
+
for (const entityId of entityIds) {
|
|
124
|
+
for (let j = 0; j < config.numObservationsPerEntity; j++) {
|
|
125
|
+
// Reuse pre-generated embeddings for speed
|
|
126
|
+
const templateIdx = j % templateEmbeddings.length;
|
|
127
|
+
const text = observationTemplates[templateIdx];
|
|
128
|
+
const embedding = templateEmbeddings[templateIdx];
|
|
129
|
+
observationBatch.push({
|
|
130
|
+
id: (0, uuid_1.v4)(),
|
|
131
|
+
entity_id: entityId,
|
|
132
|
+
text,
|
|
133
|
+
embedding,
|
|
134
|
+
metadata: { confidence: 0.8 + Math.random() * 0.2 },
|
|
135
|
+
session_id: '',
|
|
136
|
+
task_id: '',
|
|
137
|
+
created_at: [Date.now() * 1000, true]
|
|
138
|
+
});
|
|
139
|
+
// Insert batch when it reaches BATCH_SIZE
|
|
140
|
+
if (observationBatch.length >= BATCH_SIZE) {
|
|
141
|
+
const endTimer = performance_monitor_1.perfMonitor.startTimer('add_observation');
|
|
142
|
+
try {
|
|
143
|
+
// Build batch insert query - remove hyphens from UUIDs for variable names
|
|
144
|
+
const rows = observationBatch.map(obs => {
|
|
145
|
+
const cleanId = obs.id.replace(/-/g, '_');
|
|
146
|
+
return `[$id_${cleanId}, $entity_id_${cleanId}, $text_${cleanId}, $embedding_${cleanId}, $metadata_${cleanId}, $session_id_${cleanId}, $task_id_${cleanId}, $created_at_${cleanId}]`;
|
|
147
|
+
}).join(',\n ');
|
|
148
|
+
const params = {};
|
|
149
|
+
for (const obs of observationBatch) {
|
|
150
|
+
const cleanId = obs.id.replace(/-/g, '_');
|
|
151
|
+
params[`id_${cleanId}`] = obs.id;
|
|
152
|
+
params[`entity_id_${cleanId}`] = obs.entity_id;
|
|
153
|
+
params[`text_${cleanId}`] = obs.text;
|
|
154
|
+
params[`embedding_${cleanId}`] = obs.embedding;
|
|
155
|
+
params[`metadata_${cleanId}`] = obs.metadata;
|
|
156
|
+
params[`session_id_${cleanId}`] = obs.session_id;
|
|
157
|
+
params[`task_id_${cleanId}`] = obs.task_id;
|
|
158
|
+
params[`created_at_${cleanId}`] = obs.created_at;
|
|
159
|
+
}
|
|
160
|
+
await db.run(`
|
|
161
|
+
?[id, entity_id, text, embedding, metadata, session_id, task_id, created_at] <- [
|
|
162
|
+
${rows}
|
|
163
|
+
]
|
|
164
|
+
:put observation { id => entity_id, text, embedding, metadata, session_id, task_id, created_at }
|
|
165
|
+
`, params);
|
|
166
|
+
obsCount += observationBatch.length;
|
|
167
|
+
endTimer();
|
|
168
|
+
observationBatch.length = 0; // Clear batch
|
|
169
|
+
}
|
|
170
|
+
catch (error) {
|
|
171
|
+
performance_monitor_1.perfMonitor.recordMetric('add_observation', 0, true);
|
|
172
|
+
logger_1.logger.error('TestLargeDataset', `Failed to create observation batch at ${obsCount}:`, error);
|
|
173
|
+
observationBatch.length = 0; // Clear batch on error
|
|
174
|
+
}
|
|
175
|
+
// Log progress
|
|
176
|
+
const now = Date.now();
|
|
177
|
+
if (obsCount % 50 === 0 || (now - lastLogTime) > 10000) {
|
|
178
|
+
logger_1.logger.info('TestLargeDataset', `Created ${obsCount}/${totalObservations} observations (${((obsCount / totalObservations) * 100).toFixed(1)}%)`);
|
|
179
|
+
lastLogTime = now;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
// Insert remaining observations
|
|
185
|
+
if (observationBatch.length > 0) {
|
|
186
|
+
const endTimer = performance_monitor_1.perfMonitor.startTimer('add_observation');
|
|
187
|
+
try {
|
|
188
|
+
const rows = observationBatch.map(obs => {
|
|
189
|
+
const cleanId = obs.id.replace(/-/g, '_');
|
|
190
|
+
return `[$id_${cleanId}, $entity_id_${cleanId}, $text_${cleanId}, $embedding_${cleanId}, $metadata_${cleanId}, $session_id_${cleanId}, $task_id_${cleanId}, $created_at_${cleanId}]`;
|
|
191
|
+
}).join(',\n ');
|
|
192
|
+
const params = {};
|
|
193
|
+
for (const obs of observationBatch) {
|
|
194
|
+
const cleanId = obs.id.replace(/-/g, '_');
|
|
195
|
+
params[`id_${cleanId}`] = obs.id;
|
|
196
|
+
params[`entity_id_${cleanId}`] = obs.entity_id;
|
|
197
|
+
params[`text_${cleanId}`] = obs.text;
|
|
198
|
+
params[`embedding_${cleanId}`] = obs.embedding;
|
|
199
|
+
params[`metadata_${cleanId}`] = obs.metadata;
|
|
200
|
+
params[`session_id_${cleanId}`] = obs.session_id;
|
|
201
|
+
params[`task_id_${cleanId}`] = obs.task_id;
|
|
202
|
+
params[`created_at_${cleanId}`] = obs.created_at;
|
|
203
|
+
}
|
|
204
|
+
await db.run(`
|
|
205
|
+
?[id, entity_id, text, embedding, metadata, session_id, task_id, created_at] <- [
|
|
206
|
+
${rows}
|
|
207
|
+
]
|
|
208
|
+
:put observation { id => entity_id, text, embedding, metadata, session_id, task_id, created_at }
|
|
209
|
+
`, params);
|
|
210
|
+
obsCount += observationBatch.length;
|
|
211
|
+
endTimer();
|
|
212
|
+
}
|
|
213
|
+
catch (error) {
|
|
214
|
+
performance_monitor_1.perfMonitor.recordMetric('add_observation', 0, true);
|
|
215
|
+
logger_1.logger.error('TestLargeDataset', `Failed to create final observation batch:`, error);
|
|
216
|
+
}
|
|
217
|
+
logger_1.logger.info('TestLargeDataset', `Created ${obsCount}/${totalObservations} observations (100.0%)`);
|
|
218
|
+
}
|
|
219
|
+
logger_1.logger.info('TestLargeDataset', `Creating ${config.numRelationships} relationships...`);
|
|
220
|
+
// Create relationships
|
|
221
|
+
for (let i = 0; i < config.numRelationships; i++) {
|
|
222
|
+
const fromId = entityIds[Math.floor(Math.random() * entityIds.length)];
|
|
223
|
+
let toId = entityIds[Math.floor(Math.random() * entityIds.length)];
|
|
224
|
+
// Avoid self-references
|
|
225
|
+
while (toId === fromId) {
|
|
226
|
+
toId = entityIds[Math.floor(Math.random() * entityIds.length)];
|
|
227
|
+
}
|
|
228
|
+
const relationType = RELATION_TYPES[i % RELATION_TYPES.length];
|
|
229
|
+
const endTimer = performance_monitor_1.perfMonitor.startTimer('create_relation');
|
|
230
|
+
try {
|
|
231
|
+
await db.run(`
|
|
232
|
+
?[from_id, to_id, relation_type, strength, metadata, created_at] <- [
|
|
233
|
+
[$from_id, $to_id, $relation_type, $strength, $metadata, $created_at]
|
|
234
|
+
]
|
|
235
|
+
:put relationship { from_id, to_id, relation_type => strength, metadata, created_at }
|
|
236
|
+
`, {
|
|
237
|
+
from_id: fromId,
|
|
238
|
+
to_id: toId,
|
|
239
|
+
relation_type: relationType,
|
|
240
|
+
strength: 0.5 + Math.random() * 0.5,
|
|
241
|
+
metadata: {},
|
|
242
|
+
created_at: [Date.now() * 1000, true]
|
|
243
|
+
});
|
|
244
|
+
endTimer();
|
|
245
|
+
}
|
|
246
|
+
catch (error) {
|
|
247
|
+
performance_monitor_1.perfMonitor.recordMetric('create_relation', 0, true);
|
|
248
|
+
}
|
|
249
|
+
if ((i + 1) % 500 === 0) {
|
|
250
|
+
logger_1.logger.info('TestLargeDataset', `Created ${i + 1}/${config.numRelationships} relationships`);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
const duration = Date.now() - startTime;
|
|
254
|
+
logger_1.logger.info('TestLargeDataset', `Data creation completed in ${(duration / 1000).toFixed(2)}s`);
|
|
255
|
+
return { entityIds, duration };
|
|
256
|
+
}
|
|
257
|
+
async function runSearchTests(hybridSearch, config) {
|
|
258
|
+
logger_1.logger.info('TestLargeDataset', `Running ${config.searchQueries} search queries...`);
|
|
259
|
+
const queries = [
|
|
260
|
+
'project management',
|
|
261
|
+
'software development',
|
|
262
|
+
'team collaboration',
|
|
263
|
+
'technical documentation',
|
|
264
|
+
'code review process',
|
|
265
|
+
'deployment pipeline',
|
|
266
|
+
'testing strategy',
|
|
267
|
+
'performance optimization'
|
|
268
|
+
];
|
|
269
|
+
for (let i = 0; i < config.searchQueries; i++) {
|
|
270
|
+
const query = queries[i % queries.length];
|
|
271
|
+
const endTimer = performance_monitor_1.perfMonitor.startTimer('hybrid_search');
|
|
272
|
+
try {
|
|
273
|
+
await hybridSearch.search({ query, limit: 10 });
|
|
274
|
+
endTimer();
|
|
275
|
+
}
|
|
276
|
+
catch (error) {
|
|
277
|
+
performance_monitor_1.perfMonitor.recordMetric('hybrid_search', 0, true);
|
|
278
|
+
logger_1.logger.error('TestLargeDataset', `Search failed for query "${query}":`, error);
|
|
279
|
+
}
|
|
280
|
+
if ((i + 1) % 20 === 0) {
|
|
281
|
+
logger_1.logger.info('TestLargeDataset', `Completed ${i + 1}/${config.searchQueries} searches`);
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
async function runTest(configName, cleanStart = false) {
|
|
286
|
+
const config = CONFIGS[configName];
|
|
287
|
+
if (!config) {
|
|
288
|
+
logger_1.logger.error('TestLargeDataset', `Unknown config: ${configName}`);
|
|
289
|
+
return;
|
|
290
|
+
}
|
|
291
|
+
logger_1.logger.info('TestLargeDataset', `\n=== Starting ${configName.toUpperCase()} dataset test ===`);
|
|
292
|
+
logger_1.logger.info('TestLargeDataset', `Config: ${JSON.stringify(config, null, 2)}`);
|
|
293
|
+
// Use the real CozoDB setup like in index.ts
|
|
294
|
+
const { CozoDb } = await import('cozo-node');
|
|
295
|
+
const dbPath = `test_large_${configName}.cozo.db`;
|
|
296
|
+
// Delete old database only if cleanStart flag is set
|
|
297
|
+
const fs = await import('fs');
|
|
298
|
+
if (cleanStart && fs.existsSync(dbPath)) {
|
|
299
|
+
logger_1.logger.info('TestLargeDataset', `Removing old database: ${dbPath}`);
|
|
300
|
+
fs.unlinkSync(dbPath);
|
|
301
|
+
}
|
|
302
|
+
else if (fs.existsSync(dbPath)) {
|
|
303
|
+
logger_1.logger.info('TestLargeDataset', `Using existing database: ${dbPath}`);
|
|
304
|
+
}
|
|
305
|
+
const db = new CozoDb('sqlite', dbPath);
|
|
306
|
+
const embeddingService = new embedding_service_1.EmbeddingService();
|
|
307
|
+
const hybridSearch = new hybrid_search_1.HybridSearch(db, embeddingService);
|
|
308
|
+
// Initialize schema like the real server does
|
|
309
|
+
try {
|
|
310
|
+
// Create entity table
|
|
311
|
+
await db.run(`
|
|
312
|
+
:create entity {
|
|
313
|
+
id: String,
|
|
314
|
+
=>
|
|
315
|
+
name: String,
|
|
316
|
+
type: String,
|
|
317
|
+
content_embedding: <F32; 1024>,
|
|
318
|
+
name_embedding: <F32; 1024>,
|
|
319
|
+
metadata: Json,
|
|
320
|
+
created_at: Validity
|
|
321
|
+
}
|
|
322
|
+
`);
|
|
323
|
+
// Create content HNSW index
|
|
324
|
+
await db.run(`
|
|
325
|
+
::hnsw create entity:semantic {
|
|
326
|
+
dim: 1024,
|
|
327
|
+
m: 50,
|
|
328
|
+
dtype: F32,
|
|
329
|
+
ef_construction: 200,
|
|
330
|
+
fields: [content_embedding],
|
|
331
|
+
distance: Cosine,
|
|
332
|
+
extend_candidates: true,
|
|
333
|
+
keep_pruned_connections: true
|
|
334
|
+
}
|
|
335
|
+
`);
|
|
336
|
+
// Create name HNSW index
|
|
337
|
+
await db.run(`
|
|
338
|
+
::hnsw create entity:name_semantic {
|
|
339
|
+
dim: 1024,
|
|
340
|
+
m: 50,
|
|
341
|
+
dtype: F32,
|
|
342
|
+
ef_construction: 200,
|
|
343
|
+
fields: [name_embedding],
|
|
344
|
+
distance: Cosine,
|
|
345
|
+
extend_candidates: true,
|
|
346
|
+
keep_pruned_connections: true
|
|
347
|
+
}
|
|
348
|
+
`);
|
|
349
|
+
// Create FTS index for entity names
|
|
350
|
+
await db.run(`
|
|
351
|
+
::fts create entity:fts {
|
|
352
|
+
extractor: name,
|
|
353
|
+
tokenizer: Simple,
|
|
354
|
+
filters: [Lowercase, Stemmer('english'), Stopwords('en')]
|
|
355
|
+
}
|
|
356
|
+
`);
|
|
357
|
+
// Create observation table
|
|
358
|
+
await db.run(`
|
|
359
|
+
:create observation {
|
|
360
|
+
id: String,
|
|
361
|
+
=>
|
|
362
|
+
entity_id: String,
|
|
363
|
+
text: String,
|
|
364
|
+
embedding: <F32; 1024>,
|
|
365
|
+
metadata: Json,
|
|
366
|
+
session_id: String,
|
|
367
|
+
task_id: String,
|
|
368
|
+
created_at: Validity
|
|
369
|
+
}
|
|
370
|
+
`);
|
|
371
|
+
// Create observation HNSW index
|
|
372
|
+
await db.run(`
|
|
373
|
+
::hnsw create observation:semantic {
|
|
374
|
+
dim: 1024,
|
|
375
|
+
m: 50,
|
|
376
|
+
dtype: F32,
|
|
377
|
+
ef_construction: 200,
|
|
378
|
+
fields: [embedding],
|
|
379
|
+
distance: Cosine,
|
|
380
|
+
extend_candidates: true,
|
|
381
|
+
keep_pruned_connections: true
|
|
382
|
+
}
|
|
383
|
+
`);
|
|
384
|
+
// Create FTS index for observation text
|
|
385
|
+
await db.run(`
|
|
386
|
+
::fts create observation:fts {
|
|
387
|
+
extractor: text,
|
|
388
|
+
tokenizer: Simple,
|
|
389
|
+
filters: [Lowercase, Stemmer('english'), Stopwords('en')]
|
|
390
|
+
}
|
|
391
|
+
`);
|
|
392
|
+
// Create relationship table
|
|
393
|
+
await db.run(`
|
|
394
|
+
:create relationship {
|
|
395
|
+
from_id: String,
|
|
396
|
+
to_id: String,
|
|
397
|
+
relation_type: String,
|
|
398
|
+
=>
|
|
399
|
+
strength: Float,
|
|
400
|
+
metadata: Json,
|
|
401
|
+
created_at: Validity
|
|
402
|
+
}
|
|
403
|
+
`);
|
|
404
|
+
// Create search cache table
|
|
405
|
+
await db.run(`
|
|
406
|
+
:create search_cache {
|
|
407
|
+
query_hash: String,
|
|
408
|
+
=>
|
|
409
|
+
query_text: String,
|
|
410
|
+
results: Json,
|
|
411
|
+
options: Json,
|
|
412
|
+
embedding: <F32; 1024>,
|
|
413
|
+
created_at: Int
|
|
414
|
+
}
|
|
415
|
+
`);
|
|
416
|
+
// Create search cache HNSW index
|
|
417
|
+
await db.run(`
|
|
418
|
+
::hnsw create search_cache:semantic {
|
|
419
|
+
dim: 1024,
|
|
420
|
+
m: 16,
|
|
421
|
+
dtype: F32,
|
|
422
|
+
ef_construction: 200,
|
|
423
|
+
fields: [embedding],
|
|
424
|
+
distance: Cosine
|
|
425
|
+
}
|
|
426
|
+
`);
|
|
427
|
+
// Create entity_rank table (for PageRank scores)
|
|
428
|
+
await db.run(`
|
|
429
|
+
:create entity_rank {
|
|
430
|
+
entity_id: String
|
|
431
|
+
=>
|
|
432
|
+
pagerank: Float
|
|
433
|
+
}
|
|
434
|
+
`);
|
|
435
|
+
logger_1.logger.info('TestLargeDataset', 'Database schema initialized with all indexes');
|
|
436
|
+
}
|
|
437
|
+
catch (error) {
|
|
438
|
+
if (!error.message?.includes('already exists')) {
|
|
439
|
+
logger_1.logger.error('TestLargeDataset', 'Schema initialization failed:', error);
|
|
440
|
+
throw error;
|
|
441
|
+
}
|
|
442
|
+
logger_1.logger.info('TestLargeDataset', 'Schema already exists, continuing...');
|
|
443
|
+
}
|
|
444
|
+
try {
|
|
445
|
+
// Check if database already has data - simple approach
|
|
446
|
+
let existingEntityCount = 0;
|
|
447
|
+
try {
|
|
448
|
+
const statsQuery = await db.run(`?[id] := *entity{id} :limit 1`);
|
|
449
|
+
existingEntityCount = statsQuery.rows.length > 0 ? 1 : 0;
|
|
450
|
+
if (existingEntityCount > 0) {
|
|
451
|
+
// Get actual count
|
|
452
|
+
const countQuery = await db.run(`?[count(id)] := *entity{id}`);
|
|
453
|
+
existingEntityCount = countQuery.rows[0]?.[0] || 0;
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
catch (e) {
|
|
457
|
+
// Table doesn't exist yet, that's fine
|
|
458
|
+
existingEntityCount = 0;
|
|
459
|
+
}
|
|
460
|
+
if (existingEntityCount > 0 && !cleanStart) {
|
|
461
|
+
logger_1.logger.info('TestLargeDataset', `Database already contains ${existingEntityCount} entities, skipping data creation`);
|
|
462
|
+
logger_1.logger.info('TestLargeDataset', `Use --clean flag to recreate database from scratch`);
|
|
463
|
+
}
|
|
464
|
+
else {
|
|
465
|
+
// Create test data
|
|
466
|
+
const { entityIds, duration: createDuration } = await createTestData(db, embeddingService, config);
|
|
467
|
+
const totalOps = config.numEntities +
|
|
468
|
+
(config.numEntities * config.numObservationsPerEntity) +
|
|
469
|
+
config.numRelationships;
|
|
470
|
+
const totalTime = createDuration / 1000;
|
|
471
|
+
const throughput = totalOps / totalTime;
|
|
472
|
+
logger_1.logger.info('TestLargeDataset', `\nData creation stats:`);
|
|
473
|
+
logger_1.logger.info('TestLargeDataset', `Total operations: ${totalOps}`);
|
|
474
|
+
logger_1.logger.info('TestLargeDataset', `Total time: ${totalTime.toFixed(2)}s`);
|
|
475
|
+
logger_1.logger.info('TestLargeDataset', `Throughput: ${throughput.toFixed(2)} ops/sec`);
|
|
476
|
+
}
|
|
477
|
+
// Run search tests
|
|
478
|
+
await runSearchTests(hybridSearch, config);
|
|
479
|
+
// Print performance summary
|
|
480
|
+
logger_1.logger.info('TestLargeDataset', '\n=== Performance Summary ===');
|
|
481
|
+
performance_monitor_1.perfMonitor.logSummary();
|
|
482
|
+
}
|
|
483
|
+
catch (error) {
|
|
484
|
+
logger_1.logger.error('TestLargeDataset', 'Test failed:', error);
|
|
485
|
+
}
|
|
486
|
+
finally {
|
|
487
|
+
db.close();
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
// Run tests
|
|
491
|
+
async function main() {
|
|
492
|
+
const configName = process.argv[2] || 'small';
|
|
493
|
+
const cleanStart = process.argv.includes('--clean');
|
|
494
|
+
if (cleanStart) {
|
|
495
|
+
logger_1.logger.info('TestLargeDataset', 'Clean start mode: will delete existing database');
|
|
496
|
+
}
|
|
497
|
+
await runTest(configName, cleanStart);
|
|
498
|
+
logger_1.logger.info('TestLargeDataset', '\n=== Test completed ===');
|
|
499
|
+
logger_1.logger.info('TestLargeDataset', `\nUsage: npx ts-node src/test-large-dataset.ts [small|medium|large] [--clean]`);
|
|
500
|
+
logger_1.logger.info('TestLargeDataset', ` --clean: Delete existing database before test`);
|
|
501
|
+
}
|
|
502
|
+
main().catch(console.error);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "cozo-memory",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.2.0",
|
|
4
4
|
"mcpName": "io.github.tobs-code/cozo-memory",
|
|
5
5
|
"description": "Local-first persistent memory system for AI agents with hybrid search, graph reasoning, and MCP integration",
|
|
6
6
|
"main": "dist/index.js",
|