cozo-memory 1.1.8 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adaptive-retrieval.js +10 -0
- package/dist/dynamic-fusion.js +15 -2
- package/dist/emotional-salience.js +295 -0
- package/dist/hybrid-search.js +51 -18
- package/dist/index.js +705 -9
- package/dist/logger.js +56 -0
- package/dist/memory-activation.js +64 -30
- package/dist/memory-service.js +68 -0
- package/dist/migrate-logging.js +113 -0
- package/dist/performance-monitor.js +108 -0
- package/dist/pre-storage-reasoning.js +351 -0
- package/dist/temporal-conflict-resolution.js +10 -6
- package/dist/test-activation-mcp.js +118 -0
- package/dist/test-advanced-search-mcp.js +204 -0
- package/dist/test-conflicts-mcp.js +173 -0
- package/dist/test-emotional-salience.js +177 -0
- package/dist/test-hierarchical-mcp.js +135 -0
- package/dist/test-large-dataset.js +502 -0
- package/dist/test-logical-edges-mcp.js +215 -0
- package/dist/test-metadata-check.js +69 -0
- package/dist/test-metadata-update.js +92 -0
- package/dist/test-pre-storage-reasoning.js +149 -0
- package/dist/test-salience-mcp.js +94 -0
- package/dist/test-spreading-mcp.js +155 -0
- package/dist/test-suggest-connections-mcp.js +172 -0
- package/dist/test-zettelkasten-evolution.js +255 -0
- package/dist/test-zettelkasten-fixed.js +74 -0
- package/dist/test-zettelkasten-live.js +117 -0
- package/dist/test-zettelkasten-mcp.js +96 -0
- package/dist/zettelkasten-evolution.js +342 -0
- package/package.json +1 -1
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
const index_1 = require("./index");
|
|
37
|
+
const fs = __importStar(require("fs"));
|
|
38
|
+
const TEST_DB_PATH = 'test_hierarchical_mcp';
|
|
39
|
+
async function runTest() {
|
|
40
|
+
console.error('=== Testing compress_memory_levels and analyze_memory_distribution MCP Tools ===\n');
|
|
41
|
+
// Clean up old test database
|
|
42
|
+
const dbFile = `${TEST_DB_PATH}.db`;
|
|
43
|
+
if (fs.existsSync(dbFile)) {
|
|
44
|
+
try {
|
|
45
|
+
fs.unlinkSync(dbFile);
|
|
46
|
+
console.error('[Cleanup] Removed old test database');
|
|
47
|
+
}
|
|
48
|
+
catch (e) {
|
|
49
|
+
console.error('[Cleanup] Warning: Could not remove old database:', e);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
const server = new index_1.MemoryServer(TEST_DB_PATH);
|
|
53
|
+
await server.initPromise;
|
|
54
|
+
try {
|
|
55
|
+
console.error('1. Creating test entity...');
|
|
56
|
+
const entity = await server.createEntity({
|
|
57
|
+
name: 'Test Project',
|
|
58
|
+
type: 'Project',
|
|
59
|
+
metadata: {}
|
|
60
|
+
});
|
|
61
|
+
const entityId = entity.id;
|
|
62
|
+
console.error(`✓ Created entity: ${entityId.substring(0, 8)}...\n`);
|
|
63
|
+
console.error('2. Adding observations at L0 (raw) level...');
|
|
64
|
+
// Add multiple observations with L0 level and varied content to avoid deduplication
|
|
65
|
+
for (let i = 0; i < 15; i++) {
|
|
66
|
+
await server.addObservation({
|
|
67
|
+
entity_id: entityId,
|
|
68
|
+
text: `Observation ${i + 1}: This is a test observation about topic ${i * 7} with unique content ${Math.random().toString(36).substring(7)}`,
|
|
69
|
+
metadata: { memory_level: 0 },
|
|
70
|
+
deduplicate: false // Disable deduplication for this test
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
console.error(`✓ Added 15 observations at L0 level\n`);
|
|
74
|
+
console.error('3. Testing analyze_memory_distribution...');
|
|
75
|
+
const stats = await server.getHierarchicalMemoryService().getMemoryStats(entityId);
|
|
76
|
+
console.error(`✓ Memory distribution analysis completed:`);
|
|
77
|
+
console.error(` - Total observations: ${stats.total_observations}`);
|
|
78
|
+
console.error(` - L0 (Raw): ${stats.by_level[0] || 0}`);
|
|
79
|
+
console.error(` - L1 (Session): ${stats.by_level[1] || 0}`);
|
|
80
|
+
console.error(` - L2 (Weekly): ${stats.by_level[2] || 0}`);
|
|
81
|
+
console.error(` - L3 (Monthly): ${stats.by_level[3] || 0}\n`);
|
|
82
|
+
if (stats.total_observations !== 15) {
|
|
83
|
+
console.error(`⚠ Warning: Expected 15 observations, got ${stats.total_observations}`);
|
|
84
|
+
}
|
|
85
|
+
console.error('4. Testing compress_memory_levels (will likely not compress due to recency)...');
|
|
86
|
+
// Note: Compression requires observations older than retention period
|
|
87
|
+
// For L0, default is 24 hours, so fresh observations won't be compressed
|
|
88
|
+
const compressionResult = await server.getHierarchicalMemoryService().compressMemoryLevel(entityId, 0);
|
|
89
|
+
if (compressionResult) {
|
|
90
|
+
console.error(`✓ Compression completed:`);
|
|
91
|
+
console.error(` - Level: ${compressionResult.level}`);
|
|
92
|
+
console.error(` - Compressed observations: ${compressionResult.compressed_observations}`);
|
|
93
|
+
console.error(` - Summary ID: ${compressionResult.summary_id.substring(0, 8)}...`);
|
|
94
|
+
console.error(` - Preserved: ${compressionResult.preserved_observations.length}`);
|
|
95
|
+
console.error(` - Deleted: ${compressionResult.deleted_observations.length}`);
|
|
96
|
+
}
|
|
97
|
+
else {
|
|
98
|
+
console.error(`✓ No compression performed (observations too recent or insufficient count)`);
|
|
99
|
+
console.error(` This is expected behavior - observations must be older than retention period`);
|
|
100
|
+
}
|
|
101
|
+
console.error('\n5. Re-checking memory distribution after compression attempt...');
|
|
102
|
+
const statsAfter = await server.getHierarchicalMemoryService().getMemoryStats(entityId);
|
|
103
|
+
console.error(`✓ Updated memory distribution:`);
|
|
104
|
+
console.error(` - Total observations: ${statsAfter.total_observations}`);
|
|
105
|
+
console.error(` - L0 (Raw): ${statsAfter.by_level[0] || 0}`);
|
|
106
|
+
console.error(` - L1 (Session): ${statsAfter.by_level[1] || 0}`);
|
|
107
|
+
console.error(` - L2 (Weekly): ${statsAfter.by_level[2] || 0}`);
|
|
108
|
+
console.error(` - L3 (Monthly): ${statsAfter.by_level[3] || 0}`);
|
|
109
|
+
console.error('\n6. Testing with manually aged observations...');
|
|
110
|
+
// Create observations with old timestamps by manipulating metadata
|
|
111
|
+
const oldTimestamp = Date.now() - (30 * 24 * 60 * 60 * 1000); // 30 days ago
|
|
112
|
+
console.error(` Note: CozoDB Validity uses system time, so we cannot easily simulate old observations`);
|
|
113
|
+
console.error(` In production, compression would work on observations older than retention periods`);
|
|
114
|
+
console.error('\n=== ✓ Hierarchical Memory MCP Tools Test Passed ===\n');
|
|
115
|
+
}
|
|
116
|
+
catch (error) {
|
|
117
|
+
console.error('\n=== ✗ Test Failed ===');
|
|
118
|
+
console.error('Error:', error);
|
|
119
|
+
throw error;
|
|
120
|
+
}
|
|
121
|
+
finally {
|
|
122
|
+
// Cleanup
|
|
123
|
+
server.db.close();
|
|
124
|
+
if (fs.existsSync(dbFile)) {
|
|
125
|
+
try {
|
|
126
|
+
fs.unlinkSync(dbFile);
|
|
127
|
+
console.error('[Cleanup] Test database removed');
|
|
128
|
+
}
|
|
129
|
+
catch (e) {
|
|
130
|
+
console.error('[Cleanup] Warning: Could not remove test database');
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
runTest().catch(console.error);
|
|
@@ -0,0 +1,502 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Large Dataset Performance Test
|
|
4
|
+
*
|
|
5
|
+
* Tests system performance with realistic data volumes
|
|
6
|
+
*/
|
|
7
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
+
const embedding_service_1 = require("./embedding-service");
|
|
9
|
+
const hybrid_search_1 = require("./hybrid-search");
|
|
10
|
+
const performance_monitor_1 = require("./performance-monitor");
|
|
11
|
+
const logger_1 = require("./logger");
|
|
12
|
+
const uuid_1 = require("uuid");
|
|
13
|
+
// Set log level to INFO for cleaner output
|
|
14
|
+
logger_1.logger.setLevel(logger_1.LogLevel.INFO);
|
|
15
|
+
const CONFIGS = {
|
|
16
|
+
small: {
|
|
17
|
+
numEntities: 50, // Reduced to keep total observations under 100
|
|
18
|
+
numObservationsPerEntity: 1,
|
|
19
|
+
numRelationships: 75,
|
|
20
|
+
searchQueries: 20
|
|
21
|
+
},
|
|
22
|
+
medium: {
|
|
23
|
+
numEntities: 200,
|
|
24
|
+
numObservationsPerEntity: 2,
|
|
25
|
+
numRelationships: 400,
|
|
26
|
+
searchQueries: 50
|
|
27
|
+
},
|
|
28
|
+
large: {
|
|
29
|
+
numEntities: 500,
|
|
30
|
+
numObservationsPerEntity: 3,
|
|
31
|
+
numRelationships: 1000,
|
|
32
|
+
searchQueries: 100
|
|
33
|
+
}
|
|
34
|
+
};
|
|
35
|
+
// Sample data generators
|
|
36
|
+
const ENTITY_TYPES = ['Person', 'Project', 'Technology', 'Document', 'Task'];
|
|
37
|
+
const RELATION_TYPES = ['works_on', 'uses', 'depends_on', 'created_by', 'related_to'];
|
|
38
|
+
function generateEntityName(type, index) {
|
|
39
|
+
const names = {
|
|
40
|
+
Person: ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve', 'Frank', 'Grace', 'Henry'],
|
|
41
|
+
Project: ['Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon', 'Zeta', 'Eta', 'Theta'],
|
|
42
|
+
Technology: ['TypeScript', 'React', 'Node.js', 'Python', 'Go', 'Rust', 'Java', 'C++'],
|
|
43
|
+
Document: ['Spec', 'Guide', 'Manual', 'Report', 'Analysis', 'Design', 'Plan', 'Review'],
|
|
44
|
+
Task: ['Implement', 'Test', 'Deploy', 'Review', 'Refactor', 'Document', 'Optimize', 'Debug']
|
|
45
|
+
};
|
|
46
|
+
const nameList = names[type] || ['Item'];
|
|
47
|
+
const baseName = nameList[index % nameList.length];
|
|
48
|
+
return `${baseName} ${Math.floor(index / nameList.length) + 1}`;
|
|
49
|
+
}
|
|
50
|
+
function generateObservation(entityName, type, index) {
|
|
51
|
+
const templates = [
|
|
52
|
+
`${entityName} is a ${type.toLowerCase()} that focuses on innovation and quality.`,
|
|
53
|
+
`Key characteristics of ${entityName} include reliability and performance.`,
|
|
54
|
+
`${entityName} has been actively developed and maintained since 2020.`,
|
|
55
|
+
`The primary goal of ${entityName} is to deliver exceptional results.`,
|
|
56
|
+
`${entityName} integrates seamlessly with modern development workflows.`
|
|
57
|
+
];
|
|
58
|
+
return templates[index % templates.length];
|
|
59
|
+
}
|
|
60
|
+
async function createTestData(db, embeddingService, config) {
|
|
61
|
+
const startTime = Date.now();
|
|
62
|
+
const entityIds = [];
|
|
63
|
+
logger_1.logger.info('TestLargeDataset', `Creating ${config.numEntities} entities...`);
|
|
64
|
+
// Pre-generate embeddings for entity types to speed up creation
|
|
65
|
+
const typeEmbeddings = new Map();
|
|
66
|
+
for (const type of ENTITY_TYPES) {
|
|
67
|
+
const contentEmbed = await embeddingService.embed(`${type} entity`);
|
|
68
|
+
const nameEmbed = await embeddingService.embed(type);
|
|
69
|
+
typeEmbeddings.set(type, { content: contentEmbed, name: nameEmbed });
|
|
70
|
+
}
|
|
71
|
+
// Create entities directly in CozoDB
|
|
72
|
+
for (let i = 0; i < config.numEntities; i++) {
|
|
73
|
+
const type = ENTITY_TYPES[i % ENTITY_TYPES.length];
|
|
74
|
+
const name = generateEntityName(type, i);
|
|
75
|
+
const id = (0, uuid_1.v4)();
|
|
76
|
+
const endTimer = performance_monitor_1.perfMonitor.startTimer('create_entity');
|
|
77
|
+
try {
|
|
78
|
+
// Reuse type embeddings for speed
|
|
79
|
+
const embeddings = typeEmbeddings.get(type);
|
|
80
|
+
// Insert into CozoDB
|
|
81
|
+
await db.run(`
|
|
82
|
+
?[id, name, type, content_embedding, name_embedding, metadata, created_at] <- [
|
|
83
|
+
[$id, $name, $type, $content_embedding, $name_embedding, $metadata, $created_at]
|
|
84
|
+
]
|
|
85
|
+
:put entity { id => name, type, content_embedding, name_embedding, metadata, created_at }
|
|
86
|
+
`, {
|
|
87
|
+
id,
|
|
88
|
+
name,
|
|
89
|
+
type,
|
|
90
|
+
content_embedding: embeddings.content,
|
|
91
|
+
name_embedding: embeddings.name,
|
|
92
|
+
metadata: { index: i, category: type.toLowerCase(), created_at: Date.now() },
|
|
93
|
+
created_at: [Date.now() * 1000, true]
|
|
94
|
+
});
|
|
95
|
+
entityIds.push(id);
|
|
96
|
+
endTimer();
|
|
97
|
+
}
|
|
98
|
+
catch (error) {
|
|
99
|
+
performance_monitor_1.perfMonitor.recordMetric('create_entity', 0, true);
|
|
100
|
+
logger_1.logger.error('TestLargeDataset', `Failed to create entity ${name}:`, error);
|
|
101
|
+
}
|
|
102
|
+
if ((i + 1) % 50 === 0) {
|
|
103
|
+
logger_1.logger.info('TestLargeDataset', `Created ${i + 1}/${config.numEntities} entities`);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
logger_1.logger.info('TestLargeDataset', `Creating observations...`);
|
|
107
|
+
// Pre-generate a few observation embeddings to reuse (for speed)
|
|
108
|
+
const observationTemplates = [
|
|
109
|
+
'This entity focuses on innovation and quality.',
|
|
110
|
+
'Key characteristics include reliability and performance.',
|
|
111
|
+
'Has been actively developed since 2020.',
|
|
112
|
+
'Primary goal is to deliver exceptional results.',
|
|
113
|
+
'Integrates seamlessly with modern workflows.'
|
|
114
|
+
];
|
|
115
|
+
logger_1.logger.info('TestLargeDataset', `Pre-generating ${observationTemplates.length} observation embeddings...`);
|
|
116
|
+
const templateEmbeddings = await Promise.all(observationTemplates.map(t => embeddingService.embed(t)));
|
|
117
|
+
// Create observations in batches for better performance
|
|
118
|
+
let obsCount = 0;
|
|
119
|
+
const totalObservations = entityIds.length * config.numObservationsPerEntity;
|
|
120
|
+
let lastLogTime = Date.now();
|
|
121
|
+
const BATCH_SIZE = 50; // Insert 50 observations at once
|
|
122
|
+
const observationBatch = [];
|
|
123
|
+
for (const entityId of entityIds) {
|
|
124
|
+
for (let j = 0; j < config.numObservationsPerEntity; j++) {
|
|
125
|
+
// Reuse pre-generated embeddings for speed
|
|
126
|
+
const templateIdx = j % templateEmbeddings.length;
|
|
127
|
+
const text = observationTemplates[templateIdx];
|
|
128
|
+
const embedding = templateEmbeddings[templateIdx];
|
|
129
|
+
observationBatch.push({
|
|
130
|
+
id: (0, uuid_1.v4)(),
|
|
131
|
+
entity_id: entityId,
|
|
132
|
+
text,
|
|
133
|
+
embedding,
|
|
134
|
+
metadata: { confidence: 0.8 + Math.random() * 0.2 },
|
|
135
|
+
session_id: '',
|
|
136
|
+
task_id: '',
|
|
137
|
+
created_at: [Date.now() * 1000, true]
|
|
138
|
+
});
|
|
139
|
+
// Insert batch when it reaches BATCH_SIZE
|
|
140
|
+
if (observationBatch.length >= BATCH_SIZE) {
|
|
141
|
+
const endTimer = performance_monitor_1.perfMonitor.startTimer('add_observation');
|
|
142
|
+
try {
|
|
143
|
+
// Build batch insert query - remove hyphens from UUIDs for variable names
|
|
144
|
+
const rows = observationBatch.map(obs => {
|
|
145
|
+
const cleanId = obs.id.replace(/-/g, '_');
|
|
146
|
+
return `[$id_${cleanId}, $entity_id_${cleanId}, $text_${cleanId}, $embedding_${cleanId}, $metadata_${cleanId}, $session_id_${cleanId}, $task_id_${cleanId}, $created_at_${cleanId}]`;
|
|
147
|
+
}).join(',\n ');
|
|
148
|
+
const params = {};
|
|
149
|
+
for (const obs of observationBatch) {
|
|
150
|
+
const cleanId = obs.id.replace(/-/g, '_');
|
|
151
|
+
params[`id_${cleanId}`] = obs.id;
|
|
152
|
+
params[`entity_id_${cleanId}`] = obs.entity_id;
|
|
153
|
+
params[`text_${cleanId}`] = obs.text;
|
|
154
|
+
params[`embedding_${cleanId}`] = obs.embedding;
|
|
155
|
+
params[`metadata_${cleanId}`] = obs.metadata;
|
|
156
|
+
params[`session_id_${cleanId}`] = obs.session_id;
|
|
157
|
+
params[`task_id_${cleanId}`] = obs.task_id;
|
|
158
|
+
params[`created_at_${cleanId}`] = obs.created_at;
|
|
159
|
+
}
|
|
160
|
+
await db.run(`
|
|
161
|
+
?[id, entity_id, text, embedding, metadata, session_id, task_id, created_at] <- [
|
|
162
|
+
${rows}
|
|
163
|
+
]
|
|
164
|
+
:put observation { id => entity_id, text, embedding, metadata, session_id, task_id, created_at }
|
|
165
|
+
`, params);
|
|
166
|
+
obsCount += observationBatch.length;
|
|
167
|
+
endTimer();
|
|
168
|
+
observationBatch.length = 0; // Clear batch
|
|
169
|
+
}
|
|
170
|
+
catch (error) {
|
|
171
|
+
performance_monitor_1.perfMonitor.recordMetric('add_observation', 0, true);
|
|
172
|
+
logger_1.logger.error('TestLargeDataset', `Failed to create observation batch at ${obsCount}:`, error);
|
|
173
|
+
observationBatch.length = 0; // Clear batch on error
|
|
174
|
+
}
|
|
175
|
+
// Log progress
|
|
176
|
+
const now = Date.now();
|
|
177
|
+
if (obsCount % 50 === 0 || (now - lastLogTime) > 10000) {
|
|
178
|
+
logger_1.logger.info('TestLargeDataset', `Created ${obsCount}/${totalObservations} observations (${((obsCount / totalObservations) * 100).toFixed(1)}%)`);
|
|
179
|
+
lastLogTime = now;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
// Insert remaining observations
|
|
185
|
+
if (observationBatch.length > 0) {
|
|
186
|
+
const endTimer = performance_monitor_1.perfMonitor.startTimer('add_observation');
|
|
187
|
+
try {
|
|
188
|
+
const rows = observationBatch.map(obs => {
|
|
189
|
+
const cleanId = obs.id.replace(/-/g, '_');
|
|
190
|
+
return `[$id_${cleanId}, $entity_id_${cleanId}, $text_${cleanId}, $embedding_${cleanId}, $metadata_${cleanId}, $session_id_${cleanId}, $task_id_${cleanId}, $created_at_${cleanId}]`;
|
|
191
|
+
}).join(',\n ');
|
|
192
|
+
const params = {};
|
|
193
|
+
for (const obs of observationBatch) {
|
|
194
|
+
const cleanId = obs.id.replace(/-/g, '_');
|
|
195
|
+
params[`id_${cleanId}`] = obs.id;
|
|
196
|
+
params[`entity_id_${cleanId}`] = obs.entity_id;
|
|
197
|
+
params[`text_${cleanId}`] = obs.text;
|
|
198
|
+
params[`embedding_${cleanId}`] = obs.embedding;
|
|
199
|
+
params[`metadata_${cleanId}`] = obs.metadata;
|
|
200
|
+
params[`session_id_${cleanId}`] = obs.session_id;
|
|
201
|
+
params[`task_id_${cleanId}`] = obs.task_id;
|
|
202
|
+
params[`created_at_${cleanId}`] = obs.created_at;
|
|
203
|
+
}
|
|
204
|
+
await db.run(`
|
|
205
|
+
?[id, entity_id, text, embedding, metadata, session_id, task_id, created_at] <- [
|
|
206
|
+
${rows}
|
|
207
|
+
]
|
|
208
|
+
:put observation { id => entity_id, text, embedding, metadata, session_id, task_id, created_at }
|
|
209
|
+
`, params);
|
|
210
|
+
obsCount += observationBatch.length;
|
|
211
|
+
endTimer();
|
|
212
|
+
}
|
|
213
|
+
catch (error) {
|
|
214
|
+
performance_monitor_1.perfMonitor.recordMetric('add_observation', 0, true);
|
|
215
|
+
logger_1.logger.error('TestLargeDataset', `Failed to create final observation batch:`, error);
|
|
216
|
+
}
|
|
217
|
+
logger_1.logger.info('TestLargeDataset', `Created ${obsCount}/${totalObservations} observations (100.0%)`);
|
|
218
|
+
}
|
|
219
|
+
logger_1.logger.info('TestLargeDataset', `Creating ${config.numRelationships} relationships...`);
|
|
220
|
+
// Create relationships
|
|
221
|
+
for (let i = 0; i < config.numRelationships; i++) {
|
|
222
|
+
const fromId = entityIds[Math.floor(Math.random() * entityIds.length)];
|
|
223
|
+
let toId = entityIds[Math.floor(Math.random() * entityIds.length)];
|
|
224
|
+
// Avoid self-references
|
|
225
|
+
while (toId === fromId) {
|
|
226
|
+
toId = entityIds[Math.floor(Math.random() * entityIds.length)];
|
|
227
|
+
}
|
|
228
|
+
const relationType = RELATION_TYPES[i % RELATION_TYPES.length];
|
|
229
|
+
const endTimer = performance_monitor_1.perfMonitor.startTimer('create_relation');
|
|
230
|
+
try {
|
|
231
|
+
await db.run(`
|
|
232
|
+
?[from_id, to_id, relation_type, strength, metadata, created_at] <- [
|
|
233
|
+
[$from_id, $to_id, $relation_type, $strength, $metadata, $created_at]
|
|
234
|
+
]
|
|
235
|
+
:put relationship { from_id, to_id, relation_type => strength, metadata, created_at }
|
|
236
|
+
`, {
|
|
237
|
+
from_id: fromId,
|
|
238
|
+
to_id: toId,
|
|
239
|
+
relation_type: relationType,
|
|
240
|
+
strength: 0.5 + Math.random() * 0.5,
|
|
241
|
+
metadata: {},
|
|
242
|
+
created_at: [Date.now() * 1000, true]
|
|
243
|
+
});
|
|
244
|
+
endTimer();
|
|
245
|
+
}
|
|
246
|
+
catch (error) {
|
|
247
|
+
performance_monitor_1.perfMonitor.recordMetric('create_relation', 0, true);
|
|
248
|
+
}
|
|
249
|
+
if ((i + 1) % 500 === 0) {
|
|
250
|
+
logger_1.logger.info('TestLargeDataset', `Created ${i + 1}/${config.numRelationships} relationships`);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
const duration = Date.now() - startTime;
|
|
254
|
+
logger_1.logger.info('TestLargeDataset', `Data creation completed in ${(duration / 1000).toFixed(2)}s`);
|
|
255
|
+
return { entityIds, duration };
|
|
256
|
+
}
|
|
257
|
+
async function runSearchTests(hybridSearch, config) {
|
|
258
|
+
logger_1.logger.info('TestLargeDataset', `Running ${config.searchQueries} search queries...`);
|
|
259
|
+
const queries = [
|
|
260
|
+
'project management',
|
|
261
|
+
'software development',
|
|
262
|
+
'team collaboration',
|
|
263
|
+
'technical documentation',
|
|
264
|
+
'code review process',
|
|
265
|
+
'deployment pipeline',
|
|
266
|
+
'testing strategy',
|
|
267
|
+
'performance optimization'
|
|
268
|
+
];
|
|
269
|
+
for (let i = 0; i < config.searchQueries; i++) {
|
|
270
|
+
const query = queries[i % queries.length];
|
|
271
|
+
const endTimer = performance_monitor_1.perfMonitor.startTimer('hybrid_search');
|
|
272
|
+
try {
|
|
273
|
+
await hybridSearch.search({ query, limit: 10 });
|
|
274
|
+
endTimer();
|
|
275
|
+
}
|
|
276
|
+
catch (error) {
|
|
277
|
+
performance_monitor_1.perfMonitor.recordMetric('hybrid_search', 0, true);
|
|
278
|
+
logger_1.logger.error('TestLargeDataset', `Search failed for query "${query}":`, error);
|
|
279
|
+
}
|
|
280
|
+
if ((i + 1) % 20 === 0) {
|
|
281
|
+
logger_1.logger.info('TestLargeDataset', `Completed ${i + 1}/${config.searchQueries} searches`);
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
async function runTest(configName, cleanStart = false) {
|
|
286
|
+
const config = CONFIGS[configName];
|
|
287
|
+
if (!config) {
|
|
288
|
+
logger_1.logger.error('TestLargeDataset', `Unknown config: ${configName}`);
|
|
289
|
+
return;
|
|
290
|
+
}
|
|
291
|
+
logger_1.logger.info('TestLargeDataset', `\n=== Starting ${configName.toUpperCase()} dataset test ===`);
|
|
292
|
+
logger_1.logger.info('TestLargeDataset', `Config: ${JSON.stringify(config, null, 2)}`);
|
|
293
|
+
// Use the real CozoDB setup like in index.ts
|
|
294
|
+
const { CozoDb } = await import('cozo-node');
|
|
295
|
+
const dbPath = `test_large_${configName}.cozo.db`;
|
|
296
|
+
// Delete old database only if cleanStart flag is set
|
|
297
|
+
const fs = await import('fs');
|
|
298
|
+
if (cleanStart && fs.existsSync(dbPath)) {
|
|
299
|
+
logger_1.logger.info('TestLargeDataset', `Removing old database: ${dbPath}`);
|
|
300
|
+
fs.unlinkSync(dbPath);
|
|
301
|
+
}
|
|
302
|
+
else if (fs.existsSync(dbPath)) {
|
|
303
|
+
logger_1.logger.info('TestLargeDataset', `Using existing database: ${dbPath}`);
|
|
304
|
+
}
|
|
305
|
+
const db = new CozoDb('sqlite', dbPath);
|
|
306
|
+
const embeddingService = new embedding_service_1.EmbeddingService();
|
|
307
|
+
const hybridSearch = new hybrid_search_1.HybridSearch(db, embeddingService);
|
|
308
|
+
// Initialize schema like the real server does
|
|
309
|
+
try {
|
|
310
|
+
// Create entity table
|
|
311
|
+
await db.run(`
|
|
312
|
+
:create entity {
|
|
313
|
+
id: String,
|
|
314
|
+
=>
|
|
315
|
+
name: String,
|
|
316
|
+
type: String,
|
|
317
|
+
content_embedding: <F32; 1024>,
|
|
318
|
+
name_embedding: <F32; 1024>,
|
|
319
|
+
metadata: Json,
|
|
320
|
+
created_at: Validity
|
|
321
|
+
}
|
|
322
|
+
`);
|
|
323
|
+
// Create content HNSW index
|
|
324
|
+
await db.run(`
|
|
325
|
+
::hnsw create entity:semantic {
|
|
326
|
+
dim: 1024,
|
|
327
|
+
m: 50,
|
|
328
|
+
dtype: F32,
|
|
329
|
+
ef_construction: 200,
|
|
330
|
+
fields: [content_embedding],
|
|
331
|
+
distance: Cosine,
|
|
332
|
+
extend_candidates: true,
|
|
333
|
+
keep_pruned_connections: true
|
|
334
|
+
}
|
|
335
|
+
`);
|
|
336
|
+
// Create name HNSW index
|
|
337
|
+
await db.run(`
|
|
338
|
+
::hnsw create entity:name_semantic {
|
|
339
|
+
dim: 1024,
|
|
340
|
+
m: 50,
|
|
341
|
+
dtype: F32,
|
|
342
|
+
ef_construction: 200,
|
|
343
|
+
fields: [name_embedding],
|
|
344
|
+
distance: Cosine,
|
|
345
|
+
extend_candidates: true,
|
|
346
|
+
keep_pruned_connections: true
|
|
347
|
+
}
|
|
348
|
+
`);
|
|
349
|
+
// Create FTS index for entity names
|
|
350
|
+
await db.run(`
|
|
351
|
+
::fts create entity:fts {
|
|
352
|
+
extractor: name,
|
|
353
|
+
tokenizer: Simple,
|
|
354
|
+
filters: [Lowercase, Stemmer('english'), Stopwords('en')]
|
|
355
|
+
}
|
|
356
|
+
`);
|
|
357
|
+
// Create observation table
|
|
358
|
+
await db.run(`
|
|
359
|
+
:create observation {
|
|
360
|
+
id: String,
|
|
361
|
+
=>
|
|
362
|
+
entity_id: String,
|
|
363
|
+
text: String,
|
|
364
|
+
embedding: <F32; 1024>,
|
|
365
|
+
metadata: Json,
|
|
366
|
+
session_id: String,
|
|
367
|
+
task_id: String,
|
|
368
|
+
created_at: Validity
|
|
369
|
+
}
|
|
370
|
+
`);
|
|
371
|
+
// Create observation HNSW index
|
|
372
|
+
await db.run(`
|
|
373
|
+
::hnsw create observation:semantic {
|
|
374
|
+
dim: 1024,
|
|
375
|
+
m: 50,
|
|
376
|
+
dtype: F32,
|
|
377
|
+
ef_construction: 200,
|
|
378
|
+
fields: [embedding],
|
|
379
|
+
distance: Cosine,
|
|
380
|
+
extend_candidates: true,
|
|
381
|
+
keep_pruned_connections: true
|
|
382
|
+
}
|
|
383
|
+
`);
|
|
384
|
+
// Create FTS index for observation text
|
|
385
|
+
await db.run(`
|
|
386
|
+
::fts create observation:fts {
|
|
387
|
+
extractor: text,
|
|
388
|
+
tokenizer: Simple,
|
|
389
|
+
filters: [Lowercase, Stemmer('english'), Stopwords('en')]
|
|
390
|
+
}
|
|
391
|
+
`);
|
|
392
|
+
// Create relationship table
|
|
393
|
+
await db.run(`
|
|
394
|
+
:create relationship {
|
|
395
|
+
from_id: String,
|
|
396
|
+
to_id: String,
|
|
397
|
+
relation_type: String,
|
|
398
|
+
=>
|
|
399
|
+
strength: Float,
|
|
400
|
+
metadata: Json,
|
|
401
|
+
created_at: Validity
|
|
402
|
+
}
|
|
403
|
+
`);
|
|
404
|
+
// Create search cache table
|
|
405
|
+
await db.run(`
|
|
406
|
+
:create search_cache {
|
|
407
|
+
query_hash: String,
|
|
408
|
+
=>
|
|
409
|
+
query_text: String,
|
|
410
|
+
results: Json,
|
|
411
|
+
options: Json,
|
|
412
|
+
embedding: <F32; 1024>,
|
|
413
|
+
created_at: Int
|
|
414
|
+
}
|
|
415
|
+
`);
|
|
416
|
+
// Create search cache HNSW index
|
|
417
|
+
await db.run(`
|
|
418
|
+
::hnsw create search_cache:semantic {
|
|
419
|
+
dim: 1024,
|
|
420
|
+
m: 16,
|
|
421
|
+
dtype: F32,
|
|
422
|
+
ef_construction: 200,
|
|
423
|
+
fields: [embedding],
|
|
424
|
+
distance: Cosine
|
|
425
|
+
}
|
|
426
|
+
`);
|
|
427
|
+
// Create entity_rank table (for PageRank scores)
|
|
428
|
+
await db.run(`
|
|
429
|
+
:create entity_rank {
|
|
430
|
+
entity_id: String
|
|
431
|
+
=>
|
|
432
|
+
pagerank: Float
|
|
433
|
+
}
|
|
434
|
+
`);
|
|
435
|
+
logger_1.logger.info('TestLargeDataset', 'Database schema initialized with all indexes');
|
|
436
|
+
}
|
|
437
|
+
catch (error) {
|
|
438
|
+
if (!error.message?.includes('already exists')) {
|
|
439
|
+
logger_1.logger.error('TestLargeDataset', 'Schema initialization failed:', error);
|
|
440
|
+
throw error;
|
|
441
|
+
}
|
|
442
|
+
logger_1.logger.info('TestLargeDataset', 'Schema already exists, continuing...');
|
|
443
|
+
}
|
|
444
|
+
try {
|
|
445
|
+
// Check if database already has data - simple approach
|
|
446
|
+
let existingEntityCount = 0;
|
|
447
|
+
try {
|
|
448
|
+
const statsQuery = await db.run(`?[id] := *entity{id} :limit 1`);
|
|
449
|
+
existingEntityCount = statsQuery.rows.length > 0 ? 1 : 0;
|
|
450
|
+
if (existingEntityCount > 0) {
|
|
451
|
+
// Get actual count
|
|
452
|
+
const countQuery = await db.run(`?[count(id)] := *entity{id}`);
|
|
453
|
+
existingEntityCount = countQuery.rows[0]?.[0] || 0;
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
catch (e) {
|
|
457
|
+
// Table doesn't exist yet, that's fine
|
|
458
|
+
existingEntityCount = 0;
|
|
459
|
+
}
|
|
460
|
+
if (existingEntityCount > 0 && !cleanStart) {
|
|
461
|
+
logger_1.logger.info('TestLargeDataset', `Database already contains ${existingEntityCount} entities, skipping data creation`);
|
|
462
|
+
logger_1.logger.info('TestLargeDataset', `Use --clean flag to recreate database from scratch`);
|
|
463
|
+
}
|
|
464
|
+
else {
|
|
465
|
+
// Create test data
|
|
466
|
+
const { entityIds, duration: createDuration } = await createTestData(db, embeddingService, config);
|
|
467
|
+
const totalOps = config.numEntities +
|
|
468
|
+
(config.numEntities * config.numObservationsPerEntity) +
|
|
469
|
+
config.numRelationships;
|
|
470
|
+
const totalTime = createDuration / 1000;
|
|
471
|
+
const throughput = totalOps / totalTime;
|
|
472
|
+
logger_1.logger.info('TestLargeDataset', `\nData creation stats:`);
|
|
473
|
+
logger_1.logger.info('TestLargeDataset', `Total operations: ${totalOps}`);
|
|
474
|
+
logger_1.logger.info('TestLargeDataset', `Total time: ${totalTime.toFixed(2)}s`);
|
|
475
|
+
logger_1.logger.info('TestLargeDataset', `Throughput: ${throughput.toFixed(2)} ops/sec`);
|
|
476
|
+
}
|
|
477
|
+
// Run search tests
|
|
478
|
+
await runSearchTests(hybridSearch, config);
|
|
479
|
+
// Print performance summary
|
|
480
|
+
logger_1.logger.info('TestLargeDataset', '\n=== Performance Summary ===');
|
|
481
|
+
performance_monitor_1.perfMonitor.logSummary();
|
|
482
|
+
}
|
|
483
|
+
catch (error) {
|
|
484
|
+
logger_1.logger.error('TestLargeDataset', 'Test failed:', error);
|
|
485
|
+
}
|
|
486
|
+
finally {
|
|
487
|
+
db.close();
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
// Run tests
|
|
491
|
+
async function main() {
|
|
492
|
+
const configName = process.argv[2] || 'small';
|
|
493
|
+
const cleanStart = process.argv.includes('--clean');
|
|
494
|
+
if (cleanStart) {
|
|
495
|
+
logger_1.logger.info('TestLargeDataset', 'Clean start mode: will delete existing database');
|
|
496
|
+
}
|
|
497
|
+
await runTest(configName, cleanStart);
|
|
498
|
+
logger_1.logger.info('TestLargeDataset', '\n=== Test completed ===');
|
|
499
|
+
logger_1.logger.info('TestLargeDataset', `\nUsage: npx ts-node src/test-large-dataset.ts [small|medium|large] [--clean]`);
|
|
500
|
+
logger_1.logger.info('TestLargeDataset', ` --clean: Delete existing database before test`);
|
|
501
|
+
}
|
|
502
|
+
main().catch(console.error);
|