agentic-flow 1.7.3 ā 1.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/test-neural.md +0 -5
- package/.claude/answer.md +1 -0
- package/.claude/settings.json +19 -20
- package/CHANGELOG.md +0 -117
- package/README.md +17 -81
- package/dist/agentdb/benchmarks/comprehensive-benchmark.js +664 -0
- package/dist/agentdb/benchmarks/frontier-benchmark.js +419 -0
- package/dist/agentdb/benchmarks/reflexion-benchmark.js +370 -0
- package/dist/agentdb/cli/agentdb-cli.js +717 -0
- package/dist/agentdb/controllers/CausalMemoryGraph.js +322 -0
- package/dist/agentdb/controllers/CausalRecall.js +281 -0
- package/dist/agentdb/controllers/EmbeddingService.js +118 -0
- package/dist/agentdb/controllers/ExplainableRecall.js +387 -0
- package/dist/agentdb/controllers/NightlyLearner.js +382 -0
- package/dist/agentdb/controllers/ReflexionMemory.js +239 -0
- package/dist/agentdb/controllers/SkillLibrary.js +276 -0
- package/dist/agentdb/controllers/frontier-index.js +9 -0
- package/dist/agentdb/controllers/index.js +8 -0
- package/dist/agentdb/index.js +32 -0
- package/dist/agentdb/optimizations/BatchOperations.js +198 -0
- package/dist/agentdb/optimizations/QueryOptimizer.js +225 -0
- package/dist/agentdb/optimizations/index.js +7 -0
- package/dist/agentdb/tests/frontier-features.test.js +665 -0
- package/dist/cli-proxy.js +2 -33
- package/dist/mcp/standalone-stdio.js +200 -4
- package/dist/memory/SharedMemoryPool.js +211 -0
- package/dist/memory/index.js +6 -0
- package/dist/reasoningbank/AdvancedMemory.js +239 -0
- package/dist/reasoningbank/HybridBackend.js +305 -0
- package/dist/reasoningbank/index-new.js +87 -0
- package/dist/reasoningbank/index.js +23 -44
- package/dist/utils/cli.js +0 -22
- package/docs/AGENTDB_TESTING.md +411 -0
- package/docs/v1.7.1-QUICK-START.md +399 -0
- package/package.json +4 -4
- package/scripts/run-validation.sh +165 -0
- package/scripts/test-agentdb.sh +153 -0
- package/.claude/skills/agentdb-memory-patterns/SKILL.md +0 -166
- package/.claude/skills/agentdb-vector-search/SKILL.md +0 -126
- package/.claude/skills/agentic-flow/agentdb-memory-patterns/SKILL.md +0 -166
- package/.claude/skills/agentic-flow/agentdb-vector-search/SKILL.md +0 -126
- package/.claude/skills/agentic-flow/reasoningbank-intelligence/SKILL.md +0 -201
- package/.claude/skills/agentic-flow/swarm-orchestration/SKILL.md +0 -179
- package/.claude/skills/reasoningbank-intelligence/SKILL.md +0 -201
- package/.claude/skills/skill-builder/README.md +0 -308
- package/.claude/skills/skill-builder/SKILL.md +0 -910
- package/.claude/skills/skill-builder/docs/SPECIFICATION.md +0 -358
- package/.claude/skills/skill-builder/resources/schemas/skill-frontmatter.schema.json +0 -41
- package/.claude/skills/skill-builder/resources/templates/full-skill.template +0 -118
- package/.claude/skills/skill-builder/resources/templates/minimal-skill.template +0 -38
- package/.claude/skills/skill-builder/scripts/generate-skill.sh +0 -334
- package/.claude/skills/skill-builder/scripts/validate-skill.sh +0 -198
- package/.claude/skills/swarm-orchestration/SKILL.md +0 -179
- package/docs/AGENTDB_INTEGRATION.md +0 -379
|
@@ -0,0 +1,664 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Comprehensive AgentDB Benchmark Suite
|
|
3
|
+
*
|
|
4
|
+
* Tests all 5 SOTA memory patterns with production workloads:
|
|
5
|
+
* 1. Reflexion episodic replay
|
|
6
|
+
* 2. Skill library operations
|
|
7
|
+
* 3. Mixed memory (facts + notes)
|
|
8
|
+
* 4. Event consolidation
|
|
9
|
+
* 5. Graph-aware recall
|
|
10
|
+
*
|
|
11
|
+
* Metrics tracked:
|
|
12
|
+
* - Latency (p50, p95, p99)
|
|
13
|
+
* - Throughput (ops/sec)
|
|
14
|
+
* - Memory usage (RSS, heap)
|
|
15
|
+
* - Hit rates and accuracy
|
|
16
|
+
* - Concurrency performance
|
|
17
|
+
*/
|
|
18
|
+
import Database from 'better-sqlite3';
|
|
19
|
+
import { ReflexionMemory } from '../controllers/ReflexionMemory';
|
|
20
|
+
import { SkillLibrary } from '../controllers/SkillLibrary';
|
|
21
|
+
import { EmbeddingService } from '../controllers/EmbeddingService';
|
|
22
|
+
import * as fs from 'fs';
|
|
23
|
+
import * as path from 'path';
|
|
24
|
+
export class ComprehensiveBenchmark {
|
|
25
|
+
db;
|
|
26
|
+
reflexion;
|
|
27
|
+
skills;
|
|
28
|
+
embedder;
|
|
29
|
+
results = [];
|
|
30
|
+
constructor(dbPath = ':memory:') {
|
|
31
|
+
this.db = new Database(dbPath);
|
|
32
|
+
// Configure for performance
|
|
33
|
+
this.db.pragma('journal_mode = WAL');
|
|
34
|
+
this.db.pragma('synchronous = NORMAL');
|
|
35
|
+
this.db.pragma('cache_size = -64000'); // 64MB cache
|
|
36
|
+
this.db.pragma('temp_store = MEMORY');
|
|
37
|
+
this.db.pragma('mmap_size = 268435456'); // 256MB mmap
|
|
38
|
+
this.embedder = new EmbeddingService({
|
|
39
|
+
model: 'all-MiniLM-L6-v2',
|
|
40
|
+
dimension: 384,
|
|
41
|
+
provider: 'transformers'
|
|
42
|
+
});
|
|
43
|
+
this.reflexion = new ReflexionMemory(this.db, this.embedder);
|
|
44
|
+
this.skills = new SkillLibrary(this.db, this.embedder);
|
|
45
|
+
}
|
|
46
|
+
async initialize() {
|
|
47
|
+
console.log('š§ Initializing AgentDB Benchmark Suite...\n');
|
|
48
|
+
// Load schema
|
|
49
|
+
const schemaPath = path.join(__dirname, '../schemas/schema.sql');
|
|
50
|
+
const schema = fs.readFileSync(schemaPath, 'utf-8');
|
|
51
|
+
this.db.exec(schema);
|
|
52
|
+
// Initialize embedder
|
|
53
|
+
await this.embedder.initialize();
|
|
54
|
+
console.log('ā
Initialization complete\n');
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Run all benchmarks
|
|
58
|
+
*/
|
|
59
|
+
async runAll() {
|
|
60
|
+
console.log('āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā');
|
|
61
|
+
console.log('ā AgentDB Comprehensive Benchmark Suite ā');
|
|
62
|
+
console.log('āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā\n');
|
|
63
|
+
// Core performance tests
|
|
64
|
+
await this.benchmarkEpisodeInsertion();
|
|
65
|
+
await this.benchmarkEpisodeRetrieval();
|
|
66
|
+
await this.benchmarkSkillConsolidation();
|
|
67
|
+
await this.benchmarkConcurrentWrites();
|
|
68
|
+
await this.benchmarkConcurrentReads();
|
|
69
|
+
await this.benchmarkMixedWorkload();
|
|
70
|
+
// Scalability tests
|
|
71
|
+
await this.benchmarkLargeDataset();
|
|
72
|
+
await this.benchmarkMemoryPressure();
|
|
73
|
+
// Production scenarios
|
|
74
|
+
await this.benchmarkRealtimeAgent();
|
|
75
|
+
await this.benchmarkBatchProcessing();
|
|
76
|
+
this.printSummary();
|
|
77
|
+
this.generateReport();
|
|
78
|
+
return this.results;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Benchmark 1: Episode Insertion Performance
|
|
82
|
+
*/
|
|
83
|
+
async benchmarkEpisodeInsertion() {
|
|
84
|
+
console.log('\nš Benchmark 1: Episode Insertion Performance');
|
|
85
|
+
console.log('ā'.repeat(70));
|
|
86
|
+
console.log('Testing: Bulk episode storage with embeddings\n');
|
|
87
|
+
const count = 10000;
|
|
88
|
+
const latencies = [];
|
|
89
|
+
const memStart = process.memoryUsage();
|
|
90
|
+
console.log(`Inserting ${count} episodes...`);
|
|
91
|
+
const startTime = Date.now();
|
|
92
|
+
for (let i = 0; i < count; i++) {
|
|
93
|
+
const episodeStart = Date.now();
|
|
94
|
+
await this.reflexion.storeEpisode({
|
|
95
|
+
sessionId: `session-${Math.floor(i / 100)}`,
|
|
96
|
+
task: `task_${i % 50}`,
|
|
97
|
+
input: `Input data for episode ${i}`,
|
|
98
|
+
output: `Generated output ${i}`,
|
|
99
|
+
critique: this.generateCritique(i),
|
|
100
|
+
reward: Math.random(),
|
|
101
|
+
success: Math.random() > 0.3,
|
|
102
|
+
latencyMs: Math.floor(Math.random() * 500),
|
|
103
|
+
tokensUsed: Math.floor(Math.random() * 1000)
|
|
104
|
+
});
|
|
105
|
+
latencies.push(Date.now() - episodeStart);
|
|
106
|
+
if ((i + 1) % 2500 === 0) {
|
|
107
|
+
const progress = ((i + 1) / count * 100).toFixed(1);
|
|
108
|
+
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
109
|
+
console.log(` ${progress}% complete (${elapsed}s)`);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
const duration = Date.now() - startTime;
|
|
113
|
+
const memEnd = process.memoryUsage();
|
|
114
|
+
this.recordMetrics('Episode Insertion', {
|
|
115
|
+
duration,
|
|
116
|
+
operations: count,
|
|
117
|
+
latencies,
|
|
118
|
+
memStart,
|
|
119
|
+
memEnd,
|
|
120
|
+
passed: duration < 60000 // Should complete in < 60s
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Benchmark 2: Episode Retrieval Performance
|
|
125
|
+
*/
|
|
126
|
+
async benchmarkEpisodeRetrieval() {
|
|
127
|
+
console.log('\nš Benchmark 2: Episode Retrieval Performance');
|
|
128
|
+
console.log('ā'.repeat(70));
|
|
129
|
+
console.log('Testing: Semantic search with k-NN over episodes\n');
|
|
130
|
+
const queries = 1000;
|
|
131
|
+
const latencies = [];
|
|
132
|
+
const memStart = process.memoryUsage();
|
|
133
|
+
console.log(`Running ${queries} retrieval queries...`);
|
|
134
|
+
const startTime = Date.now();
|
|
135
|
+
for (let i = 0; i < queries; i++) {
|
|
136
|
+
const queryStart = Date.now();
|
|
137
|
+
await this.reflexion.retrieveRelevant({
|
|
138
|
+
task: `task_${Math.floor(Math.random() * 50)}`,
|
|
139
|
+
k: 5,
|
|
140
|
+
timeWindowDays: 7
|
|
141
|
+
});
|
|
142
|
+
latencies.push(Date.now() - queryStart);
|
|
143
|
+
if ((i + 1) % 250 === 0) {
|
|
144
|
+
const progress = ((i + 1) / queries * 100).toFixed(1);
|
|
145
|
+
console.log(` ${progress}% complete`);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
const duration = Date.now() - startTime;
|
|
149
|
+
const memEnd = process.memoryUsage();
|
|
150
|
+
this.recordMetrics('Episode Retrieval', {
|
|
151
|
+
duration,
|
|
152
|
+
operations: queries,
|
|
153
|
+
latencies,
|
|
154
|
+
memStart,
|
|
155
|
+
memEnd,
|
|
156
|
+
passed: this.calculateP95(latencies) <= 50 // p95 ⤠50ms
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Benchmark 3: Skill Consolidation
|
|
161
|
+
*/
|
|
162
|
+
async benchmarkSkillConsolidation() {
|
|
163
|
+
console.log('\nš Benchmark 3: Skill Consolidation');
|
|
164
|
+
console.log('ā'.repeat(70));
|
|
165
|
+
console.log('Testing: Episode ā Skill transformation\n');
|
|
166
|
+
const memStart = process.memoryUsage();
|
|
167
|
+
const startTime = Date.now();
|
|
168
|
+
console.log('Running consolidation job...');
|
|
169
|
+
const created = this.skills.consolidateEpisodesIntoSkills({
|
|
170
|
+
minAttempts: 3,
|
|
171
|
+
minReward: 0.5,
|
|
172
|
+
timeWindowDays: 30
|
|
173
|
+
});
|
|
174
|
+
const duration = Date.now() - startTime;
|
|
175
|
+
const memEnd = process.memoryUsage();
|
|
176
|
+
console.log(`ā Created ${created} skills in ${duration}ms`);
|
|
177
|
+
this.recordMetrics('Skill Consolidation', {
|
|
178
|
+
duration,
|
|
179
|
+
operations: created,
|
|
180
|
+
latencies: [duration],
|
|
181
|
+
memStart,
|
|
182
|
+
memEnd,
|
|
183
|
+
passed: duration < 5000 && created > 0 // < 5s, creates skills
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Benchmark 4: Concurrent Writes
|
|
188
|
+
*/
|
|
189
|
+
async benchmarkConcurrentWrites() {
|
|
190
|
+
console.log('\nš Benchmark 4: Concurrent Write Performance');
|
|
191
|
+
console.log('ā'.repeat(70));
|
|
192
|
+
console.log('Testing: Multiple agents writing simultaneously\n');
|
|
193
|
+
const writers = 10;
|
|
194
|
+
const writesPerWriter = 100;
|
|
195
|
+
const allLatencies = [];
|
|
196
|
+
const memStart = process.memoryUsage();
|
|
197
|
+
console.log(`Running ${writers} concurrent writers...`);
|
|
198
|
+
const startTime = Date.now();
|
|
199
|
+
const writerPromises = Array.from({ length: writers }, async (_, writerIdx) => {
|
|
200
|
+
const latencies = [];
|
|
201
|
+
for (let i = 0; i < writesPerWriter; i++) {
|
|
202
|
+
const writeStart = Date.now();
|
|
203
|
+
await this.reflexion.storeEpisode({
|
|
204
|
+
sessionId: `concurrent-session-${writerIdx}`,
|
|
205
|
+
task: `concurrent_task_${i}`,
|
|
206
|
+
input: `Writer ${writerIdx} input ${i}`,
|
|
207
|
+
output: `Writer ${writerIdx} output ${i}`,
|
|
208
|
+
critique: 'Concurrent write test',
|
|
209
|
+
reward: Math.random(),
|
|
210
|
+
success: true
|
|
211
|
+
});
|
|
212
|
+
latencies.push(Date.now() - writeStart);
|
|
213
|
+
}
|
|
214
|
+
return latencies;
|
|
215
|
+
});
|
|
216
|
+
const results = await Promise.all(writerPromises);
|
|
217
|
+
results.forEach(latencies => allLatencies.push(...latencies));
|
|
218
|
+
const duration = Date.now() - startTime;
|
|
219
|
+
const memEnd = process.memoryUsage();
|
|
220
|
+
this.recordMetrics('Concurrent Writes', {
|
|
221
|
+
duration,
|
|
222
|
+
operations: writers * writesPerWriter,
|
|
223
|
+
latencies: allLatencies,
|
|
224
|
+
memStart,
|
|
225
|
+
memEnd,
|
|
226
|
+
passed: this.calculateP95(allLatencies) <= 100 // p95 ⤠100ms under concurrency
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
/**
|
|
230
|
+
* Benchmark 5: Concurrent Reads
|
|
231
|
+
*/
|
|
232
|
+
async benchmarkConcurrentReads() {
|
|
233
|
+
console.log('\nš Benchmark 5: Concurrent Read Performance');
|
|
234
|
+
console.log('ā'.repeat(70));
|
|
235
|
+
console.log('Testing: Multiple agents reading simultaneously\n');
|
|
236
|
+
const readers = 20;
|
|
237
|
+
const readsPerReader = 50;
|
|
238
|
+
const allLatencies = [];
|
|
239
|
+
const memStart = process.memoryUsage();
|
|
240
|
+
console.log(`Running ${readers} concurrent readers...`);
|
|
241
|
+
const startTime = Date.now();
|
|
242
|
+
const readerPromises = Array.from({ length: readers }, async () => {
|
|
243
|
+
const latencies = [];
|
|
244
|
+
for (let i = 0; i < readsPerReader; i++) {
|
|
245
|
+
const readStart = Date.now();
|
|
246
|
+
await this.reflexion.retrieveRelevant({
|
|
247
|
+
task: `task_${Math.floor(Math.random() * 50)}`,
|
|
248
|
+
k: 5
|
|
249
|
+
});
|
|
250
|
+
latencies.push(Date.now() - readStart);
|
|
251
|
+
}
|
|
252
|
+
return latencies;
|
|
253
|
+
});
|
|
254
|
+
const results = await Promise.all(readerPromises);
|
|
255
|
+
results.forEach(latencies => allLatencies.push(...latencies));
|
|
256
|
+
const duration = Date.now() - startTime;
|
|
257
|
+
const memEnd = process.memoryUsage();
|
|
258
|
+
this.recordMetrics('Concurrent Reads', {
|
|
259
|
+
duration,
|
|
260
|
+
operations: readers * readsPerReader,
|
|
261
|
+
latencies: allLatencies,
|
|
262
|
+
memStart,
|
|
263
|
+
memEnd,
|
|
264
|
+
passed: this.calculateP95(allLatencies) <= 75 // p95 ⤠75ms
|
|
265
|
+
});
|
|
266
|
+
}
|
|
267
|
+
/**
|
|
268
|
+
* Benchmark 6: Mixed Workload (Read + Write)
|
|
269
|
+
*/
|
|
270
|
+
async benchmarkMixedWorkload() {
|
|
271
|
+
console.log('\nš Benchmark 6: Mixed Workload Performance');
|
|
272
|
+
console.log('ā'.repeat(70));
|
|
273
|
+
console.log('Testing: Simultaneous reads and writes\n');
|
|
274
|
+
const workers = 10;
|
|
275
|
+
const opsPerWorker = 100;
|
|
276
|
+
const allLatencies = [];
|
|
277
|
+
const memStart = process.memoryUsage();
|
|
278
|
+
console.log(`Running ${workers} workers with mixed operations...`);
|
|
279
|
+
const startTime = Date.now();
|
|
280
|
+
const workerPromises = Array.from({ length: workers }, async (_, idx) => {
|
|
281
|
+
const latencies = [];
|
|
282
|
+
for (let i = 0; i < opsPerWorker; i++) {
|
|
283
|
+
const opStart = Date.now();
|
|
284
|
+
// Alternate between reads and writes
|
|
285
|
+
if (i % 2 === 0) {
|
|
286
|
+
await this.reflexion.storeEpisode({
|
|
287
|
+
sessionId: `mixed-${idx}`,
|
|
288
|
+
task: `mixed_task_${i}`,
|
|
289
|
+
input: 'input',
|
|
290
|
+
output: 'output',
|
|
291
|
+
critique: 'critique',
|
|
292
|
+
reward: Math.random(),
|
|
293
|
+
success: true
|
|
294
|
+
});
|
|
295
|
+
}
|
|
296
|
+
else {
|
|
297
|
+
await this.reflexion.retrieveRelevant({
|
|
298
|
+
task: `mixed_task_${Math.floor(Math.random() * 50)}`,
|
|
299
|
+
k: 3
|
|
300
|
+
});
|
|
301
|
+
}
|
|
302
|
+
latencies.push(Date.now() - opStart);
|
|
303
|
+
}
|
|
304
|
+
return latencies;
|
|
305
|
+
});
|
|
306
|
+
const results = await Promise.all(workerPromises);
|
|
307
|
+
results.forEach(latencies => allLatencies.push(...latencies));
|
|
308
|
+
const duration = Date.now() - startTime;
|
|
309
|
+
const memEnd = process.memoryUsage();
|
|
310
|
+
this.recordMetrics('Mixed Workload', {
|
|
311
|
+
duration,
|
|
312
|
+
operations: workers * opsPerWorker,
|
|
313
|
+
latencies: allLatencies,
|
|
314
|
+
memStart,
|
|
315
|
+
memEnd,
|
|
316
|
+
passed: this.calculateP95(allLatencies) <= 80 // p95 ⤠80ms
|
|
317
|
+
});
|
|
318
|
+
}
|
|
319
|
+
/**
|
|
320
|
+
* Benchmark 7: Large Dataset Performance
|
|
321
|
+
*/
|
|
322
|
+
async benchmarkLargeDataset() {
|
|
323
|
+
console.log('\nš Benchmark 7: Large Dataset Scalability');
|
|
324
|
+
console.log('ā'.repeat(70));
|
|
325
|
+
console.log('Testing: Performance at 50k+ memories\n');
|
|
326
|
+
const targetSize = 50000;
|
|
327
|
+
const currentSize = this.db.prepare('SELECT COUNT(*) as count FROM episodes').get();
|
|
328
|
+
const needed = Math.max(0, targetSize - currentSize.count);
|
|
329
|
+
if (needed > 0) {
|
|
330
|
+
console.log(`Adding ${needed} episodes to reach ${targetSize} target...`);
|
|
331
|
+
const batchSize = 1000;
|
|
332
|
+
const batches = Math.ceil(needed / batchSize);
|
|
333
|
+
for (let batch = 0; batch < batches; batch++) {
|
|
334
|
+
const transaction = this.db.transaction((episodes) => {
|
|
335
|
+
const stmt = this.db.prepare(`
|
|
336
|
+
INSERT INTO episodes (session_id, task, input, output, reward, success)
|
|
337
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
338
|
+
`);
|
|
339
|
+
for (const ep of episodes) {
|
|
340
|
+
stmt.run(ep.session_id, ep.task, ep.input, ep.output, ep.reward, ep.success);
|
|
341
|
+
}
|
|
342
|
+
});
|
|
343
|
+
const episodes = Array.from({ length: Math.min(batchSize, needed - batch * batchSize) }, (_, i) => ({
|
|
344
|
+
session_id: `batch-${batch}`,
|
|
345
|
+
task: `task_${i % 100}`,
|
|
346
|
+
input: `input ${i}`,
|
|
347
|
+
output: `output ${i}`,
|
|
348
|
+
reward: Math.random(),
|
|
349
|
+
success: Math.random() > 0.5 ? 1 : 0
|
|
350
|
+
}));
|
|
351
|
+
transaction(episodes);
|
|
352
|
+
if ((batch + 1) % 10 === 0) {
|
|
353
|
+
console.log(` Progress: ${((batch + 1) / batches * 100).toFixed(1)}%`);
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
// Now test retrieval at scale
|
|
358
|
+
const queries = 100;
|
|
359
|
+
const latencies = [];
|
|
360
|
+
const memStart = process.memoryUsage();
|
|
361
|
+
console.log(`\nTesting retrieval over ${targetSize} memories...`);
|
|
362
|
+
const startTime = Date.now();
|
|
363
|
+
for (let i = 0; i < queries; i++) {
|
|
364
|
+
const queryStart = Date.now();
|
|
365
|
+
await this.reflexion.retrieveRelevant({
|
|
366
|
+
task: `task_${Math.floor(Math.random() * 100)}`,
|
|
367
|
+
k: 5
|
|
368
|
+
});
|
|
369
|
+
latencies.push(Date.now() - queryStart);
|
|
370
|
+
}
|
|
371
|
+
const duration = Date.now() - startTime;
|
|
372
|
+
const memEnd = process.memoryUsage();
|
|
373
|
+
this.recordMetrics('Large Dataset (50k+)', {
|
|
374
|
+
duration,
|
|
375
|
+
operations: queries,
|
|
376
|
+
latencies,
|
|
377
|
+
memStart,
|
|
378
|
+
memEnd,
|
|
379
|
+
passed: this.calculateP95(latencies) <= 100 // p95 ⤠100ms at scale
|
|
380
|
+
});
|
|
381
|
+
}
|
|
382
|
+
/**
|
|
383
|
+
* Benchmark 8: Memory Pressure
|
|
384
|
+
*/
|
|
385
|
+
async benchmarkMemoryPressure() {
|
|
386
|
+
console.log('\nš Benchmark 8: Memory Pressure Test');
|
|
387
|
+
console.log('ā'.repeat(70));
|
|
388
|
+
console.log('Testing: System stability under memory constraints\n');
|
|
389
|
+
const operations = 1000;
|
|
390
|
+
const latencies = [];
|
|
391
|
+
const memStart = process.memoryUsage();
|
|
392
|
+
console.log(`Running ${operations} operations with cache pressure...`);
|
|
393
|
+
// Force garbage collection if available
|
|
394
|
+
if (global.gc) {
|
|
395
|
+
global.gc();
|
|
396
|
+
}
|
|
397
|
+
const startTime = Date.now();
|
|
398
|
+
for (let i = 0; i < operations; i++) {
|
|
399
|
+
const opStart = Date.now();
|
|
400
|
+
// Mix of memory-intensive operations
|
|
401
|
+
if (i % 3 === 0) {
|
|
402
|
+
await this.reflexion.storeEpisode({
|
|
403
|
+
sessionId: `mem-pressure-${i}`,
|
|
404
|
+
task: `task_${i}`,
|
|
405
|
+
input: 'A'.repeat(1000), // Large input
|
|
406
|
+
output: 'B'.repeat(1000), // Large output
|
|
407
|
+
critique: 'C'.repeat(500),
|
|
408
|
+
reward: Math.random(),
|
|
409
|
+
success: true
|
|
410
|
+
});
|
|
411
|
+
}
|
|
412
|
+
else {
|
|
413
|
+
await this.reflexion.retrieveRelevant({
|
|
414
|
+
task: `task_${Math.floor(Math.random() * 100)}`,
|
|
415
|
+
k: 10 // Higher k for more memory use
|
|
416
|
+
});
|
|
417
|
+
}
|
|
418
|
+
latencies.push(Date.now() - opStart);
|
|
419
|
+
if ((i + 1) % 250 === 0) {
|
|
420
|
+
console.log(` ${((i + 1) / operations * 100).toFixed(1)}% complete`);
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
const duration = Date.now() - startTime;
|
|
424
|
+
const memEnd = process.memoryUsage();
|
|
425
|
+
const memoryGrowth = (memEnd.heapUsed - memStart.heapUsed) / 1024 / 1024;
|
|
426
|
+
this.recordMetrics('Memory Pressure', {
|
|
427
|
+
duration,
|
|
428
|
+
operations,
|
|
429
|
+
latencies,
|
|
430
|
+
memStart,
|
|
431
|
+
memEnd,
|
|
432
|
+
passed: memoryGrowth < 200 && this.calculateP95(latencies) <= 150, // < 200MB growth
|
|
433
|
+
details: `Memory growth: ${memoryGrowth.toFixed(2)}MB`
|
|
434
|
+
});
|
|
435
|
+
}
|
|
436
|
+
/**
|
|
437
|
+
* Benchmark 9: Realtime Agent Simulation
|
|
438
|
+
*/
|
|
439
|
+
async benchmarkRealtimeAgent() {
|
|
440
|
+
console.log('\nš Benchmark 9: Realtime Agent Simulation');
|
|
441
|
+
console.log('ā'.repeat(70));
|
|
442
|
+
console.log('Testing: Agent making real-time decisions with memory\n');
|
|
443
|
+
const tasks = 50;
|
|
444
|
+
const attemptsPerTask = 5;
|
|
445
|
+
const allLatencies = [];
|
|
446
|
+
const memStart = process.memoryUsage();
|
|
447
|
+
console.log(`Simulating ${tasks} tasks with ${attemptsPerTask} attempts each...`);
|
|
448
|
+
const startTime = Date.now();
|
|
449
|
+
for (let taskIdx = 0; taskIdx < tasks; taskIdx++) {
|
|
450
|
+
const task = `realtime_task_${taskIdx}`;
|
|
451
|
+
for (let attempt = 0; attempt < attemptsPerTask; attempt++) {
|
|
452
|
+
const cycleStart = Date.now();
|
|
453
|
+
// 1. Retrieve relevant memories
|
|
454
|
+
const memories = await this.reflexion.retrieveRelevant({
|
|
455
|
+
task,
|
|
456
|
+
k: 3,
|
|
457
|
+
onlyFailures: attempt > 0
|
|
458
|
+
});
|
|
459
|
+
// 2. Simulate task execution
|
|
460
|
+
const executionTime = Math.random() * 100;
|
|
461
|
+
await new Promise(resolve => setTimeout(resolve, executionTime));
|
|
462
|
+
// 3. Store result
|
|
463
|
+
const reward = Math.min(0.3 + attempt * 0.15, 1.0); // Improvement over attempts
|
|
464
|
+
await this.reflexion.storeEpisode({
|
|
465
|
+
sessionId: `realtime-${taskIdx}`,
|
|
466
|
+
task,
|
|
467
|
+
input: `Attempt ${attempt}`,
|
|
468
|
+
output: `Result ${attempt}`,
|
|
469
|
+
critique: attempt < 3 ? 'Needs improvement' : 'Good progress',
|
|
470
|
+
reward,
|
|
471
|
+
success: reward > 0.7
|
|
472
|
+
});
|
|
473
|
+
allLatencies.push(Date.now() - cycleStart);
|
|
474
|
+
}
|
|
475
|
+
if ((taskIdx + 1) % 10 === 0) {
|
|
476
|
+
console.log(` Task ${taskIdx + 1}/${tasks} complete`);
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
const duration = Date.now() - startTime;
|
|
480
|
+
const memEnd = process.memoryUsage();
|
|
481
|
+
this.recordMetrics('Realtime Agent', {
|
|
482
|
+
duration,
|
|
483
|
+
operations: tasks * attemptsPerTask,
|
|
484
|
+
latencies: allLatencies,
|
|
485
|
+
memStart,
|
|
486
|
+
memEnd,
|
|
487
|
+
passed: this.calculateP95(allLatencies) <= 200 // p95 ⤠200ms for full cycle
|
|
488
|
+
});
|
|
489
|
+
}
|
|
490
|
+
/**
|
|
491
|
+
* Benchmark 10: Batch Processing
|
|
492
|
+
*/
|
|
493
|
+
async benchmarkBatchProcessing() {
|
|
494
|
+
console.log('\nš Benchmark 10: Batch Processing Performance');
|
|
495
|
+
console.log('ā'.repeat(70));
|
|
496
|
+
console.log('Testing: High-throughput batch operations\n');
|
|
497
|
+
const batchSize = 100;
|
|
498
|
+
const batches = 50;
|
|
499
|
+
const allLatencies = [];
|
|
500
|
+
const memStart = process.memoryUsage();
|
|
501
|
+
console.log(`Processing ${batches} batches of ${batchSize} episodes...`);
|
|
502
|
+
const startTime = Date.now();
|
|
503
|
+
for (let batchIdx = 0; batchIdx < batches; batchIdx++) {
|
|
504
|
+
const batchStart = Date.now();
|
|
505
|
+
// Use transaction for batch
|
|
506
|
+
const transaction = this.db.transaction((episodes) => {
|
|
507
|
+
for (const ep of episodes) {
|
|
508
|
+
this.reflexion.storeEpisode(ep);
|
|
509
|
+
}
|
|
510
|
+
});
|
|
511
|
+
const episodes = Array.from({ length: batchSize }, (_, i) => ({
|
|
512
|
+
sessionId: `batch-${batchIdx}`,
|
|
513
|
+
task: `batch_task_${i}`,
|
|
514
|
+
input: `batch input ${i}`,
|
|
515
|
+
output: `batch output ${i}`,
|
|
516
|
+
critique: 'Batch processed',
|
|
517
|
+
reward: Math.random(),
|
|
518
|
+
success: Math.random() > 0.5
|
|
519
|
+
}));
|
|
520
|
+
await Promise.all(episodes.map(ep => this.reflexion.storeEpisode(ep)));
|
|
521
|
+
allLatencies.push(Date.now() - batchStart);
|
|
522
|
+
if ((batchIdx + 1) % 10 === 0) {
|
|
523
|
+
console.log(` ${((batchIdx + 1) / batches * 100).toFixed(1)}% complete`);
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
const duration = Date.now() - startTime;
|
|
527
|
+
const memEnd = process.memoryUsage();
|
|
528
|
+
const throughput = (batches * batchSize) / (duration / 1000);
|
|
529
|
+
this.recordMetrics('Batch Processing', {
|
|
530
|
+
duration,
|
|
531
|
+
operations: batches * batchSize,
|
|
532
|
+
latencies: allLatencies,
|
|
533
|
+
memStart,
|
|
534
|
+
memEnd,
|
|
535
|
+
passed: throughput >= 100, // ā„ 100 episodes/sec
|
|
536
|
+
details: `Throughput: ${throughput.toFixed(0)} eps/sec`
|
|
537
|
+
});
|
|
538
|
+
}
|
|
539
|
+
// ========================================================================
|
|
540
|
+
// Helper Methods
|
|
541
|
+
// ========================================================================
|
|
542
|
+
recordMetrics(testName, data) {
|
|
543
|
+
const { duration, operations, latencies, memStart, memEnd, passed, details } = data;
|
|
544
|
+
latencies.sort((a, b) => a - b);
|
|
545
|
+
const metrics = {
|
|
546
|
+
testName,
|
|
547
|
+
duration,
|
|
548
|
+
operations,
|
|
549
|
+
opsPerSecond: (operations / duration) * 1000,
|
|
550
|
+
latency: {
|
|
551
|
+
min: latencies[0] || 0,
|
|
552
|
+
max: latencies[latencies.length - 1] || 0,
|
|
553
|
+
avg: latencies.reduce((a, b) => a + b, 0) / latencies.length || 0,
|
|
554
|
+
p50: latencies[Math.floor(latencies.length * 0.50)] || 0,
|
|
555
|
+
p95: latencies[Math.floor(latencies.length * 0.95)] || 0,
|
|
556
|
+
p99: latencies[Math.floor(latencies.length * 0.99)] || 0
|
|
557
|
+
},
|
|
558
|
+
memory: {
|
|
559
|
+
heapUsed: memEnd.heapUsed - memStart.heapUsed,
|
|
560
|
+
heapTotal: memEnd.heapTotal - memStart.heapTotal,
|
|
561
|
+
rss: memEnd.rss - memStart.rss
|
|
562
|
+
},
|
|
563
|
+
passed,
|
|
564
|
+
details
|
|
565
|
+
};
|
|
566
|
+
this.results.push(metrics);
|
|
567
|
+
console.log(`\nš Results:`);
|
|
568
|
+
console.log(` Duration: ${duration.toFixed(0)}ms`);
|
|
569
|
+
console.log(` Operations: ${operations}`);
|
|
570
|
+
console.log(` Throughput: ${metrics.opsPerSecond.toFixed(1)} ops/sec`);
|
|
571
|
+
console.log(` Latency p50: ${metrics.latency.p50.toFixed(1)}ms`);
|
|
572
|
+
console.log(` Latency p95: ${metrics.latency.p95.toFixed(1)}ms`);
|
|
573
|
+
console.log(` Latency p99: ${metrics.latency.p99.toFixed(1)}ms`);
|
|
574
|
+
console.log(` Memory Ī: ${(metrics.memory.heapUsed / 1024 / 1024).toFixed(2)}MB`);
|
|
575
|
+
if (details)
|
|
576
|
+
console.log(` ${details}`);
|
|
577
|
+
console.log(`\n${passed ? 'ā
PASSED' : 'ā FAILED'}`);
|
|
578
|
+
}
|
|
579
|
+
calculateP95(latencies) {
|
|
580
|
+
const sorted = [...latencies].sort((a, b) => a - b);
|
|
581
|
+
return sorted[Math.floor(sorted.length * 0.95)] || 0;
|
|
582
|
+
}
|
|
583
|
+
generateCritique(index) {
|
|
584
|
+
const critiques = [
|
|
585
|
+
'Edge case: empty input not handled',
|
|
586
|
+
'Performance: could optimize query',
|
|
587
|
+
'Bug: off-by-one error in loop',
|
|
588
|
+
'Missing: input validation needed',
|
|
589
|
+
'Improvement: add error handling',
|
|
590
|
+
'Success: all edge cases covered',
|
|
591
|
+
'Optimization: reduced time complexity',
|
|
592
|
+
'Enhancement: added comprehensive tests'
|
|
593
|
+
];
|
|
594
|
+
return critiques[index % critiques.length];
|
|
595
|
+
}
|
|
596
|
+
printSummary() {
|
|
597
|
+
console.log('\n' + 'ā'.repeat(70));
|
|
598
|
+
console.log('\nš BENCHMARK SUMMARY\n');
|
|
599
|
+
console.log('ā'.repeat(70));
|
|
600
|
+
const passed = this.results.filter(r => r.passed).length;
|
|
601
|
+
const total = this.results.length;
|
|
602
|
+
console.log('\nāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā');
|
|
603
|
+
console.log('ā Test Name ā Result ā p95 Latency ā');
|
|
604
|
+
console.log('ā āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā£');
|
|
605
|
+
this.results.forEach(result => {
|
|
606
|
+
const status = result.passed ? 'ā
PASS' : 'ā FAIL';
|
|
607
|
+
const name = result.testName.padEnd(33);
|
|
608
|
+
const latency = `${result.latency.p95.toFixed(1)}ms`.padStart(12);
|
|
609
|
+
console.log(`ā ${name}ā ${status} ā ${latency} ā`);
|
|
610
|
+
});
|
|
611
|
+
console.log('āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā');
|
|
612
|
+
console.log(`\nšÆ Overall: ${passed}/${total} tests passed (${((passed / total) * 100).toFixed(1)}%)`);
|
|
613
|
+
// Calculate aggregate metrics
|
|
614
|
+
const totalOps = this.results.reduce((sum, r) => sum + r.operations, 0);
|
|
615
|
+
const totalDuration = this.results.reduce((sum, r) => sum + r.duration, 0);
|
|
616
|
+
const avgThroughput = (totalOps / totalDuration) * 1000;
|
|
617
|
+
console.log(`\nš Aggregate Metrics:`);
|
|
618
|
+
console.log(` Total Operations: ${totalOps.toLocaleString()}`);
|
|
619
|
+
console.log(` Total Duration: ${(totalDuration / 1000).toFixed(1)}s`);
|
|
620
|
+
console.log(` Avg Throughput: ${avgThroughput.toFixed(1)} ops/sec`);
|
|
621
|
+
if (passed === total) {
|
|
622
|
+
console.log('\n⨠All benchmarks passed! AgentDB is production-ready.\n');
|
|
623
|
+
}
|
|
624
|
+
else {
|
|
625
|
+
console.log('\nā ļø Some benchmarks failed. Review optimizations needed.\n');
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
generateReport() {
|
|
629
|
+
const report = {
|
|
630
|
+
timestamp: new Date().toISOString(),
|
|
631
|
+
summary: {
|
|
632
|
+
totalTests: this.results.length,
|
|
633
|
+
passed: this.results.filter(r => r.passed).length,
|
|
634
|
+
failed: this.results.filter(r => !r.passed).length
|
|
635
|
+
},
|
|
636
|
+
results: this.results,
|
|
637
|
+
environment: {
|
|
638
|
+
nodeVersion: process.version,
|
|
639
|
+
platform: process.platform,
|
|
640
|
+
arch: process.arch,
|
|
641
|
+
memory: process.memoryUsage()
|
|
642
|
+
}
|
|
643
|
+
};
|
|
644
|
+
const reportPath = path.join(__dirname, 'benchmark-results.json');
|
|
645
|
+
fs.writeFileSync(reportPath, JSON.stringify(report, null, 2));
|
|
646
|
+
console.log(`š Detailed report saved to: ${reportPath}`);
|
|
647
|
+
}
|
|
648
|
+
close() {
|
|
649
|
+
this.db.close();
|
|
650
|
+
}
|
|
651
|
+
}
|
|
652
|
+
// Run if called directly
|
|
653
|
+
if (require.main === module) {
|
|
654
|
+
(async () => {
|
|
655
|
+
const benchmark = new ComprehensiveBenchmark();
|
|
656
|
+
await benchmark.initialize();
|
|
657
|
+
await benchmark.runAll();
|
|
658
|
+
benchmark.close();
|
|
659
|
+
process.exit(0);
|
|
660
|
+
})().catch(err => {
|
|
661
|
+
console.error('Benchmark failed:', err);
|
|
662
|
+
process.exit(1);
|
|
663
|
+
});
|
|
664
|
+
}
|