agentic-flow 1.7.0 → 1.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/.claude/agents/test-neural.md +5 -0
  2. package/.claude/settings.json +20 -19
  3. package/.claude/skills/agentdb-memory-patterns/SKILL.md +166 -0
  4. package/.claude/skills/agentdb-vector-search/SKILL.md +126 -0
  5. package/.claude/skills/agentic-flow/agentdb-memory-patterns/SKILL.md +166 -0
  6. package/.claude/skills/agentic-flow/agentdb-vector-search/SKILL.md +126 -0
  7. package/.claude/skills/agentic-flow/reasoningbank-intelligence/SKILL.md +201 -0
  8. package/.claude/skills/agentic-flow/swarm-orchestration/SKILL.md +179 -0
  9. package/.claude/skills/reasoningbank-intelligence/SKILL.md +201 -0
  10. package/.claude/skills/skill-builder/README.md +308 -0
  11. package/.claude/skills/skill-builder/SKILL.md +910 -0
  12. package/.claude/skills/skill-builder/docs/SPECIFICATION.md +358 -0
  13. package/.claude/skills/skill-builder/resources/schemas/skill-frontmatter.schema.json +41 -0
  14. package/.claude/skills/skill-builder/resources/templates/full-skill.template +118 -0
  15. package/.claude/skills/skill-builder/resources/templates/minimal-skill.template +38 -0
  16. package/.claude/skills/skill-builder/scripts/generate-skill.sh +334 -0
  17. package/.claude/skills/skill-builder/scripts/validate-skill.sh +198 -0
  18. package/.claude/skills/swarm-orchestration/SKILL.md +179 -0
  19. package/CHANGELOG.md +117 -0
  20. package/README.md +81 -17
  21. package/dist/cli-proxy.js +33 -2
  22. package/dist/mcp/standalone-stdio.js +4 -200
  23. package/dist/reasoningbank/index.js +4 -0
  24. package/dist/utils/cli.js +22 -0
  25. package/docs/AGENTDB_INTEGRATION.md +379 -0
  26. package/package.json +4 -4
  27. package/.claude/answer.md +0 -1
  28. package/dist/agentdb/benchmarks/comprehensive-benchmark.js +0 -664
  29. package/dist/agentdb/benchmarks/frontier-benchmark.js +0 -419
  30. package/dist/agentdb/benchmarks/reflexion-benchmark.js +0 -370
  31. package/dist/agentdb/cli/agentdb-cli.js +0 -717
  32. package/dist/agentdb/controllers/CausalMemoryGraph.js +0 -322
  33. package/dist/agentdb/controllers/CausalRecall.js +0 -281
  34. package/dist/agentdb/controllers/EmbeddingService.js +0 -118
  35. package/dist/agentdb/controllers/ExplainableRecall.js +0 -387
  36. package/dist/agentdb/controllers/NightlyLearner.js +0 -382
  37. package/dist/agentdb/controllers/ReflexionMemory.js +0 -239
  38. package/dist/agentdb/controllers/SkillLibrary.js +0 -276
  39. package/dist/agentdb/controllers/frontier-index.js +0 -9
  40. package/dist/agentdb/controllers/index.js +0 -8
  41. package/dist/agentdb/index.js +0 -32
  42. package/dist/agentdb/optimizations/BatchOperations.js +0 -198
  43. package/dist/agentdb/optimizations/QueryOptimizer.js +0 -225
  44. package/dist/agentdb/optimizations/index.js +0 -7
  45. package/dist/agentdb/tests/frontier-features.test.js +0 -665
  46. package/dist/memory/SharedMemoryPool.js +0 -211
  47. package/dist/memory/index.js +0 -6
  48. package/dist/reasoningbank/AdvancedMemory.js +0 -67
  49. package/dist/reasoningbank/HybridBackend.js +0 -91
  50. package/dist/reasoningbank/index-new.js +0 -87
  51. package/docs/AGENTDB_TESTING.md +0 -411
  52. package/scripts/run-validation.sh +0 -165
  53. package/scripts/test-agentdb.sh +0 -153
@@ -1,664 +0,0 @@
1
- /**
2
- * Comprehensive AgentDB Benchmark Suite
3
- *
4
- * Tests all 5 SOTA memory patterns with production workloads:
5
- * 1. Reflexion episodic replay
6
- * 2. Skill library operations
7
- * 3. Mixed memory (facts + notes)
8
- * 4. Event consolidation
9
- * 5. Graph-aware recall
10
- *
11
- * Metrics tracked:
12
- * - Latency (p50, p95, p99)
13
- * - Throughput (ops/sec)
14
- * - Memory usage (RSS, heap)
15
- * - Hit rates and accuracy
16
- * - Concurrency performance
17
- */
18
- import Database from 'better-sqlite3';
19
- import { ReflexionMemory } from '../controllers/ReflexionMemory';
20
- import { SkillLibrary } from '../controllers/SkillLibrary';
21
- import { EmbeddingService } from '../controllers/EmbeddingService';
22
- import * as fs from 'fs';
23
- import * as path from 'path';
24
- export class ComprehensiveBenchmark {
25
- db;
26
- reflexion;
27
- skills;
28
- embedder;
29
- results = [];
30
- constructor(dbPath = ':memory:') {
31
- this.db = new Database(dbPath);
32
- // Configure for performance
33
- this.db.pragma('journal_mode = WAL');
34
- this.db.pragma('synchronous = NORMAL');
35
- this.db.pragma('cache_size = -64000'); // 64MB cache
36
- this.db.pragma('temp_store = MEMORY');
37
- this.db.pragma('mmap_size = 268435456'); // 256MB mmap
38
- this.embedder = new EmbeddingService({
39
- model: 'all-MiniLM-L6-v2',
40
- dimension: 384,
41
- provider: 'transformers'
42
- });
43
- this.reflexion = new ReflexionMemory(this.db, this.embedder);
44
- this.skills = new SkillLibrary(this.db, this.embedder);
45
- }
46
- async initialize() {
47
- console.log('šŸ”§ Initializing AgentDB Benchmark Suite...\n');
48
- // Load schema
49
- const schemaPath = path.join(__dirname, '../schemas/schema.sql');
50
- const schema = fs.readFileSync(schemaPath, 'utf-8');
51
- this.db.exec(schema);
52
- // Initialize embedder
53
- await this.embedder.initialize();
54
- console.log('āœ… Initialization complete\n');
55
- }
56
- /**
57
- * Run all benchmarks
58
- */
59
- async runAll() {
60
- console.log('╔════════════════════════════════════════════════════════════════╗');
61
- console.log('ā•‘ AgentDB Comprehensive Benchmark Suite ā•‘');
62
- console.log('ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•\n');
63
- // Core performance tests
64
- await this.benchmarkEpisodeInsertion();
65
- await this.benchmarkEpisodeRetrieval();
66
- await this.benchmarkSkillConsolidation();
67
- await this.benchmarkConcurrentWrites();
68
- await this.benchmarkConcurrentReads();
69
- await this.benchmarkMixedWorkload();
70
- // Scalability tests
71
- await this.benchmarkLargeDataset();
72
- await this.benchmarkMemoryPressure();
73
- // Production scenarios
74
- await this.benchmarkRealtimeAgent();
75
- await this.benchmarkBatchProcessing();
76
- this.printSummary();
77
- this.generateReport();
78
- return this.results;
79
- }
80
- /**
81
- * Benchmark 1: Episode Insertion Performance
82
- */
83
- async benchmarkEpisodeInsertion() {
84
- console.log('\nšŸ“Š Benchmark 1: Episode Insertion Performance');
85
- console.log('━'.repeat(70));
86
- console.log('Testing: Bulk episode storage with embeddings\n');
87
- const count = 10000;
88
- const latencies = [];
89
- const memStart = process.memoryUsage();
90
- console.log(`Inserting ${count} episodes...`);
91
- const startTime = Date.now();
92
- for (let i = 0; i < count; i++) {
93
- const episodeStart = Date.now();
94
- await this.reflexion.storeEpisode({
95
- sessionId: `session-${Math.floor(i / 100)}`,
96
- task: `task_${i % 50}`,
97
- input: `Input data for episode ${i}`,
98
- output: `Generated output ${i}`,
99
- critique: this.generateCritique(i),
100
- reward: Math.random(),
101
- success: Math.random() > 0.3,
102
- latencyMs: Math.floor(Math.random() * 500),
103
- tokensUsed: Math.floor(Math.random() * 1000)
104
- });
105
- latencies.push(Date.now() - episodeStart);
106
- if ((i + 1) % 2500 === 0) {
107
- const progress = ((i + 1) / count * 100).toFixed(1);
108
- const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
109
- console.log(` ${progress}% complete (${elapsed}s)`);
110
- }
111
- }
112
- const duration = Date.now() - startTime;
113
- const memEnd = process.memoryUsage();
114
- this.recordMetrics('Episode Insertion', {
115
- duration,
116
- operations: count,
117
- latencies,
118
- memStart,
119
- memEnd,
120
- passed: duration < 60000 // Should complete in < 60s
121
- });
122
- }
123
- /**
124
- * Benchmark 2: Episode Retrieval Performance
125
- */
126
- async benchmarkEpisodeRetrieval() {
127
- console.log('\nšŸ“Š Benchmark 2: Episode Retrieval Performance');
128
- console.log('━'.repeat(70));
129
- console.log('Testing: Semantic search with k-NN over episodes\n');
130
- const queries = 1000;
131
- const latencies = [];
132
- const memStart = process.memoryUsage();
133
- console.log(`Running ${queries} retrieval queries...`);
134
- const startTime = Date.now();
135
- for (let i = 0; i < queries; i++) {
136
- const queryStart = Date.now();
137
- await this.reflexion.retrieveRelevant({
138
- task: `task_${Math.floor(Math.random() * 50)}`,
139
- k: 5,
140
- timeWindowDays: 7
141
- });
142
- latencies.push(Date.now() - queryStart);
143
- if ((i + 1) % 250 === 0) {
144
- const progress = ((i + 1) / queries * 100).toFixed(1);
145
- console.log(` ${progress}% complete`);
146
- }
147
- }
148
- const duration = Date.now() - startTime;
149
- const memEnd = process.memoryUsage();
150
- this.recordMetrics('Episode Retrieval', {
151
- duration,
152
- operations: queries,
153
- latencies,
154
- memStart,
155
- memEnd,
156
- passed: this.calculateP95(latencies) <= 50 // p95 ≤ 50ms
157
- });
158
- }
159
- /**
160
- * Benchmark 3: Skill Consolidation
161
- */
162
- async benchmarkSkillConsolidation() {
163
- console.log('\nšŸ“Š Benchmark 3: Skill Consolidation');
164
- console.log('━'.repeat(70));
165
- console.log('Testing: Episode → Skill transformation\n');
166
- const memStart = process.memoryUsage();
167
- const startTime = Date.now();
168
- console.log('Running consolidation job...');
169
- const created = this.skills.consolidateEpisodesIntoSkills({
170
- minAttempts: 3,
171
- minReward: 0.5,
172
- timeWindowDays: 30
173
- });
174
- const duration = Date.now() - startTime;
175
- const memEnd = process.memoryUsage();
176
- console.log(`āœ“ Created ${created} skills in ${duration}ms`);
177
- this.recordMetrics('Skill Consolidation', {
178
- duration,
179
- operations: created,
180
- latencies: [duration],
181
- memStart,
182
- memEnd,
183
- passed: duration < 5000 && created > 0 // < 5s, creates skills
184
- });
185
- }
186
- /**
187
- * Benchmark 4: Concurrent Writes
188
- */
189
- async benchmarkConcurrentWrites() {
190
- console.log('\nšŸ“Š Benchmark 4: Concurrent Write Performance');
191
- console.log('━'.repeat(70));
192
- console.log('Testing: Multiple agents writing simultaneously\n');
193
- const writers = 10;
194
- const writesPerWriter = 100;
195
- const allLatencies = [];
196
- const memStart = process.memoryUsage();
197
- console.log(`Running ${writers} concurrent writers...`);
198
- const startTime = Date.now();
199
- const writerPromises = Array.from({ length: writers }, async (_, writerIdx) => {
200
- const latencies = [];
201
- for (let i = 0; i < writesPerWriter; i++) {
202
- const writeStart = Date.now();
203
- await this.reflexion.storeEpisode({
204
- sessionId: `concurrent-session-${writerIdx}`,
205
- task: `concurrent_task_${i}`,
206
- input: `Writer ${writerIdx} input ${i}`,
207
- output: `Writer ${writerIdx} output ${i}`,
208
- critique: 'Concurrent write test',
209
- reward: Math.random(),
210
- success: true
211
- });
212
- latencies.push(Date.now() - writeStart);
213
- }
214
- return latencies;
215
- });
216
- const results = await Promise.all(writerPromises);
217
- results.forEach(latencies => allLatencies.push(...latencies));
218
- const duration = Date.now() - startTime;
219
- const memEnd = process.memoryUsage();
220
- this.recordMetrics('Concurrent Writes', {
221
- duration,
222
- operations: writers * writesPerWriter,
223
- latencies: allLatencies,
224
- memStart,
225
- memEnd,
226
- passed: this.calculateP95(allLatencies) <= 100 // p95 ≤ 100ms under concurrency
227
- });
228
- }
229
- /**
230
- * Benchmark 5: Concurrent Reads
231
- */
232
- async benchmarkConcurrentReads() {
233
- console.log('\nšŸ“Š Benchmark 5: Concurrent Read Performance');
234
- console.log('━'.repeat(70));
235
- console.log('Testing: Multiple agents reading simultaneously\n');
236
- const readers = 20;
237
- const readsPerReader = 50;
238
- const allLatencies = [];
239
- const memStart = process.memoryUsage();
240
- console.log(`Running ${readers} concurrent readers...`);
241
- const startTime = Date.now();
242
- const readerPromises = Array.from({ length: readers }, async () => {
243
- const latencies = [];
244
- for (let i = 0; i < readsPerReader; i++) {
245
- const readStart = Date.now();
246
- await this.reflexion.retrieveRelevant({
247
- task: `task_${Math.floor(Math.random() * 50)}`,
248
- k: 5
249
- });
250
- latencies.push(Date.now() - readStart);
251
- }
252
- return latencies;
253
- });
254
- const results = await Promise.all(readerPromises);
255
- results.forEach(latencies => allLatencies.push(...latencies));
256
- const duration = Date.now() - startTime;
257
- const memEnd = process.memoryUsage();
258
- this.recordMetrics('Concurrent Reads', {
259
- duration,
260
- operations: readers * readsPerReader,
261
- latencies: allLatencies,
262
- memStart,
263
- memEnd,
264
- passed: this.calculateP95(allLatencies) <= 75 // p95 ≤ 75ms
265
- });
266
- }
267
- /**
268
- * Benchmark 6: Mixed Workload (Read + Write)
269
- */
270
- async benchmarkMixedWorkload() {
271
- console.log('\nšŸ“Š Benchmark 6: Mixed Workload Performance');
272
- console.log('━'.repeat(70));
273
- console.log('Testing: Simultaneous reads and writes\n');
274
- const workers = 10;
275
- const opsPerWorker = 100;
276
- const allLatencies = [];
277
- const memStart = process.memoryUsage();
278
- console.log(`Running ${workers} workers with mixed operations...`);
279
- const startTime = Date.now();
280
- const workerPromises = Array.from({ length: workers }, async (_, idx) => {
281
- const latencies = [];
282
- for (let i = 0; i < opsPerWorker; i++) {
283
- const opStart = Date.now();
284
- // Alternate between reads and writes
285
- if (i % 2 === 0) {
286
- await this.reflexion.storeEpisode({
287
- sessionId: `mixed-${idx}`,
288
- task: `mixed_task_${i}`,
289
- input: 'input',
290
- output: 'output',
291
- critique: 'critique',
292
- reward: Math.random(),
293
- success: true
294
- });
295
- }
296
- else {
297
- await this.reflexion.retrieveRelevant({
298
- task: `mixed_task_${Math.floor(Math.random() * 50)}`,
299
- k: 3
300
- });
301
- }
302
- latencies.push(Date.now() - opStart);
303
- }
304
- return latencies;
305
- });
306
- const results = await Promise.all(workerPromises);
307
- results.forEach(latencies => allLatencies.push(...latencies));
308
- const duration = Date.now() - startTime;
309
- const memEnd = process.memoryUsage();
310
- this.recordMetrics('Mixed Workload', {
311
- duration,
312
- operations: workers * opsPerWorker,
313
- latencies: allLatencies,
314
- memStart,
315
- memEnd,
316
- passed: this.calculateP95(allLatencies) <= 80 // p95 ≤ 80ms
317
- });
318
- }
319
- /**
320
- * Benchmark 7: Large Dataset Performance
321
- */
322
- async benchmarkLargeDataset() {
323
- console.log('\nšŸ“Š Benchmark 7: Large Dataset Scalability');
324
- console.log('━'.repeat(70));
325
- console.log('Testing: Performance at 50k+ memories\n');
326
- const targetSize = 50000;
327
- const currentSize = this.db.prepare('SELECT COUNT(*) as count FROM episodes').get();
328
- const needed = Math.max(0, targetSize - currentSize.count);
329
- if (needed > 0) {
330
- console.log(`Adding ${needed} episodes to reach ${targetSize} target...`);
331
- const batchSize = 1000;
332
- const batches = Math.ceil(needed / batchSize);
333
- for (let batch = 0; batch < batches; batch++) {
334
- const transaction = this.db.transaction((episodes) => {
335
- const stmt = this.db.prepare(`
336
- INSERT INTO episodes (session_id, task, input, output, reward, success)
337
- VALUES (?, ?, ?, ?, ?, ?)
338
- `);
339
- for (const ep of episodes) {
340
- stmt.run(ep.session_id, ep.task, ep.input, ep.output, ep.reward, ep.success);
341
- }
342
- });
343
- const episodes = Array.from({ length: Math.min(batchSize, needed - batch * batchSize) }, (_, i) => ({
344
- session_id: `batch-${batch}`,
345
- task: `task_${i % 100}`,
346
- input: `input ${i}`,
347
- output: `output ${i}`,
348
- reward: Math.random(),
349
- success: Math.random() > 0.5 ? 1 : 0
350
- }));
351
- transaction(episodes);
352
- if ((batch + 1) % 10 === 0) {
353
- console.log(` Progress: ${((batch + 1) / batches * 100).toFixed(1)}%`);
354
- }
355
- }
356
- }
357
- // Now test retrieval at scale
358
- const queries = 100;
359
- const latencies = [];
360
- const memStart = process.memoryUsage();
361
- console.log(`\nTesting retrieval over ${targetSize} memories...`);
362
- const startTime = Date.now();
363
- for (let i = 0; i < queries; i++) {
364
- const queryStart = Date.now();
365
- await this.reflexion.retrieveRelevant({
366
- task: `task_${Math.floor(Math.random() * 100)}`,
367
- k: 5
368
- });
369
- latencies.push(Date.now() - queryStart);
370
- }
371
- const duration = Date.now() - startTime;
372
- const memEnd = process.memoryUsage();
373
- this.recordMetrics('Large Dataset (50k+)', {
374
- duration,
375
- operations: queries,
376
- latencies,
377
- memStart,
378
- memEnd,
379
- passed: this.calculateP95(latencies) <= 100 // p95 ≤ 100ms at scale
380
- });
381
- }
382
- /**
383
- * Benchmark 8: Memory Pressure
384
- */
385
- async benchmarkMemoryPressure() {
386
- console.log('\nšŸ“Š Benchmark 8: Memory Pressure Test');
387
- console.log('━'.repeat(70));
388
- console.log('Testing: System stability under memory constraints\n');
389
- const operations = 1000;
390
- const latencies = [];
391
- const memStart = process.memoryUsage();
392
- console.log(`Running ${operations} operations with cache pressure...`);
393
- // Force garbage collection if available
394
- if (global.gc) {
395
- global.gc();
396
- }
397
- const startTime = Date.now();
398
- for (let i = 0; i < operations; i++) {
399
- const opStart = Date.now();
400
- // Mix of memory-intensive operations
401
- if (i % 3 === 0) {
402
- await this.reflexion.storeEpisode({
403
- sessionId: `mem-pressure-${i}`,
404
- task: `task_${i}`,
405
- input: 'A'.repeat(1000), // Large input
406
- output: 'B'.repeat(1000), // Large output
407
- critique: 'C'.repeat(500),
408
- reward: Math.random(),
409
- success: true
410
- });
411
- }
412
- else {
413
- await this.reflexion.retrieveRelevant({
414
- task: `task_${Math.floor(Math.random() * 100)}`,
415
- k: 10 // Higher k for more memory use
416
- });
417
- }
418
- latencies.push(Date.now() - opStart);
419
- if ((i + 1) % 250 === 0) {
420
- console.log(` ${((i + 1) / operations * 100).toFixed(1)}% complete`);
421
- }
422
- }
423
- const duration = Date.now() - startTime;
424
- const memEnd = process.memoryUsage();
425
- const memoryGrowth = (memEnd.heapUsed - memStart.heapUsed) / 1024 / 1024;
426
- this.recordMetrics('Memory Pressure', {
427
- duration,
428
- operations,
429
- latencies,
430
- memStart,
431
- memEnd,
432
- passed: memoryGrowth < 200 && this.calculateP95(latencies) <= 150, // < 200MB growth
433
- details: `Memory growth: ${memoryGrowth.toFixed(2)}MB`
434
- });
435
- }
436
- /**
437
- * Benchmark 9: Realtime Agent Simulation
438
- */
439
- async benchmarkRealtimeAgent() {
440
- console.log('\nšŸ“Š Benchmark 9: Realtime Agent Simulation');
441
- console.log('━'.repeat(70));
442
- console.log('Testing: Agent making real-time decisions with memory\n');
443
- const tasks = 50;
444
- const attemptsPerTask = 5;
445
- const allLatencies = [];
446
- const memStart = process.memoryUsage();
447
- console.log(`Simulating ${tasks} tasks with ${attemptsPerTask} attempts each...`);
448
- const startTime = Date.now();
449
- for (let taskIdx = 0; taskIdx < tasks; taskIdx++) {
450
- const task = `realtime_task_${taskIdx}`;
451
- for (let attempt = 0; attempt < attemptsPerTask; attempt++) {
452
- const cycleStart = Date.now();
453
- // 1. Retrieve relevant memories
454
- const memories = await this.reflexion.retrieveRelevant({
455
- task,
456
- k: 3,
457
- onlyFailures: attempt > 0
458
- });
459
- // 2. Simulate task execution
460
- const executionTime = Math.random() * 100;
461
- await new Promise(resolve => setTimeout(resolve, executionTime));
462
- // 3. Store result
463
- const reward = Math.min(0.3 + attempt * 0.15, 1.0); // Improvement over attempts
464
- await this.reflexion.storeEpisode({
465
- sessionId: `realtime-${taskIdx}`,
466
- task,
467
- input: `Attempt ${attempt}`,
468
- output: `Result ${attempt}`,
469
- critique: attempt < 3 ? 'Needs improvement' : 'Good progress',
470
- reward,
471
- success: reward > 0.7
472
- });
473
- allLatencies.push(Date.now() - cycleStart);
474
- }
475
- if ((taskIdx + 1) % 10 === 0) {
476
- console.log(` Task ${taskIdx + 1}/${tasks} complete`);
477
- }
478
- }
479
- const duration = Date.now() - startTime;
480
- const memEnd = process.memoryUsage();
481
- this.recordMetrics('Realtime Agent', {
482
- duration,
483
- operations: tasks * attemptsPerTask,
484
- latencies: allLatencies,
485
- memStart,
486
- memEnd,
487
- passed: this.calculateP95(allLatencies) <= 200 // p95 ≤ 200ms for full cycle
488
- });
489
- }
490
- /**
491
- * Benchmark 10: Batch Processing
492
- */
493
- async benchmarkBatchProcessing() {
494
- console.log('\nšŸ“Š Benchmark 10: Batch Processing Performance');
495
- console.log('━'.repeat(70));
496
- console.log('Testing: High-throughput batch operations\n');
497
- const batchSize = 100;
498
- const batches = 50;
499
- const allLatencies = [];
500
- const memStart = process.memoryUsage();
501
- console.log(`Processing ${batches} batches of ${batchSize} episodes...`);
502
- const startTime = Date.now();
503
- for (let batchIdx = 0; batchIdx < batches; batchIdx++) {
504
- const batchStart = Date.now();
505
- // Use transaction for batch
506
- const transaction = this.db.transaction((episodes) => {
507
- for (const ep of episodes) {
508
- this.reflexion.storeEpisode(ep);
509
- }
510
- });
511
- const episodes = Array.from({ length: batchSize }, (_, i) => ({
512
- sessionId: `batch-${batchIdx}`,
513
- task: `batch_task_${i}`,
514
- input: `batch input ${i}`,
515
- output: `batch output ${i}`,
516
- critique: 'Batch processed',
517
- reward: Math.random(),
518
- success: Math.random() > 0.5
519
- }));
520
- await Promise.all(episodes.map(ep => this.reflexion.storeEpisode(ep)));
521
- allLatencies.push(Date.now() - batchStart);
522
- if ((batchIdx + 1) % 10 === 0) {
523
- console.log(` ${((batchIdx + 1) / batches * 100).toFixed(1)}% complete`);
524
- }
525
- }
526
- const duration = Date.now() - startTime;
527
- const memEnd = process.memoryUsage();
528
- const throughput = (batches * batchSize) / (duration / 1000);
529
- this.recordMetrics('Batch Processing', {
530
- duration,
531
- operations: batches * batchSize,
532
- latencies: allLatencies,
533
- memStart,
534
- memEnd,
535
- passed: throughput >= 100, // ≄ 100 episodes/sec
536
- details: `Throughput: ${throughput.toFixed(0)} eps/sec`
537
- });
538
- }
539
- // ========================================================================
540
- // Helper Methods
541
- // ========================================================================
542
- recordMetrics(testName, data) {
543
- const { duration, operations, latencies, memStart, memEnd, passed, details } = data;
544
- latencies.sort((a, b) => a - b);
545
- const metrics = {
546
- testName,
547
- duration,
548
- operations,
549
- opsPerSecond: (operations / duration) * 1000,
550
- latency: {
551
- min: latencies[0] || 0,
552
- max: latencies[latencies.length - 1] || 0,
553
- avg: latencies.reduce((a, b) => a + b, 0) / latencies.length || 0,
554
- p50: latencies[Math.floor(latencies.length * 0.50)] || 0,
555
- p95: latencies[Math.floor(latencies.length * 0.95)] || 0,
556
- p99: latencies[Math.floor(latencies.length * 0.99)] || 0
557
- },
558
- memory: {
559
- heapUsed: memEnd.heapUsed - memStart.heapUsed,
560
- heapTotal: memEnd.heapTotal - memStart.heapTotal,
561
- rss: memEnd.rss - memStart.rss
562
- },
563
- passed,
564
- details
565
- };
566
- this.results.push(metrics);
567
- console.log(`\nšŸ“ˆ Results:`);
568
- console.log(` Duration: ${duration.toFixed(0)}ms`);
569
- console.log(` Operations: ${operations}`);
570
- console.log(` Throughput: ${metrics.opsPerSecond.toFixed(1)} ops/sec`);
571
- console.log(` Latency p50: ${metrics.latency.p50.toFixed(1)}ms`);
572
- console.log(` Latency p95: ${metrics.latency.p95.toFixed(1)}ms`);
573
- console.log(` Latency p99: ${metrics.latency.p99.toFixed(1)}ms`);
574
- console.log(` Memory Ī”: ${(metrics.memory.heapUsed / 1024 / 1024).toFixed(2)}MB`);
575
- if (details)
576
- console.log(` ${details}`);
577
- console.log(`\n${passed ? 'āœ… PASSED' : 'āŒ FAILED'}`);
578
- }
579
- calculateP95(latencies) {
580
- const sorted = [...latencies].sort((a, b) => a - b);
581
- return sorted[Math.floor(sorted.length * 0.95)] || 0;
582
- }
583
- generateCritique(index) {
584
- const critiques = [
585
- 'Edge case: empty input not handled',
586
- 'Performance: could optimize query',
587
- 'Bug: off-by-one error in loop',
588
- 'Missing: input validation needed',
589
- 'Improvement: add error handling',
590
- 'Success: all edge cases covered',
591
- 'Optimization: reduced time complexity',
592
- 'Enhancement: added comprehensive tests'
593
- ];
594
- return critiques[index % critiques.length];
595
- }
596
- printSummary() {
597
- console.log('\n' + '═'.repeat(70));
598
- console.log('\nšŸ“Š BENCHMARK SUMMARY\n');
599
- console.log('═'.repeat(70));
600
- const passed = this.results.filter(r => r.passed).length;
601
- const total = this.results.length;
602
- console.log('\n╔════════════════════════════════════════════════════════════════╗');
603
- console.log('ā•‘ Test Name │ Result │ p95 Latency ā•‘');
604
- console.log('╠════════════════════════════════════════════════════════════════╣');
605
- this.results.forEach(result => {
606
- const status = result.passed ? 'āœ… PASS' : 'āŒ FAIL';
607
- const name = result.testName.padEnd(33);
608
- const latency = `${result.latency.p95.toFixed(1)}ms`.padStart(12);
609
- console.log(`ā•‘ ${name}│ ${status} │ ${latency} ā•‘`);
610
- });
611
- console.log('ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•');
612
- console.log(`\nšŸŽÆ Overall: ${passed}/${total} tests passed (${((passed / total) * 100).toFixed(1)}%)`);
613
- // Calculate aggregate metrics
614
- const totalOps = this.results.reduce((sum, r) => sum + r.operations, 0);
615
- const totalDuration = this.results.reduce((sum, r) => sum + r.duration, 0);
616
- const avgThroughput = (totalOps / totalDuration) * 1000;
617
- console.log(`\nšŸ“Š Aggregate Metrics:`);
618
- console.log(` Total Operations: ${totalOps.toLocaleString()}`);
619
- console.log(` Total Duration: ${(totalDuration / 1000).toFixed(1)}s`);
620
- console.log(` Avg Throughput: ${avgThroughput.toFixed(1)} ops/sec`);
621
- if (passed === total) {
622
- console.log('\n✨ All benchmarks passed! AgentDB is production-ready.\n');
623
- }
624
- else {
625
- console.log('\nāš ļø Some benchmarks failed. Review optimizations needed.\n');
626
- }
627
- }
628
- generateReport() {
629
- const report = {
630
- timestamp: new Date().toISOString(),
631
- summary: {
632
- totalTests: this.results.length,
633
- passed: this.results.filter(r => r.passed).length,
634
- failed: this.results.filter(r => !r.passed).length
635
- },
636
- results: this.results,
637
- environment: {
638
- nodeVersion: process.version,
639
- platform: process.platform,
640
- arch: process.arch,
641
- memory: process.memoryUsage()
642
- }
643
- };
644
- const reportPath = path.join(__dirname, 'benchmark-results.json');
645
- fs.writeFileSync(reportPath, JSON.stringify(report, null, 2));
646
- console.log(`šŸ“„ Detailed report saved to: ${reportPath}`);
647
- }
648
- close() {
649
- this.db.close();
650
- }
651
- }
652
- // Run if called directly
653
- if (require.main === module) {
654
- (async () => {
655
- const benchmark = new ComprehensiveBenchmark();
656
- await benchmark.initialize();
657
- await benchmark.runAll();
658
- benchmark.close();
659
- process.exit(0);
660
- })().catch(err => {
661
- console.error('Benchmark failed:', err);
662
- process.exit(1);
663
- });
664
- }