agentic-flow 1.7.2 → 1.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/.claude/agents/test-neural.md +0 -5
  2. package/.claude/answer.md +1 -0
  3. package/.claude/settings.json +19 -20
  4. package/CHANGELOG.md +0 -91
  5. package/README.md +17 -81
  6. package/dist/agentdb/benchmarks/comprehensive-benchmark.js +664 -0
  7. package/dist/agentdb/benchmarks/frontier-benchmark.js +419 -0
  8. package/dist/agentdb/benchmarks/reflexion-benchmark.js +370 -0
  9. package/dist/agentdb/cli/agentdb-cli.js +717 -0
  10. package/dist/agentdb/controllers/CausalMemoryGraph.js +322 -0
  11. package/dist/agentdb/controllers/CausalRecall.js +281 -0
  12. package/dist/agentdb/controllers/EmbeddingService.js +118 -0
  13. package/dist/agentdb/controllers/ExplainableRecall.js +387 -0
  14. package/dist/agentdb/controllers/NightlyLearner.js +382 -0
  15. package/dist/agentdb/controllers/ReflexionMemory.js +239 -0
  16. package/dist/agentdb/controllers/SkillLibrary.js +276 -0
  17. package/dist/agentdb/controllers/frontier-index.js +9 -0
  18. package/dist/agentdb/controllers/index.js +8 -0
  19. package/dist/agentdb/index.js +32 -0
  20. package/dist/agentdb/optimizations/BatchOperations.js +198 -0
  21. package/dist/agentdb/optimizations/QueryOptimizer.js +225 -0
  22. package/dist/agentdb/optimizations/index.js +7 -0
  23. package/dist/agentdb/tests/frontier-features.test.js +665 -0
  24. package/dist/cli/skills-manager.js +3 -1
  25. package/dist/cli-proxy.js +2 -33
  26. package/dist/mcp/standalone-stdio.js +200 -4
  27. package/dist/memory/SharedMemoryPool.js +211 -0
  28. package/dist/memory/index.js +6 -0
  29. package/dist/reasoningbank/AdvancedMemory.js +239 -0
  30. package/dist/reasoningbank/HybridBackend.js +305 -0
  31. package/dist/reasoningbank/index-new.js +87 -0
  32. package/dist/reasoningbank/index.js +23 -44
  33. package/dist/utils/cli.js +0 -22
  34. package/docs/AGENTDB_TESTING.md +411 -0
  35. package/docs/v1.7.1-QUICK-START.md +399 -0
  36. package/package.json +4 -4
  37. package/scripts/run-validation.sh +165 -0
  38. package/scripts/test-agentdb.sh +153 -0
  39. package/.claude/skills/agentdb-memory-patterns/SKILL.md +0 -166
  40. package/.claude/skills/agentdb-vector-search/SKILL.md +0 -126
  41. package/.claude/skills/agentic-flow/agentdb-memory-patterns/SKILL.md +0 -166
  42. package/.claude/skills/agentic-flow/agentdb-vector-search/SKILL.md +0 -126
  43. package/.claude/skills/agentic-flow/reasoningbank-intelligence/SKILL.md +0 -201
  44. package/.claude/skills/agentic-flow/swarm-orchestration/SKILL.md +0 -179
  45. package/.claude/skills/reasoningbank-intelligence/SKILL.md +0 -201
  46. package/.claude/skills/skill-builder/README.md +0 -308
  47. package/.claude/skills/skill-builder/SKILL.md +0 -910
  48. package/.claude/skills/skill-builder/docs/SPECIFICATION.md +0 -358
  49. package/.claude/skills/skill-builder/resources/schemas/skill-frontmatter.schema.json +0 -41
  50. package/.claude/skills/skill-builder/resources/templates/full-skill.template +0 -118
  51. package/.claude/skills/skill-builder/resources/templates/minimal-skill.template +0 -38
  52. package/.claude/skills/skill-builder/scripts/generate-skill.sh +0 -334
  53. package/.claude/skills/skill-builder/scripts/validate-skill.sh +0 -198
  54. package/.claude/skills/swarm-orchestration/SKILL.md +0 -179
  55. package/docs/AGENTDB_INTEGRATION.md +0 -379
@@ -0,0 +1,419 @@
1
+ /**
2
+ * Frontier Features Performance Benchmark
3
+ *
4
+ * Benchmarks:
5
+ * 1. Causal edge operations (insert, query, chain traversal)
6
+ * 2. A/B experiment tracking (observation recording, uplift calculation)
7
+ * 3. Certificate creation (minimal hitting set, Merkle trees)
8
+ * 4. Provenance tracking (lineage queries, verification)
9
+ *
10
+ * Performance Targets:
11
+ * - Causal edge insertion: < 5ms p95
12
+ * - Uplift calculation (1000 obs): < 100ms p95
13
+ * - Certificate creation: < 50ms p95
14
+ * - Certificate verification: < 10ms p95
15
+ * - Concurrent operations: No degradation vs sequential
16
+ */
17
+ import Database from 'better-sqlite3';
18
+ import * as fs from 'fs';
19
+ import * as path from 'path';
20
+ import { CausalMemoryGraph } from '../controllers/CausalMemoryGraph';
21
+ import { ExplainableRecall } from '../controllers/ExplainableRecall';
22
+ class FrontierBenchmark {
23
+ db;
24
+ causalGraph;
25
+ explainableRecall;
26
+ constructor() {
27
+ // Use file-based database for realistic performance
28
+ const dbPath = path.join(__dirname, 'frontier-benchmark.db');
29
+ if (fs.existsSync(dbPath)) {
30
+ fs.unlinkSync(dbPath);
31
+ }
32
+ this.db = new Database(dbPath);
33
+ // Enable optimizations
34
+ this.db.pragma('journal_mode = WAL');
35
+ this.db.pragma('synchronous = NORMAL');
36
+ this.db.pragma('cache_size = -64000'); // 64MB
37
+ this.db.pragma('temp_store = MEMORY');
38
+ // Load schemas
39
+ const coreSchema = fs.readFileSync(path.join(__dirname, '../schemas/schema.sql'), 'utf-8');
40
+ this.db.exec(coreSchema);
41
+ const frontierSchema = fs.readFileSync(path.join(__dirname, '../schemas/frontier-schema.sql'), 'utf-8');
42
+ this.db.exec(frontierSchema);
43
+ this.causalGraph = new CausalMemoryGraph(this.db);
44
+ this.explainableRecall = new ExplainableRecall(this.db);
45
+ }
46
+ cleanup() {
47
+ this.db.close();
48
+ const dbPath = path.join(__dirname, 'frontier-benchmark.db');
49
+ if (fs.existsSync(dbPath)) {
50
+ fs.unlinkSync(dbPath);
51
+ }
52
+ }
53
+ calculatePercentile(sorted, percentile) {
54
+ const index = Math.ceil((percentile / 100) * sorted.length) - 1;
55
+ return sorted[index];
56
+ }
57
+ createResult(operation, latencies, target) {
58
+ const sorted = [...latencies].sort((a, b) => a - b);
59
+ const totalTimeMs = latencies.reduce((sum, t) => sum + t, 0);
60
+ const avgTimeMs = totalTimeMs / latencies.length;
61
+ const result = {
62
+ operation,
63
+ count: latencies.length,
64
+ totalTimeMs,
65
+ avgTimeMs,
66
+ p50Ms: this.calculatePercentile(sorted, 50),
67
+ p95Ms: this.calculatePercentile(sorted, 95),
68
+ p99Ms: this.calculatePercentile(sorted, 99),
69
+ minMs: sorted[0],
70
+ maxMs: sorted[sorted.length - 1],
71
+ throughput: (latencies.length / totalTimeMs) * 1000,
72
+ passed: target ? this.calculatePercentile(sorted, 95) <= target : true,
73
+ target
74
+ };
75
+ return result;
76
+ }
77
+ /**
78
+ * Benchmark 1: Causal Edge Insertion
79
+ * Target: < 5ms p95
80
+ */
81
+ async benchmarkCausalEdgeInsertion() {
82
+ console.log('\nšŸ“Š Benchmark 1: Causal Edge Insertion');
83
+ // Setup: Insert test episodes
84
+ for (let i = 1; i <= 100; i++) {
85
+ this.db.prepare(`
86
+ INSERT INTO episodes (session_id, task, reward, success)
87
+ VALUES (?, ?, ?, ?)
88
+ `).run(`session${i}`, `task${i}`, 0.7 + Math.random() * 0.3, 1);
89
+ }
90
+ const latencies = [];
91
+ const iterations = 1000;
92
+ for (let i = 0; i < iterations; i++) {
93
+ const fromId = (i % 100) + 1;
94
+ const toId = ((i + 1) % 100) + 1;
95
+ const edge = {
96
+ fromMemoryId: fromId,
97
+ fromMemoryType: 'episode',
98
+ toMemoryId: toId,
99
+ toMemoryType: 'episode',
100
+ similarity: 0.7 + Math.random() * 0.3,
101
+ uplift: Math.random() * 0.3 - 0.1,
102
+ confidence: 0.5 + Math.random() * 0.5,
103
+ sampleSize: Math.floor(Math.random() * 200) + 50,
104
+ mechanism: 'automated test edge'
105
+ };
106
+ const start = Date.now();
107
+ this.causalGraph.addCausalEdge(edge);
108
+ latencies.push(Date.now() - start);
109
+ }
110
+ return this.createResult('Causal Edge Insertion', latencies, 5);
111
+ }
112
+ /**
113
+ * Benchmark 2: Causal Effect Query
114
+ * Target: < 20ms p95
115
+ */
116
+ async benchmarkCausalQuery() {
117
+ console.log('\nšŸ“Š Benchmark 2: Causal Effect Query');
118
+ const latencies = [];
119
+ const iterations = 500;
120
+ for (let i = 0; i < iterations; i++) {
121
+ const memoryId = (i % 100) + 1;
122
+ const start = Date.now();
123
+ this.causalGraph.queryCausalEffects({
124
+ interventionMemoryId: memoryId,
125
+ interventionMemoryType: 'episode',
126
+ minConfidence: 0.7,
127
+ minUplift: 0.05
128
+ });
129
+ latencies.push(Date.now() - start);
130
+ }
131
+ return this.createResult('Causal Effect Query', latencies, 20);
132
+ }
133
+ /**
134
+ * Benchmark 3: Causal Chain Discovery
135
+ * Target: < 50ms p95
136
+ */
137
+ async benchmarkCausalChain() {
138
+ console.log('\nšŸ“Š Benchmark 3: Causal Chain Discovery');
139
+ const latencies = [];
140
+ const iterations = 200;
141
+ for (let i = 0; i < iterations; i++) {
142
+ const fromId = (i % 50) + 1;
143
+ const toId = (i % 50) + 26;
144
+ const start = Date.now();
145
+ this.causalGraph.getCausalChain(fromId, toId, 5);
146
+ latencies.push(Date.now() - start);
147
+ }
148
+ return this.createResult('Causal Chain Discovery', latencies, 50);
149
+ }
150
+ /**
151
+ * Benchmark 4: A/B Experiment Observation Recording
152
+ * Target: < 3ms p95
153
+ */
154
+ async benchmarkExperimentObservation() {
155
+ console.log('\nšŸ“Š Benchmark 4: A/B Experiment Observation Recording');
156
+ // Create experiment
157
+ const expId = this.causalGraph.createExperiment({
158
+ name: 'Benchmark Experiment',
159
+ hypothesis: 'Treatment improves outcome',
160
+ treatmentId: 1,
161
+ treatmentType: 'episode',
162
+ startTime: Date.now(),
163
+ sampleSize: 0,
164
+ status: 'running'
165
+ });
166
+ const latencies = [];
167
+ const iterations = 2000;
168
+ for (let i = 0; i < iterations; i++) {
169
+ const start = Date.now();
170
+ this.causalGraph.recordObservation({
171
+ experimentId: expId,
172
+ episodeId: (i % 100) + 1,
173
+ isTreatment: i % 2 === 0,
174
+ outcomeValue: Math.random(),
175
+ outcomeType: 'reward'
176
+ });
177
+ latencies.push(Date.now() - start);
178
+ }
179
+ return this.createResult('Experiment Observation Recording', latencies, 3);
180
+ }
181
+ /**
182
+ * Benchmark 5: Uplift Calculation
183
+ * Target: < 100ms p95 for 1000 observations
184
+ */
185
+ async benchmarkUpliftCalculation() {
186
+ console.log('\nšŸ“Š Benchmark 5: Uplift Calculation');
187
+ const latencies = [];
188
+ const iterations = 50;
189
+ for (let i = 0; i < iterations; i++) {
190
+ // Create experiment with 1000 observations
191
+ const expId = this.causalGraph.createExperiment({
192
+ name: `Uplift Test ${i}`,
193
+ hypothesis: 'Treatment improves outcome',
194
+ treatmentId: 1,
195
+ treatmentType: 'episode',
196
+ startTime: Date.now(),
197
+ sampleSize: 0,
198
+ status: 'running'
199
+ });
200
+ // Record 1000 observations
201
+ for (let j = 0; j < 1000; j++) {
202
+ this.causalGraph.recordObservation({
203
+ experimentId: expId,
204
+ episodeId: (j % 100) + 1,
205
+ isTreatment: j % 2 === 0,
206
+ outcomeValue: (j % 2 === 0 ? 0.7 : 0.5) + Math.random() * 0.2,
207
+ outcomeType: 'reward'
208
+ });
209
+ }
210
+ // Measure uplift calculation
211
+ const start = Date.now();
212
+ this.causalGraph.calculateUplift(expId);
213
+ latencies.push(Date.now() - start);
214
+ }
215
+ return this.createResult('Uplift Calculation (1000 obs)', latencies, 100);
216
+ }
217
+ /**
218
+ * Benchmark 6: Certificate Creation
219
+ * Target: < 50ms p95
220
+ */
221
+ async benchmarkCertificateCreation() {
222
+ console.log('\nšŸ“Š Benchmark 6: Certificate Creation');
223
+ const latencies = [];
224
+ const iterations = 500;
225
+ for (let i = 0; i < iterations; i++) {
226
+ const numChunks = 10 + (i % 20); // 10-30 chunks
227
+ const chunks = Array.from({ length: numChunks }, (_, idx) => ({
228
+ id: `${idx + 1}`,
229
+ type: 'episode',
230
+ content: `Test content ${idx}`,
231
+ relevance: 0.9 - idx * 0.02
232
+ }));
233
+ const requirements = Array.from({ length: 5 }, (_, idx) => `req${idx}`);
234
+ const start = Date.now();
235
+ this.explainableRecall.createCertificate({
236
+ queryId: `bench_q${i}`,
237
+ queryText: `Benchmark query ${i}`,
238
+ chunks,
239
+ requirements,
240
+ accessLevel: 'internal'
241
+ });
242
+ latencies.push(Date.now() - start);
243
+ }
244
+ return this.createResult('Certificate Creation', latencies, 50);
245
+ }
246
+ /**
247
+ * Benchmark 7: Certificate Verification
248
+ * Target: < 10ms p95
249
+ */
250
+ async benchmarkCertificateVerification() {
251
+ console.log('\nšŸ“Š Benchmark 7: Certificate Verification');
252
+ // Create certificates to verify
253
+ const certificateIds = [];
254
+ for (let i = 0; i < 100; i++) {
255
+ const chunks = Array.from({ length: 10 }, (_, idx) => ({
256
+ id: `${idx + 1}`,
257
+ type: 'episode',
258
+ content: `Content ${idx}`,
259
+ relevance: 0.9
260
+ }));
261
+ const cert = this.explainableRecall.createCertificate({
262
+ queryId: `verify_q${i}`,
263
+ queryText: `Verification test ${i}`,
264
+ chunks,
265
+ requirements: ['test'],
266
+ accessLevel: 'internal'
267
+ });
268
+ certificateIds.push(cert.id);
269
+ }
270
+ const latencies = [];
271
+ const iterations = 500;
272
+ for (let i = 0; i < iterations; i++) {
273
+ const certId = certificateIds[i % certificateIds.length];
274
+ const start = Date.now();
275
+ this.explainableRecall.verifyCertificate(certId);
276
+ latencies.push(Date.now() - start);
277
+ }
278
+ return this.createResult('Certificate Verification', latencies, 10);
279
+ }
280
+ /**
281
+ * Benchmark 8: Provenance Lineage Query
282
+ * Target: < 15ms p95
283
+ */
284
+ async benchmarkProvenanceLineage() {
285
+ console.log('\nšŸ“Š Benchmark 8: Provenance Lineage Query');
286
+ // Create provenance sources
287
+ const hashes = [];
288
+ for (let i = 0; i < 100; i++) {
289
+ const sourceId = this.explainableRecall.createProvenance({
290
+ sourceType: 'episode',
291
+ sourceId: i + 1,
292
+ creator: 'benchmark_test'
293
+ });
294
+ const source = this.db.prepare('SELECT content_hash FROM provenance_sources WHERE id = ?').get(sourceId);
295
+ hashes.push(source.content_hash);
296
+ }
297
+ const latencies = [];
298
+ const iterations = 500;
299
+ for (let i = 0; i < iterations; i++) {
300
+ const hash = hashes[i % hashes.length];
301
+ const start = Date.now();
302
+ this.explainableRecall.getProvenanceLineage(hash);
303
+ latencies.push(Date.now() - start);
304
+ }
305
+ return this.createResult('Provenance Lineage Query', latencies, 15);
306
+ }
307
+ /**
308
+ * Benchmark 9: Concurrent Certificate Creation
309
+ * Target: No degradation vs sequential
310
+ */
311
+ async benchmarkConcurrentOperations() {
312
+ console.log('\nšŸ“Š Benchmark 9: Concurrent Certificate Creation');
313
+ const latencies = [];
314
+ const iterations = 100;
315
+ const concurrency = 10;
316
+ for (let i = 0; i < iterations; i++) {
317
+ const start = Date.now();
318
+ // Create 10 certificates concurrently (simulate with sequential for now)
319
+ for (let j = 0; j < concurrency; j++) {
320
+ const chunks = Array.from({ length: 10 }, (_, idx) => ({
321
+ id: `${idx + 1}`,
322
+ type: 'episode',
323
+ content: `Concurrent content ${idx}`,
324
+ relevance: 0.9
325
+ }));
326
+ this.explainableRecall.createCertificate({
327
+ queryId: `concurrent_q${i}_${j}`,
328
+ queryText: `Concurrent query ${i} ${j}`,
329
+ chunks,
330
+ requirements: ['test'],
331
+ accessLevel: 'internal'
332
+ });
333
+ }
334
+ latencies.push(Date.now() - start);
335
+ }
336
+ return this.createResult('Concurrent Operations (10x)', latencies, 500);
337
+ }
338
+ /**
339
+ * Benchmark 10: Database Size and Memory
340
+ */
341
+ async benchmarkDatabaseMetrics() {
342
+ console.log('\nšŸ“Š Benchmark 10: Database Metrics');
343
+ const stats = this.db.prepare(`
344
+ SELECT
345
+ (page_count * page_size) / 1024 / 1024 as size_mb
346
+ FROM pragma_page_count(), pragma_page_size()
347
+ `).get();
348
+ const tableStats = this.db.prepare(`
349
+ SELECT
350
+ name,
351
+ (SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=m.name) as table_count
352
+ FROM sqlite_master m
353
+ WHERE type='table'
354
+ AND name IN ('causal_edges', 'causal_experiments', 'causal_observations',
355
+ 'recall_certificates', 'provenance_sources', 'justification_paths')
356
+ `).all();
357
+ console.log(`\n Database Size: ${stats.size_mb.toFixed(2)} MB`);
358
+ console.log(` Tables Created: ${tableStats.length}`);
359
+ // Count records
360
+ const causalEdges = this.db.prepare('SELECT COUNT(*) as count FROM causal_edges').get();
361
+ const certificates = this.db.prepare('SELECT COUNT(*) as count FROM recall_certificates').get();
362
+ const provenance = this.db.prepare('SELECT COUNT(*) as count FROM provenance_sources').get();
363
+ console.log(`\n Causal Edges: ${causalEdges.count.toLocaleString()}`);
364
+ console.log(` Certificates: ${certificates.count.toLocaleString()}`);
365
+ console.log(` Provenance Sources: ${provenance.count.toLocaleString()}`);
366
+ }
367
+ async runAll() {
368
+ console.log('═══════════════════════════════════════════════════════════');
369
+ console.log(' AgentDB Frontier Features Performance Benchmark');
370
+ console.log('═══════════════════════════════════════════════════════════');
371
+ const results = [];
372
+ try {
373
+ results.push(await this.benchmarkCausalEdgeInsertion());
374
+ results.push(await this.benchmarkCausalQuery());
375
+ results.push(await this.benchmarkCausalChain());
376
+ results.push(await this.benchmarkExperimentObservation());
377
+ results.push(await this.benchmarkUpliftCalculation());
378
+ results.push(await this.benchmarkCertificateCreation());
379
+ results.push(await this.benchmarkCertificateVerification());
380
+ results.push(await this.benchmarkProvenanceLineage());
381
+ results.push(await this.benchmarkConcurrentOperations());
382
+ await this.benchmarkDatabaseMetrics();
383
+ // Print summary
384
+ console.log('\n═══════════════════════════════════════════════════════════');
385
+ console.log(' Summary');
386
+ console.log('═══════════════════════════════════════════════════════════\n');
387
+ results.forEach(result => {
388
+ const status = result.passed ? 'āœ…' : 'āŒ';
389
+ const target = result.target ? ` (target: ${result.target}ms)` : '';
390
+ console.log(`${status} ${result.operation}`);
391
+ console.log(` Count: ${result.count.toLocaleString()}`);
392
+ console.log(` p50: ${result.p50Ms.toFixed(2)}ms | p95: ${result.p95Ms.toFixed(2)}ms | p99: ${result.p99Ms.toFixed(2)}ms${target}`);
393
+ console.log(` Throughput: ${result.throughput.toFixed(0)} ops/sec`);
394
+ console.log('');
395
+ });
396
+ // Overall pass/fail
397
+ const allPassed = results.every(r => r.passed);
398
+ console.log('═══════════════════════════════════════════════════════════');
399
+ if (allPassed) {
400
+ console.log('āœ… ALL BENCHMARKS PASSED');
401
+ }
402
+ else {
403
+ const failed = results.filter(r => !r.passed);
404
+ console.log(`āŒ ${failed.length} BENCHMARK(S) FAILED:`);
405
+ failed.forEach(r => console.log(` - ${r.operation} (p95: ${r.p95Ms.toFixed(2)}ms > target: ${r.target}ms)`));
406
+ }
407
+ console.log('═══════════════════════════════════════════════════════════\n');
408
+ }
409
+ finally {
410
+ this.cleanup();
411
+ }
412
+ }
413
+ }
414
+ // Run if called directly
415
+ if (require.main === module) {
416
+ const benchmark = new FrontierBenchmark();
417
+ benchmark.runAll().catch(console.error);
418
+ }
419
+ export { FrontierBenchmark };