agentic-flow 1.6.5 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/.claude/skills/.claude-flow/metrics/agent-metrics.json +1 -0
  2. package/.claude/skills/.claude-flow/metrics/performance.json +87 -0
  3. package/.claude/skills/.claude-flow/metrics/task-metrics.json +10 -0
  4. package/.claude/skills/skill-builder/.claude-flow/metrics/agent-metrics.json +1 -0
  5. package/.claude/skills/skill-builder/.claude-flow/metrics/performance.json +87 -0
  6. package/.claude/skills/skill-builder/.claude-flow/metrics/task-metrics.json +10 -0
  7. package/CHANGELOG.md +0 -20
  8. package/README.md +16 -2
  9. package/dist/agentdb/benchmarks/comprehensive-benchmark.js +664 -0
  10. package/dist/agentdb/benchmarks/frontier-benchmark.js +419 -0
  11. package/dist/agentdb/benchmarks/reflexion-benchmark.js +370 -0
  12. package/dist/agentdb/cli/agentdb-cli.js +717 -0
  13. package/dist/agentdb/controllers/CausalMemoryGraph.js +322 -0
  14. package/dist/agentdb/controllers/CausalRecall.js +281 -0
  15. package/dist/agentdb/controllers/EmbeddingService.js +118 -0
  16. package/dist/agentdb/controllers/ExplainableRecall.js +387 -0
  17. package/dist/agentdb/controllers/NightlyLearner.js +382 -0
  18. package/dist/agentdb/controllers/ReflexionMemory.js +239 -0
  19. package/dist/agentdb/controllers/SkillLibrary.js +276 -0
  20. package/dist/agentdb/controllers/frontier-index.js +9 -0
  21. package/dist/agentdb/controllers/index.js +8 -0
  22. package/dist/agentdb/index.js +32 -0
  23. package/dist/agentdb/optimizations/BatchOperations.js +198 -0
  24. package/dist/agentdb/optimizations/QueryOptimizer.js +225 -0
  25. package/dist/agentdb/optimizations/index.js +7 -0
  26. package/dist/agentdb/tests/frontier-features.test.js +665 -0
  27. package/dist/cli/skills-manager.js +1297 -0
  28. package/dist/cli/update-message.js +175 -0
  29. package/dist/cli-proxy.js +2 -26
  30. package/dist/mcp/standalone-stdio.js +200 -4
  31. package/dist/memory/SharedMemoryPool.js +211 -0
  32. package/dist/memory/index.js +6 -0
  33. package/dist/reasoningbank/AdvancedMemory.js +67 -0
  34. package/dist/reasoningbank/HybridBackend.js +91 -0
  35. package/dist/reasoningbank/index-new.js +87 -0
  36. package/dist/reasoningbank/index.js +0 -4
  37. package/dist/utils/cli.js +0 -5
  38. package/docs/AGENTDB_TESTING.md +411 -0
  39. package/package.json +4 -4
  40. package/scripts/run-validation.sh +165 -0
  41. package/scripts/test-agentdb.sh +153 -0
  42. package/wasm/reasoningbank/reasoningbank_wasm_bg.js +2 -2
  43. package/wasm/reasoningbank/reasoningbank_wasm_bg.wasm +0 -0
  44. package/docs/AGENTDB_INTEGRATION.md +0 -379
@@ -0,0 +1,665 @@
1
+ /**
2
+ * Comprehensive Tests for Frontier Features
3
+ *
4
+ * Validates:
5
+ * 1. CausalMemoryGraph - causal inference, uplift calculation, A/B testing
6
+ * 2. ExplainableRecall - minimal hitting sets, Merkle proofs, provenance
7
+ *
8
+ * NO MOCKING - Real SQLite database, real algorithms, real results
9
+ */
10
+ import Database from 'better-sqlite3';
11
+ import * as fs from 'fs';
12
+ import * as path from 'path';
13
+ import { CausalMemoryGraph } from '../controllers/CausalMemoryGraph';
14
+ import { ExplainableRecall } from '../controllers/ExplainableRecall';
15
+ describe('Frontier Features - CausalMemoryGraph', () => {
16
+ let db;
17
+ let causalGraph;
18
+ beforeEach(() => {
19
+ // Create in-memory database
20
+ db = new Database(':memory:');
21
+ // Load core schema
22
+ const coreSchema = fs.readFileSync(path.join(__dirname, '../schemas/schema.sql'), 'utf-8');
23
+ db.exec(coreSchema);
24
+ // Load frontier schema
25
+ const frontierSchema = fs.readFileSync(path.join(__dirname, '../schemas/frontier-schema.sql'), 'utf-8');
26
+ db.exec(frontierSchema);
27
+ causalGraph = new CausalMemoryGraph(db);
28
+ // Insert test episodes
29
+ const episodes = [
30
+ { session_id: 'session1', task: 'code_review', reward: 0.8, success: 1 },
31
+ { session_id: 'session1', task: 'bug_fix', reward: 0.9, success: 1 },
32
+ { session_id: 'session2', task: 'feature_impl', reward: 0.7, success: 1 },
33
+ { session_id: 'session2', task: 'testing', reward: 0.85, success: 1 },
34
+ { session_id: 'session3', task: 'refactoring', reward: 0.75, success: 1 },
35
+ ];
36
+ const stmt = db.prepare(`
37
+ INSERT INTO episodes (session_id, task, reward, success)
38
+ VALUES (?, ?, ?, ?)
39
+ `);
40
+ episodes.forEach(ep => {
41
+ stmt.run(ep.session_id, ep.task, ep.reward, ep.success);
42
+ });
43
+ });
44
+ afterEach(() => {
45
+ db.close();
46
+ });
47
+ test('should add causal edge with all properties', () => {
48
+ const edge = {
49
+ fromMemoryId: 1,
50
+ fromMemoryType: 'episode',
51
+ toMemoryId: 2,
52
+ toMemoryType: 'episode',
53
+ similarity: 0.85,
54
+ uplift: 0.15,
55
+ confidence: 0.9,
56
+ sampleSize: 100,
57
+ evidenceIds: ['exp1', 'exp2'],
58
+ mechanism: 'code_review improves bug_fix success'
59
+ };
60
+ const edgeId = causalGraph.addCausalEdge(edge);
61
+ expect(edgeId).toBeGreaterThan(0);
62
+ // Verify stored correctly
63
+ const stored = db.prepare('SELECT * FROM causal_edges WHERE id = ?').get(edgeId);
64
+ expect(stored.from_memory_id).toBe(1);
65
+ expect(stored.to_memory_id).toBe(2);
66
+ expect(stored.similarity).toBe(0.85);
67
+ expect(stored.uplift).toBe(0.15);
68
+ expect(stored.confidence).toBe(0.9);
69
+ expect(stored.sample_size).toBe(100);
70
+ expect(JSON.parse(stored.evidence_ids)).toEqual(['exp1', 'exp2']);
71
+ expect(stored.mechanism).toBe('code_review improves bug_fix success');
72
+ });
73
+ test('should create and track A/B experiment', () => {
74
+ const experiment = {
75
+ name: 'Test Code Review Impact',
76
+ hypothesis: 'Code review reduces bugs',
77
+ treatmentId: 1,
78
+ treatmentType: 'episode',
79
+ controlId: 3,
80
+ startTime: Date.now(),
81
+ sampleSize: 0,
82
+ status: 'running'
83
+ };
84
+ const expId = causalGraph.createExperiment(experiment);
85
+ expect(expId).toBeGreaterThan(0);
86
+ // Verify stored
87
+ const stored = db.prepare('SELECT * FROM causal_experiments WHERE id = ?').get(expId);
88
+ expect(stored.name).toBe('Test Code Review Impact');
89
+ expect(stored.status).toBe('running');
90
+ expect(stored.sample_size).toBe(0);
91
+ });
92
+ test('should record observations and update sample size', () => {
93
+ const expId = causalGraph.createExperiment({
94
+ name: 'Test Experiment',
95
+ hypothesis: 'Treatment improves outcome',
96
+ treatmentId: 1,
97
+ treatmentType: 'episode',
98
+ startTime: Date.now(),
99
+ sampleSize: 0,
100
+ status: 'running'
101
+ });
102
+ // Record treatment observation
103
+ causalGraph.recordObservation({
104
+ experimentId: expId,
105
+ episodeId: 1,
106
+ isTreatment: true,
107
+ outcomeValue: 0.9,
108
+ outcomeType: 'reward'
109
+ });
110
+ // Record control observation
111
+ causalGraph.recordObservation({
112
+ experimentId: expId,
113
+ episodeId: 2,
114
+ isTreatment: false,
115
+ outcomeValue: 0.7,
116
+ outcomeType: 'reward'
117
+ });
118
+ // Check sample size updated
119
+ const experiment = db.prepare('SELECT * FROM causal_experiments WHERE id = ?').get(expId);
120
+ expect(experiment.sample_size).toBe(2);
121
+ // Check observations stored
122
+ const observations = db.prepare('SELECT * FROM causal_observations WHERE experiment_id = ?').all(expId);
123
+ expect(observations).toHaveLength(2);
124
+ });
125
+ test('should calculate uplift with statistical significance', () => {
126
+ const expId = causalGraph.createExperiment({
127
+ name: 'Uplift Test',
128
+ hypothesis: 'Treatment increases reward',
129
+ treatmentId: 1,
130
+ treatmentType: 'episode',
131
+ startTime: Date.now(),
132
+ sampleSize: 0,
133
+ status: 'running'
134
+ });
135
+ // Record 50 treatment observations (mean ~0.8)
136
+ for (let i = 0; i < 50; i++) {
137
+ causalGraph.recordObservation({
138
+ experimentId: expId,
139
+ episodeId: 1,
140
+ isTreatment: true,
141
+ outcomeValue: 0.75 + Math.random() * 0.1, // 0.75-0.85
142
+ outcomeType: 'reward'
143
+ });
144
+ }
145
+ // Record 50 control observations (mean ~0.6)
146
+ for (let i = 0; i < 50; i++) {
147
+ causalGraph.recordObservation({
148
+ experimentId: expId,
149
+ episodeId: 2,
150
+ isTreatment: false,
151
+ outcomeValue: 0.55 + Math.random() * 0.1, // 0.55-0.65
152
+ outcomeType: 'reward'
153
+ });
154
+ }
155
+ const result = causalGraph.calculateUplift(expId);
156
+ // Validate results
157
+ expect(result.uplift).toBeGreaterThan(0.1); // Treatment > Control
158
+ expect(result.uplift).toBeLessThan(0.3); // Reasonable range
159
+ expect(result.pValue).toBeLessThan(0.05); // Statistically significant
160
+ expect(result.confidenceInterval[0]).toBeLessThan(result.uplift);
161
+ expect(result.confidenceInterval[1]).toBeGreaterThan(result.uplift);
162
+ // Verify experiment updated
163
+ const experiment = db.prepare('SELECT * FROM causal_experiments WHERE id = ?').get(expId);
164
+ expect(experiment.status).toBe('completed');
165
+ expect(experiment.uplift).toBeCloseTo(result.uplift, 5);
166
+ expect(experiment.p_value).toBeCloseTo(result.pValue, 5);
167
+ });
168
+ test('should query causal effects by confidence and uplift', () => {
169
+ // Add multiple causal edges
170
+ causalGraph.addCausalEdge({
171
+ fromMemoryId: 1,
172
+ fromMemoryType: 'episode',
173
+ toMemoryId: 2,
174
+ toMemoryType: 'episode',
175
+ similarity: 0.8,
176
+ uplift: 0.2,
177
+ confidence: 0.9,
178
+ sampleSize: 100
179
+ });
180
+ causalGraph.addCausalEdge({
181
+ fromMemoryId: 1,
182
+ fromMemoryType: 'episode',
183
+ toMemoryId: 3,
184
+ toMemoryType: 'episode',
185
+ similarity: 0.7,
186
+ uplift: 0.05, // Low uplift
187
+ confidence: 0.95,
188
+ sampleSize: 80
189
+ });
190
+ causalGraph.addCausalEdge({
191
+ fromMemoryId: 1,
192
+ fromMemoryType: 'episode',
193
+ toMemoryId: 4,
194
+ toMemoryType: 'episode',
195
+ similarity: 0.85,
196
+ uplift: 0.25,
197
+ confidence: 0.4, // Low confidence
198
+ sampleSize: 50
199
+ });
200
+ // Query with filters
201
+ const effects = causalGraph.queryCausalEffects({
202
+ interventionMemoryId: 1,
203
+ interventionMemoryType: 'episode',
204
+ minConfidence: 0.8,
205
+ minUplift: 0.1
206
+ });
207
+ // Should only return first edge (high confidence + high uplift)
208
+ expect(effects).toHaveLength(1);
209
+ expect(effects[0].toMemoryId).toBe(2);
210
+ expect(effects[0].confidence).toBe(0.9);
211
+ expect(effects[0].uplift).toBe(0.2);
212
+ });
213
+ test('should find multi-hop causal chains', () => {
214
+ // Create chain: 1 -> 2 -> 3
215
+ causalGraph.addCausalEdge({
216
+ fromMemoryId: 1,
217
+ fromMemoryType: 'episode',
218
+ toMemoryId: 2,
219
+ toMemoryType: 'episode',
220
+ similarity: 0.8,
221
+ uplift: 0.1,
222
+ confidence: 0.9,
223
+ sampleSize: 100
224
+ });
225
+ causalGraph.addCausalEdge({
226
+ fromMemoryId: 2,
227
+ fromMemoryType: 'episode',
228
+ toMemoryId: 3,
229
+ toMemoryType: 'episode',
230
+ similarity: 0.75,
231
+ uplift: 0.15,
232
+ confidence: 0.85,
233
+ sampleSize: 80
234
+ });
235
+ // Also add direct edge 1 -> 3 (lower uplift)
236
+ causalGraph.addCausalEdge({
237
+ fromMemoryId: 1,
238
+ fromMemoryType: 'episode',
239
+ toMemoryId: 3,
240
+ toMemoryType: 'episode',
241
+ similarity: 0.7,
242
+ uplift: 0.05,
243
+ confidence: 0.8,
244
+ sampleSize: 60
245
+ });
246
+ const chains = causalGraph.getCausalChain(1, 3, 5);
247
+ // Should find both paths
248
+ expect(chains.length).toBeGreaterThanOrEqual(1);
249
+ // The 2-hop path should have higher total uplift
250
+ const twoHopChain = chains.find(c => c.path.length === 3); // [1, 2, 3]
251
+ expect(twoHopChain).toBeDefined();
252
+ expect(twoHopChain.path).toEqual([1, 2, 3]);
253
+ expect(twoHopChain.totalUplift).toBeCloseTo(0.25, 1); // 0.1 + 0.15
254
+ expect(twoHopChain.confidence).toBeGreaterThanOrEqual(0.85); // Min of chain
255
+ });
256
+ test('should detect potential confounders', () => {
257
+ // Create scenario where episode 3 might be a confounder
258
+ // for the relationship between episode 1 and episode 2
259
+ const edgeId = causalGraph.addCausalEdge({
260
+ fromMemoryId: 1,
261
+ fromMemoryType: 'episode',
262
+ toMemoryId: 2,
263
+ toMemoryType: 'episode',
264
+ similarity: 0.8,
265
+ uplift: 0.2,
266
+ confidence: 0.9,
267
+ sampleSize: 100
268
+ });
269
+ const result = causalGraph.detectConfounders(edgeId);
270
+ // Should return confounders array (may be empty in test data)
271
+ expect(result).toHaveProperty('confounders');
272
+ expect(Array.isArray(result.confounders)).toBe(true);
273
+ // Each confounder should have required properties
274
+ result.confounders.forEach(conf => {
275
+ expect(conf).toHaveProperty('memoryId');
276
+ expect(conf).toHaveProperty('correlationWithTreatment');
277
+ expect(conf).toHaveProperty('correlationWithOutcome');
278
+ expect(conf).toHaveProperty('confounderScore');
279
+ expect(conf.confounderScore).toBeGreaterThan(0.3);
280
+ });
281
+ });
282
+ test('should calculate causal gain vs baseline', () => {
283
+ // Add causal edges from treatment (episode 1) to outcomes
284
+ causalGraph.addCausalEdge({
285
+ fromMemoryId: 1,
286
+ fromMemoryType: 'episode',
287
+ toMemoryId: 2,
288
+ toMemoryType: 'episode',
289
+ similarity: 0.85,
290
+ uplift: 0.2,
291
+ confidence: 0.9,
292
+ mechanism: 'improves outcome'
293
+ });
294
+ const result = causalGraph.calculateCausalGain(1, 'reward');
295
+ // Should calculate difference between treated and untreated episodes
296
+ expect(result).toHaveProperty('causalGain');
297
+ expect(result).toHaveProperty('confidence');
298
+ expect(result).toHaveProperty('mechanism');
299
+ expect(result.confidence).toBeGreaterThanOrEqual(0);
300
+ expect(result.confidence).toBeLessThanOrEqual(1);
301
+ expect(typeof result.causalGain).toBe('number');
302
+ });
303
+ });
304
+ describe('Frontier Features - ExplainableRecall', () => {
305
+ let db;
306
+ let explainableRecall;
307
+ beforeEach(() => {
308
+ db = new Database(':memory:');
309
+ const coreSchema = fs.readFileSync(path.join(__dirname, '../schemas/schema.sql'), 'utf-8');
310
+ db.exec(coreSchema);
311
+ const frontierSchema = fs.readFileSync(path.join(__dirname, '../schemas/frontier-schema.sql'), 'utf-8');
312
+ db.exec(frontierSchema);
313
+ explainableRecall = new ExplainableRecall(db);
314
+ // Insert test episodes
315
+ for (let i = 1; i <= 10; i++) {
316
+ db.prepare(`
317
+ INSERT INTO episodes (session_id, task, reward, success)
318
+ VALUES (?, ?, ?, ?)
319
+ `).run(`session${i}`, `task${i}`, 0.8, 1);
320
+ }
321
+ });
322
+ afterEach(() => {
323
+ db.close();
324
+ });
325
+ test('should create recall certificate with minimal hitting set', () => {
326
+ const chunks = [
327
+ { id: '1', type: 'episode', content: 'Implement authentication', relevance: 0.9 },
328
+ { id: '2', type: 'episode', content: 'Add JWT tokens', relevance: 0.85 },
329
+ { id: '3', type: 'episode', content: 'Hash passwords', relevance: 0.8 },
330
+ { id: '4', type: 'episode', content: 'Setup database', relevance: 0.7 }
331
+ ];
332
+ const requirements = [
333
+ 'authentication',
334
+ 'security',
335
+ 'tokens'
336
+ ];
337
+ const certificate = explainableRecall.createCertificate({
338
+ queryId: 'q1',
339
+ queryText: 'How to implement secure authentication?',
340
+ chunks,
341
+ requirements,
342
+ accessLevel: 'internal'
343
+ });
344
+ // Validate certificate structure
345
+ expect(certificate).toHaveProperty('id');
346
+ expect(certificate.queryId).toBe('q1');
347
+ expect(certificate.chunkIds).toEqual(['1', '2', '3', '4']);
348
+ expect(certificate.chunkTypes).toEqual(['episode', 'episode', 'episode', 'episode']);
349
+ // Validate minimal hitting set
350
+ expect(certificate.minimalWhy).toBeDefined();
351
+ expect(certificate.minimalWhy.length).toBeGreaterThan(0);
352
+ expect(certificate.minimalWhy.length).toBeLessThanOrEqual(chunks.length);
353
+ // Redundancy ratio should be >= 1
354
+ expect(certificate.redundancyRatio).toBeGreaterThanOrEqual(1);
355
+ expect(certificate.redundancyRatio).toBe(chunks.length / certificate.minimalWhy.length);
356
+ // Completeness should be 0-1
357
+ expect(certificate.completenessScore).toBeGreaterThanOrEqual(0);
358
+ expect(certificate.completenessScore).toBeLessThanOrEqual(1);
359
+ // Merkle root should exist
360
+ expect(certificate.merkleRoot).toBeDefined();
361
+ expect(certificate.merkleRoot.length).toBeGreaterThan(0);
362
+ // Source hashes should match chunks
363
+ expect(certificate.sourceHashes).toHaveLength(chunks.length);
364
+ // Access level
365
+ expect(certificate.accessLevel).toBe('internal');
366
+ // Latency tracking
367
+ expect(certificate.latencyMs).toBeGreaterThan(0);
368
+ });
369
+ test('should verify certificate integrity', () => {
370
+ const chunks = [
371
+ { id: '1', type: 'episode', content: 'Test content 1', relevance: 0.9 },
372
+ { id: '2', type: 'episode', content: 'Test content 2', relevance: 0.8 }
373
+ ];
374
+ const certificate = explainableRecall.createCertificate({
375
+ queryId: 'q2',
376
+ queryText: 'Test query',
377
+ chunks,
378
+ requirements: ['test'],
379
+ accessLevel: 'public'
380
+ });
381
+ // Verify the certificate
382
+ const verification = explainableRecall.verifyCertificate(certificate.id);
383
+ expect(verification.valid).toBe(true);
384
+ expect(verification.issues).toHaveLength(0);
385
+ });
386
+ test('should detect tampered certificate', () => {
387
+ const chunks = [
388
+ { id: '1', type: 'episode', content: 'Original content', relevance: 0.9 }
389
+ ];
390
+ const certificate = explainableRecall.createCertificate({
391
+ queryId: 'q3',
392
+ queryText: 'Test query',
393
+ chunks,
394
+ requirements: ['test'],
395
+ accessLevel: 'internal'
396
+ });
397
+ // Tamper with the certificate in database
398
+ db.prepare(`
399
+ UPDATE recall_certificates
400
+ SET chunk_ids = ?
401
+ WHERE id = ?
402
+ `).run(JSON.stringify(['1', '999']), certificate.id);
403
+ // Verification should fail
404
+ const verification = explainableRecall.verifyCertificate(certificate.id);
405
+ expect(verification.valid).toBe(false);
406
+ expect(verification.issues.length).toBeGreaterThan(0);
407
+ expect(verification.issues[0]).toContain('Merkle root');
408
+ });
409
+ test('should provide justification for each chunk', () => {
410
+ const chunks = [
411
+ { id: '1', type: 'episode', content: 'Setup API endpoint', relevance: 0.9 },
412
+ { id: '2', type: 'episode', content: 'Add validation', relevance: 0.85 }
413
+ ];
414
+ const certificate = explainableRecall.createCertificate({
415
+ queryId: 'q4',
416
+ queryText: 'How to create API?',
417
+ chunks,
418
+ requirements: ['api', 'validation'],
419
+ accessLevel: 'internal'
420
+ });
421
+ // Get justification for first chunk
422
+ const justification = explainableRecall.getJustification(certificate.id, '1');
423
+ expect(justification).toBeDefined();
424
+ expect(justification.chunkId).toBe('1');
425
+ expect(justification.chunkType).toBe('episode');
426
+ expect(justification.reason).toBeDefined();
427
+ expect(justification.necessityScore).toBeGreaterThanOrEqual(0);
428
+ expect(justification.necessityScore).toBeLessThanOrEqual(1);
429
+ expect(justification.pathElements).toBeDefined();
430
+ expect(Array.isArray(justification.pathElements)).toBe(true);
431
+ });
432
+ test('should track provenance lineage', () => {
433
+ // Create source with provenance
434
+ const sourceId = explainableRecall.createProvenance({
435
+ sourceType: 'episode',
436
+ sourceId: 1,
437
+ creator: 'test_user'
438
+ });
439
+ expect(sourceId).toBeGreaterThan(0);
440
+ // Get the content hash
441
+ const source = db.prepare('SELECT * FROM provenance_sources WHERE id = ?').get(sourceId);
442
+ expect(source).toBeDefined();
443
+ expect(source.content_hash).toBeDefined();
444
+ // Track lineage
445
+ const lineage = explainableRecall.getProvenanceLineage(source.content_hash);
446
+ expect(lineage).toHaveLength(1);
447
+ expect(lineage[0].sourceType).toBe('episode');
448
+ expect(lineage[0].sourceId).toBe(1);
449
+ expect(lineage[0].creator).toBe('test_user');
450
+ });
451
+ test('should audit certificate for quality metrics', () => {
452
+ const chunks = [
453
+ { id: '1', type: 'episode', content: 'Content 1', relevance: 0.95 },
454
+ { id: '2', type: 'episode', content: 'Content 2', relevance: 0.9 },
455
+ { id: '3', type: 'episode', content: 'Content 3', relevance: 0.85 },
456
+ { id: '4', type: 'episode', content: 'Content 4', relevance: 0.5 } // Low relevance
457
+ ];
458
+ const certificate = explainableRecall.createCertificate({
459
+ queryId: 'q5',
460
+ queryText: 'Test audit query',
461
+ chunks,
462
+ requirements: ['req1', 'req2'],
463
+ accessLevel: 'internal'
464
+ });
465
+ const audit = explainableRecall.auditCertificate(certificate.id);
466
+ // Validate audit structure
467
+ expect(audit).toHaveProperty('certificateId');
468
+ expect(audit).toHaveProperty('queryText');
469
+ expect(audit).toHaveProperty('totalChunks');
470
+ expect(audit).toHaveProperty('minimalSetSize');
471
+ expect(audit).toHaveProperty('redundancyRatio');
472
+ expect(audit).toHaveProperty('completenessScore');
473
+ expect(audit).toHaveProperty('avgNecessityScore');
474
+ expect(audit).toHaveProperty('provenanceVerified');
475
+ expect(audit).toHaveProperty('qualityScore');
476
+ expect(audit.certificateId).toBe(certificate.id);
477
+ expect(audit.totalChunks).toBe(4);
478
+ expect(audit.minimalSetSize).toBeGreaterThan(0);
479
+ expect(audit.redundancyRatio).toBeGreaterThanOrEqual(1);
480
+ expect(audit.qualityScore).toBeGreaterThanOrEqual(0);
481
+ expect(audit.qualityScore).toBeLessThanOrEqual(1);
482
+ expect(audit.provenanceVerified).toBe(true);
483
+ });
484
+ test('should handle empty chunks gracefully', () => {
485
+ const certificate = explainableRecall.createCertificate({
486
+ queryId: 'q6',
487
+ queryText: 'Empty query',
488
+ chunks: [],
489
+ requirements: ['test'],
490
+ accessLevel: 'public'
491
+ });
492
+ expect(certificate.chunkIds).toHaveLength(0);
493
+ expect(certificate.minimalWhy).toHaveLength(0);
494
+ expect(certificate.redundancyRatio).toBe(0);
495
+ expect(certificate.completenessScore).toBe(0);
496
+ });
497
+ test('should calculate correct minimal hitting set', () => {
498
+ // Test case: Each chunk covers different requirements
499
+ const chunks = [
500
+ { id: '1', type: 'episode', content: 'auth implementation', relevance: 0.9 },
501
+ { id: '2', type: 'episode', content: 'database setup', relevance: 0.85 },
502
+ { id: '3', type: 'episode', content: 'API endpoints', relevance: 0.8 }
503
+ ];
504
+ const requirements = ['auth', 'database', 'api'];
505
+ const certificate = explainableRecall.createCertificate({
506
+ queryId: 'q7',
507
+ queryText: 'Full stack implementation',
508
+ chunks,
509
+ requirements,
510
+ accessLevel: 'internal'
511
+ });
512
+ // All chunks should be in minimal set (each covers unique requirement)
513
+ expect(certificate.minimalWhy.length).toBe(3);
514
+ expect(certificate.redundancyRatio).toBe(1.0);
515
+ });
516
+ test('should generate valid Merkle proofs', () => {
517
+ const chunks = Array.from({ length: 8 }, (_, i) => ({
518
+ id: `${i + 1}`,
519
+ type: 'episode',
520
+ content: `Content ${i + 1}`,
521
+ relevance: 0.9 - i * 0.05
522
+ }));
523
+ const certificate = explainableRecall.createCertificate({
524
+ queryId: 'q8',
525
+ queryText: 'Merkle test',
526
+ chunks,
527
+ requirements: ['test'],
528
+ accessLevel: 'internal'
529
+ });
530
+ // Merkle root should be deterministic
531
+ expect(certificate.merkleRoot).toBeDefined();
532
+ expect(certificate.merkleRoot.length).toBe(64); // SHA-256 hex = 64 chars
533
+ // Source hashes should match chunks
534
+ expect(certificate.sourceHashes).toHaveLength(8);
535
+ certificate.sourceHashes.forEach(hash => {
536
+ expect(hash.length).toBe(64);
537
+ });
538
+ });
539
+ });
540
+ describe('Frontier Features - Integration Tests', () => {
541
+ let db;
542
+ let causalGraph;
543
+ let explainableRecall;
544
+ beforeEach(() => {
545
+ db = new Database(':memory:');
546
+ const coreSchema = fs.readFileSync(path.join(__dirname, '../schemas/schema.sql'), 'utf-8');
547
+ db.exec(coreSchema);
548
+ const frontierSchema = fs.readFileSync(path.join(__dirname, '../schemas/frontier-schema.sql'), 'utf-8');
549
+ db.exec(frontierSchema);
550
+ causalGraph = new CausalMemoryGraph(db);
551
+ explainableRecall = new ExplainableRecall(db);
552
+ });
553
+ afterEach(() => {
554
+ db.close();
555
+ });
556
+ test('should combine causal reasoning with explainable recall', () => {
557
+ // 1. Insert episodes
558
+ for (let i = 1; i <= 5; i++) {
559
+ db.prepare(`
560
+ INSERT INTO episodes (session_id, task, reward, success)
561
+ VALUES (?, ?, ?, ?)
562
+ `).run(`session${i}`, `task${i}`, 0.7 + i * 0.05, 1);
563
+ }
564
+ // 2. Add causal edges
565
+ causalGraph.addCausalEdge({
566
+ fromMemoryId: 1,
567
+ fromMemoryType: 'episode',
568
+ toMemoryId: 2,
569
+ toMemoryType: 'episode',
570
+ similarity: 0.85,
571
+ uplift: 0.15,
572
+ confidence: 0.9,
573
+ mechanism: 'task1 improves task2'
574
+ });
575
+ // 3. Create retrieval with provenance
576
+ const chunks = [
577
+ { id: '1', type: 'episode', content: 'First task', relevance: 0.9 },
578
+ { id: '2', type: 'episode', content: 'Second task', relevance: 0.85 }
579
+ ];
580
+ const certificate = explainableRecall.createCertificate({
581
+ queryId: 'integrated_q1',
582
+ queryText: 'Show me tasks with causal relationships',
583
+ chunks,
584
+ requirements: ['causal', 'evidence'],
585
+ accessLevel: 'internal'
586
+ });
587
+ // 4. Verify we can query both systems
588
+ const causalEffects = causalGraph.queryCausalEffects({
589
+ interventionMemoryId: 1,
590
+ interventionMemoryType: 'episode',
591
+ minConfidence: 0.8
592
+ });
593
+ expect(causalEffects.length).toBeGreaterThan(0);
594
+ expect(certificate).toBeDefined();
595
+ expect(certificate.minimalWhy.length).toBeGreaterThan(0);
596
+ // 5. Audit the certificate
597
+ const audit = explainableRecall.auditCertificate(certificate.id);
598
+ expect(audit.provenanceVerified).toBe(true);
599
+ });
600
+ test('should handle large-scale causal experiments efficiently', () => {
601
+ // Create experiment with many observations
602
+ const expId = causalGraph.createExperiment({
603
+ name: 'Large Scale Test',
604
+ hypothesis: 'Treatment improves outcome at scale',
605
+ treatmentId: 1,
606
+ treatmentType: 'episode',
607
+ startTime: Date.now(),
608
+ sampleSize: 0,
609
+ status: 'running'
610
+ });
611
+ const startTime = Date.now();
612
+ // Record 1000 observations
613
+ for (let i = 0; i < 1000; i++) {
614
+ causalGraph.recordObservation({
615
+ experimentId: expId,
616
+ episodeId: i % 10 + 1,
617
+ isTreatment: i % 2 === 0,
618
+ outcomeValue: (i % 2 === 0 ? 0.8 : 0.6) + Math.random() * 0.1,
619
+ outcomeType: 'reward'
620
+ });
621
+ }
622
+ const recordTime = Date.now() - startTime;
623
+ // Calculate uplift
624
+ const calcStart = Date.now();
625
+ const result = causalGraph.calculateUplift(expId);
626
+ const calcTime = Date.now() - calcStart;
627
+ // Performance assertions
628
+ expect(recordTime).toBeLessThan(1000); // < 1 second to record 1000 observations
629
+ expect(calcTime).toBeLessThan(100); // < 100ms to calculate uplift
630
+ // Result assertions
631
+ expect(result.uplift).toBeGreaterThan(0.1);
632
+ expect(result.pValue).toBeLessThan(0.05);
633
+ });
634
+ test('should handle concurrent certificate creation', () => {
635
+ const chunks = Array.from({ length: 10 }, (_, i) => ({
636
+ id: `${i + 1}`,
637
+ type: 'episode',
638
+ content: `Content ${i + 1}`,
639
+ relevance: 0.9 - i * 0.05
640
+ }));
641
+ // Create multiple certificates concurrently
642
+ const certificates = [];
643
+ for (let i = 0; i < 10; i++) {
644
+ const cert = explainableRecall.createCertificate({
645
+ queryId: `concurrent_q${i}`,
646
+ queryText: `Concurrent query ${i}`,
647
+ chunks: chunks.slice(0, i + 1),
648
+ requirements: ['test'],
649
+ accessLevel: 'internal'
650
+ });
651
+ certificates.push(cert);
652
+ }
653
+ // All should succeed
654
+ expect(certificates).toHaveLength(10);
655
+ certificates.forEach((cert, idx) => {
656
+ expect(cert.queryId).toBe(`concurrent_q${idx}`);
657
+ expect(cert.chunkIds.length).toBe(idx + 1);
658
+ });
659
+ // All should be verifiable
660
+ certificates.forEach(cert => {
661
+ const verification = explainableRecall.verifyCertificate(cert.id);
662
+ expect(verification.valid).toBe(true);
663
+ });
664
+ });
665
+ });