agentic-flow 1.7.3 → 1.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/test-neural.md +0 -5
- package/.claude/answer.md +1 -0
- package/.claude/settings.json +19 -20
- package/CHANGELOG.md +0 -117
- package/README.md +17 -81
- package/dist/agentdb/benchmarks/comprehensive-benchmark.js +664 -0
- package/dist/agentdb/benchmarks/frontier-benchmark.js +419 -0
- package/dist/agentdb/benchmarks/reflexion-benchmark.js +370 -0
- package/dist/agentdb/cli/agentdb-cli.js +717 -0
- package/dist/agentdb/controllers/CausalMemoryGraph.js +322 -0
- package/dist/agentdb/controllers/CausalRecall.js +281 -0
- package/dist/agentdb/controllers/EmbeddingService.js +118 -0
- package/dist/agentdb/controllers/ExplainableRecall.js +387 -0
- package/dist/agentdb/controllers/NightlyLearner.js +382 -0
- package/dist/agentdb/controllers/ReflexionMemory.js +239 -0
- package/dist/agentdb/controllers/SkillLibrary.js +276 -0
- package/dist/agentdb/controllers/frontier-index.js +9 -0
- package/dist/agentdb/controllers/index.js +8 -0
- package/dist/agentdb/index.js +32 -0
- package/dist/agentdb/optimizations/BatchOperations.js +198 -0
- package/dist/agentdb/optimizations/QueryOptimizer.js +225 -0
- package/dist/agentdb/optimizations/index.js +7 -0
- package/dist/agentdb/tests/frontier-features.test.js +665 -0
- package/dist/cli-proxy.js +2 -33
- package/dist/mcp/standalone-stdio.js +200 -4
- package/dist/memory/SharedMemoryPool.js +211 -0
- package/dist/memory/index.js +6 -0
- package/dist/reasoningbank/AdvancedMemory.js +239 -0
- package/dist/reasoningbank/HybridBackend.js +305 -0
- package/dist/reasoningbank/index-new.js +87 -0
- package/dist/reasoningbank/index.js +23 -44
- package/dist/utils/cli.js +0 -22
- package/docs/AGENTDB_TESTING.md +411 -0
- package/docs/v1.7.1-QUICK-START.md +399 -0
- package/package.json +4 -4
- package/scripts/run-validation.sh +165 -0
- package/scripts/test-agentdb.sh +153 -0
- package/.claude/skills/agentdb-memory-patterns/SKILL.md +0 -166
- package/.claude/skills/agentdb-vector-search/SKILL.md +0 -126
- package/.claude/skills/agentic-flow/agentdb-memory-patterns/SKILL.md +0 -166
- package/.claude/skills/agentic-flow/agentdb-vector-search/SKILL.md +0 -126
- package/.claude/skills/agentic-flow/reasoningbank-intelligence/SKILL.md +0 -201
- package/.claude/skills/agentic-flow/swarm-orchestration/SKILL.md +0 -179
- package/.claude/skills/reasoningbank-intelligence/SKILL.md +0 -201
- package/.claude/skills/skill-builder/README.md +0 -308
- package/.claude/skills/skill-builder/SKILL.md +0 -910
- package/.claude/skills/skill-builder/docs/SPECIFICATION.md +0 -358
- package/.claude/skills/skill-builder/resources/schemas/skill-frontmatter.schema.json +0 -41
- package/.claude/skills/skill-builder/resources/templates/full-skill.template +0 -118
- package/.claude/skills/skill-builder/resources/templates/minimal-skill.template +0 -38
- package/.claude/skills/skill-builder/scripts/generate-skill.sh +0 -334
- package/.claude/skills/skill-builder/scripts/validate-skill.sh +0 -198
- package/.claude/skills/swarm-orchestration/SKILL.md +0 -179
- package/docs/AGENTDB_INTEGRATION.md +0 -379
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Nightly Learner - Automated Causal Discovery and Consolidation
|
|
3
|
+
*
|
|
4
|
+
* Runs as a background job to:
|
|
5
|
+
* 1. Discover new causal edges from episode patterns
|
|
6
|
+
* 2. Run A/B experiments on promising hypotheses
|
|
7
|
+
* 3. Calculate uplift for completed experiments
|
|
8
|
+
* 4. Prune low-confidence edges
|
|
9
|
+
* 5. Update rerank weights based on performance
|
|
10
|
+
*
|
|
11
|
+
* Based on doubly robust learner:
|
|
12
|
+
* τ̂(x) = μ1(x) − μ0(x) + [a*(y−μ1(x)) / e(x)] − [(1−a)*(y−μ0(x)) / (1−e(x))]
|
|
13
|
+
*/
|
|
14
|
+
import { CausalMemoryGraph } from './CausalMemoryGraph.js';
|
|
15
|
+
import { ReflexionMemory } from './ReflexionMemory.js';
|
|
16
|
+
import { SkillLibrary } from './SkillLibrary.js';
|
|
17
|
+
export class NightlyLearner {
|
|
18
|
+
config;
|
|
19
|
+
db;
|
|
20
|
+
causalGraph;
|
|
21
|
+
reflexion;
|
|
22
|
+
skillLibrary;
|
|
23
|
+
constructor(db, embedder, config = {
|
|
24
|
+
minSimilarity: 0.7,
|
|
25
|
+
minSampleSize: 30,
|
|
26
|
+
confidenceThreshold: 0.6,
|
|
27
|
+
upliftThreshold: 0.05,
|
|
28
|
+
pruneOldEdges: true,
|
|
29
|
+
edgeMaxAgeDays: 90,
|
|
30
|
+
autoExperiments: true,
|
|
31
|
+
experimentBudget: 10
|
|
32
|
+
}) {
|
|
33
|
+
this.config = config;
|
|
34
|
+
this.db = db;
|
|
35
|
+
this.causalGraph = new CausalMemoryGraph(db);
|
|
36
|
+
this.reflexion = new ReflexionMemory(db, embedder);
|
|
37
|
+
this.skillLibrary = new SkillLibrary(db, embedder);
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Main learning job - runs all discovery and consolidation tasks
|
|
41
|
+
*/
|
|
42
|
+
async run() {
|
|
43
|
+
console.log('\n🌙 Nightly Learner Starting...\n');
|
|
44
|
+
const startTime = Date.now();
|
|
45
|
+
const report = {
|
|
46
|
+
timestamp: startTime,
|
|
47
|
+
executionTimeMs: 0,
|
|
48
|
+
edgesDiscovered: 0,
|
|
49
|
+
edgesPruned: 0,
|
|
50
|
+
experimentsCompleted: 0,
|
|
51
|
+
experimentsCreated: 0,
|
|
52
|
+
avgUplift: 0,
|
|
53
|
+
avgConfidence: 0,
|
|
54
|
+
recommendations: []
|
|
55
|
+
};
|
|
56
|
+
try {
|
|
57
|
+
// Step 1: Discover new causal edges
|
|
58
|
+
console.log('📊 Discovering causal edges from episode patterns...');
|
|
59
|
+
report.edgesDiscovered = await this.discoverCausalEdges();
|
|
60
|
+
console.log(` ✓ Discovered ${report.edgesDiscovered} new edges\n`);
|
|
61
|
+
// Step 2: Complete running experiments
|
|
62
|
+
console.log('🧪 Completing A/B experiments...');
|
|
63
|
+
report.experimentsCompleted = await this.completeExperiments();
|
|
64
|
+
console.log(` ✓ Completed ${report.experimentsCompleted} experiments\n`);
|
|
65
|
+
// Step 3: Create new experiments (if enabled)
|
|
66
|
+
if (this.config.autoExperiments) {
|
|
67
|
+
console.log('🔬 Creating new A/B experiments...');
|
|
68
|
+
report.experimentsCreated = await this.createExperiments();
|
|
69
|
+
console.log(` ✓ Created ${report.experimentsCreated} new experiments\n`);
|
|
70
|
+
}
|
|
71
|
+
// Step 4: Prune low-confidence edges
|
|
72
|
+
if (this.config.pruneOldEdges) {
|
|
73
|
+
console.log('🧹 Pruning low-confidence edges...');
|
|
74
|
+
report.edgesPruned = await this.pruneEdges();
|
|
75
|
+
console.log(` ✓ Pruned ${report.edgesPruned} edges\n`);
|
|
76
|
+
}
|
|
77
|
+
// Step 5: Calculate statistics
|
|
78
|
+
const stats = this.calculateStats();
|
|
79
|
+
report.avgUplift = stats.avgUplift;
|
|
80
|
+
report.avgConfidence = stats.avgConfidence;
|
|
81
|
+
// Step 6: Generate recommendations
|
|
82
|
+
report.recommendations = this.generateRecommendations(report);
|
|
83
|
+
report.executionTimeMs = Date.now() - startTime;
|
|
84
|
+
console.log('✅ Nightly Learner Completed\n');
|
|
85
|
+
this.printReport(report);
|
|
86
|
+
return report;
|
|
87
|
+
}
|
|
88
|
+
catch (error) {
|
|
89
|
+
console.error('❌ Nightly Learner Failed:', error);
|
|
90
|
+
throw error;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Discover causal edges using doubly robust learner
|
|
95
|
+
*
|
|
96
|
+
* τ̂(x) = μ1(x) − μ0(x) + [a*(y−μ1(x)) / e(x)] − [(1−a)*(y−μ0(x)) / (1−e(x))]
|
|
97
|
+
*
|
|
98
|
+
* Where:
|
|
99
|
+
* - μ1(x) = outcome model for treatment
|
|
100
|
+
* - μ0(x) = outcome model for control
|
|
101
|
+
* - e(x) = propensity score (probability of treatment)
|
|
102
|
+
* - a = treatment indicator
|
|
103
|
+
* - y = observed outcome
|
|
104
|
+
*/
|
|
105
|
+
async discover(config) {
|
|
106
|
+
return this.discoverCausalEdges();
|
|
107
|
+
}
|
|
108
|
+
async discoverCausalEdges() {
|
|
109
|
+
let discovered = 0;
|
|
110
|
+
// Find episode pairs with high similarity and temporal sequence
|
|
111
|
+
const candidatePairs = this.db.prepare(`
|
|
112
|
+
SELECT
|
|
113
|
+
e1.id as from_id,
|
|
114
|
+
e1.task as from_task,
|
|
115
|
+
e1.reward as from_reward,
|
|
116
|
+
e2.id as to_id,
|
|
117
|
+
e2.task as to_task,
|
|
118
|
+
e2.reward as to_reward,
|
|
119
|
+
e2.ts - e1.ts as time_diff
|
|
120
|
+
FROM episodes e1
|
|
121
|
+
JOIN episodes e2 ON e1.session_id = e2.session_id
|
|
122
|
+
WHERE e1.id != e2.id
|
|
123
|
+
AND e2.ts > e1.ts
|
|
124
|
+
AND e2.ts - e1.ts < 3600 -- Within 1 hour
|
|
125
|
+
ORDER BY e1.id, e2.ts
|
|
126
|
+
LIMIT 1000
|
|
127
|
+
`).all();
|
|
128
|
+
for (const pair of candidatePairs) {
|
|
129
|
+
// Check if edge already exists
|
|
130
|
+
const existing = this.db.prepare(`
|
|
131
|
+
SELECT id FROM causal_edges
|
|
132
|
+
WHERE from_memory_id = ? AND to_memory_id = ?
|
|
133
|
+
`).get(pair.from_id, pair.to_id);
|
|
134
|
+
if (existing)
|
|
135
|
+
continue;
|
|
136
|
+
// Calculate propensity score e(x) - probability of treatment
|
|
137
|
+
// Simplified: use frequency of from_task in session
|
|
138
|
+
const propensity = this.calculatePropensity(pair.from_id);
|
|
139
|
+
// Calculate outcome models μ1(x) and μ0(x)
|
|
140
|
+
const mu1 = this.calculateOutcomeModel(pair.from_task, true); // With treatment
|
|
141
|
+
const mu0 = this.calculateOutcomeModel(pair.from_task, false); // Without treatment
|
|
142
|
+
// Calculate doubly robust estimator
|
|
143
|
+
const a = 1; // This is a treated observation
|
|
144
|
+
const y = pair.to_reward;
|
|
145
|
+
const doublyRobustEstimate = (mu1 - mu0) + (a * (y - mu1) / propensity);
|
|
146
|
+
// Calculate confidence based on sample size and variance
|
|
147
|
+
const sampleSize = this.getSampleSize(pair.from_task);
|
|
148
|
+
const confidence = this.calculateConfidence(sampleSize, doublyRobustEstimate);
|
|
149
|
+
// Only add if meets thresholds
|
|
150
|
+
if (Math.abs(doublyRobustEstimate) >= this.config.upliftThreshold && confidence >= this.config.confidenceThreshold) {
|
|
151
|
+
const edge = {
|
|
152
|
+
fromMemoryId: pair.from_id,
|
|
153
|
+
fromMemoryType: 'episode',
|
|
154
|
+
toMemoryId: pair.to_id,
|
|
155
|
+
toMemoryType: 'episode',
|
|
156
|
+
similarity: 0.8, // Simplified - would use embedding similarity in production
|
|
157
|
+
uplift: doublyRobustEstimate,
|
|
158
|
+
confidence,
|
|
159
|
+
sampleSize,
|
|
160
|
+
mechanism: `${pair.from_task} → ${pair.to_task} (doubly robust)`,
|
|
161
|
+
metadata: {
|
|
162
|
+
propensity,
|
|
163
|
+
mu1,
|
|
164
|
+
mu0,
|
|
165
|
+
discoveredAt: Date.now()
|
|
166
|
+
}
|
|
167
|
+
};
|
|
168
|
+
this.causalGraph.addCausalEdge(edge);
|
|
169
|
+
discovered++;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
return discovered;
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Calculate propensity score e(x) - probability of treatment given context
|
|
176
|
+
*/
|
|
177
|
+
calculatePropensity(episodeId) {
|
|
178
|
+
const episode = this.db.prepare('SELECT task, session_id FROM episodes WHERE id = ?').get(episodeId);
|
|
179
|
+
// Count occurrences of this task type in session
|
|
180
|
+
const counts = this.db.prepare(`
|
|
181
|
+
SELECT
|
|
182
|
+
COUNT(*) as total,
|
|
183
|
+
SUM(CASE WHEN task = ? THEN 1 ELSE 0 END) as task_count
|
|
184
|
+
FROM episodes
|
|
185
|
+
WHERE session_id = ?
|
|
186
|
+
`).get(episode.task, episode.session_id);
|
|
187
|
+
const propensity = counts.task_count / Math.max(counts.total, 1);
|
|
188
|
+
// Clip to avoid division by zero
|
|
189
|
+
return Math.max(0.01, Math.min(0.99, propensity));
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Calculate outcome model μ(x) - expected outcome given treatment status
|
|
193
|
+
*/
|
|
194
|
+
calculateOutcomeModel(task, treated) {
|
|
195
|
+
// Get average reward for episodes with/without this task in their history
|
|
196
|
+
const avgReward = this.db.prepare(`
|
|
197
|
+
SELECT AVG(reward) as avg_reward
|
|
198
|
+
FROM episodes
|
|
199
|
+
WHERE ${treated ? '' : 'NOT'} EXISTS (
|
|
200
|
+
SELECT 1 FROM episodes e2
|
|
201
|
+
WHERE e2.session_id = episodes.session_id
|
|
202
|
+
AND e2.task = ?
|
|
203
|
+
AND e2.ts < episodes.ts
|
|
204
|
+
)
|
|
205
|
+
`).get(task);
|
|
206
|
+
return avgReward?.avg_reward || 0.5;
|
|
207
|
+
}
|
|
208
|
+
/**
|
|
209
|
+
* Get sample size for a task type
|
|
210
|
+
*/
|
|
211
|
+
getSampleSize(task) {
|
|
212
|
+
const count = this.db.prepare(`
|
|
213
|
+
SELECT COUNT(*) as count
|
|
214
|
+
FROM episodes
|
|
215
|
+
WHERE task = ?
|
|
216
|
+
`).get(task);
|
|
217
|
+
return count.count;
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Calculate confidence based on sample size and effect size
|
|
221
|
+
*/
|
|
222
|
+
calculateConfidence(sampleSize, uplift) {
|
|
223
|
+
// Simplified confidence calculation
|
|
224
|
+
// In production, use proper statistical methods (bootstrap, etc.)
|
|
225
|
+
const sampleFactor = Math.min(sampleSize / 100, 1.0); // Max at 100 samples
|
|
226
|
+
const effectSizeFactor = Math.min(Math.abs(uplift) / 0.5, 1.0); // Max at 0.5 uplift
|
|
227
|
+
return sampleFactor * effectSizeFactor;
|
|
228
|
+
}
|
|
229
|
+
/**
|
|
230
|
+
* Complete running A/B experiments and calculate uplift
|
|
231
|
+
*/
|
|
232
|
+
async completeExperiments() {
|
|
233
|
+
const runningExperiments = this.db.prepare(`
|
|
234
|
+
SELECT id, start_time, sample_size
|
|
235
|
+
FROM causal_experiments
|
|
236
|
+
WHERE status = 'running'
|
|
237
|
+
AND sample_size >= ?
|
|
238
|
+
`).all(this.config.minSampleSize);
|
|
239
|
+
let completed = 0;
|
|
240
|
+
for (const exp of runningExperiments) {
|
|
241
|
+
try {
|
|
242
|
+
this.causalGraph.calculateUplift(exp.id);
|
|
243
|
+
completed++;
|
|
244
|
+
}
|
|
245
|
+
catch (error) {
|
|
246
|
+
console.error(` ⚠ Failed to calculate uplift for experiment ${exp.id}:`, error);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
return completed;
|
|
250
|
+
}
|
|
251
|
+
/**
|
|
252
|
+
* Create new A/B experiments for promising hypotheses
|
|
253
|
+
*/
|
|
254
|
+
async createExperiments() {
|
|
255
|
+
const currentExperiments = this.db.prepare(`
|
|
256
|
+
SELECT COUNT(*) as count
|
|
257
|
+
FROM causal_experiments
|
|
258
|
+
WHERE status = 'running'
|
|
259
|
+
`).get();
|
|
260
|
+
const available = this.config.experimentBudget - currentExperiments.count;
|
|
261
|
+
if (available <= 0) {
|
|
262
|
+
return 0;
|
|
263
|
+
}
|
|
264
|
+
// Find promising task pairs that don't have experiments yet
|
|
265
|
+
const candidates = this.db.prepare(`
|
|
266
|
+
SELECT DISTINCT
|
|
267
|
+
e1.task as treatment_task,
|
|
268
|
+
e1.id as treatment_id,
|
|
269
|
+
COUNT(e2.id) as potential_outcomes
|
|
270
|
+
FROM episodes e1
|
|
271
|
+
JOIN episodes e2 ON e1.session_id = e2.session_id
|
|
272
|
+
WHERE e2.ts > e1.ts
|
|
273
|
+
AND NOT EXISTS (
|
|
274
|
+
SELECT 1 FROM causal_experiments
|
|
275
|
+
WHERE treatment_id = e1.id
|
|
276
|
+
)
|
|
277
|
+
GROUP BY e1.task, e1.id
|
|
278
|
+
HAVING COUNT(e2.id) >= ?
|
|
279
|
+
ORDER BY COUNT(e2.id) DESC
|
|
280
|
+
LIMIT ?
|
|
281
|
+
`).all(this.config.minSampleSize, available);
|
|
282
|
+
let created = 0;
|
|
283
|
+
for (const candidate of candidates) {
|
|
284
|
+
const expId = this.causalGraph.createExperiment({
|
|
285
|
+
name: `Auto: ${candidate.treatment_task} Impact`,
|
|
286
|
+
hypothesis: `${candidate.treatment_task} affects downstream outcomes`,
|
|
287
|
+
treatmentId: candidate.treatment_id,
|
|
288
|
+
treatmentType: 'episode',
|
|
289
|
+
startTime: Date.now(),
|
|
290
|
+
sampleSize: 0,
|
|
291
|
+
status: 'running',
|
|
292
|
+
metadata: {
|
|
293
|
+
autoGenerated: true,
|
|
294
|
+
potentialOutcomes: candidate.potential_outcomes
|
|
295
|
+
}
|
|
296
|
+
});
|
|
297
|
+
created++;
|
|
298
|
+
}
|
|
299
|
+
return created;
|
|
300
|
+
}
|
|
301
|
+
/**
|
|
302
|
+
* Prune old or low-confidence edges
|
|
303
|
+
*/
|
|
304
|
+
async pruneEdges() {
|
|
305
|
+
const maxAgeMs = this.config.edgeMaxAgeDays * 24 * 60 * 60 * 1000;
|
|
306
|
+
const cutoffTime = Date.now() / 1000 - maxAgeMs / 1000;
|
|
307
|
+
const result = this.db.prepare(`
|
|
308
|
+
DELETE FROM causal_edges
|
|
309
|
+
WHERE confidence < ?
|
|
310
|
+
OR created_at < ?
|
|
311
|
+
`).run(this.config.confidenceThreshold, cutoffTime);
|
|
312
|
+
return result.changes;
|
|
313
|
+
}
|
|
314
|
+
/**
|
|
315
|
+
* Calculate overall statistics
|
|
316
|
+
*/
|
|
317
|
+
calculateStats() {
|
|
318
|
+
const stats = this.db.prepare(`
|
|
319
|
+
SELECT
|
|
320
|
+
AVG(ABS(uplift)) as avg_uplift,
|
|
321
|
+
AVG(confidence) as avg_confidence
|
|
322
|
+
FROM causal_edges
|
|
323
|
+
WHERE uplift IS NOT NULL
|
|
324
|
+
`).get();
|
|
325
|
+
return {
|
|
326
|
+
avgUplift: stats?.avg_uplift || 0,
|
|
327
|
+
avgConfidence: stats?.avg_confidence || 0
|
|
328
|
+
};
|
|
329
|
+
}
|
|
330
|
+
/**
|
|
331
|
+
* Generate recommendations based on learning results
|
|
332
|
+
*/
|
|
333
|
+
generateRecommendations(report) {
|
|
334
|
+
const recommendations = [];
|
|
335
|
+
if (report.edgesDiscovered === 0) {
|
|
336
|
+
recommendations.push('No new causal edges discovered. Consider collecting more diverse episode data.');
|
|
337
|
+
}
|
|
338
|
+
if (report.avgUplift < 0.1) {
|
|
339
|
+
recommendations.push('Average uplift is low. Review task sequences for optimization opportunities.');
|
|
340
|
+
}
|
|
341
|
+
if (report.avgConfidence < 0.7) {
|
|
342
|
+
recommendations.push('Average confidence is below target. Increase sample sizes or refine hypothesis selection.');
|
|
343
|
+
}
|
|
344
|
+
if (report.experimentsCompleted > 0) {
|
|
345
|
+
recommendations.push(`${report.experimentsCompleted} experiments completed. Review results for actionable insights.`);
|
|
346
|
+
}
|
|
347
|
+
if (report.edgesPruned > report.edgesDiscovered) {
|
|
348
|
+
recommendations.push('More edges pruned than discovered. Consider adjusting confidence thresholds.');
|
|
349
|
+
}
|
|
350
|
+
return recommendations;
|
|
351
|
+
}
|
|
352
|
+
/**
|
|
353
|
+
* Print report to console
|
|
354
|
+
*/
|
|
355
|
+
printReport(report) {
|
|
356
|
+
console.log('═══════════════════════════════════════════════════════════');
|
|
357
|
+
console.log(' Nightly Learner Report');
|
|
358
|
+
console.log('═══════════════════════════════════════════════════════════\n');
|
|
359
|
+
console.log(` Execution Time: ${report.executionTimeMs}ms`);
|
|
360
|
+
console.log(` Timestamp: ${new Date(report.timestamp).toISOString()}\n`);
|
|
361
|
+
console.log(' Results:');
|
|
362
|
+
console.log(` • Edges Discovered: ${report.edgesDiscovered}`);
|
|
363
|
+
console.log(` • Edges Pruned: ${report.edgesPruned}`);
|
|
364
|
+
console.log(` • Experiments Completed: ${report.experimentsCompleted}`);
|
|
365
|
+
console.log(` • Experiments Created: ${report.experimentsCreated}\n`);
|
|
366
|
+
console.log(' Statistics:');
|
|
367
|
+
console.log(` • Avg Uplift: ${report.avgUplift.toFixed(3)}`);
|
|
368
|
+
console.log(` • Avg Confidence: ${report.avgConfidence.toFixed(3)}\n`);
|
|
369
|
+
if (report.recommendations.length > 0) {
|
|
370
|
+
console.log(' Recommendations:');
|
|
371
|
+
report.recommendations.forEach(rec => console.log(` • ${rec}`));
|
|
372
|
+
console.log('');
|
|
373
|
+
}
|
|
374
|
+
console.log('═══════════════════════════════════════════════════════════\n');
|
|
375
|
+
}
|
|
376
|
+
/**
|
|
377
|
+
* Update learner configuration
|
|
378
|
+
*/
|
|
379
|
+
updateConfig(config) {
|
|
380
|
+
this.config = { ...this.config, ...config };
|
|
381
|
+
}
|
|
382
|
+
}
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ReflexionMemory - Episodic Replay Memory System
|
|
3
|
+
*
|
|
4
|
+
* Implements reflexion-style episodic replay for agent self-improvement.
|
|
5
|
+
* Stores self-critiques and outcomes, retrieves relevant past experiences.
|
|
6
|
+
*
|
|
7
|
+
* Based on: "Reflexion: Language Agents with Verbal Reinforcement Learning"
|
|
8
|
+
* https://arxiv.org/abs/2303.11366
|
|
9
|
+
*/
|
|
10
|
+
export class ReflexionMemory {
|
|
11
|
+
db;
|
|
12
|
+
embedder;
|
|
13
|
+
constructor(db, embedder) {
|
|
14
|
+
this.db = db;
|
|
15
|
+
this.embedder = embedder;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Store a new episode with its critique and outcome
|
|
19
|
+
*/
|
|
20
|
+
async storeEpisode(episode) {
|
|
21
|
+
const stmt = this.db.prepare(`
|
|
22
|
+
INSERT INTO episodes (
|
|
23
|
+
session_id, task, input, output, critique, reward, success,
|
|
24
|
+
latency_ms, tokens_used, tags, metadata
|
|
25
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
26
|
+
`);
|
|
27
|
+
const tags = episode.tags ? JSON.stringify(episode.tags) : null;
|
|
28
|
+
const metadata = episode.metadata ? JSON.stringify(episode.metadata) : null;
|
|
29
|
+
const result = stmt.run(episode.sessionId, episode.task, episode.input || null, episode.output || null, episode.critique || null, episode.reward, episode.success ? 1 : 0, episode.latencyMs || null, episode.tokensUsed || null, tags, metadata);
|
|
30
|
+
const episodeId = result.lastInsertRowid;
|
|
31
|
+
// Generate and store embedding
|
|
32
|
+
const text = this.buildEpisodeText(episode);
|
|
33
|
+
const embedding = await this.embedder.embed(text);
|
|
34
|
+
this.storeEmbedding(episodeId, embedding);
|
|
35
|
+
return episodeId;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Retrieve relevant past episodes for a new task attempt
|
|
39
|
+
*/
|
|
40
|
+
async retrieveRelevant(query) {
|
|
41
|
+
const { task, currentState = '', k = 5, minReward, onlyFailures = false, onlySuccesses = false, timeWindowDays } = query;
|
|
42
|
+
// Generate query embedding
|
|
43
|
+
const queryText = currentState ? `${task}\n${currentState}` : task;
|
|
44
|
+
const queryEmbedding = await this.embedder.embed(queryText);
|
|
45
|
+
// Build SQL filters
|
|
46
|
+
const filters = [];
|
|
47
|
+
const params = [];
|
|
48
|
+
if (minReward !== undefined) {
|
|
49
|
+
filters.push('e.reward >= ?');
|
|
50
|
+
params.push(minReward);
|
|
51
|
+
}
|
|
52
|
+
if (onlyFailures) {
|
|
53
|
+
filters.push('e.success = 0');
|
|
54
|
+
}
|
|
55
|
+
if (onlySuccesses) {
|
|
56
|
+
filters.push('e.success = 1');
|
|
57
|
+
}
|
|
58
|
+
if (timeWindowDays) {
|
|
59
|
+
filters.push('e.ts > strftime("%s", "now") - ?');
|
|
60
|
+
params.push(timeWindowDays * 86400);
|
|
61
|
+
}
|
|
62
|
+
const whereClause = filters.length > 0 ? `WHERE ${filters.join(' AND ')}` : '';
|
|
63
|
+
// Retrieve all candidates
|
|
64
|
+
const stmt = this.db.prepare(`
|
|
65
|
+
SELECT
|
|
66
|
+
e.*,
|
|
67
|
+
ee.embedding
|
|
68
|
+
FROM episodes e
|
|
69
|
+
JOIN episode_embeddings ee ON e.id = ee.episode_id
|
|
70
|
+
${whereClause}
|
|
71
|
+
ORDER BY e.reward DESC
|
|
72
|
+
`);
|
|
73
|
+
const rows = stmt.all(...params);
|
|
74
|
+
// Calculate similarities
|
|
75
|
+
const episodes = rows.map(row => {
|
|
76
|
+
const embedding = this.deserializeEmbedding(row.embedding);
|
|
77
|
+
const similarity = this.cosineSimilarity(queryEmbedding, embedding);
|
|
78
|
+
return {
|
|
79
|
+
id: row.id,
|
|
80
|
+
ts: row.ts,
|
|
81
|
+
sessionId: row.session_id,
|
|
82
|
+
task: row.task,
|
|
83
|
+
input: row.input,
|
|
84
|
+
output: row.output,
|
|
85
|
+
critique: row.critique,
|
|
86
|
+
reward: row.reward,
|
|
87
|
+
success: row.success === 1,
|
|
88
|
+
latencyMs: row.latency_ms,
|
|
89
|
+
tokensUsed: row.tokens_used,
|
|
90
|
+
tags: row.tags ? JSON.parse(row.tags) : undefined,
|
|
91
|
+
metadata: row.metadata ? JSON.parse(row.metadata) : undefined,
|
|
92
|
+
embedding,
|
|
93
|
+
similarity
|
|
94
|
+
};
|
|
95
|
+
});
|
|
96
|
+
// Sort by similarity and return top-k
|
|
97
|
+
episodes.sort((a, b) => (b.similarity || 0) - (a.similarity || 0));
|
|
98
|
+
return episodes.slice(0, k);
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Get statistics for a task
|
|
102
|
+
*/
|
|
103
|
+
getTaskStats(task, timeWindowDays) {
|
|
104
|
+
const windowFilter = timeWindowDays
|
|
105
|
+
? `AND ts > strftime('%s', 'now') - ${timeWindowDays * 86400}`
|
|
106
|
+
: '';
|
|
107
|
+
const stmt = this.db.prepare(`
|
|
108
|
+
SELECT
|
|
109
|
+
COUNT(*) as total,
|
|
110
|
+
AVG(CASE WHEN success = 1 THEN 1.0 ELSE 0.0 END) as success_rate,
|
|
111
|
+
AVG(reward) as avg_reward,
|
|
112
|
+
AVG(latency_ms) as avg_latency
|
|
113
|
+
FROM episodes
|
|
114
|
+
WHERE task = ? ${windowFilter}
|
|
115
|
+
`);
|
|
116
|
+
const stats = stmt.get(task);
|
|
117
|
+
// Calculate improvement trend (recent vs older)
|
|
118
|
+
const trendStmt = this.db.prepare(`
|
|
119
|
+
SELECT
|
|
120
|
+
AVG(CASE
|
|
121
|
+
WHEN ts > strftime('%s', 'now') - ${7 * 86400} THEN reward
|
|
122
|
+
END) as recent_reward,
|
|
123
|
+
AVG(CASE
|
|
124
|
+
WHEN ts <= strftime('%s', 'now') - ${7 * 86400} THEN reward
|
|
125
|
+
END) as older_reward
|
|
126
|
+
FROM episodes
|
|
127
|
+
WHERE task = ? ${windowFilter}
|
|
128
|
+
`);
|
|
129
|
+
const trend = trendStmt.get(task);
|
|
130
|
+
const improvementTrend = trend.recent_reward && trend.older_reward
|
|
131
|
+
? (trend.recent_reward - trend.older_reward) / trend.older_reward
|
|
132
|
+
: 0;
|
|
133
|
+
return {
|
|
134
|
+
totalAttempts: stats.total || 0,
|
|
135
|
+
successRate: stats.success_rate || 0,
|
|
136
|
+
avgReward: stats.avg_reward || 0,
|
|
137
|
+
avgLatency: stats.avg_latency || 0,
|
|
138
|
+
improvementTrend
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Build critique summary from similar failed episodes
|
|
143
|
+
*/
|
|
144
|
+
async getCritiqueSummary(query) {
|
|
145
|
+
const failures = await this.retrieveRelevant({
|
|
146
|
+
...query,
|
|
147
|
+
onlyFailures: true,
|
|
148
|
+
k: 3
|
|
149
|
+
});
|
|
150
|
+
if (failures.length === 0) {
|
|
151
|
+
return 'No prior failures found for this task.';
|
|
152
|
+
}
|
|
153
|
+
const critiques = failures
|
|
154
|
+
.filter(ep => ep.critique)
|
|
155
|
+
.map((ep, i) => `${i + 1}. ${ep.critique} (reward: ${ep.reward.toFixed(2)})`)
|
|
156
|
+
.join('\n');
|
|
157
|
+
return `Prior failures and lessons learned:\n${critiques}`;
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Get successful strategies for a task
|
|
161
|
+
*/
|
|
162
|
+
async getSuccessStrategies(query) {
|
|
163
|
+
const successes = await this.retrieveRelevant({
|
|
164
|
+
...query,
|
|
165
|
+
onlySuccesses: true,
|
|
166
|
+
minReward: 0.7,
|
|
167
|
+
k: 3
|
|
168
|
+
});
|
|
169
|
+
if (successes.length === 0) {
|
|
170
|
+
return 'No successful strategies found for this task.';
|
|
171
|
+
}
|
|
172
|
+
const strategies = successes
|
|
173
|
+
.map((ep, i) => {
|
|
174
|
+
const approach = ep.output?.substring(0, 200) || 'No output recorded';
|
|
175
|
+
return `${i + 1}. Approach (reward ${ep.reward.toFixed(2)}): ${approach}...`;
|
|
176
|
+
})
|
|
177
|
+
.join('\n');
|
|
178
|
+
return `Successful strategies:\n${strategies}`;
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Prune low-quality episodes based on TTL and quality threshold
|
|
182
|
+
*/
|
|
183
|
+
pruneEpisodes(config) {
|
|
184
|
+
const { minReward = 0.3, maxAgeDays = 30, keepMinPerTask = 5 } = config;
|
|
185
|
+
// Keep high-reward episodes and minimum per task
|
|
186
|
+
const stmt = this.db.prepare(`
|
|
187
|
+
DELETE FROM episodes
|
|
188
|
+
WHERE id IN (
|
|
189
|
+
SELECT id FROM (
|
|
190
|
+
SELECT
|
|
191
|
+
id,
|
|
192
|
+
reward,
|
|
193
|
+
ts,
|
|
194
|
+
ROW_NUMBER() OVER (PARTITION BY task ORDER BY reward DESC) as rank
|
|
195
|
+
FROM episodes
|
|
196
|
+
WHERE reward < ?
|
|
197
|
+
AND ts < strftime('%s', 'now') - ?
|
|
198
|
+
) WHERE rank > ?
|
|
199
|
+
)
|
|
200
|
+
`);
|
|
201
|
+
const result = stmt.run(minReward, maxAgeDays * 86400, keepMinPerTask);
|
|
202
|
+
return result.changes;
|
|
203
|
+
}
|
|
204
|
+
// ========================================================================
|
|
205
|
+
// Private Helper Methods
|
|
206
|
+
// ========================================================================
|
|
207
|
+
buildEpisodeText(episode) {
|
|
208
|
+
const parts = [episode.task];
|
|
209
|
+
if (episode.critique)
|
|
210
|
+
parts.push(episode.critique);
|
|
211
|
+
if (episode.output)
|
|
212
|
+
parts.push(episode.output);
|
|
213
|
+
return parts.join('\n');
|
|
214
|
+
}
|
|
215
|
+
storeEmbedding(episodeId, embedding) {
|
|
216
|
+
const stmt = this.db.prepare(`
|
|
217
|
+
INSERT INTO episode_embeddings (episode_id, embedding)
|
|
218
|
+
VALUES (?, ?)
|
|
219
|
+
`);
|
|
220
|
+
stmt.run(episodeId, this.serializeEmbedding(embedding));
|
|
221
|
+
}
|
|
222
|
+
serializeEmbedding(embedding) {
|
|
223
|
+
return Buffer.from(embedding.buffer);
|
|
224
|
+
}
|
|
225
|
+
deserializeEmbedding(buffer) {
|
|
226
|
+
return new Float32Array(buffer.buffer, buffer.byteOffset, buffer.length / 4);
|
|
227
|
+
}
|
|
228
|
+
cosineSimilarity(a, b) {
|
|
229
|
+
let dotProduct = 0;
|
|
230
|
+
let normA = 0;
|
|
231
|
+
let normB = 0;
|
|
232
|
+
for (let i = 0; i < a.length; i++) {
|
|
233
|
+
dotProduct += a[i] * b[i];
|
|
234
|
+
normA += a[i] * a[i];
|
|
235
|
+
normB += b[i] * b[i];
|
|
236
|
+
}
|
|
237
|
+
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
238
|
+
}
|
|
239
|
+
}
|