agentic-flow 1.7.0 → 1.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/.claude/agents/test-neural.md +5 -0
  2. package/.claude/settings.json +20 -19
  3. package/.claude/skills/agentdb-memory-patterns/SKILL.md +166 -0
  4. package/.claude/skills/agentdb-vector-search/SKILL.md +126 -0
  5. package/.claude/skills/agentic-flow/agentdb-memory-patterns/SKILL.md +166 -0
  6. package/.claude/skills/agentic-flow/agentdb-vector-search/SKILL.md +126 -0
  7. package/.claude/skills/agentic-flow/reasoningbank-intelligence/SKILL.md +201 -0
  8. package/.claude/skills/agentic-flow/swarm-orchestration/SKILL.md +179 -0
  9. package/.claude/skills/reasoningbank-intelligence/SKILL.md +201 -0
  10. package/.claude/skills/skill-builder/README.md +308 -0
  11. package/.claude/skills/skill-builder/SKILL.md +910 -0
  12. package/.claude/skills/skill-builder/docs/SPECIFICATION.md +358 -0
  13. package/.claude/skills/skill-builder/resources/schemas/skill-frontmatter.schema.json +41 -0
  14. package/.claude/skills/skill-builder/resources/templates/full-skill.template +118 -0
  15. package/.claude/skills/skill-builder/resources/templates/minimal-skill.template +38 -0
  16. package/.claude/skills/skill-builder/scripts/generate-skill.sh +334 -0
  17. package/.claude/skills/skill-builder/scripts/validate-skill.sh +198 -0
  18. package/.claude/skills/swarm-orchestration/SKILL.md +179 -0
  19. package/CHANGELOG.md +117 -0
  20. package/README.md +81 -17
  21. package/dist/cli-proxy.js +33 -2
  22. package/dist/mcp/standalone-stdio.js +4 -200
  23. package/dist/reasoningbank/index.js +4 -0
  24. package/dist/utils/cli.js +22 -0
  25. package/docs/AGENTDB_INTEGRATION.md +379 -0
  26. package/package.json +4 -4
  27. package/.claude/answer.md +0 -1
  28. package/dist/agentdb/benchmarks/comprehensive-benchmark.js +0 -664
  29. package/dist/agentdb/benchmarks/frontier-benchmark.js +0 -419
  30. package/dist/agentdb/benchmarks/reflexion-benchmark.js +0 -370
  31. package/dist/agentdb/cli/agentdb-cli.js +0 -717
  32. package/dist/agentdb/controllers/CausalMemoryGraph.js +0 -322
  33. package/dist/agentdb/controllers/CausalRecall.js +0 -281
  34. package/dist/agentdb/controllers/EmbeddingService.js +0 -118
  35. package/dist/agentdb/controllers/ExplainableRecall.js +0 -387
  36. package/dist/agentdb/controllers/NightlyLearner.js +0 -382
  37. package/dist/agentdb/controllers/ReflexionMemory.js +0 -239
  38. package/dist/agentdb/controllers/SkillLibrary.js +0 -276
  39. package/dist/agentdb/controllers/frontier-index.js +0 -9
  40. package/dist/agentdb/controllers/index.js +0 -8
  41. package/dist/agentdb/index.js +0 -32
  42. package/dist/agentdb/optimizations/BatchOperations.js +0 -198
  43. package/dist/agentdb/optimizations/QueryOptimizer.js +0 -225
  44. package/dist/agentdb/optimizations/index.js +0 -7
  45. package/dist/agentdb/tests/frontier-features.test.js +0 -665
  46. package/dist/memory/SharedMemoryPool.js +0 -211
  47. package/dist/memory/index.js +0 -6
  48. package/dist/reasoningbank/AdvancedMemory.js +0 -67
  49. package/dist/reasoningbank/HybridBackend.js +0 -91
  50. package/dist/reasoningbank/index-new.js +0 -87
  51. package/docs/AGENTDB_TESTING.md +0 -411
  52. package/scripts/run-validation.sh +0 -165
  53. package/scripts/test-agentdb.sh +0 -153
@@ -1,382 +0,0 @@
1
- /**
2
- * Nightly Learner - Automated Causal Discovery and Consolidation
3
- *
4
- * Runs as a background job to:
5
- * 1. Discover new causal edges from episode patterns
6
- * 2. Run A/B experiments on promising hypotheses
7
- * 3. Calculate uplift for completed experiments
8
- * 4. Prune low-confidence edges
9
- * 5. Update rerank weights based on performance
10
- *
11
- * Based on doubly robust learner:
12
- * τ̂(x) = μ1(x) − μ0(x) + [a*(y−μ1(x)) / e(x)] − [(1−a)*(y−μ0(x)) / (1−e(x))]
13
- */
14
- import { CausalMemoryGraph } from './CausalMemoryGraph.js';
15
- import { ReflexionMemory } from './ReflexionMemory.js';
16
- import { SkillLibrary } from './SkillLibrary.js';
17
- export class NightlyLearner {
18
- config;
19
- db;
20
- causalGraph;
21
- reflexion;
22
- skillLibrary;
23
- constructor(db, embedder, config = {
24
- minSimilarity: 0.7,
25
- minSampleSize: 30,
26
- confidenceThreshold: 0.6,
27
- upliftThreshold: 0.05,
28
- pruneOldEdges: true,
29
- edgeMaxAgeDays: 90,
30
- autoExperiments: true,
31
- experimentBudget: 10
32
- }) {
33
- this.config = config;
34
- this.db = db;
35
- this.causalGraph = new CausalMemoryGraph(db);
36
- this.reflexion = new ReflexionMemory(db, embedder);
37
- this.skillLibrary = new SkillLibrary(db, embedder);
38
- }
39
- /**
40
- * Main learning job - runs all discovery and consolidation tasks
41
- */
42
- async run() {
43
- console.log('\n🌙 Nightly Learner Starting...\n');
44
- const startTime = Date.now();
45
- const report = {
46
- timestamp: startTime,
47
- executionTimeMs: 0,
48
- edgesDiscovered: 0,
49
- edgesPruned: 0,
50
- experimentsCompleted: 0,
51
- experimentsCreated: 0,
52
- avgUplift: 0,
53
- avgConfidence: 0,
54
- recommendations: []
55
- };
56
- try {
57
- // Step 1: Discover new causal edges
58
- console.log('📊 Discovering causal edges from episode patterns...');
59
- report.edgesDiscovered = await this.discoverCausalEdges();
60
- console.log(` ✓ Discovered ${report.edgesDiscovered} new edges\n`);
61
- // Step 2: Complete running experiments
62
- console.log('🧪 Completing A/B experiments...');
63
- report.experimentsCompleted = await this.completeExperiments();
64
- console.log(` ✓ Completed ${report.experimentsCompleted} experiments\n`);
65
- // Step 3: Create new experiments (if enabled)
66
- if (this.config.autoExperiments) {
67
- console.log('🔬 Creating new A/B experiments...');
68
- report.experimentsCreated = await this.createExperiments();
69
- console.log(` ✓ Created ${report.experimentsCreated} new experiments\n`);
70
- }
71
- // Step 4: Prune low-confidence edges
72
- if (this.config.pruneOldEdges) {
73
- console.log('🧹 Pruning low-confidence edges...');
74
- report.edgesPruned = await this.pruneEdges();
75
- console.log(` ✓ Pruned ${report.edgesPruned} edges\n`);
76
- }
77
- // Step 5: Calculate statistics
78
- const stats = this.calculateStats();
79
- report.avgUplift = stats.avgUplift;
80
- report.avgConfidence = stats.avgConfidence;
81
- // Step 6: Generate recommendations
82
- report.recommendations = this.generateRecommendations(report);
83
- report.executionTimeMs = Date.now() - startTime;
84
- console.log('✅ Nightly Learner Completed\n');
85
- this.printReport(report);
86
- return report;
87
- }
88
- catch (error) {
89
- console.error('❌ Nightly Learner Failed:', error);
90
- throw error;
91
- }
92
- }
93
- /**
94
- * Discover causal edges using doubly robust learner
95
- *
96
- * τ̂(x) = μ1(x) − μ0(x) + [a*(y−μ1(x)) / e(x)] − [(1−a)*(y−μ0(x)) / (1−e(x))]
97
- *
98
- * Where:
99
- * - μ1(x) = outcome model for treatment
100
- * - μ0(x) = outcome model for control
101
- * - e(x) = propensity score (probability of treatment)
102
- * - a = treatment indicator
103
- * - y = observed outcome
104
- */
105
- async discover(config) {
106
- return this.discoverCausalEdges();
107
- }
108
- async discoverCausalEdges() {
109
- let discovered = 0;
110
- // Find episode pairs with high similarity and temporal sequence
111
- const candidatePairs = this.db.prepare(`
112
- SELECT
113
- e1.id as from_id,
114
- e1.task as from_task,
115
- e1.reward as from_reward,
116
- e2.id as to_id,
117
- e2.task as to_task,
118
- e2.reward as to_reward,
119
- e2.ts - e1.ts as time_diff
120
- FROM episodes e1
121
- JOIN episodes e2 ON e1.session_id = e2.session_id
122
- WHERE e1.id != e2.id
123
- AND e2.ts > e1.ts
124
- AND e2.ts - e1.ts < 3600 -- Within 1 hour
125
- ORDER BY e1.id, e2.ts
126
- LIMIT 1000
127
- `).all();
128
- for (const pair of candidatePairs) {
129
- // Check if edge already exists
130
- const existing = this.db.prepare(`
131
- SELECT id FROM causal_edges
132
- WHERE from_memory_id = ? AND to_memory_id = ?
133
- `).get(pair.from_id, pair.to_id);
134
- if (existing)
135
- continue;
136
- // Calculate propensity score e(x) - probability of treatment
137
- // Simplified: use frequency of from_task in session
138
- const propensity = this.calculatePropensity(pair.from_id);
139
- // Calculate outcome models μ1(x) and μ0(x)
140
- const mu1 = this.calculateOutcomeModel(pair.from_task, true); // With treatment
141
- const mu0 = this.calculateOutcomeModel(pair.from_task, false); // Without treatment
142
- // Calculate doubly robust estimator
143
- const a = 1; // This is a treated observation
144
- const y = pair.to_reward;
145
- const doublyRobustEstimate = (mu1 - mu0) + (a * (y - mu1) / propensity);
146
- // Calculate confidence based on sample size and variance
147
- const sampleSize = this.getSampleSize(pair.from_task);
148
- const confidence = this.calculateConfidence(sampleSize, doublyRobustEstimate);
149
- // Only add if meets thresholds
150
- if (Math.abs(doublyRobustEstimate) >= this.config.upliftThreshold && confidence >= this.config.confidenceThreshold) {
151
- const edge = {
152
- fromMemoryId: pair.from_id,
153
- fromMemoryType: 'episode',
154
- toMemoryId: pair.to_id,
155
- toMemoryType: 'episode',
156
- similarity: 0.8, // Simplified - would use embedding similarity in production
157
- uplift: doublyRobustEstimate,
158
- confidence,
159
- sampleSize,
160
- mechanism: `${pair.from_task} → ${pair.to_task} (doubly robust)`,
161
- metadata: {
162
- propensity,
163
- mu1,
164
- mu0,
165
- discoveredAt: Date.now()
166
- }
167
- };
168
- this.causalGraph.addCausalEdge(edge);
169
- discovered++;
170
- }
171
- }
172
- return discovered;
173
- }
174
- /**
175
- * Calculate propensity score e(x) - probability of treatment given context
176
- */
177
- calculatePropensity(episodeId) {
178
- const episode = this.db.prepare('SELECT task, session_id FROM episodes WHERE id = ?').get(episodeId);
179
- // Count occurrences of this task type in session
180
- const counts = this.db.prepare(`
181
- SELECT
182
- COUNT(*) as total,
183
- SUM(CASE WHEN task = ? THEN 1 ELSE 0 END) as task_count
184
- FROM episodes
185
- WHERE session_id = ?
186
- `).get(episode.task, episode.session_id);
187
- const propensity = counts.task_count / Math.max(counts.total, 1);
188
- // Clip to avoid division by zero
189
- return Math.max(0.01, Math.min(0.99, propensity));
190
- }
191
- /**
192
- * Calculate outcome model μ(x) - expected outcome given treatment status
193
- */
194
- calculateOutcomeModel(task, treated) {
195
- // Get average reward for episodes with/without this task in their history
196
- const avgReward = this.db.prepare(`
197
- SELECT AVG(reward) as avg_reward
198
- FROM episodes
199
- WHERE ${treated ? '' : 'NOT'} EXISTS (
200
- SELECT 1 FROM episodes e2
201
- WHERE e2.session_id = episodes.session_id
202
- AND e2.task = ?
203
- AND e2.ts < episodes.ts
204
- )
205
- `).get(task);
206
- return avgReward?.avg_reward || 0.5;
207
- }
208
- /**
209
- * Get sample size for a task type
210
- */
211
- getSampleSize(task) {
212
- const count = this.db.prepare(`
213
- SELECT COUNT(*) as count
214
- FROM episodes
215
- WHERE task = ?
216
- `).get(task);
217
- return count.count;
218
- }
219
- /**
220
- * Calculate confidence based on sample size and effect size
221
- */
222
- calculateConfidence(sampleSize, uplift) {
223
- // Simplified confidence calculation
224
- // In production, use proper statistical methods (bootstrap, etc.)
225
- const sampleFactor = Math.min(sampleSize / 100, 1.0); // Max at 100 samples
226
- const effectSizeFactor = Math.min(Math.abs(uplift) / 0.5, 1.0); // Max at 0.5 uplift
227
- return sampleFactor * effectSizeFactor;
228
- }
229
- /**
230
- * Complete running A/B experiments and calculate uplift
231
- */
232
- async completeExperiments() {
233
- const runningExperiments = this.db.prepare(`
234
- SELECT id, start_time, sample_size
235
- FROM causal_experiments
236
- WHERE status = 'running'
237
- AND sample_size >= ?
238
- `).all(this.config.minSampleSize);
239
- let completed = 0;
240
- for (const exp of runningExperiments) {
241
- try {
242
- this.causalGraph.calculateUplift(exp.id);
243
- completed++;
244
- }
245
- catch (error) {
246
- console.error(` ⚠ Failed to calculate uplift for experiment ${exp.id}:`, error);
247
- }
248
- }
249
- return completed;
250
- }
251
- /**
252
- * Create new A/B experiments for promising hypotheses
253
- */
254
- async createExperiments() {
255
- const currentExperiments = this.db.prepare(`
256
- SELECT COUNT(*) as count
257
- FROM causal_experiments
258
- WHERE status = 'running'
259
- `).get();
260
- const available = this.config.experimentBudget - currentExperiments.count;
261
- if (available <= 0) {
262
- return 0;
263
- }
264
- // Find promising task pairs that don't have experiments yet
265
- const candidates = this.db.prepare(`
266
- SELECT DISTINCT
267
- e1.task as treatment_task,
268
- e1.id as treatment_id,
269
- COUNT(e2.id) as potential_outcomes
270
- FROM episodes e1
271
- JOIN episodes e2 ON e1.session_id = e2.session_id
272
- WHERE e2.ts > e1.ts
273
- AND NOT EXISTS (
274
- SELECT 1 FROM causal_experiments
275
- WHERE treatment_id = e1.id
276
- )
277
- GROUP BY e1.task, e1.id
278
- HAVING COUNT(e2.id) >= ?
279
- ORDER BY COUNT(e2.id) DESC
280
- LIMIT ?
281
- `).all(this.config.minSampleSize, available);
282
- let created = 0;
283
- for (const candidate of candidates) {
284
- const expId = this.causalGraph.createExperiment({
285
- name: `Auto: ${candidate.treatment_task} Impact`,
286
- hypothesis: `${candidate.treatment_task} affects downstream outcomes`,
287
- treatmentId: candidate.treatment_id,
288
- treatmentType: 'episode',
289
- startTime: Date.now(),
290
- sampleSize: 0,
291
- status: 'running',
292
- metadata: {
293
- autoGenerated: true,
294
- potentialOutcomes: candidate.potential_outcomes
295
- }
296
- });
297
- created++;
298
- }
299
- return created;
300
- }
301
- /**
302
- * Prune old or low-confidence edges
303
- */
304
- async pruneEdges() {
305
- const maxAgeMs = this.config.edgeMaxAgeDays * 24 * 60 * 60 * 1000;
306
- const cutoffTime = Date.now() / 1000 - maxAgeMs / 1000;
307
- const result = this.db.prepare(`
308
- DELETE FROM causal_edges
309
- WHERE confidence < ?
310
- OR created_at < ?
311
- `).run(this.config.confidenceThreshold, cutoffTime);
312
- return result.changes;
313
- }
314
- /**
315
- * Calculate overall statistics
316
- */
317
- calculateStats() {
318
- const stats = this.db.prepare(`
319
- SELECT
320
- AVG(ABS(uplift)) as avg_uplift,
321
- AVG(confidence) as avg_confidence
322
- FROM causal_edges
323
- WHERE uplift IS NOT NULL
324
- `).get();
325
- return {
326
- avgUplift: stats?.avg_uplift || 0,
327
- avgConfidence: stats?.avg_confidence || 0
328
- };
329
- }
330
- /**
331
- * Generate recommendations based on learning results
332
- */
333
- generateRecommendations(report) {
334
- const recommendations = [];
335
- if (report.edgesDiscovered === 0) {
336
- recommendations.push('No new causal edges discovered. Consider collecting more diverse episode data.');
337
- }
338
- if (report.avgUplift < 0.1) {
339
- recommendations.push('Average uplift is low. Review task sequences for optimization opportunities.');
340
- }
341
- if (report.avgConfidence < 0.7) {
342
- recommendations.push('Average confidence is below target. Increase sample sizes or refine hypothesis selection.');
343
- }
344
- if (report.experimentsCompleted > 0) {
345
- recommendations.push(`${report.experimentsCompleted} experiments completed. Review results for actionable insights.`);
346
- }
347
- if (report.edgesPruned > report.edgesDiscovered) {
348
- recommendations.push('More edges pruned than discovered. Consider adjusting confidence thresholds.');
349
- }
350
- return recommendations;
351
- }
352
- /**
353
- * Print report to console
354
- */
355
- printReport(report) {
356
- console.log('═══════════════════════════════════════════════════════════');
357
- console.log(' Nightly Learner Report');
358
- console.log('═══════════════════════════════════════════════════════════\n');
359
- console.log(` Execution Time: ${report.executionTimeMs}ms`);
360
- console.log(` Timestamp: ${new Date(report.timestamp).toISOString()}\n`);
361
- console.log(' Results:');
362
- console.log(` • Edges Discovered: ${report.edgesDiscovered}`);
363
- console.log(` • Edges Pruned: ${report.edgesPruned}`);
364
- console.log(` • Experiments Completed: ${report.experimentsCompleted}`);
365
- console.log(` • Experiments Created: ${report.experimentsCreated}\n`);
366
- console.log(' Statistics:');
367
- console.log(` • Avg Uplift: ${report.avgUplift.toFixed(3)}`);
368
- console.log(` • Avg Confidence: ${report.avgConfidence.toFixed(3)}\n`);
369
- if (report.recommendations.length > 0) {
370
- console.log(' Recommendations:');
371
- report.recommendations.forEach(rec => console.log(` • ${rec}`));
372
- console.log('');
373
- }
374
- console.log('═══════════════════════════════════════════════════════════\n');
375
- }
376
- /**
377
- * Update learner configuration
378
- */
379
- updateConfig(config) {
380
- this.config = { ...this.config, ...config };
381
- }
382
- }
@@ -1,239 +0,0 @@
1
- /**
2
- * ReflexionMemory - Episodic Replay Memory System
3
- *
4
- * Implements reflexion-style episodic replay for agent self-improvement.
5
- * Stores self-critiques and outcomes, retrieves relevant past experiences.
6
- *
7
- * Based on: "Reflexion: Language Agents with Verbal Reinforcement Learning"
8
- * https://arxiv.org/abs/2303.11366
9
- */
10
- export class ReflexionMemory {
11
- db;
12
- embedder;
13
- constructor(db, embedder) {
14
- this.db = db;
15
- this.embedder = embedder;
16
- }
17
- /**
18
- * Store a new episode with its critique and outcome
19
- */
20
- async storeEpisode(episode) {
21
- const stmt = this.db.prepare(`
22
- INSERT INTO episodes (
23
- session_id, task, input, output, critique, reward, success,
24
- latency_ms, tokens_used, tags, metadata
25
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
26
- `);
27
- const tags = episode.tags ? JSON.stringify(episode.tags) : null;
28
- const metadata = episode.metadata ? JSON.stringify(episode.metadata) : null;
29
- const result = stmt.run(episode.sessionId, episode.task, episode.input || null, episode.output || null, episode.critique || null, episode.reward, episode.success ? 1 : 0, episode.latencyMs || null, episode.tokensUsed || null, tags, metadata);
30
- const episodeId = result.lastInsertRowid;
31
- // Generate and store embedding
32
- const text = this.buildEpisodeText(episode);
33
- const embedding = await this.embedder.embed(text);
34
- this.storeEmbedding(episodeId, embedding);
35
- return episodeId;
36
- }
37
- /**
38
- * Retrieve relevant past episodes for a new task attempt
39
- */
40
- async retrieveRelevant(query) {
41
- const { task, currentState = '', k = 5, minReward, onlyFailures = false, onlySuccesses = false, timeWindowDays } = query;
42
- // Generate query embedding
43
- const queryText = currentState ? `${task}\n${currentState}` : task;
44
- const queryEmbedding = await this.embedder.embed(queryText);
45
- // Build SQL filters
46
- const filters = [];
47
- const params = [];
48
- if (minReward !== undefined) {
49
- filters.push('e.reward >= ?');
50
- params.push(minReward);
51
- }
52
- if (onlyFailures) {
53
- filters.push('e.success = 0');
54
- }
55
- if (onlySuccesses) {
56
- filters.push('e.success = 1');
57
- }
58
- if (timeWindowDays) {
59
- filters.push('e.ts > strftime("%s", "now") - ?');
60
- params.push(timeWindowDays * 86400);
61
- }
62
- const whereClause = filters.length > 0 ? `WHERE ${filters.join(' AND ')}` : '';
63
- // Retrieve all candidates
64
- const stmt = this.db.prepare(`
65
- SELECT
66
- e.*,
67
- ee.embedding
68
- FROM episodes e
69
- JOIN episode_embeddings ee ON e.id = ee.episode_id
70
- ${whereClause}
71
- ORDER BY e.reward DESC
72
- `);
73
- const rows = stmt.all(...params);
74
- // Calculate similarities
75
- const episodes = rows.map(row => {
76
- const embedding = this.deserializeEmbedding(row.embedding);
77
- const similarity = this.cosineSimilarity(queryEmbedding, embedding);
78
- return {
79
- id: row.id,
80
- ts: row.ts,
81
- sessionId: row.session_id,
82
- task: row.task,
83
- input: row.input,
84
- output: row.output,
85
- critique: row.critique,
86
- reward: row.reward,
87
- success: row.success === 1,
88
- latencyMs: row.latency_ms,
89
- tokensUsed: row.tokens_used,
90
- tags: row.tags ? JSON.parse(row.tags) : undefined,
91
- metadata: row.metadata ? JSON.parse(row.metadata) : undefined,
92
- embedding,
93
- similarity
94
- };
95
- });
96
- // Sort by similarity and return top-k
97
- episodes.sort((a, b) => (b.similarity || 0) - (a.similarity || 0));
98
- return episodes.slice(0, k);
99
- }
100
- /**
101
- * Get statistics for a task
102
- */
103
- getTaskStats(task, timeWindowDays) {
104
- const windowFilter = timeWindowDays
105
- ? `AND ts > strftime('%s', 'now') - ${timeWindowDays * 86400}`
106
- : '';
107
- const stmt = this.db.prepare(`
108
- SELECT
109
- COUNT(*) as total,
110
- AVG(CASE WHEN success = 1 THEN 1.0 ELSE 0.0 END) as success_rate,
111
- AVG(reward) as avg_reward,
112
- AVG(latency_ms) as avg_latency
113
- FROM episodes
114
- WHERE task = ? ${windowFilter}
115
- `);
116
- const stats = stmt.get(task);
117
- // Calculate improvement trend (recent vs older)
118
- const trendStmt = this.db.prepare(`
119
- SELECT
120
- AVG(CASE
121
- WHEN ts > strftime('%s', 'now') - ${7 * 86400} THEN reward
122
- END) as recent_reward,
123
- AVG(CASE
124
- WHEN ts <= strftime('%s', 'now') - ${7 * 86400} THEN reward
125
- END) as older_reward
126
- FROM episodes
127
- WHERE task = ? ${windowFilter}
128
- `);
129
- const trend = trendStmt.get(task);
130
- const improvementTrend = trend.recent_reward && trend.older_reward
131
- ? (trend.recent_reward - trend.older_reward) / trend.older_reward
132
- : 0;
133
- return {
134
- totalAttempts: stats.total || 0,
135
- successRate: stats.success_rate || 0,
136
- avgReward: stats.avg_reward || 0,
137
- avgLatency: stats.avg_latency || 0,
138
- improvementTrend
139
- };
140
- }
141
- /**
142
- * Build critique summary from similar failed episodes
143
- */
144
- async getCritiqueSummary(query) {
145
- const failures = await this.retrieveRelevant({
146
- ...query,
147
- onlyFailures: true,
148
- k: 3
149
- });
150
- if (failures.length === 0) {
151
- return 'No prior failures found for this task.';
152
- }
153
- const critiques = failures
154
- .filter(ep => ep.critique)
155
- .map((ep, i) => `${i + 1}. ${ep.critique} (reward: ${ep.reward.toFixed(2)})`)
156
- .join('\n');
157
- return `Prior failures and lessons learned:\n${critiques}`;
158
- }
159
- /**
160
- * Get successful strategies for a task
161
- */
162
- async getSuccessStrategies(query) {
163
- const successes = await this.retrieveRelevant({
164
- ...query,
165
- onlySuccesses: true,
166
- minReward: 0.7,
167
- k: 3
168
- });
169
- if (successes.length === 0) {
170
- return 'No successful strategies found for this task.';
171
- }
172
- const strategies = successes
173
- .map((ep, i) => {
174
- const approach = ep.output?.substring(0, 200) || 'No output recorded';
175
- return `${i + 1}. Approach (reward ${ep.reward.toFixed(2)}): ${approach}...`;
176
- })
177
- .join('\n');
178
- return `Successful strategies:\n${strategies}`;
179
- }
180
- /**
181
- * Prune low-quality episodes based on TTL and quality threshold
182
- */
183
- pruneEpisodes(config) {
184
- const { minReward = 0.3, maxAgeDays = 30, keepMinPerTask = 5 } = config;
185
- // Keep high-reward episodes and minimum per task
186
- const stmt = this.db.prepare(`
187
- DELETE FROM episodes
188
- WHERE id IN (
189
- SELECT id FROM (
190
- SELECT
191
- id,
192
- reward,
193
- ts,
194
- ROW_NUMBER() OVER (PARTITION BY task ORDER BY reward DESC) as rank
195
- FROM episodes
196
- WHERE reward < ?
197
- AND ts < strftime('%s', 'now') - ?
198
- ) WHERE rank > ?
199
- )
200
- `);
201
- const result = stmt.run(minReward, maxAgeDays * 86400, keepMinPerTask);
202
- return result.changes;
203
- }
204
- // ========================================================================
205
- // Private Helper Methods
206
- // ========================================================================
207
- buildEpisodeText(episode) {
208
- const parts = [episode.task];
209
- if (episode.critique)
210
- parts.push(episode.critique);
211
- if (episode.output)
212
- parts.push(episode.output);
213
- return parts.join('\n');
214
- }
215
- storeEmbedding(episodeId, embedding) {
216
- const stmt = this.db.prepare(`
217
- INSERT INTO episode_embeddings (episode_id, embedding)
218
- VALUES (?, ?)
219
- `);
220
- stmt.run(episodeId, this.serializeEmbedding(embedding));
221
- }
222
- serializeEmbedding(embedding) {
223
- return Buffer.from(embedding.buffer);
224
- }
225
- deserializeEmbedding(buffer) {
226
- return new Float32Array(buffer.buffer, buffer.byteOffset, buffer.length / 4);
227
- }
228
- cosineSimilarity(a, b) {
229
- let dotProduct = 0;
230
- let normA = 0;
231
- let normB = 0;
232
- for (let i = 0; i < a.length; i++) {
233
- dotProduct += a[i] * b[i];
234
- normA += a[i] * a[i];
235
- normB += b[i] * b[i];
236
- }
237
- return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
238
- }
239
- }