agentic-flow 1.4.5 → 1.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/CHANGELOG.md +96 -0
  2. package/README.md +36 -2
  3. package/dist/index.js +9 -0
  4. package/dist/reasoningbank/benchmark.js +333 -0
  5. package/dist/reasoningbank/config/reasoningbank-types.js +4 -0
  6. package/dist/reasoningbank/core/consolidate.js +139 -0
  7. package/dist/reasoningbank/core/database.js +250 -0
  8. package/dist/reasoningbank/core/distill.js +159 -0
  9. package/dist/reasoningbank/core/judge.js +128 -0
  10. package/dist/reasoningbank/core/matts.js +225 -0
  11. package/dist/reasoningbank/core/memory-engine.js +335 -0
  12. package/dist/reasoningbank/core/retrieve.js +86 -0
  13. package/dist/reasoningbank/db/queries.js +230 -0
  14. package/dist/reasoningbank/db/schema.js +4 -0
  15. package/dist/reasoningbank/demo-comparison.js +301 -0
  16. package/dist/reasoningbank/hooks/post-task.js +109 -0
  17. package/dist/reasoningbank/hooks/pre-task.js +68 -0
  18. package/dist/reasoningbank/index.js +91 -0
  19. package/dist/reasoningbank/test-integration.js +90 -0
  20. package/dist/reasoningbank/test-retrieval.js +176 -0
  21. package/dist/reasoningbank/test-validation.js +172 -0
  22. package/dist/reasoningbank/types/index.js +5 -0
  23. package/dist/reasoningbank/utils/config.js +76 -0
  24. package/dist/reasoningbank/utils/embeddings.js +113 -0
  25. package/dist/reasoningbank/utils/mmr.js +64 -0
  26. package/dist/reasoningbank/utils/pii-scrubber.js +98 -0
  27. package/dist/utils/cli.js +19 -0
  28. package/dist/utils/reasoningbankCommands.js +204 -0
  29. package/docs/REASONINGBANK-BENCHMARK.md +396 -0
  30. package/docs/REASONINGBANK-CLI-INTEGRATION.md +455 -0
  31. package/docs/REASONINGBANK-DEMO.md +419 -0
  32. package/docs/REASONINGBANK-VALIDATION.md +532 -0
  33. package/docs/releases/GITHUB-ISSUE-ADDENDUM-v1.4.6.md +1529 -0
  34. package/docs/releases/GITHUB-ISSUE-v1.4.6.md +1453 -0
  35. package/docs/releases/v1.4.6-reasoningbank-release.md +541 -0
  36. package/docs/releases/v1.4.7-bugfix.md +212 -0
  37. package/package.json +9 -2
@@ -0,0 +1,225 @@
1
+ /**
2
+ * MaTTS: Memory-aware Test-Time Scaling
3
+ * Algorithm 5 from ReasoningBank paper
4
+ *
5
+ * Two modes:
6
+ * - Parallel: k independent rollouts with self-contrast aggregation
7
+ * - Sequential: r iterative refinements with check-and-correct
8
+ */
9
+ import { readFileSync } from 'fs';
10
+ import { join } from 'path';
11
+ import { ulid } from 'ulid';
12
+ import { loadConfig } from '../utils/config.js';
13
+ import { retrieveMemories } from './retrieve.js';
14
+ import { judgeTrajectory } from './judge.js';
15
+ import { distillMemories } from './distill.js';
16
+ import * as db from '../db/queries.js';
17
+ /**
18
+ * Run MaTTS in parallel mode
19
+ * Execute k independent rollouts and aggregate via self-contrast
20
+ */
21
+ export async function mattsParallel(taskFn, query, options = {}) {
22
+ const config = loadConfig();
23
+ const k = options.k || config.matts.parallel_k;
24
+ const runId = ulid();
25
+ const startTime = Date.now();
26
+ console.log(`[INFO] Starting MaTTS parallel mode with k=${k}`);
27
+ // Store MaTTS run
28
+ db.storeMattsRun({
29
+ run_id: runId,
30
+ task_id: options.taskId || 'matts-' + runId,
31
+ mode: 'parallel',
32
+ k,
33
+ status: 'running',
34
+ summary: undefined
35
+ });
36
+ const trajectories = [];
37
+ // Execute k independent rollouts
38
+ for (let i = 0; i < k; i++) {
39
+ console.log(`[INFO] MaTTS parallel rollout ${i + 1}/${k}`);
40
+ try {
41
+ const trajectory = await taskFn();
42
+ const verdict = await judgeTrajectory(trajectory, query);
43
+ trajectories.push({
44
+ id: ulid(),
45
+ verdict,
46
+ trajectory
47
+ });
48
+ // Store trajectory
49
+ db.storeTrajectory({
50
+ task_id: options.taskId || 'matts-' + runId,
51
+ agent_id: options.agentId || 'matts-agent',
52
+ query,
53
+ trajectory_json: JSON.stringify(trajectory),
54
+ started_at: new Date().toISOString(),
55
+ ended_at: new Date().toISOString(),
56
+ judge_label: verdict.label,
57
+ judge_conf: verdict.confidence,
58
+ judge_reasons: JSON.stringify(verdict.reasons),
59
+ matts_run_id: runId
60
+ });
61
+ }
62
+ catch (error) {
63
+ console.error(`[ERROR] MaTTS rollout ${i + 1} failed:`, error);
64
+ }
65
+ }
66
+ // Aggregate memories via self-contrast
67
+ const aggregatedMemories = await aggregateMemories(trajectories, query, options);
68
+ const successRate = trajectories.filter(t => t.verdict.label === 'Success').length / trajectories.length;
69
+ const duration = Date.now() - startTime;
70
+ console.log(`[INFO] MaTTS parallel complete: ${trajectories.length} trajectories, ${successRate * 100}% success in ${duration}ms`);
71
+ db.logMetric('rb.matts.parallel.duration_ms', duration);
72
+ db.logMetric('rb.matts.parallel.success_rate', successRate);
73
+ db.logMetric('rb.matts.parallel.memories', aggregatedMemories.length);
74
+ return {
75
+ runId,
76
+ mode: 'parallel',
77
+ k,
78
+ trajectories,
79
+ aggregatedMemories,
80
+ successRate,
81
+ duration
82
+ };
83
+ }
84
+ /**
85
+ * Run MaTTS in sequential mode
86
+ * Iterative refinement with check-and-correct
87
+ */
88
+ export async function mattsSequential(taskFn, query, options = {}) {
89
+ const config = loadConfig();
90
+ const r = options.r || config.matts.sequential_r || config.matts.sequential_k;
91
+ const runId = ulid();
92
+ const startTime = Date.now();
93
+ console.log(`[INFO] Starting MaTTS sequential mode with r=${r}`);
94
+ db.storeMattsRun({
95
+ run_id: runId,
96
+ task_id: options.taskId || 'matts-seq-' + runId,
97
+ mode: 'sequential',
98
+ k: r,
99
+ status: 'running',
100
+ summary: undefined
101
+ });
102
+ const trajectories = [];
103
+ let previousMemories = [];
104
+ // Iterative refinement
105
+ for (let i = 0; i < r; i++) {
106
+ console.log(`[INFO] MaTTS sequential iteration ${i + 1}/${r}`);
107
+ try {
108
+ // Retrieve relevant memories (including from previous iterations)
109
+ const memories = await retrieveMemories(query, {
110
+ domain: options.domain
111
+ });
112
+ // Execute with memories
113
+ const trajectory = await taskFn([...memories, ...previousMemories]);
114
+ const verdict = await judgeTrajectory(trajectory, query);
115
+ trajectories.push({
116
+ id: ulid(),
117
+ verdict,
118
+ trajectory
119
+ });
120
+ // If success and stop_on_success is true, break early
121
+ if (verdict.label === 'Success' && (config.matts.sequential_stop_on_success ?? true)) {
122
+ console.log(`[INFO] Success achieved at iteration ${i + 1}, stopping early`);
123
+ break;
124
+ }
125
+ // Distill memories from this iteration
126
+ const newMemories = await distillMemories(trajectory, verdict, query, options);
127
+ previousMemories = [...previousMemories, ...newMemories];
128
+ // Store trajectory
129
+ db.storeTrajectory({
130
+ task_id: options.taskId || 'matts-seq-' + runId,
131
+ agent_id: options.agentId || 'matts-agent',
132
+ query,
133
+ trajectory_json: JSON.stringify(trajectory),
134
+ started_at: new Date().toISOString(),
135
+ ended_at: new Date().toISOString(),
136
+ judge_label: verdict.label,
137
+ judge_conf: verdict.confidence,
138
+ judge_reasons: JSON.stringify(verdict.reasons),
139
+ matts_run_id: runId
140
+ });
141
+ }
142
+ catch (error) {
143
+ console.error(`[ERROR] MaTTS iteration ${i + 1} failed:`, error);
144
+ }
145
+ }
146
+ const successRate = trajectories.filter(t => t.verdict.label === 'Success').length / trajectories.length;
147
+ const duration = Date.now() - startTime;
148
+ console.log(`[INFO] MaTTS sequential complete: ${trajectories.length} iterations, ${successRate * 100}% success in ${duration}ms`);
149
+ db.logMetric('rb.matts.sequential.duration_ms', duration);
150
+ db.logMetric('rb.matts.sequential.success_rate', successRate);
151
+ return {
152
+ runId,
153
+ mode: 'sequential',
154
+ k: r,
155
+ trajectories,
156
+ aggregatedMemories: previousMemories,
157
+ successRate,
158
+ duration
159
+ };
160
+ }
161
+ /**
162
+ * Aggregate memories from multiple trajectories using self-contrast
163
+ */
164
+ async function aggregateMemories(trajectories, query, options) {
165
+ console.log('[INFO] Aggregating memories via self-contrast');
166
+ // Load aggregation prompt
167
+ const promptPath = join(process.cwd(), 'src', 'reasoningbank', 'prompts', 'matts-aggregate.json');
168
+ const promptTemplate = JSON.parse(readFileSync(promptPath, 'utf-8'));
169
+ // Format trajectories for comparison
170
+ const trajectoryTexts = trajectories.map((t, i) => ({
171
+ id: t.id,
172
+ label: t.verdict.label,
173
+ confidence: t.verdict.confidence,
174
+ steps: JSON.stringify(t.trajectory.steps || [], null, 2)
175
+ }));
176
+ const apiKey = process.env.ANTHROPIC_API_KEY;
177
+ if (!apiKey) {
178
+ console.warn('[WARN] No API key, skipping aggregation');
179
+ return [];
180
+ }
181
+ try {
182
+ const prompt = promptTemplate.template
183
+ .replace('{{k}}', String(trajectories.length))
184
+ .replace('{{task_query}}', query)
185
+ .replace('{{trajectories}}', JSON.stringify(trajectoryTexts, null, 2));
186
+ const response = await fetch('https://api.anthropic.com/v1/messages', {
187
+ method: 'POST',
188
+ headers: {
189
+ 'x-api-key': apiKey,
190
+ 'anthropic-version': '2023-06-01',
191
+ 'content-type': 'application/json'
192
+ },
193
+ body: JSON.stringify({
194
+ model: promptTemplate.model,
195
+ max_tokens: promptTemplate.max_tokens,
196
+ temperature: promptTemplate.temperature,
197
+ system: promptTemplate.system,
198
+ messages: [{ role: 'user', content: prompt }]
199
+ })
200
+ });
201
+ if (!response.ok) {
202
+ throw new Error(`Anthropic API error: ${response.status}`);
203
+ }
204
+ const result = await response.json();
205
+ const content = result.content[0].text;
206
+ // Parse and store aggregated memories
207
+ const jsonMatch = content.match(/\{[\s\S]*\}/);
208
+ if (jsonMatch) {
209
+ const parsed = JSON.parse(jsonMatch[0]);
210
+ const memories = parsed.memories || [];
211
+ // Store with boosted confidence
212
+ const memoryIds = [];
213
+ for (const mem of memories) {
214
+ const verdict = { label: 'Success', confidence: 0.9, reasons: [] };
215
+ const ids = await distillMemories({ steps: [] }, verdict, query, options);
216
+ memoryIds.push(...ids);
217
+ }
218
+ return memoryIds;
219
+ }
220
+ }
221
+ catch (error) {
222
+ console.error('[ERROR] Memory aggregation failed:', error);
223
+ }
224
+ return [];
225
+ }
@@ -0,0 +1,335 @@
1
+ /**
2
+ * ReasoningBank Memory Engine
3
+ * Implements the 4-phase learning loop: RETRIEVE → JUDGE → DISTILL → CONSOLIDATE
4
+ */
5
+ import { ReasoningBankDB } from './database.js';
6
+ import { createEmbeddingProvider, cosineSimilarity } from '../utils/embeddings.js';
7
+ import { piiScrubber } from '../utils/pii-scrubber.js';
8
+ export class ReasoningBankEngine {
9
+ db;
10
+ embeddings;
11
+ piiEnabled;
12
+ weights;
13
+ defaultK;
14
+ minConfidence;
15
+ consolidationThreshold;
16
+ memoriesSinceConsolidation = 0;
17
+ constructor(config) {
18
+ this.db = new ReasoningBankDB(config.dbPath);
19
+ this.embeddings = createEmbeddingProvider(config.embeddings?.provider || 'hash', {
20
+ apiKey: process.env.OPENAI_API_KEY || process.env.ANTHROPIC_API_KEY,
21
+ model: config.embeddings?.model
22
+ });
23
+ this.piiEnabled = config.piiScrub?.enabled !== false;
24
+ this.weights = {
25
+ alpha: config.retrieval?.weights?.alpha || 0.65, // Similarity
26
+ beta: config.retrieval?.weights?.beta || 0.15, // Recency
27
+ gamma: config.retrieval?.weights?.gamma || 0.20, // Reliability
28
+ delta: config.retrieval?.weights?.delta || 0.10 // Diversity penalty
29
+ };
30
+ this.defaultK = config.retrieval?.k || 3;
31
+ this.minConfidence = config.retrieval?.minConfidence || 0.3;
32
+ this.consolidationThreshold = config.consolidation?.scheduleEvery || 20;
33
+ }
34
+ /**
35
+ * Phase 1: RETRIEVE - Get relevant memories using 4-factor scoring
36
+ */
37
+ async retrieve(options) {
38
+ const k = options.k || this.defaultK;
39
+ const lambda = options.lambda || 0.9;
40
+ // Generate query embedding
41
+ const queryEmbedding = await this.embeddings.generate(options.query);
42
+ // Get all memories
43
+ const allMemories = this.db.getAllMemories();
44
+ const embeddings = this.db.getAllEmbeddings();
45
+ // Filter by domain if specified
46
+ let candidates = allMemories;
47
+ if (options.domain) {
48
+ candidates = candidates.filter(m => m.pattern_data.domain === options.domain ||
49
+ m.pattern_data.domain?.startsWith(options.domain + '.'));
50
+ }
51
+ // Calculate scores for each candidate
52
+ const scoredCandidates = [];
53
+ for (const memory of candidates) {
54
+ const embedding = embeddings.get(memory.id);
55
+ if (!embedding)
56
+ continue;
57
+ // 1. Similarity score (cosine similarity)
58
+ const similarity = cosineSimilarity(queryEmbedding, embedding);
59
+ // 2. Recency score (exponential decay, 30-day half-life)
60
+ const ageDays = (Date.now() - new Date(memory.created_at).getTime()) / (1000 * 60 * 60 * 24);
61
+ const recency = Math.exp(-ageDays / 30);
62
+ // 3. Reliability score (confidence × sqrt(usage/10))
63
+ const reliability = Math.min(memory.confidence * Math.sqrt(memory.usage_count / 10), 1.0);
64
+ // Combined score (before diversity penalty)
65
+ const score = this.weights.alpha * similarity +
66
+ this.weights.beta * recency +
67
+ this.weights.gamma * reliability;
68
+ scoredCandidates.push({
69
+ ...memory,
70
+ score,
71
+ similarity,
72
+ recency,
73
+ reliability,
74
+ diversityPenalty: 0 // Will be calculated in MMR
75
+ });
76
+ }
77
+ // Sort by score
78
+ scoredCandidates.sort((a, b) => b.score - a.score);
79
+ // Apply MMR for diversity
80
+ const selected = this.selectWithMMR(scoredCandidates, queryEmbedding, k, lambda);
81
+ // Update usage counts
82
+ for (const memory of selected) {
83
+ this.db.updateMemoryUsage(memory.id);
84
+ }
85
+ // Filter by minimum confidence
86
+ return selected.filter(m => m.confidence >= this.minConfidence);
87
+ }
88
+ /**
89
+ * MMR (Maximal Marginal Relevance) Selection
90
+ * Balances relevance and diversity
91
+ */
92
+ selectWithMMR(candidates, queryEmbedding, k, lambda) {
93
+ const selected = [];
94
+ const remaining = [...candidates];
95
+ const embeddings = this.db.getAllEmbeddings();
96
+ while (selected.length < k && remaining.length > 0) {
97
+ let bestScore = -Infinity;
98
+ let bestIndex = -1;
99
+ for (let i = 0; i < remaining.length; i++) {
100
+ const candidate = remaining[i];
101
+ const candidateEmbedding = embeddings.get(candidate.id);
102
+ if (!candidateEmbedding)
103
+ continue;
104
+ // Relevance to query
105
+ const relevance = candidate.score;
106
+ // Maximum similarity to already selected
107
+ let maxSimilarity = 0;
108
+ if (selected.length > 0) {
109
+ for (const selectedMemory of selected) {
110
+ const selectedEmbedding = embeddings.get(selectedMemory.id);
111
+ if (selectedEmbedding) {
112
+ const sim = cosineSimilarity(candidateEmbedding, selectedEmbedding);
113
+ maxSimilarity = Math.max(maxSimilarity, sim);
114
+ }
115
+ }
116
+ }
117
+ // MMR score
118
+ const mmrScore = lambda * relevance - (1 - lambda) * maxSimilarity;
119
+ if (mmrScore > bestScore) {
120
+ bestScore = mmrScore;
121
+ bestIndex = i;
122
+ }
123
+ }
124
+ if (bestIndex >= 0) {
125
+ selected.push(remaining[bestIndex]);
126
+ remaining.splice(bestIndex, 1);
127
+ }
128
+ else {
129
+ break;
130
+ }
131
+ }
132
+ return selected;
133
+ }
134
+ /**
135
+ * Phase 2: JUDGE - Evaluate task outcome
136
+ */
137
+ async judge(trajectory) {
138
+ // Simple heuristic judge (can be upgraded to LLM)
139
+ const scrubbed = this.piiEnabled ? piiScrubber.scrub(trajectory) : trajectory;
140
+ // Heuristics for success/failure
141
+ const errorKeywords = ['error', 'failed', 'exception', 'timeout', 'unauthorized', 'forbidden'];
142
+ const successKeywords = ['success', 'completed', 'ok', '200', 'done'];
143
+ const lowerTrajectory = scrubbed.toLowerCase();
144
+ const hasError = errorKeywords.some(kw => lowerTrajectory.includes(kw));
145
+ const hasSuccess = successKeywords.some(kw => lowerTrajectory.includes(kw));
146
+ if (hasSuccess && !hasError) {
147
+ return { label: 'Success', confidence: 0.8, rationale: 'Success keywords found' };
148
+ }
149
+ else if (hasError && !hasSuccess) {
150
+ return { label: 'Failure', confidence: 0.8, rationale: 'Error keywords found' };
151
+ }
152
+ else if (hasSuccess && hasError) {
153
+ return { label: 'Success', confidence: 0.5, rationale: 'Mixed signals' };
154
+ }
155
+ else {
156
+ return { label: 'Failure', confidence: 0.5, rationale: 'No clear indicators' };
157
+ }
158
+ }
159
+ /**
160
+ * Phase 3: DISTILL - Extract patterns from trajectory
161
+ */
162
+ async distill(taskId, trajectory, verdict, domain) {
163
+ const scrubbed = this.piiEnabled ? piiScrubber.scrub(trajectory) : trajectory;
164
+ // Store trajectory
165
+ this.db.insertTrajectory({
166
+ task_id: taskId,
167
+ trajectory: scrubbed,
168
+ verdict: verdict.label,
169
+ confidence: verdict.confidence
170
+ });
171
+ // Extract pattern based on verdict
172
+ const pattern = verdict.label === 'Success'
173
+ ? this.extractSuccessPattern(scrubbed, domain)
174
+ : this.extractFailureGuardrail(scrubbed, domain);
175
+ // Store as memory
176
+ const memoryId = this.db.insertMemory({
177
+ title: pattern.title,
178
+ description: pattern.description,
179
+ content: pattern.content,
180
+ confidence: verdict.confidence,
181
+ usage_count: 0,
182
+ pattern_data: {
183
+ domain,
184
+ success_pattern: verdict.label === 'Success',
185
+ failure_guardrail: verdict.label === 'Failure'
186
+ }
187
+ });
188
+ // Generate and store embedding
189
+ const embedding = await this.embeddings.generate(pattern.content);
190
+ this.db.insertEmbedding(memoryId, embedding);
191
+ this.memoriesSinceConsolidation++;
192
+ return memoryId;
193
+ }
194
+ extractSuccessPattern(trajectory, domain) {
195
+ // Extract key steps from successful execution
196
+ const lines = trajectory.split('\n').filter(l => l.trim());
197
+ const keySteps = lines.slice(0, 5).join('\n');
198
+ return {
199
+ title: `Success pattern for ${domain}`,
200
+ description: `Successful execution strategy`,
201
+ content: `Successful approach:\n${keySteps}`
202
+ };
203
+ }
204
+ extractFailureGuardrail(trajectory, domain) {
205
+ // Extract error information
206
+ const lines = trajectory.split('\n').filter(l => l.trim());
207
+ const errorInfo = lines.find(l => l.toLowerCase().includes('error') ||
208
+ l.toLowerCase().includes('failed')) || 'Unknown error';
209
+ return {
210
+ title: `Failure guardrail for ${domain}`,
211
+ description: `Prevention strategy for common failures`,
212
+ content: `Avoid: ${errorInfo}\nRecommend: Check prerequisites and retry with backoff`
213
+ };
214
+ }
215
+ /**
216
+ * Phase 4: CONSOLIDATE - Deduplicate and prune
217
+ */
218
+ async consolidate(options) {
219
+ const startTime = Date.now();
220
+ const dedupeThreshold = options?.dedupeThreshold || 0.95;
221
+ const maxAgeDays = options?.prune?.maxAgeDays || 90;
222
+ const minConfidence = options?.prune?.minConfidence || 0.3;
223
+ const unusedDays = options?.prune?.unusedDays || 30;
224
+ // Find and merge duplicates
225
+ const duplicates = this.db.findDuplicates(dedupeThreshold);
226
+ for (const [id1, id2] of duplicates) {
227
+ const mem1 = this.db.getMemory(id1);
228
+ const mem2 = this.db.getMemory(id2);
229
+ if (mem1 && mem2) {
230
+ // Keep the one with higher confidence and usage
231
+ const keepId = mem1.confidence > mem2.confidence ||
232
+ (mem1.confidence === mem2.confidence && mem1.usage_count > mem2.usage_count)
233
+ ? id1 : id2;
234
+ const deleteId = keepId === id1 ? id2 : id1;
235
+ this.db.deleteMemory(deleteId);
236
+ }
237
+ }
238
+ // Prune old or low-quality memories
239
+ const allMemories = this.db.getAllMemories();
240
+ let pruned = 0;
241
+ for (const memory of allMemories) {
242
+ const ageDays = (Date.now() - new Date(memory.created_at).getTime()) / (1000 * 60 * 60 * 24);
243
+ const lastUsedDays = ageDays; // Simplified: assume last used = created
244
+ const shouldPrune = ageDays > maxAgeDays ||
245
+ memory.confidence < minConfidence ||
246
+ (memory.usage_count === 0 && lastUsedDays > unusedDays);
247
+ if (shouldPrune) {
248
+ this.db.deleteMemory(memory.id);
249
+ pruned++;
250
+ }
251
+ }
252
+ // Detect contradictions (simplified)
253
+ const contradictions = 0; // TODO: Implement semantic contradiction detection
254
+ this.memoriesSinceConsolidation = 0;
255
+ return {
256
+ processed: allMemories.length,
257
+ duplicates: duplicates.length,
258
+ contradictions,
259
+ pruned,
260
+ durationMs: Date.now() - startTime
261
+ };
262
+ }
263
+ /**
264
+ * High-level task execution with full learning loop
265
+ */
266
+ async runTask(options) {
267
+ // Phase 1: RETRIEVE
268
+ const memories = await this.retrieve({
269
+ query: options.query,
270
+ domain: options.domain
271
+ });
272
+ // EXECUTE
273
+ const result = await options.executeFn(memories);
274
+ // Phase 2: JUDGE
275
+ const verdict = await this.judge(result.log);
276
+ // Phase 3: DISTILL
277
+ await this.distill(options.taskId, result.log, verdict, options.domain);
278
+ // Phase 4: CONSOLIDATE (if threshold reached)
279
+ if (this.memoriesSinceConsolidation >= this.consolidationThreshold) {
280
+ await this.consolidate();
281
+ }
282
+ return {
283
+ success: result.success,
284
+ summary: `Task ${options.taskId}: ${verdict.label} (confidence: ${verdict.confidence})`,
285
+ memories,
286
+ verdict
287
+ };
288
+ }
289
+ /**
290
+ * MaTTS: Memory-aware Test-Time Scaling (Parallel)
291
+ */
292
+ async mattsParallel(options) {
293
+ const runs = await Promise.all(Array.from({ length: options.k }, async (_, i) => {
294
+ const memories = await this.retrieve({
295
+ query: options.query,
296
+ domain: options.domain
297
+ });
298
+ const result = await options.executeFn(memories);
299
+ const verdict = await this.judge(result.log);
300
+ this.db.insertMattsRun({
301
+ task_id: options.taskId,
302
+ run_index: i,
303
+ result: result.log,
304
+ verdict: verdict.label,
305
+ confidence: verdict.confidence
306
+ });
307
+ return { result, verdict };
308
+ }));
309
+ // Calculate consensus
310
+ const successes = runs.filter(r => r.verdict.label === 'Success').length;
311
+ const avgConfidence = runs.reduce((sum, r) => sum + r.verdict.confidence, 0) / runs.length;
312
+ const consensusVerdict = {
313
+ label: successes > runs.length / 2 ? 'Success' : 'Failure',
314
+ confidence: avgConfidence
315
+ };
316
+ return {
317
+ success: consensusVerdict.label === 'Success',
318
+ summary: `MaTTS Parallel: ${successes}/${runs.length} successes, consensus: ${consensusVerdict.label}`,
319
+ memories: [],
320
+ verdict: consensusVerdict
321
+ };
322
+ }
323
+ /**
324
+ * Get statistics
325
+ */
326
+ getStats() {
327
+ return this.db.getStats();
328
+ }
329
+ /**
330
+ * Close database connection
331
+ */
332
+ close() {
333
+ this.db.close();
334
+ }
335
+ }
@@ -0,0 +1,86 @@
1
+ /**
2
+ * Memory Retrieval with MMR diversity
3
+ * Algorithm 1 from ReasoningBank paper
4
+ */
5
+ import { computeEmbedding } from '../utils/embeddings.js';
6
+ import { mmrSelection, cosineSimilarity } from '../utils/mmr.js';
7
+ import * as db from '../db/queries.js';
8
+ import { loadConfig } from '../utils/config.js';
9
+ /**
10
+ * Retrieve top-k memories with MMR diversity
11
+ *
12
+ * Scoring formula: score = α·sim + β·recency + γ·reliability
13
+ * Where:
14
+ * - sim: cosine similarity to query
15
+ * - recency: exp(-age_days / half_life)
16
+ * - reliability: min(confidence, 1.0)
17
+ */
18
+ export async function retrieveMemories(query, options = {}) {
19
+ const config = loadConfig();
20
+ const k = options.k || config.retrieve.k;
21
+ const startTime = Date.now();
22
+ console.log(`[INFO] Retrieving memories for query: ${query.substring(0, 100)}...`);
23
+ // 1. Embed query
24
+ const queryEmbed = await computeEmbedding(query);
25
+ // 2. Fetch candidates from database
26
+ const candidates = db.fetchMemoryCandidates({
27
+ domain: options.domain,
28
+ agent: options.agent,
29
+ minConfidence: config.retrieve.min_score
30
+ });
31
+ if (candidates.length === 0) {
32
+ console.log('[INFO] No memory candidates found');
33
+ return [];
34
+ }
35
+ console.log(`[INFO] Found ${candidates.length} candidates`);
36
+ // 3. Score each candidate with 4-factor model
37
+ const scored = candidates.map(item => {
38
+ const similarity = cosineSimilarity(queryEmbed, item.embedding);
39
+ const recency = Math.exp(-item.age_days / config.retrieve.recency_half_life_days);
40
+ const reliability = Math.min(item.confidence, 1.0);
41
+ const baseScore = config.retrieve.alpha * similarity +
42
+ config.retrieve.beta * recency +
43
+ config.retrieve.gamma * reliability;
44
+ return {
45
+ ...item,
46
+ score: baseScore,
47
+ components: { similarity, recency, reliability }
48
+ };
49
+ });
50
+ // 4. MMR selection for diversity
51
+ const selected = mmrSelection(scored, queryEmbed, k, config.retrieve.delta);
52
+ // 5. Record usage for selected memories
53
+ for (const mem of selected) {
54
+ db.incrementUsage(mem.id);
55
+ }
56
+ const duration = Date.now() - startTime;
57
+ console.log(`[INFO] Retrieval complete: ${selected.length} memories in ${duration}ms`);
58
+ db.logMetric('rb.retrieve.latency_ms', duration);
59
+ return selected.map(item => ({
60
+ id: item.id,
61
+ title: item.pattern_data.title,
62
+ description: item.pattern_data.description,
63
+ content: item.pattern_data.content,
64
+ score: item.score,
65
+ components: item.components
66
+ }));
67
+ }
68
+ /**
69
+ * Format memories for injection into system prompt
70
+ */
71
+ export function formatMemoriesForPrompt(memories) {
72
+ if (memories.length === 0) {
73
+ return '';
74
+ }
75
+ let formatted = '\n## Relevant Memories from Past Experience\n\n';
76
+ for (let i = 0; i < memories.length; i++) {
77
+ const mem = memories[i];
78
+ formatted += `### Memory ${i + 1}: ${mem.title}\n\n`;
79
+ formatted += `${mem.description}\n\n`;
80
+ formatted += `**Strategy:**\n${mem.content}\n\n`;
81
+ formatted += `*Confidence: ${(mem.score * 100).toFixed(1)}% | `;
82
+ formatted += `Similarity: ${(mem.components.similarity * 100).toFixed(1)}%*\n\n`;
83
+ formatted += '---\n\n';
84
+ }
85
+ return formatted;
86
+ }