agentic-flow 1.4.5 → 1.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +96 -0
- package/README.md +36 -2
- package/dist/index.js +9 -0
- package/dist/reasoningbank/benchmark.js +333 -0
- package/dist/reasoningbank/config/reasoningbank-types.js +4 -0
- package/dist/reasoningbank/core/consolidate.js +139 -0
- package/dist/reasoningbank/core/database.js +250 -0
- package/dist/reasoningbank/core/distill.js +159 -0
- package/dist/reasoningbank/core/judge.js +128 -0
- package/dist/reasoningbank/core/matts.js +225 -0
- package/dist/reasoningbank/core/memory-engine.js +335 -0
- package/dist/reasoningbank/core/retrieve.js +86 -0
- package/dist/reasoningbank/db/queries.js +230 -0
- package/dist/reasoningbank/db/schema.js +4 -0
- package/dist/reasoningbank/demo-comparison.js +301 -0
- package/dist/reasoningbank/hooks/post-task.js +109 -0
- package/dist/reasoningbank/hooks/pre-task.js +68 -0
- package/dist/reasoningbank/index.js +91 -0
- package/dist/reasoningbank/test-integration.js +90 -0
- package/dist/reasoningbank/test-retrieval.js +176 -0
- package/dist/reasoningbank/test-validation.js +172 -0
- package/dist/reasoningbank/types/index.js +5 -0
- package/dist/reasoningbank/utils/config.js +76 -0
- package/dist/reasoningbank/utils/embeddings.js +113 -0
- package/dist/reasoningbank/utils/mmr.js +64 -0
- package/dist/reasoningbank/utils/pii-scrubber.js +98 -0
- package/dist/utils/cli.js +19 -0
- package/dist/utils/reasoningbankCommands.js +204 -0
- package/docs/REASONINGBANK-BENCHMARK.md +396 -0
- package/docs/REASONINGBANK-CLI-INTEGRATION.md +455 -0
- package/docs/REASONINGBANK-DEMO.md +419 -0
- package/docs/REASONINGBANK-VALIDATION.md +532 -0
- package/docs/releases/GITHUB-ISSUE-ADDENDUM-v1.4.6.md +1529 -0
- package/docs/releases/GITHUB-ISSUE-v1.4.6.md +1453 -0
- package/docs/releases/v1.4.6-reasoningbank-release.md +541 -0
- package/docs/releases/v1.4.7-bugfix.md +212 -0
- package/package.json +9 -2
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MaTTS: Memory-aware Test-Time Scaling
|
|
3
|
+
* Algorithm 5 from ReasoningBank paper
|
|
4
|
+
*
|
|
5
|
+
* Two modes:
|
|
6
|
+
* - Parallel: k independent rollouts with self-contrast aggregation
|
|
7
|
+
* - Sequential: r iterative refinements with check-and-correct
|
|
8
|
+
*/
|
|
9
|
+
import { readFileSync } from 'fs';
|
|
10
|
+
import { join } from 'path';
|
|
11
|
+
import { ulid } from 'ulid';
|
|
12
|
+
import { loadConfig } from '../utils/config.js';
|
|
13
|
+
import { retrieveMemories } from './retrieve.js';
|
|
14
|
+
import { judgeTrajectory } from './judge.js';
|
|
15
|
+
import { distillMemories } from './distill.js';
|
|
16
|
+
import * as db from '../db/queries.js';
|
|
17
|
+
/**
|
|
18
|
+
* Run MaTTS in parallel mode
|
|
19
|
+
* Execute k independent rollouts and aggregate via self-contrast
|
|
20
|
+
*/
|
|
21
|
+
export async function mattsParallel(taskFn, query, options = {}) {
|
|
22
|
+
const config = loadConfig();
|
|
23
|
+
const k = options.k || config.matts.parallel_k;
|
|
24
|
+
const runId = ulid();
|
|
25
|
+
const startTime = Date.now();
|
|
26
|
+
console.log(`[INFO] Starting MaTTS parallel mode with k=${k}`);
|
|
27
|
+
// Store MaTTS run
|
|
28
|
+
db.storeMattsRun({
|
|
29
|
+
run_id: runId,
|
|
30
|
+
task_id: options.taskId || 'matts-' + runId,
|
|
31
|
+
mode: 'parallel',
|
|
32
|
+
k,
|
|
33
|
+
status: 'running',
|
|
34
|
+
summary: undefined
|
|
35
|
+
});
|
|
36
|
+
const trajectories = [];
|
|
37
|
+
// Execute k independent rollouts
|
|
38
|
+
for (let i = 0; i < k; i++) {
|
|
39
|
+
console.log(`[INFO] MaTTS parallel rollout ${i + 1}/${k}`);
|
|
40
|
+
try {
|
|
41
|
+
const trajectory = await taskFn();
|
|
42
|
+
const verdict = await judgeTrajectory(trajectory, query);
|
|
43
|
+
trajectories.push({
|
|
44
|
+
id: ulid(),
|
|
45
|
+
verdict,
|
|
46
|
+
trajectory
|
|
47
|
+
});
|
|
48
|
+
// Store trajectory
|
|
49
|
+
db.storeTrajectory({
|
|
50
|
+
task_id: options.taskId || 'matts-' + runId,
|
|
51
|
+
agent_id: options.agentId || 'matts-agent',
|
|
52
|
+
query,
|
|
53
|
+
trajectory_json: JSON.stringify(trajectory),
|
|
54
|
+
started_at: new Date().toISOString(),
|
|
55
|
+
ended_at: new Date().toISOString(),
|
|
56
|
+
judge_label: verdict.label,
|
|
57
|
+
judge_conf: verdict.confidence,
|
|
58
|
+
judge_reasons: JSON.stringify(verdict.reasons),
|
|
59
|
+
matts_run_id: runId
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
catch (error) {
|
|
63
|
+
console.error(`[ERROR] MaTTS rollout ${i + 1} failed:`, error);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
// Aggregate memories via self-contrast
|
|
67
|
+
const aggregatedMemories = await aggregateMemories(trajectories, query, options);
|
|
68
|
+
const successRate = trajectories.filter(t => t.verdict.label === 'Success').length / trajectories.length;
|
|
69
|
+
const duration = Date.now() - startTime;
|
|
70
|
+
console.log(`[INFO] MaTTS parallel complete: ${trajectories.length} trajectories, ${successRate * 100}% success in ${duration}ms`);
|
|
71
|
+
db.logMetric('rb.matts.parallel.duration_ms', duration);
|
|
72
|
+
db.logMetric('rb.matts.parallel.success_rate', successRate);
|
|
73
|
+
db.logMetric('rb.matts.parallel.memories', aggregatedMemories.length);
|
|
74
|
+
return {
|
|
75
|
+
runId,
|
|
76
|
+
mode: 'parallel',
|
|
77
|
+
k,
|
|
78
|
+
trajectories,
|
|
79
|
+
aggregatedMemories,
|
|
80
|
+
successRate,
|
|
81
|
+
duration
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Run MaTTS in sequential mode
|
|
86
|
+
* Iterative refinement with check-and-correct
|
|
87
|
+
*/
|
|
88
|
+
export async function mattsSequential(taskFn, query, options = {}) {
|
|
89
|
+
const config = loadConfig();
|
|
90
|
+
const r = options.r || config.matts.sequential_r || config.matts.sequential_k;
|
|
91
|
+
const runId = ulid();
|
|
92
|
+
const startTime = Date.now();
|
|
93
|
+
console.log(`[INFO] Starting MaTTS sequential mode with r=${r}`);
|
|
94
|
+
db.storeMattsRun({
|
|
95
|
+
run_id: runId,
|
|
96
|
+
task_id: options.taskId || 'matts-seq-' + runId,
|
|
97
|
+
mode: 'sequential',
|
|
98
|
+
k: r,
|
|
99
|
+
status: 'running',
|
|
100
|
+
summary: undefined
|
|
101
|
+
});
|
|
102
|
+
const trajectories = [];
|
|
103
|
+
let previousMemories = [];
|
|
104
|
+
// Iterative refinement
|
|
105
|
+
for (let i = 0; i < r; i++) {
|
|
106
|
+
console.log(`[INFO] MaTTS sequential iteration ${i + 1}/${r}`);
|
|
107
|
+
try {
|
|
108
|
+
// Retrieve relevant memories (including from previous iterations)
|
|
109
|
+
const memories = await retrieveMemories(query, {
|
|
110
|
+
domain: options.domain
|
|
111
|
+
});
|
|
112
|
+
// Execute with memories
|
|
113
|
+
const trajectory = await taskFn([...memories, ...previousMemories]);
|
|
114
|
+
const verdict = await judgeTrajectory(trajectory, query);
|
|
115
|
+
trajectories.push({
|
|
116
|
+
id: ulid(),
|
|
117
|
+
verdict,
|
|
118
|
+
trajectory
|
|
119
|
+
});
|
|
120
|
+
// If success and stop_on_success is true, break early
|
|
121
|
+
if (verdict.label === 'Success' && (config.matts.sequential_stop_on_success ?? true)) {
|
|
122
|
+
console.log(`[INFO] Success achieved at iteration ${i + 1}, stopping early`);
|
|
123
|
+
break;
|
|
124
|
+
}
|
|
125
|
+
// Distill memories from this iteration
|
|
126
|
+
const newMemories = await distillMemories(trajectory, verdict, query, options);
|
|
127
|
+
previousMemories = [...previousMemories, ...newMemories];
|
|
128
|
+
// Store trajectory
|
|
129
|
+
db.storeTrajectory({
|
|
130
|
+
task_id: options.taskId || 'matts-seq-' + runId,
|
|
131
|
+
agent_id: options.agentId || 'matts-agent',
|
|
132
|
+
query,
|
|
133
|
+
trajectory_json: JSON.stringify(trajectory),
|
|
134
|
+
started_at: new Date().toISOString(),
|
|
135
|
+
ended_at: new Date().toISOString(),
|
|
136
|
+
judge_label: verdict.label,
|
|
137
|
+
judge_conf: verdict.confidence,
|
|
138
|
+
judge_reasons: JSON.stringify(verdict.reasons),
|
|
139
|
+
matts_run_id: runId
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
catch (error) {
|
|
143
|
+
console.error(`[ERROR] MaTTS iteration ${i + 1} failed:`, error);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
const successRate = trajectories.filter(t => t.verdict.label === 'Success').length / trajectories.length;
|
|
147
|
+
const duration = Date.now() - startTime;
|
|
148
|
+
console.log(`[INFO] MaTTS sequential complete: ${trajectories.length} iterations, ${successRate * 100}% success in ${duration}ms`);
|
|
149
|
+
db.logMetric('rb.matts.sequential.duration_ms', duration);
|
|
150
|
+
db.logMetric('rb.matts.sequential.success_rate', successRate);
|
|
151
|
+
return {
|
|
152
|
+
runId,
|
|
153
|
+
mode: 'sequential',
|
|
154
|
+
k: r,
|
|
155
|
+
trajectories,
|
|
156
|
+
aggregatedMemories: previousMemories,
|
|
157
|
+
successRate,
|
|
158
|
+
duration
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Aggregate memories from multiple trajectories using self-contrast
|
|
163
|
+
*/
|
|
164
|
+
async function aggregateMemories(trajectories, query, options) {
|
|
165
|
+
console.log('[INFO] Aggregating memories via self-contrast');
|
|
166
|
+
// Load aggregation prompt
|
|
167
|
+
const promptPath = join(process.cwd(), 'src', 'reasoningbank', 'prompts', 'matts-aggregate.json');
|
|
168
|
+
const promptTemplate = JSON.parse(readFileSync(promptPath, 'utf-8'));
|
|
169
|
+
// Format trajectories for comparison
|
|
170
|
+
const trajectoryTexts = trajectories.map((t, i) => ({
|
|
171
|
+
id: t.id,
|
|
172
|
+
label: t.verdict.label,
|
|
173
|
+
confidence: t.verdict.confidence,
|
|
174
|
+
steps: JSON.stringify(t.trajectory.steps || [], null, 2)
|
|
175
|
+
}));
|
|
176
|
+
const apiKey = process.env.ANTHROPIC_API_KEY;
|
|
177
|
+
if (!apiKey) {
|
|
178
|
+
console.warn('[WARN] No API key, skipping aggregation');
|
|
179
|
+
return [];
|
|
180
|
+
}
|
|
181
|
+
try {
|
|
182
|
+
const prompt = promptTemplate.template
|
|
183
|
+
.replace('{{k}}', String(trajectories.length))
|
|
184
|
+
.replace('{{task_query}}', query)
|
|
185
|
+
.replace('{{trajectories}}', JSON.stringify(trajectoryTexts, null, 2));
|
|
186
|
+
const response = await fetch('https://api.anthropic.com/v1/messages', {
|
|
187
|
+
method: 'POST',
|
|
188
|
+
headers: {
|
|
189
|
+
'x-api-key': apiKey,
|
|
190
|
+
'anthropic-version': '2023-06-01',
|
|
191
|
+
'content-type': 'application/json'
|
|
192
|
+
},
|
|
193
|
+
body: JSON.stringify({
|
|
194
|
+
model: promptTemplate.model,
|
|
195
|
+
max_tokens: promptTemplate.max_tokens,
|
|
196
|
+
temperature: promptTemplate.temperature,
|
|
197
|
+
system: promptTemplate.system,
|
|
198
|
+
messages: [{ role: 'user', content: prompt }]
|
|
199
|
+
})
|
|
200
|
+
});
|
|
201
|
+
if (!response.ok) {
|
|
202
|
+
throw new Error(`Anthropic API error: ${response.status}`);
|
|
203
|
+
}
|
|
204
|
+
const result = await response.json();
|
|
205
|
+
const content = result.content[0].text;
|
|
206
|
+
// Parse and store aggregated memories
|
|
207
|
+
const jsonMatch = content.match(/\{[\s\S]*\}/);
|
|
208
|
+
if (jsonMatch) {
|
|
209
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
210
|
+
const memories = parsed.memories || [];
|
|
211
|
+
// Store with boosted confidence
|
|
212
|
+
const memoryIds = [];
|
|
213
|
+
for (const mem of memories) {
|
|
214
|
+
const verdict = { label: 'Success', confidence: 0.9, reasons: [] };
|
|
215
|
+
const ids = await distillMemories({ steps: [] }, verdict, query, options);
|
|
216
|
+
memoryIds.push(...ids);
|
|
217
|
+
}
|
|
218
|
+
return memoryIds;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
catch (error) {
|
|
222
|
+
console.error('[ERROR] Memory aggregation failed:', error);
|
|
223
|
+
}
|
|
224
|
+
return [];
|
|
225
|
+
}
|
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ReasoningBank Memory Engine
|
|
3
|
+
* Implements the 4-phase learning loop: RETRIEVE → JUDGE → DISTILL → CONSOLIDATE
|
|
4
|
+
*/
|
|
5
|
+
import { ReasoningBankDB } from './database.js';
|
|
6
|
+
import { createEmbeddingProvider, cosineSimilarity } from '../utils/embeddings.js';
|
|
7
|
+
import { piiScrubber } from '../utils/pii-scrubber.js';
|
|
8
|
+
export class ReasoningBankEngine {
|
|
9
|
+
db;
|
|
10
|
+
embeddings;
|
|
11
|
+
piiEnabled;
|
|
12
|
+
weights;
|
|
13
|
+
defaultK;
|
|
14
|
+
minConfidence;
|
|
15
|
+
consolidationThreshold;
|
|
16
|
+
memoriesSinceConsolidation = 0;
|
|
17
|
+
constructor(config) {
|
|
18
|
+
this.db = new ReasoningBankDB(config.dbPath);
|
|
19
|
+
this.embeddings = createEmbeddingProvider(config.embeddings?.provider || 'hash', {
|
|
20
|
+
apiKey: process.env.OPENAI_API_KEY || process.env.ANTHROPIC_API_KEY,
|
|
21
|
+
model: config.embeddings?.model
|
|
22
|
+
});
|
|
23
|
+
this.piiEnabled = config.piiScrub?.enabled !== false;
|
|
24
|
+
this.weights = {
|
|
25
|
+
alpha: config.retrieval?.weights?.alpha || 0.65, // Similarity
|
|
26
|
+
beta: config.retrieval?.weights?.beta || 0.15, // Recency
|
|
27
|
+
gamma: config.retrieval?.weights?.gamma || 0.20, // Reliability
|
|
28
|
+
delta: config.retrieval?.weights?.delta || 0.10 // Diversity penalty
|
|
29
|
+
};
|
|
30
|
+
this.defaultK = config.retrieval?.k || 3;
|
|
31
|
+
this.minConfidence = config.retrieval?.minConfidence || 0.3;
|
|
32
|
+
this.consolidationThreshold = config.consolidation?.scheduleEvery || 20;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Phase 1: RETRIEVE - Get relevant memories using 4-factor scoring
|
|
36
|
+
*/
|
|
37
|
+
async retrieve(options) {
|
|
38
|
+
const k = options.k || this.defaultK;
|
|
39
|
+
const lambda = options.lambda || 0.9;
|
|
40
|
+
// Generate query embedding
|
|
41
|
+
const queryEmbedding = await this.embeddings.generate(options.query);
|
|
42
|
+
// Get all memories
|
|
43
|
+
const allMemories = this.db.getAllMemories();
|
|
44
|
+
const embeddings = this.db.getAllEmbeddings();
|
|
45
|
+
// Filter by domain if specified
|
|
46
|
+
let candidates = allMemories;
|
|
47
|
+
if (options.domain) {
|
|
48
|
+
candidates = candidates.filter(m => m.pattern_data.domain === options.domain ||
|
|
49
|
+
m.pattern_data.domain?.startsWith(options.domain + '.'));
|
|
50
|
+
}
|
|
51
|
+
// Calculate scores for each candidate
|
|
52
|
+
const scoredCandidates = [];
|
|
53
|
+
for (const memory of candidates) {
|
|
54
|
+
const embedding = embeddings.get(memory.id);
|
|
55
|
+
if (!embedding)
|
|
56
|
+
continue;
|
|
57
|
+
// 1. Similarity score (cosine similarity)
|
|
58
|
+
const similarity = cosineSimilarity(queryEmbedding, embedding);
|
|
59
|
+
// 2. Recency score (exponential decay, 30-day half-life)
|
|
60
|
+
const ageDays = (Date.now() - new Date(memory.created_at).getTime()) / (1000 * 60 * 60 * 24);
|
|
61
|
+
const recency = Math.exp(-ageDays / 30);
|
|
62
|
+
// 3. Reliability score (confidence × sqrt(usage/10))
|
|
63
|
+
const reliability = Math.min(memory.confidence * Math.sqrt(memory.usage_count / 10), 1.0);
|
|
64
|
+
// Combined score (before diversity penalty)
|
|
65
|
+
const score = this.weights.alpha * similarity +
|
|
66
|
+
this.weights.beta * recency +
|
|
67
|
+
this.weights.gamma * reliability;
|
|
68
|
+
scoredCandidates.push({
|
|
69
|
+
...memory,
|
|
70
|
+
score,
|
|
71
|
+
similarity,
|
|
72
|
+
recency,
|
|
73
|
+
reliability,
|
|
74
|
+
diversityPenalty: 0 // Will be calculated in MMR
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
// Sort by score
|
|
78
|
+
scoredCandidates.sort((a, b) => b.score - a.score);
|
|
79
|
+
// Apply MMR for diversity
|
|
80
|
+
const selected = this.selectWithMMR(scoredCandidates, queryEmbedding, k, lambda);
|
|
81
|
+
// Update usage counts
|
|
82
|
+
for (const memory of selected) {
|
|
83
|
+
this.db.updateMemoryUsage(memory.id);
|
|
84
|
+
}
|
|
85
|
+
// Filter by minimum confidence
|
|
86
|
+
return selected.filter(m => m.confidence >= this.minConfidence);
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* MMR (Maximal Marginal Relevance) Selection
|
|
90
|
+
* Balances relevance and diversity
|
|
91
|
+
*/
|
|
92
|
+
selectWithMMR(candidates, queryEmbedding, k, lambda) {
|
|
93
|
+
const selected = [];
|
|
94
|
+
const remaining = [...candidates];
|
|
95
|
+
const embeddings = this.db.getAllEmbeddings();
|
|
96
|
+
while (selected.length < k && remaining.length > 0) {
|
|
97
|
+
let bestScore = -Infinity;
|
|
98
|
+
let bestIndex = -1;
|
|
99
|
+
for (let i = 0; i < remaining.length; i++) {
|
|
100
|
+
const candidate = remaining[i];
|
|
101
|
+
const candidateEmbedding = embeddings.get(candidate.id);
|
|
102
|
+
if (!candidateEmbedding)
|
|
103
|
+
continue;
|
|
104
|
+
// Relevance to query
|
|
105
|
+
const relevance = candidate.score;
|
|
106
|
+
// Maximum similarity to already selected
|
|
107
|
+
let maxSimilarity = 0;
|
|
108
|
+
if (selected.length > 0) {
|
|
109
|
+
for (const selectedMemory of selected) {
|
|
110
|
+
const selectedEmbedding = embeddings.get(selectedMemory.id);
|
|
111
|
+
if (selectedEmbedding) {
|
|
112
|
+
const sim = cosineSimilarity(candidateEmbedding, selectedEmbedding);
|
|
113
|
+
maxSimilarity = Math.max(maxSimilarity, sim);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
// MMR score
|
|
118
|
+
const mmrScore = lambda * relevance - (1 - lambda) * maxSimilarity;
|
|
119
|
+
if (mmrScore > bestScore) {
|
|
120
|
+
bestScore = mmrScore;
|
|
121
|
+
bestIndex = i;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
if (bestIndex >= 0) {
|
|
125
|
+
selected.push(remaining[bestIndex]);
|
|
126
|
+
remaining.splice(bestIndex, 1);
|
|
127
|
+
}
|
|
128
|
+
else {
|
|
129
|
+
break;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return selected;
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Phase 2: JUDGE - Evaluate task outcome
|
|
136
|
+
*/
|
|
137
|
+
async judge(trajectory) {
|
|
138
|
+
// Simple heuristic judge (can be upgraded to LLM)
|
|
139
|
+
const scrubbed = this.piiEnabled ? piiScrubber.scrub(trajectory) : trajectory;
|
|
140
|
+
// Heuristics for success/failure
|
|
141
|
+
const errorKeywords = ['error', 'failed', 'exception', 'timeout', 'unauthorized', 'forbidden'];
|
|
142
|
+
const successKeywords = ['success', 'completed', 'ok', '200', 'done'];
|
|
143
|
+
const lowerTrajectory = scrubbed.toLowerCase();
|
|
144
|
+
const hasError = errorKeywords.some(kw => lowerTrajectory.includes(kw));
|
|
145
|
+
const hasSuccess = successKeywords.some(kw => lowerTrajectory.includes(kw));
|
|
146
|
+
if (hasSuccess && !hasError) {
|
|
147
|
+
return { label: 'Success', confidence: 0.8, rationale: 'Success keywords found' };
|
|
148
|
+
}
|
|
149
|
+
else if (hasError && !hasSuccess) {
|
|
150
|
+
return { label: 'Failure', confidence: 0.8, rationale: 'Error keywords found' };
|
|
151
|
+
}
|
|
152
|
+
else if (hasSuccess && hasError) {
|
|
153
|
+
return { label: 'Success', confidence: 0.5, rationale: 'Mixed signals' };
|
|
154
|
+
}
|
|
155
|
+
else {
|
|
156
|
+
return { label: 'Failure', confidence: 0.5, rationale: 'No clear indicators' };
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Phase 3: DISTILL - Extract patterns from trajectory
|
|
161
|
+
*/
|
|
162
|
+
async distill(taskId, trajectory, verdict, domain) {
|
|
163
|
+
const scrubbed = this.piiEnabled ? piiScrubber.scrub(trajectory) : trajectory;
|
|
164
|
+
// Store trajectory
|
|
165
|
+
this.db.insertTrajectory({
|
|
166
|
+
task_id: taskId,
|
|
167
|
+
trajectory: scrubbed,
|
|
168
|
+
verdict: verdict.label,
|
|
169
|
+
confidence: verdict.confidence
|
|
170
|
+
});
|
|
171
|
+
// Extract pattern based on verdict
|
|
172
|
+
const pattern = verdict.label === 'Success'
|
|
173
|
+
? this.extractSuccessPattern(scrubbed, domain)
|
|
174
|
+
: this.extractFailureGuardrail(scrubbed, domain);
|
|
175
|
+
// Store as memory
|
|
176
|
+
const memoryId = this.db.insertMemory({
|
|
177
|
+
title: pattern.title,
|
|
178
|
+
description: pattern.description,
|
|
179
|
+
content: pattern.content,
|
|
180
|
+
confidence: verdict.confidence,
|
|
181
|
+
usage_count: 0,
|
|
182
|
+
pattern_data: {
|
|
183
|
+
domain,
|
|
184
|
+
success_pattern: verdict.label === 'Success',
|
|
185
|
+
failure_guardrail: verdict.label === 'Failure'
|
|
186
|
+
}
|
|
187
|
+
});
|
|
188
|
+
// Generate and store embedding
|
|
189
|
+
const embedding = await this.embeddings.generate(pattern.content);
|
|
190
|
+
this.db.insertEmbedding(memoryId, embedding);
|
|
191
|
+
this.memoriesSinceConsolidation++;
|
|
192
|
+
return memoryId;
|
|
193
|
+
}
|
|
194
|
+
extractSuccessPattern(trajectory, domain) {
|
|
195
|
+
// Extract key steps from successful execution
|
|
196
|
+
const lines = trajectory.split('\n').filter(l => l.trim());
|
|
197
|
+
const keySteps = lines.slice(0, 5).join('\n');
|
|
198
|
+
return {
|
|
199
|
+
title: `Success pattern for ${domain}`,
|
|
200
|
+
description: `Successful execution strategy`,
|
|
201
|
+
content: `Successful approach:\n${keySteps}`
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
extractFailureGuardrail(trajectory, domain) {
|
|
205
|
+
// Extract error information
|
|
206
|
+
const lines = trajectory.split('\n').filter(l => l.trim());
|
|
207
|
+
const errorInfo = lines.find(l => l.toLowerCase().includes('error') ||
|
|
208
|
+
l.toLowerCase().includes('failed')) || 'Unknown error';
|
|
209
|
+
return {
|
|
210
|
+
title: `Failure guardrail for ${domain}`,
|
|
211
|
+
description: `Prevention strategy for common failures`,
|
|
212
|
+
content: `Avoid: ${errorInfo}\nRecommend: Check prerequisites and retry with backoff`
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* Phase 4: CONSOLIDATE - Deduplicate and prune
|
|
217
|
+
*/
|
|
218
|
+
async consolidate(options) {
|
|
219
|
+
const startTime = Date.now();
|
|
220
|
+
const dedupeThreshold = options?.dedupeThreshold || 0.95;
|
|
221
|
+
const maxAgeDays = options?.prune?.maxAgeDays || 90;
|
|
222
|
+
const minConfidence = options?.prune?.minConfidence || 0.3;
|
|
223
|
+
const unusedDays = options?.prune?.unusedDays || 30;
|
|
224
|
+
// Find and merge duplicates
|
|
225
|
+
const duplicates = this.db.findDuplicates(dedupeThreshold);
|
|
226
|
+
for (const [id1, id2] of duplicates) {
|
|
227
|
+
const mem1 = this.db.getMemory(id1);
|
|
228
|
+
const mem2 = this.db.getMemory(id2);
|
|
229
|
+
if (mem1 && mem2) {
|
|
230
|
+
// Keep the one with higher confidence and usage
|
|
231
|
+
const keepId = mem1.confidence > mem2.confidence ||
|
|
232
|
+
(mem1.confidence === mem2.confidence && mem1.usage_count > mem2.usage_count)
|
|
233
|
+
? id1 : id2;
|
|
234
|
+
const deleteId = keepId === id1 ? id2 : id1;
|
|
235
|
+
this.db.deleteMemory(deleteId);
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
// Prune old or low-quality memories
|
|
239
|
+
const allMemories = this.db.getAllMemories();
|
|
240
|
+
let pruned = 0;
|
|
241
|
+
for (const memory of allMemories) {
|
|
242
|
+
const ageDays = (Date.now() - new Date(memory.created_at).getTime()) / (1000 * 60 * 60 * 24);
|
|
243
|
+
const lastUsedDays = ageDays; // Simplified: assume last used = created
|
|
244
|
+
const shouldPrune = ageDays > maxAgeDays ||
|
|
245
|
+
memory.confidence < minConfidence ||
|
|
246
|
+
(memory.usage_count === 0 && lastUsedDays > unusedDays);
|
|
247
|
+
if (shouldPrune) {
|
|
248
|
+
this.db.deleteMemory(memory.id);
|
|
249
|
+
pruned++;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
// Detect contradictions (simplified)
|
|
253
|
+
const contradictions = 0; // TODO: Implement semantic contradiction detection
|
|
254
|
+
this.memoriesSinceConsolidation = 0;
|
|
255
|
+
return {
|
|
256
|
+
processed: allMemories.length,
|
|
257
|
+
duplicates: duplicates.length,
|
|
258
|
+
contradictions,
|
|
259
|
+
pruned,
|
|
260
|
+
durationMs: Date.now() - startTime
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
/**
|
|
264
|
+
* High-level task execution with full learning loop
|
|
265
|
+
*/
|
|
266
|
+
async runTask(options) {
|
|
267
|
+
// Phase 1: RETRIEVE
|
|
268
|
+
const memories = await this.retrieve({
|
|
269
|
+
query: options.query,
|
|
270
|
+
domain: options.domain
|
|
271
|
+
});
|
|
272
|
+
// EXECUTE
|
|
273
|
+
const result = await options.executeFn(memories);
|
|
274
|
+
// Phase 2: JUDGE
|
|
275
|
+
const verdict = await this.judge(result.log);
|
|
276
|
+
// Phase 3: DISTILL
|
|
277
|
+
await this.distill(options.taskId, result.log, verdict, options.domain);
|
|
278
|
+
// Phase 4: CONSOLIDATE (if threshold reached)
|
|
279
|
+
if (this.memoriesSinceConsolidation >= this.consolidationThreshold) {
|
|
280
|
+
await this.consolidate();
|
|
281
|
+
}
|
|
282
|
+
return {
|
|
283
|
+
success: result.success,
|
|
284
|
+
summary: `Task ${options.taskId}: ${verdict.label} (confidence: ${verdict.confidence})`,
|
|
285
|
+
memories,
|
|
286
|
+
verdict
|
|
287
|
+
};
|
|
288
|
+
}
|
|
289
|
+
/**
|
|
290
|
+
* MaTTS: Memory-aware Test-Time Scaling (Parallel)
|
|
291
|
+
*/
|
|
292
|
+
async mattsParallel(options) {
|
|
293
|
+
const runs = await Promise.all(Array.from({ length: options.k }, async (_, i) => {
|
|
294
|
+
const memories = await this.retrieve({
|
|
295
|
+
query: options.query,
|
|
296
|
+
domain: options.domain
|
|
297
|
+
});
|
|
298
|
+
const result = await options.executeFn(memories);
|
|
299
|
+
const verdict = await this.judge(result.log);
|
|
300
|
+
this.db.insertMattsRun({
|
|
301
|
+
task_id: options.taskId,
|
|
302
|
+
run_index: i,
|
|
303
|
+
result: result.log,
|
|
304
|
+
verdict: verdict.label,
|
|
305
|
+
confidence: verdict.confidence
|
|
306
|
+
});
|
|
307
|
+
return { result, verdict };
|
|
308
|
+
}));
|
|
309
|
+
// Calculate consensus
|
|
310
|
+
const successes = runs.filter(r => r.verdict.label === 'Success').length;
|
|
311
|
+
const avgConfidence = runs.reduce((sum, r) => sum + r.verdict.confidence, 0) / runs.length;
|
|
312
|
+
const consensusVerdict = {
|
|
313
|
+
label: successes > runs.length / 2 ? 'Success' : 'Failure',
|
|
314
|
+
confidence: avgConfidence
|
|
315
|
+
};
|
|
316
|
+
return {
|
|
317
|
+
success: consensusVerdict.label === 'Success',
|
|
318
|
+
summary: `MaTTS Parallel: ${successes}/${runs.length} successes, consensus: ${consensusVerdict.label}`,
|
|
319
|
+
memories: [],
|
|
320
|
+
verdict: consensusVerdict
|
|
321
|
+
};
|
|
322
|
+
}
|
|
323
|
+
/**
|
|
324
|
+
* Get statistics
|
|
325
|
+
*/
|
|
326
|
+
getStats() {
|
|
327
|
+
return this.db.getStats();
|
|
328
|
+
}
|
|
329
|
+
/**
|
|
330
|
+
* Close database connection
|
|
331
|
+
*/
|
|
332
|
+
close() {
|
|
333
|
+
this.db.close();
|
|
334
|
+
}
|
|
335
|
+
}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory Retrieval with MMR diversity
|
|
3
|
+
* Algorithm 1 from ReasoningBank paper
|
|
4
|
+
*/
|
|
5
|
+
import { computeEmbedding } from '../utils/embeddings.js';
|
|
6
|
+
import { mmrSelection, cosineSimilarity } from '../utils/mmr.js';
|
|
7
|
+
import * as db from '../db/queries.js';
|
|
8
|
+
import { loadConfig } from '../utils/config.js';
|
|
9
|
+
/**
|
|
10
|
+
* Retrieve top-k memories with MMR diversity
|
|
11
|
+
*
|
|
12
|
+
* Scoring formula: score = α·sim + β·recency + γ·reliability
|
|
13
|
+
* Where:
|
|
14
|
+
* - sim: cosine similarity to query
|
|
15
|
+
* - recency: exp(-age_days / half_life)
|
|
16
|
+
* - reliability: min(confidence, 1.0)
|
|
17
|
+
*/
|
|
18
|
+
export async function retrieveMemories(query, options = {}) {
|
|
19
|
+
const config = loadConfig();
|
|
20
|
+
const k = options.k || config.retrieve.k;
|
|
21
|
+
const startTime = Date.now();
|
|
22
|
+
console.log(`[INFO] Retrieving memories for query: ${query.substring(0, 100)}...`);
|
|
23
|
+
// 1. Embed query
|
|
24
|
+
const queryEmbed = await computeEmbedding(query);
|
|
25
|
+
// 2. Fetch candidates from database
|
|
26
|
+
const candidates = db.fetchMemoryCandidates({
|
|
27
|
+
domain: options.domain,
|
|
28
|
+
agent: options.agent,
|
|
29
|
+
minConfidence: config.retrieve.min_score
|
|
30
|
+
});
|
|
31
|
+
if (candidates.length === 0) {
|
|
32
|
+
console.log('[INFO] No memory candidates found');
|
|
33
|
+
return [];
|
|
34
|
+
}
|
|
35
|
+
console.log(`[INFO] Found ${candidates.length} candidates`);
|
|
36
|
+
// 3. Score each candidate with 4-factor model
|
|
37
|
+
const scored = candidates.map(item => {
|
|
38
|
+
const similarity = cosineSimilarity(queryEmbed, item.embedding);
|
|
39
|
+
const recency = Math.exp(-item.age_days / config.retrieve.recency_half_life_days);
|
|
40
|
+
const reliability = Math.min(item.confidence, 1.0);
|
|
41
|
+
const baseScore = config.retrieve.alpha * similarity +
|
|
42
|
+
config.retrieve.beta * recency +
|
|
43
|
+
config.retrieve.gamma * reliability;
|
|
44
|
+
return {
|
|
45
|
+
...item,
|
|
46
|
+
score: baseScore,
|
|
47
|
+
components: { similarity, recency, reliability }
|
|
48
|
+
};
|
|
49
|
+
});
|
|
50
|
+
// 4. MMR selection for diversity
|
|
51
|
+
const selected = mmrSelection(scored, queryEmbed, k, config.retrieve.delta);
|
|
52
|
+
// 5. Record usage for selected memories
|
|
53
|
+
for (const mem of selected) {
|
|
54
|
+
db.incrementUsage(mem.id);
|
|
55
|
+
}
|
|
56
|
+
const duration = Date.now() - startTime;
|
|
57
|
+
console.log(`[INFO] Retrieval complete: ${selected.length} memories in ${duration}ms`);
|
|
58
|
+
db.logMetric('rb.retrieve.latency_ms', duration);
|
|
59
|
+
return selected.map(item => ({
|
|
60
|
+
id: item.id,
|
|
61
|
+
title: item.pattern_data.title,
|
|
62
|
+
description: item.pattern_data.description,
|
|
63
|
+
content: item.pattern_data.content,
|
|
64
|
+
score: item.score,
|
|
65
|
+
components: item.components
|
|
66
|
+
}));
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Format memories for injection into system prompt
|
|
70
|
+
*/
|
|
71
|
+
export function formatMemoriesForPrompt(memories) {
|
|
72
|
+
if (memories.length === 0) {
|
|
73
|
+
return '';
|
|
74
|
+
}
|
|
75
|
+
let formatted = '\n## Relevant Memories from Past Experience\n\n';
|
|
76
|
+
for (let i = 0; i < memories.length; i++) {
|
|
77
|
+
const mem = memories[i];
|
|
78
|
+
formatted += `### Memory ${i + 1}: ${mem.title}\n\n`;
|
|
79
|
+
formatted += `${mem.description}\n\n`;
|
|
80
|
+
formatted += `**Strategy:**\n${mem.content}\n\n`;
|
|
81
|
+
formatted += `*Confidence: ${(mem.score * 100).toFixed(1)}% | `;
|
|
82
|
+
formatted += `Similarity: ${(mem.components.similarity * 100).toFixed(1)}%*\n\n`;
|
|
83
|
+
formatted += '---\n\n';
|
|
84
|
+
}
|
|
85
|
+
return formatted;
|
|
86
|
+
}
|