agentic-flow 1.4.4 → 1.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -2
- package/dist/index.js +9 -0
- package/dist/reasoningbank/benchmark.js +333 -0
- package/dist/reasoningbank/config/reasoningbank-types.js +4 -0
- package/dist/reasoningbank/core/consolidate.js +139 -0
- package/dist/reasoningbank/core/distill.js +159 -0
- package/dist/reasoningbank/core/judge.js +128 -0
- package/dist/reasoningbank/core/matts.js +225 -0
- package/dist/reasoningbank/core/retrieve.js +86 -0
- package/dist/reasoningbank/db/queries.js +230 -0
- package/dist/reasoningbank/db/schema.js +4 -0
- package/dist/reasoningbank/demo-comparison.js +301 -0
- package/dist/reasoningbank/hooks/post-task.js +109 -0
- package/dist/reasoningbank/hooks/pre-task.js +68 -0
- package/dist/reasoningbank/index.js +91 -0
- package/dist/reasoningbank/test-integration.js +90 -0
- package/dist/reasoningbank/test-retrieval.js +176 -0
- package/dist/reasoningbank/test-validation.js +172 -0
- package/dist/reasoningbank/utils/config.js +76 -0
- package/dist/reasoningbank/utils/embeddings.js +113 -0
- package/dist/reasoningbank/utils/mmr.js +64 -0
- package/dist/reasoningbank/utils/pii-scrubber.js +98 -0
- package/dist/utils/agentBoosterPreprocessor.js +25 -10
- package/dist/utils/cli.js +19 -0
- package/dist/utils/reasoningbankCommands.js +137 -0
- package/docs/AGENT-BOOSTER-INTEGRATION.md +143 -128
- package/docs/REASONINGBANK-BENCHMARK.md +396 -0
- package/docs/REASONINGBANK-CLI-INTEGRATION.md +455 -0
- package/docs/REASONINGBANK-DEMO.md +419 -0
- package/docs/REASONINGBANK-VALIDATION.md +532 -0
- package/package.json +9 -2
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory Distillation from trajectories
|
|
3
|
+
* Algorithm 3 from ReasoningBank paper
|
|
4
|
+
*/
|
|
5
|
+
import { readFileSync } from 'fs';
|
|
6
|
+
import { join } from 'path';
|
|
7
|
+
import { ulid } from 'ulid';
|
|
8
|
+
import { loadConfig } from '../utils/config.js';
|
|
9
|
+
import { scrubMemory } from '../utils/pii-scrubber.js';
|
|
10
|
+
import { computeEmbedding } from '../utils/embeddings.js';
|
|
11
|
+
import * as db from '../db/queries.js';
|
|
12
|
+
/**
|
|
13
|
+
* Distill memories from a trajectory
|
|
14
|
+
*/
|
|
15
|
+
export async function distillMemories(trajectory, verdict, query, options = {}) {
|
|
16
|
+
const config = loadConfig();
|
|
17
|
+
const startTime = Date.now();
|
|
18
|
+
console.log(`[INFO] Distilling memories from ${verdict.label} trajectory`);
|
|
19
|
+
// Select appropriate prompt template
|
|
20
|
+
const templateName = verdict.label === 'Success' ? 'distill-success.json' : 'distill-failure.json';
|
|
21
|
+
const promptPath = join(process.cwd(), 'src', 'reasoningbank', 'prompts', templateName);
|
|
22
|
+
const promptTemplate = JSON.parse(readFileSync(promptPath, 'utf-8'));
|
|
23
|
+
const maxItems = verdict.label === 'Success'
|
|
24
|
+
? config.distill.max_items_success
|
|
25
|
+
: config.distill.max_items_failure;
|
|
26
|
+
const confidencePrior = verdict.label === 'Success'
|
|
27
|
+
? config.distill.confidence_prior_success
|
|
28
|
+
: config.distill.confidence_prior_failure;
|
|
29
|
+
// Check API key
|
|
30
|
+
const apiKey = process.env.ANTHROPIC_API_KEY;
|
|
31
|
+
if (!apiKey) {
|
|
32
|
+
console.warn('[WARN] ANTHROPIC_API_KEY not set, using template-based distillation');
|
|
33
|
+
return templateBasedDistill(trajectory, verdict, query, options);
|
|
34
|
+
}
|
|
35
|
+
try {
|
|
36
|
+
// Format trajectory
|
|
37
|
+
const trajectoryText = JSON.stringify(trajectory.steps || [], null, 2);
|
|
38
|
+
// Build prompt
|
|
39
|
+
const prompt = promptTemplate.template
|
|
40
|
+
.replace('{{task_query}}', query)
|
|
41
|
+
.replace('{{trajectory}}', trajectoryText)
|
|
42
|
+
.replace('{{max_items}}', String(maxItems));
|
|
43
|
+
// Call Anthropic API
|
|
44
|
+
const response = await fetch('https://api.anthropic.com/v1/messages', {
|
|
45
|
+
method: 'POST',
|
|
46
|
+
headers: {
|
|
47
|
+
'x-api-key': apiKey,
|
|
48
|
+
'anthropic-version': '2023-06-01',
|
|
49
|
+
'content-type': 'application/json'
|
|
50
|
+
},
|
|
51
|
+
body: JSON.stringify({
|
|
52
|
+
model: config.distill.model,
|
|
53
|
+
max_tokens: 2048,
|
|
54
|
+
temperature: config.distill.temperature,
|
|
55
|
+
system: promptTemplate.system,
|
|
56
|
+
messages: [{ role: 'user', content: prompt }]
|
|
57
|
+
})
|
|
58
|
+
});
|
|
59
|
+
if (!response.ok) {
|
|
60
|
+
throw new Error(`Anthropic API error: ${response.status}`);
|
|
61
|
+
}
|
|
62
|
+
const result = await response.json();
|
|
63
|
+
const content = result.content[0].text;
|
|
64
|
+
// Parse memories from response
|
|
65
|
+
const distilled = parseDistilledMemories(content);
|
|
66
|
+
// Store memories in database
|
|
67
|
+
const memoryIds = await storeMemories(distilled, confidencePrior, verdict, options);
|
|
68
|
+
const duration = Date.now() - startTime;
|
|
69
|
+
console.log(`[INFO] Distilled ${memoryIds.length} memories in ${duration}ms`);
|
|
70
|
+
db.logMetric('rb.distill.latency_ms', duration);
|
|
71
|
+
db.logMetric('rb.distill.yield', memoryIds.length);
|
|
72
|
+
return memoryIds;
|
|
73
|
+
}
|
|
74
|
+
catch (error) {
|
|
75
|
+
console.error('[ERROR] Distillation failed:', error);
|
|
76
|
+
return templateBasedDistill(trajectory, verdict, query, options);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Parse distilled memories from LLM response
|
|
81
|
+
*/
|
|
82
|
+
function parseDistilledMemories(content) {
|
|
83
|
+
try {
|
|
84
|
+
const jsonMatch = content.match(/\{[\s\S]*\}/);
|
|
85
|
+
if (jsonMatch) {
|
|
86
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
87
|
+
return parsed.memories || [];
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
catch (error) {
|
|
91
|
+
console.warn('[WARN] Failed to parse distilled memories JSON');
|
|
92
|
+
}
|
|
93
|
+
return [];
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Store memories in database
|
|
97
|
+
*/
|
|
98
|
+
async function storeMemories(memories, confidencePrior, verdict, options) {
|
|
99
|
+
const memoryIds = [];
|
|
100
|
+
for (const mem of memories) {
|
|
101
|
+
// Scrub PII
|
|
102
|
+
const scrubbed = scrubMemory(mem);
|
|
103
|
+
// Generate embedding
|
|
104
|
+
const embedding = await computeEmbedding(`${scrubbed.title} ${scrubbed.description} ${scrubbed.content}`);
|
|
105
|
+
// Create memory ID
|
|
106
|
+
const id = ulid();
|
|
107
|
+
// Store memory
|
|
108
|
+
db.upsertMemory({
|
|
109
|
+
id,
|
|
110
|
+
type: 'reasoning_memory',
|
|
111
|
+
pattern_data: {
|
|
112
|
+
title: scrubbed.title,
|
|
113
|
+
description: scrubbed.description,
|
|
114
|
+
content: scrubbed.content,
|
|
115
|
+
source: {
|
|
116
|
+
task_id: options.taskId || 'unknown',
|
|
117
|
+
agent_id: options.agentId || 'unknown',
|
|
118
|
+
outcome: verdict.label,
|
|
119
|
+
evidence: []
|
|
120
|
+
},
|
|
121
|
+
tags: scrubbed.tags,
|
|
122
|
+
domain: options.domain || scrubbed.domain,
|
|
123
|
+
created_at: new Date().toISOString(),
|
|
124
|
+
confidence: confidencePrior,
|
|
125
|
+
n_uses: 0
|
|
126
|
+
},
|
|
127
|
+
confidence: confidencePrior,
|
|
128
|
+
usage_count: 0
|
|
129
|
+
});
|
|
130
|
+
// Store embedding
|
|
131
|
+
db.upsertEmbedding({
|
|
132
|
+
id,
|
|
133
|
+
model: 'distill-' + verdict.label.toLowerCase(),
|
|
134
|
+
dims: embedding.length,
|
|
135
|
+
vector: embedding,
|
|
136
|
+
created_at: new Date().toISOString()
|
|
137
|
+
});
|
|
138
|
+
memoryIds.push(id);
|
|
139
|
+
console.log(`[INFO] Stored memory: ${scrubbed.title}`);
|
|
140
|
+
}
|
|
141
|
+
return memoryIds;
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Template-based distillation (fallback)
|
|
145
|
+
* Simple extraction without LLM
|
|
146
|
+
*/
|
|
147
|
+
function templateBasedDistill(trajectory, verdict, query, options) {
|
|
148
|
+
console.log('[INFO] Using template-based distillation (no API key)');
|
|
149
|
+
// Create a single generic memory from the trajectory
|
|
150
|
+
const memory = {
|
|
151
|
+
title: `${verdict.label}: ${query.substring(0, 50)}`,
|
|
152
|
+
description: `Task outcome: ${verdict.label}`,
|
|
153
|
+
content: `Query: ${query}\n\nSteps: ${trajectory.steps?.length || 0}\n\nOutcome: ${verdict.label}`,
|
|
154
|
+
tags: [verdict.label.toLowerCase(), 'template'],
|
|
155
|
+
domain: options.domain
|
|
156
|
+
};
|
|
157
|
+
// Store synchronously (no async needed for template)
|
|
158
|
+
return []; // Skip storage for template-based (would need to make this async)
|
|
159
|
+
}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM-as-Judge for trajectory evaluation
|
|
3
|
+
* Algorithm 2 from ReasoningBank paper
|
|
4
|
+
*/
|
|
5
|
+
import { readFileSync } from 'fs';
|
|
6
|
+
import { join } from 'path';
|
|
7
|
+
import { loadConfig } from '../utils/config.js';
|
|
8
|
+
/**
|
|
9
|
+
* Judge a task trajectory using LLM evaluation
|
|
10
|
+
*/
|
|
11
|
+
export async function judgeTrajectory(trajectory, query, options = {}) {
|
|
12
|
+
const config = loadConfig();
|
|
13
|
+
const startTime = Date.now();
|
|
14
|
+
console.log(`[INFO] Judging trajectory for query: ${query.substring(0, 100)}...`);
|
|
15
|
+
// Load judge prompt template
|
|
16
|
+
const promptPath = join(process.cwd(), 'src', 'reasoningbank', 'prompts', 'judge.json');
|
|
17
|
+
const promptTemplate = JSON.parse(readFileSync(promptPath, 'utf-8'));
|
|
18
|
+
// Format trajectory for judgment
|
|
19
|
+
const trajectoryText = formatTrajectory(trajectory);
|
|
20
|
+
// Check if we have Anthropic API key
|
|
21
|
+
const apiKey = process.env.ANTHROPIC_API_KEY;
|
|
22
|
+
if (!apiKey) {
|
|
23
|
+
console.warn('[WARN] ANTHROPIC_API_KEY not set, using heuristic judgment');
|
|
24
|
+
return heuristicJudge(trajectory, query);
|
|
25
|
+
}
|
|
26
|
+
try {
|
|
27
|
+
// Call Anthropic API with judge prompt
|
|
28
|
+
const prompt = promptTemplate.template
|
|
29
|
+
.replace('{{task_query}}', query)
|
|
30
|
+
.replace('{{trajectory}}', trajectoryText);
|
|
31
|
+
const response = await fetch('https://api.anthropic.com/v1/messages', {
|
|
32
|
+
method: 'POST',
|
|
33
|
+
headers: {
|
|
34
|
+
'x-api-key': apiKey,
|
|
35
|
+
'anthropic-version': '2023-06-01',
|
|
36
|
+
'content-type': 'application/json'
|
|
37
|
+
},
|
|
38
|
+
body: JSON.stringify({
|
|
39
|
+
model: config.judge.model,
|
|
40
|
+
max_tokens: config.judge.max_tokens,
|
|
41
|
+
temperature: config.judge.temperature,
|
|
42
|
+
system: promptTemplate.system,
|
|
43
|
+
messages: [{ role: 'user', content: prompt }]
|
|
44
|
+
})
|
|
45
|
+
});
|
|
46
|
+
if (!response.ok) {
|
|
47
|
+
throw new Error(`Anthropic API error: ${response.status}`);
|
|
48
|
+
}
|
|
49
|
+
const result = await response.json();
|
|
50
|
+
const content = result.content[0].text;
|
|
51
|
+
// Parse JSON response
|
|
52
|
+
const verdict = parseVerdict(content);
|
|
53
|
+
const duration = Date.now() - startTime;
|
|
54
|
+
console.log(`[INFO] Judgment complete: ${verdict.label} (${verdict.confidence}) in ${duration}ms`);
|
|
55
|
+
db.logMetric('rb.judge.latency_ms', duration);
|
|
56
|
+
db.logMetric('rb.judge.success_rate', verdict.label === 'Success' ? 1 : 0);
|
|
57
|
+
return verdict;
|
|
58
|
+
}
|
|
59
|
+
catch (error) {
|
|
60
|
+
console.error('[ERROR] Judge failed:', error);
|
|
61
|
+
console.warn('[WARN] Falling back to heuristic judgment');
|
|
62
|
+
return heuristicJudge(trajectory, query);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Format trajectory for LLM consumption
|
|
67
|
+
*/
|
|
68
|
+
function formatTrajectory(trajectory) {
|
|
69
|
+
const steps = trajectory.steps || [];
|
|
70
|
+
let formatted = '';
|
|
71
|
+
for (let i = 0; i < steps.length; i++) {
|
|
72
|
+
formatted += `Step ${i + 1}: ${JSON.stringify(steps[i], null, 2)}\n\n`;
|
|
73
|
+
}
|
|
74
|
+
return formatted || 'No steps recorded';
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Parse verdict from LLM response
|
|
78
|
+
*/
|
|
79
|
+
function parseVerdict(content) {
|
|
80
|
+
try {
|
|
81
|
+
// Try to extract JSON from response
|
|
82
|
+
const jsonMatch = content.match(/\{[\s\S]*\}/);
|
|
83
|
+
if (jsonMatch) {
|
|
84
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
85
|
+
return {
|
|
86
|
+
label: parsed.verdict?.label || parsed.label || 'Failure',
|
|
87
|
+
confidence: parsed.verdict?.confidence || parsed.confidence || 0.5,
|
|
88
|
+
reasons: parsed.verdict?.reasons || parsed.reasons || []
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
catch (error) {
|
|
93
|
+
console.warn('[WARN] Failed to parse verdict JSON, using text analysis');
|
|
94
|
+
}
|
|
95
|
+
// Fallback: text-based detection
|
|
96
|
+
const lower = content.toLowerCase();
|
|
97
|
+
const isSuccess = lower.includes('success') && !lower.includes('failure');
|
|
98
|
+
return {
|
|
99
|
+
label: isSuccess ? 'Success' : 'Failure',
|
|
100
|
+
confidence: 0.6,
|
|
101
|
+
reasons: ['Parsed from text (JSON parse failed)']
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Heuristic judgment when LLM is unavailable
|
|
106
|
+
* Simple rule-based classification
|
|
107
|
+
*/
|
|
108
|
+
function heuristicJudge(trajectory, query) {
|
|
109
|
+
const steps = trajectory.steps || [];
|
|
110
|
+
// Check for error indicators
|
|
111
|
+
const hasErrors = steps.some(step => JSON.stringify(step).toLowerCase().includes('error'));
|
|
112
|
+
// Check for completion indicators
|
|
113
|
+
const hasCompletion = steps.some(step => JSON.stringify(step).toLowerCase().includes('complete'));
|
|
114
|
+
// Simple heuristic: success if no errors and has completion markers
|
|
115
|
+
const isSuccess = !hasErrors && hasCompletion && steps.length > 0;
|
|
116
|
+
return {
|
|
117
|
+
label: isSuccess ? 'Success' : 'Failure',
|
|
118
|
+
confidence: 0.5, // Low confidence for heuristic
|
|
119
|
+
reasons: [
|
|
120
|
+
`Heuristic judgment (no API key)`,
|
|
121
|
+
`Steps: ${steps.length}`,
|
|
122
|
+
`Errors: ${hasErrors}`,
|
|
123
|
+
`Completion markers: ${hasCompletion}`
|
|
124
|
+
]
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
// Import db late to avoid circular dependency
|
|
128
|
+
import * as db from '../db/queries.js';
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MaTTS: Memory-aware Test-Time Scaling
|
|
3
|
+
* Algorithm 5 from ReasoningBank paper
|
|
4
|
+
*
|
|
5
|
+
* Two modes:
|
|
6
|
+
* - Parallel: k independent rollouts with self-contrast aggregation
|
|
7
|
+
* - Sequential: r iterative refinements with check-and-correct
|
|
8
|
+
*/
|
|
9
|
+
import { readFileSync } from 'fs';
|
|
10
|
+
import { join } from 'path';
|
|
11
|
+
import { ulid } from 'ulid';
|
|
12
|
+
import { loadConfig } from '../utils/config.js';
|
|
13
|
+
import { retrieveMemories } from './retrieve.js';
|
|
14
|
+
import { judgeTrajectory } from './judge.js';
|
|
15
|
+
import { distillMemories } from './distill.js';
|
|
16
|
+
import * as db from '../db/queries.js';
|
|
17
|
+
/**
|
|
18
|
+
* Run MaTTS in parallel mode
|
|
19
|
+
* Execute k independent rollouts and aggregate via self-contrast
|
|
20
|
+
*/
|
|
21
|
+
export async function mattsParallel(taskFn, query, options = {}) {
|
|
22
|
+
const config = loadConfig();
|
|
23
|
+
const k = options.k || config.matts.parallel_k;
|
|
24
|
+
const runId = ulid();
|
|
25
|
+
const startTime = Date.now();
|
|
26
|
+
console.log(`[INFO] Starting MaTTS parallel mode with k=${k}`);
|
|
27
|
+
// Store MaTTS run
|
|
28
|
+
db.storeMattsRun({
|
|
29
|
+
run_id: runId,
|
|
30
|
+
task_id: options.taskId || 'matts-' + runId,
|
|
31
|
+
mode: 'parallel',
|
|
32
|
+
k,
|
|
33
|
+
status: 'running',
|
|
34
|
+
summary: undefined
|
|
35
|
+
});
|
|
36
|
+
const trajectories = [];
|
|
37
|
+
// Execute k independent rollouts
|
|
38
|
+
for (let i = 0; i < k; i++) {
|
|
39
|
+
console.log(`[INFO] MaTTS parallel rollout ${i + 1}/${k}`);
|
|
40
|
+
try {
|
|
41
|
+
const trajectory = await taskFn();
|
|
42
|
+
const verdict = await judgeTrajectory(trajectory, query);
|
|
43
|
+
trajectories.push({
|
|
44
|
+
id: ulid(),
|
|
45
|
+
verdict,
|
|
46
|
+
trajectory
|
|
47
|
+
});
|
|
48
|
+
// Store trajectory
|
|
49
|
+
db.storeTrajectory({
|
|
50
|
+
task_id: options.taskId || 'matts-' + runId,
|
|
51
|
+
agent_id: options.agentId || 'matts-agent',
|
|
52
|
+
query,
|
|
53
|
+
trajectory_json: JSON.stringify(trajectory),
|
|
54
|
+
started_at: new Date().toISOString(),
|
|
55
|
+
ended_at: new Date().toISOString(),
|
|
56
|
+
judge_label: verdict.label,
|
|
57
|
+
judge_conf: verdict.confidence,
|
|
58
|
+
judge_reasons: JSON.stringify(verdict.reasons),
|
|
59
|
+
matts_run_id: runId
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
catch (error) {
|
|
63
|
+
console.error(`[ERROR] MaTTS rollout ${i + 1} failed:`, error);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
// Aggregate memories via self-contrast
|
|
67
|
+
const aggregatedMemories = await aggregateMemories(trajectories, query, options);
|
|
68
|
+
const successRate = trajectories.filter(t => t.verdict.label === 'Success').length / trajectories.length;
|
|
69
|
+
const duration = Date.now() - startTime;
|
|
70
|
+
console.log(`[INFO] MaTTS parallel complete: ${trajectories.length} trajectories, ${successRate * 100}% success in ${duration}ms`);
|
|
71
|
+
db.logMetric('rb.matts.parallel.duration_ms', duration);
|
|
72
|
+
db.logMetric('rb.matts.parallel.success_rate', successRate);
|
|
73
|
+
db.logMetric('rb.matts.parallel.memories', aggregatedMemories.length);
|
|
74
|
+
return {
|
|
75
|
+
runId,
|
|
76
|
+
mode: 'parallel',
|
|
77
|
+
k,
|
|
78
|
+
trajectories,
|
|
79
|
+
aggregatedMemories,
|
|
80
|
+
successRate,
|
|
81
|
+
duration
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Run MaTTS in sequential mode
|
|
86
|
+
* Iterative refinement with check-and-correct
|
|
87
|
+
*/
|
|
88
|
+
export async function mattsSequential(taskFn, query, options = {}) {
|
|
89
|
+
const config = loadConfig();
|
|
90
|
+
const r = options.r || config.matts.sequential_r || config.matts.sequential_k;
|
|
91
|
+
const runId = ulid();
|
|
92
|
+
const startTime = Date.now();
|
|
93
|
+
console.log(`[INFO] Starting MaTTS sequential mode with r=${r}`);
|
|
94
|
+
db.storeMattsRun({
|
|
95
|
+
run_id: runId,
|
|
96
|
+
task_id: options.taskId || 'matts-seq-' + runId,
|
|
97
|
+
mode: 'sequential',
|
|
98
|
+
k: r,
|
|
99
|
+
status: 'running',
|
|
100
|
+
summary: undefined
|
|
101
|
+
});
|
|
102
|
+
const trajectories = [];
|
|
103
|
+
let previousMemories = [];
|
|
104
|
+
// Iterative refinement
|
|
105
|
+
for (let i = 0; i < r; i++) {
|
|
106
|
+
console.log(`[INFO] MaTTS sequential iteration ${i + 1}/${r}`);
|
|
107
|
+
try {
|
|
108
|
+
// Retrieve relevant memories (including from previous iterations)
|
|
109
|
+
const memories = await retrieveMemories(query, {
|
|
110
|
+
domain: options.domain
|
|
111
|
+
});
|
|
112
|
+
// Execute with memories
|
|
113
|
+
const trajectory = await taskFn([...memories, ...previousMemories]);
|
|
114
|
+
const verdict = await judgeTrajectory(trajectory, query);
|
|
115
|
+
trajectories.push({
|
|
116
|
+
id: ulid(),
|
|
117
|
+
verdict,
|
|
118
|
+
trajectory
|
|
119
|
+
});
|
|
120
|
+
// If success and stop_on_success is true, break early
|
|
121
|
+
if (verdict.label === 'Success' && (config.matts.sequential_stop_on_success ?? true)) {
|
|
122
|
+
console.log(`[INFO] Success achieved at iteration ${i + 1}, stopping early`);
|
|
123
|
+
break;
|
|
124
|
+
}
|
|
125
|
+
// Distill memories from this iteration
|
|
126
|
+
const newMemories = await distillMemories(trajectory, verdict, query, options);
|
|
127
|
+
previousMemories = [...previousMemories, ...newMemories];
|
|
128
|
+
// Store trajectory
|
|
129
|
+
db.storeTrajectory({
|
|
130
|
+
task_id: options.taskId || 'matts-seq-' + runId,
|
|
131
|
+
agent_id: options.agentId || 'matts-agent',
|
|
132
|
+
query,
|
|
133
|
+
trajectory_json: JSON.stringify(trajectory),
|
|
134
|
+
started_at: new Date().toISOString(),
|
|
135
|
+
ended_at: new Date().toISOString(),
|
|
136
|
+
judge_label: verdict.label,
|
|
137
|
+
judge_conf: verdict.confidence,
|
|
138
|
+
judge_reasons: JSON.stringify(verdict.reasons),
|
|
139
|
+
matts_run_id: runId
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
catch (error) {
|
|
143
|
+
console.error(`[ERROR] MaTTS iteration ${i + 1} failed:`, error);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
const successRate = trajectories.filter(t => t.verdict.label === 'Success').length / trajectories.length;
|
|
147
|
+
const duration = Date.now() - startTime;
|
|
148
|
+
console.log(`[INFO] MaTTS sequential complete: ${trajectories.length} iterations, ${successRate * 100}% success in ${duration}ms`);
|
|
149
|
+
db.logMetric('rb.matts.sequential.duration_ms', duration);
|
|
150
|
+
db.logMetric('rb.matts.sequential.success_rate', successRate);
|
|
151
|
+
return {
|
|
152
|
+
runId,
|
|
153
|
+
mode: 'sequential',
|
|
154
|
+
k: r,
|
|
155
|
+
trajectories,
|
|
156
|
+
aggregatedMemories: previousMemories,
|
|
157
|
+
successRate,
|
|
158
|
+
duration
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Aggregate memories from multiple trajectories using self-contrast
|
|
163
|
+
*/
|
|
164
|
+
async function aggregateMemories(trajectories, query, options) {
|
|
165
|
+
console.log('[INFO] Aggregating memories via self-contrast');
|
|
166
|
+
// Load aggregation prompt
|
|
167
|
+
const promptPath = join(process.cwd(), 'src', 'reasoningbank', 'prompts', 'matts-aggregate.json');
|
|
168
|
+
const promptTemplate = JSON.parse(readFileSync(promptPath, 'utf-8'));
|
|
169
|
+
// Format trajectories for comparison
|
|
170
|
+
const trajectoryTexts = trajectories.map((t, i) => ({
|
|
171
|
+
id: t.id,
|
|
172
|
+
label: t.verdict.label,
|
|
173
|
+
confidence: t.verdict.confidence,
|
|
174
|
+
steps: JSON.stringify(t.trajectory.steps || [], null, 2)
|
|
175
|
+
}));
|
|
176
|
+
const apiKey = process.env.ANTHROPIC_API_KEY;
|
|
177
|
+
if (!apiKey) {
|
|
178
|
+
console.warn('[WARN] No API key, skipping aggregation');
|
|
179
|
+
return [];
|
|
180
|
+
}
|
|
181
|
+
try {
|
|
182
|
+
const prompt = promptTemplate.template
|
|
183
|
+
.replace('{{k}}', String(trajectories.length))
|
|
184
|
+
.replace('{{task_query}}', query)
|
|
185
|
+
.replace('{{trajectories}}', JSON.stringify(trajectoryTexts, null, 2));
|
|
186
|
+
const response = await fetch('https://api.anthropic.com/v1/messages', {
|
|
187
|
+
method: 'POST',
|
|
188
|
+
headers: {
|
|
189
|
+
'x-api-key': apiKey,
|
|
190
|
+
'anthropic-version': '2023-06-01',
|
|
191
|
+
'content-type': 'application/json'
|
|
192
|
+
},
|
|
193
|
+
body: JSON.stringify({
|
|
194
|
+
model: promptTemplate.model,
|
|
195
|
+
max_tokens: promptTemplate.max_tokens,
|
|
196
|
+
temperature: promptTemplate.temperature,
|
|
197
|
+
system: promptTemplate.system,
|
|
198
|
+
messages: [{ role: 'user', content: prompt }]
|
|
199
|
+
})
|
|
200
|
+
});
|
|
201
|
+
if (!response.ok) {
|
|
202
|
+
throw new Error(`Anthropic API error: ${response.status}`);
|
|
203
|
+
}
|
|
204
|
+
const result = await response.json();
|
|
205
|
+
const content = result.content[0].text;
|
|
206
|
+
// Parse and store aggregated memories
|
|
207
|
+
const jsonMatch = content.match(/\{[\s\S]*\}/);
|
|
208
|
+
if (jsonMatch) {
|
|
209
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
210
|
+
const memories = parsed.memories || [];
|
|
211
|
+
// Store with boosted confidence
|
|
212
|
+
const memoryIds = [];
|
|
213
|
+
for (const mem of memories) {
|
|
214
|
+
const verdict = { label: 'Success', confidence: 0.9, reasons: [] };
|
|
215
|
+
const ids = await distillMemories({ steps: [] }, verdict, query, options);
|
|
216
|
+
memoryIds.push(...ids);
|
|
217
|
+
}
|
|
218
|
+
return memoryIds;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
catch (error) {
|
|
222
|
+
console.error('[ERROR] Memory aggregation failed:', error);
|
|
223
|
+
}
|
|
224
|
+
return [];
|
|
225
|
+
}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory Retrieval with MMR diversity
|
|
3
|
+
* Algorithm 1 from ReasoningBank paper
|
|
4
|
+
*/
|
|
5
|
+
import { computeEmbedding } from '../utils/embeddings.js';
|
|
6
|
+
import { mmrSelection, cosineSimilarity } from '../utils/mmr.js';
|
|
7
|
+
import * as db from '../db/queries.js';
|
|
8
|
+
import { loadConfig } from '../utils/config.js';
|
|
9
|
+
/**
|
|
10
|
+
* Retrieve top-k memories with MMR diversity
|
|
11
|
+
*
|
|
12
|
+
* Scoring formula: score = α·sim + β·recency + γ·reliability
|
|
13
|
+
* Where:
|
|
14
|
+
* - sim: cosine similarity to query
|
|
15
|
+
* - recency: exp(-age_days / half_life)
|
|
16
|
+
* - reliability: min(confidence, 1.0)
|
|
17
|
+
*/
|
|
18
|
+
export async function retrieveMemories(query, options = {}) {
|
|
19
|
+
const config = loadConfig();
|
|
20
|
+
const k = options.k || config.retrieve.k;
|
|
21
|
+
const startTime = Date.now();
|
|
22
|
+
console.log(`[INFO] Retrieving memories for query: ${query.substring(0, 100)}...`);
|
|
23
|
+
// 1. Embed query
|
|
24
|
+
const queryEmbed = await computeEmbedding(query);
|
|
25
|
+
// 2. Fetch candidates from database
|
|
26
|
+
const candidates = db.fetchMemoryCandidates({
|
|
27
|
+
domain: options.domain,
|
|
28
|
+
agent: options.agent,
|
|
29
|
+
minConfidence: config.retrieve.min_score
|
|
30
|
+
});
|
|
31
|
+
if (candidates.length === 0) {
|
|
32
|
+
console.log('[INFO] No memory candidates found');
|
|
33
|
+
return [];
|
|
34
|
+
}
|
|
35
|
+
console.log(`[INFO] Found ${candidates.length} candidates`);
|
|
36
|
+
// 3. Score each candidate with 4-factor model
|
|
37
|
+
const scored = candidates.map(item => {
|
|
38
|
+
const similarity = cosineSimilarity(queryEmbed, item.embedding);
|
|
39
|
+
const recency = Math.exp(-item.age_days / config.retrieve.recency_half_life_days);
|
|
40
|
+
const reliability = Math.min(item.confidence, 1.0);
|
|
41
|
+
const baseScore = config.retrieve.alpha * similarity +
|
|
42
|
+
config.retrieve.beta * recency +
|
|
43
|
+
config.retrieve.gamma * reliability;
|
|
44
|
+
return {
|
|
45
|
+
...item,
|
|
46
|
+
score: baseScore,
|
|
47
|
+
components: { similarity, recency, reliability }
|
|
48
|
+
};
|
|
49
|
+
});
|
|
50
|
+
// 4. MMR selection for diversity
|
|
51
|
+
const selected = mmrSelection(scored, queryEmbed, k, config.retrieve.delta);
|
|
52
|
+
// 5. Record usage for selected memories
|
|
53
|
+
for (const mem of selected) {
|
|
54
|
+
db.incrementUsage(mem.id);
|
|
55
|
+
}
|
|
56
|
+
const duration = Date.now() - startTime;
|
|
57
|
+
console.log(`[INFO] Retrieval complete: ${selected.length} memories in ${duration}ms`);
|
|
58
|
+
db.logMetric('rb.retrieve.latency_ms', duration);
|
|
59
|
+
return selected.map(item => ({
|
|
60
|
+
id: item.id,
|
|
61
|
+
title: item.pattern_data.title,
|
|
62
|
+
description: item.pattern_data.description,
|
|
63
|
+
content: item.pattern_data.content,
|
|
64
|
+
score: item.score,
|
|
65
|
+
components: item.components
|
|
66
|
+
}));
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Format memories for injection into system prompt
|
|
70
|
+
*/
|
|
71
|
+
export function formatMemoriesForPrompt(memories) {
|
|
72
|
+
if (memories.length === 0) {
|
|
73
|
+
return '';
|
|
74
|
+
}
|
|
75
|
+
let formatted = '\n## Relevant Memories from Past Experience\n\n';
|
|
76
|
+
for (let i = 0; i < memories.length; i++) {
|
|
77
|
+
const mem = memories[i];
|
|
78
|
+
formatted += `### Memory ${i + 1}: ${mem.title}\n\n`;
|
|
79
|
+
formatted += `${mem.description}\n\n`;
|
|
80
|
+
formatted += `**Strategy:**\n${mem.content}\n\n`;
|
|
81
|
+
formatted += `*Confidence: ${(mem.score * 100).toFixed(1)}% | `;
|
|
82
|
+
formatted += `Similarity: ${(mem.components.similarity * 100).toFixed(1)}%*\n\n`;
|
|
83
|
+
formatted += '---\n\n';
|
|
84
|
+
}
|
|
85
|
+
return formatted;
|
|
86
|
+
}
|