agentic-flow 1.5.4 → 1.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -0
- package/dist/reasoningbank/core/distill.js +31 -25
- package/dist/reasoningbank/core/judge.js +31 -25
- package/dist/reasoningbank/core/matts.js +31 -23
- package/dist/reasoningbank/demo-comparison.js +3 -0
- package/dist/reasoningbank/prompts/distill-failure.json +1 -1
- package/dist/reasoningbank/prompts/distill-success.json +1 -1
- package/dist/reasoningbank/prompts/judge.json +1 -1
- package/dist/reasoningbank/prompts/matts-aggregate.json +1 -1
- package/dist/reasoningbank/utils/config.js +2 -2
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,41 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [1.5.6] - 2025-10-11
|
|
9
|
+
|
|
10
|
+
### Changed
|
|
11
|
+
- **Enhancement:** Integrated ModelRouter into ReasoningBank for multi-provider LLM support
|
|
12
|
+
- judge.ts, distill.ts, and matts.ts now use ModelRouter for intelligent provider selection
|
|
13
|
+
- Supports OpenRouter, Anthropic, Gemini, and ONNX local models
|
|
14
|
+
- Automatic fallback chain: OpenRouter → Anthropic → Gemini → ONNX
|
|
15
|
+
- Default model changed to deepseek/deepseek-chat for cost-effectiveness
|
|
16
|
+
- Falls back to local ONNX (Phi-4) when no API keys available
|
|
17
|
+
- Consistent with main agentic-flow proxy architecture
|
|
18
|
+
|
|
19
|
+
### Technical Details
|
|
20
|
+
- ReasoningBank modules now share same ModelRouter instance for consistency
|
|
21
|
+
- Cost-optimized routing prefers OpenRouter (99% cost savings)
|
|
22
|
+
- Local ONNX inference available offline without API keys
|
|
23
|
+
- Demo successfully runs with 67% success rate using fallback models
|
|
24
|
+
|
|
25
|
+
### Benefits
|
|
26
|
+
- 🎯 **Unified Architecture**: ReasoningBank uses same routing logic as main agents
|
|
27
|
+
- 💰 **Cost Savings**: DeepSeek via OpenRouter offers 99% cost reduction vs Claude
|
|
28
|
+
- 🔄 **Automatic Failover**: Graceful fallback to available providers
|
|
29
|
+
- 🏠 **Offline Support**: Works with local ONNX models when internet unavailable
|
|
30
|
+
|
|
31
|
+
## [1.5.5] - 2025-10-11
|
|
32
|
+
|
|
33
|
+
### Fixed
|
|
34
|
+
- **Enhancement:** Added dotenv loading to ReasoningBank demo
|
|
35
|
+
- Demo now loads `.env` file automatically to pick up ANTHROPIC_API_KEY
|
|
36
|
+
- Enables full LLM-powered judgment and distillation when API key is available
|
|
37
|
+
- Falls back gracefully to template-based approach when key is missing
|
|
38
|
+
|
|
39
|
+
### Technical Details
|
|
40
|
+
- Added `import { config } from 'dotenv'; config();` to demo-comparison.ts
|
|
41
|
+
- Ensures environment variables are loaded before ReasoningBank initialization
|
|
42
|
+
|
|
8
43
|
## [1.5.4] - 2025-10-11
|
|
9
44
|
|
|
10
45
|
### Fixed
|
|
@@ -9,9 +9,18 @@ import { ulid } from 'ulid';
|
|
|
9
9
|
import { loadConfig } from '../utils/config.js';
|
|
10
10
|
import { scrubMemory } from '../utils/pii-scrubber.js';
|
|
11
11
|
import { computeEmbedding } from '../utils/embeddings.js';
|
|
12
|
+
import { ModelRouter } from '../../router/router.js';
|
|
12
13
|
import * as db from '../db/queries.js';
|
|
13
14
|
const __filename = fileURLToPath(import.meta.url);
|
|
14
15
|
const __dirname = dirname(__filename);
|
|
16
|
+
// Initialize ModelRouter once
|
|
17
|
+
let routerInstance = null;
|
|
18
|
+
function getRouter() {
|
|
19
|
+
if (!routerInstance) {
|
|
20
|
+
routerInstance = new ModelRouter();
|
|
21
|
+
}
|
|
22
|
+
return routerInstance;
|
|
23
|
+
}
|
|
15
24
|
/**
|
|
16
25
|
* Distill memories from a trajectory
|
|
17
26
|
*/
|
|
@@ -29,10 +38,12 @@ export async function distillMemories(trajectory, verdict, query, options = {})
|
|
|
29
38
|
const confidencePrior = verdict.label === 'Success'
|
|
30
39
|
? config.distill.confidence_prior_success
|
|
31
40
|
: config.distill.confidence_prior_failure;
|
|
32
|
-
// Check API key
|
|
33
|
-
const
|
|
34
|
-
|
|
35
|
-
|
|
41
|
+
// Check if we have any API key configured
|
|
42
|
+
const hasApiKey = process.env.OPENROUTER_API_KEY ||
|
|
43
|
+
process.env.ANTHROPIC_API_KEY ||
|
|
44
|
+
process.env.GOOGLE_GEMINI_API_KEY;
|
|
45
|
+
if (!hasApiKey) {
|
|
46
|
+
console.warn('[WARN] No API key set (OPENROUTER_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_GEMINI_API_KEY), using template-based distillation');
|
|
36
47
|
return templateBasedDistill(trajectory, verdict, query, options);
|
|
37
48
|
}
|
|
38
49
|
try {
|
|
@@ -43,27 +54,22 @@ export async function distillMemories(trajectory, verdict, query, options = {})
|
|
|
43
54
|
.replace('{{task_query}}', query)
|
|
44
55
|
.replace('{{trajectory}}', trajectoryText)
|
|
45
56
|
.replace('{{max_items}}', String(maxItems));
|
|
46
|
-
//
|
|
47
|
-
const
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
'content
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
if (!response.ok) {
|
|
63
|
-
throw new Error(`Anthropic API error: ${response.status}`);
|
|
64
|
-
}
|
|
65
|
-
const result = await response.json();
|
|
66
|
-
const content = result.content[0].text;
|
|
57
|
+
// Use ModelRouter for multi-provider support
|
|
58
|
+
const router = getRouter();
|
|
59
|
+
const response = await router.chat({
|
|
60
|
+
model: config.distill.model || config.judge.model,
|
|
61
|
+
messages: [
|
|
62
|
+
{ role: 'system', content: promptTemplate.system },
|
|
63
|
+
{ role: 'user', content: prompt }
|
|
64
|
+
],
|
|
65
|
+
temperature: config.distill.temperature || 0.3,
|
|
66
|
+
maxTokens: config.distill.max_tokens || 2048
|
|
67
|
+
}, 'reasoningbank-distill');
|
|
68
|
+
// Extract content from router response
|
|
69
|
+
const content = response.content
|
|
70
|
+
.filter(block => block.type === 'text')
|
|
71
|
+
.map(block => block.text)
|
|
72
|
+
.join('\n');
|
|
67
73
|
// Parse memories from response
|
|
68
74
|
const distilled = parseDistilledMemories(content);
|
|
69
75
|
// Store memories in database
|
|
@@ -6,8 +6,17 @@ import { readFileSync } from 'fs';
|
|
|
6
6
|
import { join, dirname } from 'path';
|
|
7
7
|
import { fileURLToPath } from 'url';
|
|
8
8
|
import { loadConfig } from '../utils/config.js';
|
|
9
|
+
import { ModelRouter } from '../../router/router.js';
|
|
9
10
|
const __filename = fileURLToPath(import.meta.url);
|
|
10
11
|
const __dirname = dirname(__filename);
|
|
12
|
+
// Initialize ModelRouter once
|
|
13
|
+
let routerInstance = null;
|
|
14
|
+
function getRouter() {
|
|
15
|
+
if (!routerInstance) {
|
|
16
|
+
routerInstance = new ModelRouter();
|
|
17
|
+
}
|
|
18
|
+
return routerInstance;
|
|
19
|
+
}
|
|
11
20
|
/**
|
|
12
21
|
* Judge a task trajectory using LLM evaluation
|
|
13
22
|
*/
|
|
@@ -20,37 +29,34 @@ export async function judgeTrajectory(trajectory, query, options = {}) {
|
|
|
20
29
|
const promptTemplate = JSON.parse(readFileSync(promptPath, 'utf-8'));
|
|
21
30
|
// Format trajectory for judgment
|
|
22
31
|
const trajectoryText = formatTrajectory(trajectory);
|
|
23
|
-
// Check if we have
|
|
24
|
-
const
|
|
25
|
-
|
|
26
|
-
|
|
32
|
+
// Check if we have any API key configured
|
|
33
|
+
const hasApiKey = process.env.OPENROUTER_API_KEY ||
|
|
34
|
+
process.env.ANTHROPIC_API_KEY ||
|
|
35
|
+
process.env.GOOGLE_GEMINI_API_KEY;
|
|
36
|
+
if (!hasApiKey) {
|
|
37
|
+
console.warn('[WARN] No API key set (OPENROUTER_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_GEMINI_API_KEY), using heuristic judgment');
|
|
27
38
|
return heuristicJudge(trajectory, query);
|
|
28
39
|
}
|
|
29
40
|
try {
|
|
30
|
-
// Call
|
|
41
|
+
// Call LLM API with judge prompt using ModelRouter
|
|
31
42
|
const prompt = promptTemplate.template
|
|
32
43
|
.replace('{{task_query}}', query)
|
|
33
44
|
.replace('{{trajectory}}', trajectoryText);
|
|
34
|
-
const
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
'content
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
if (!response.ok) {
|
|
50
|
-
throw new Error(`Anthropic API error: ${response.status}`);
|
|
51
|
-
}
|
|
52
|
-
const result = await response.json();
|
|
53
|
-
const content = result.content[0].text;
|
|
45
|
+
const router = getRouter();
|
|
46
|
+
const response = await router.chat({
|
|
47
|
+
model: config.judge.model,
|
|
48
|
+
messages: [
|
|
49
|
+
{ role: 'system', content: promptTemplate.system },
|
|
50
|
+
{ role: 'user', content: prompt }
|
|
51
|
+
],
|
|
52
|
+
temperature: config.judge.temperature,
|
|
53
|
+
maxTokens: config.judge.max_tokens
|
|
54
|
+
}, 'reasoningbank-judge');
|
|
55
|
+
// Extract content from router response
|
|
56
|
+
const content = response.content
|
|
57
|
+
.filter(block => block.type === 'text')
|
|
58
|
+
.map(block => block.text)
|
|
59
|
+
.join('\n');
|
|
54
60
|
// Parse JSON response
|
|
55
61
|
const verdict = parseVerdict(content);
|
|
56
62
|
const duration = Date.now() - startTime;
|
|
@@ -14,9 +14,18 @@ import { loadConfig } from '../utils/config.js';
|
|
|
14
14
|
import { retrieveMemories } from './retrieve.js';
|
|
15
15
|
import { judgeTrajectory } from './judge.js';
|
|
16
16
|
import { distillMemories } from './distill.js';
|
|
17
|
+
import { ModelRouter } from '../../router/router.js';
|
|
17
18
|
import * as db from '../db/queries.js';
|
|
18
19
|
const __filename = fileURLToPath(import.meta.url);
|
|
19
20
|
const __dirname = dirname(__filename);
|
|
21
|
+
// Initialize ModelRouter once
|
|
22
|
+
let routerInstance = null;
|
|
23
|
+
function getRouter() {
|
|
24
|
+
if (!routerInstance) {
|
|
25
|
+
routerInstance = new ModelRouter();
|
|
26
|
+
}
|
|
27
|
+
return routerInstance;
|
|
28
|
+
}
|
|
20
29
|
/**
|
|
21
30
|
* Run MaTTS in parallel mode
|
|
22
31
|
* Execute k independent rollouts and aggregate via self-contrast
|
|
@@ -176,9 +185,12 @@ async function aggregateMemories(trajectories, query, options) {
|
|
|
176
185
|
confidence: t.verdict.confidence,
|
|
177
186
|
steps: JSON.stringify(t.trajectory.steps || [], null, 2)
|
|
178
187
|
}));
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
188
|
+
// Check if we have any API key configured
|
|
189
|
+
const hasApiKey = process.env.OPENROUTER_API_KEY ||
|
|
190
|
+
process.env.ANTHROPIC_API_KEY ||
|
|
191
|
+
process.env.GOOGLE_GEMINI_API_KEY;
|
|
192
|
+
if (!hasApiKey) {
|
|
193
|
+
console.warn('[WARN] No API key set, skipping aggregation');
|
|
182
194
|
return [];
|
|
183
195
|
}
|
|
184
196
|
try {
|
|
@@ -186,26 +198,22 @@ async function aggregateMemories(trajectories, query, options) {
|
|
|
186
198
|
.replace('{{k}}', String(trajectories.length))
|
|
187
199
|
.replace('{{task_query}}', query)
|
|
188
200
|
.replace('{{trajectories}}', JSON.stringify(trajectoryTexts, null, 2));
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
'content
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
throw new Error(`Anthropic API error: ${response.status}`);
|
|
206
|
-
}
|
|
207
|
-
const result = await response.json();
|
|
208
|
-
const content = result.content[0].text;
|
|
201
|
+
// Use ModelRouter for multi-provider support
|
|
202
|
+
const router = getRouter();
|
|
203
|
+
const response = await router.chat({
|
|
204
|
+
model: promptTemplate.model,
|
|
205
|
+
messages: [
|
|
206
|
+
{ role: 'system', content: promptTemplate.system },
|
|
207
|
+
{ role: 'user', content: prompt }
|
|
208
|
+
],
|
|
209
|
+
temperature: promptTemplate.temperature,
|
|
210
|
+
maxTokens: promptTemplate.max_tokens
|
|
211
|
+
}, 'reasoningbank-matts-aggregate');
|
|
212
|
+
// Extract content from router response
|
|
213
|
+
const content = response.content
|
|
214
|
+
.filter(block => block.type === 'text')
|
|
215
|
+
.map(block => block.text)
|
|
216
|
+
.join('\n');
|
|
209
217
|
// Parse and store aggregated memories
|
|
210
218
|
const jsonMatch = content.match(/\{[\s\S]*\}/);
|
|
211
219
|
if (jsonMatch) {
|
|
@@ -6,6 +6,9 @@
|
|
|
6
6
|
* 1. Traditional approach: Agent starts fresh every time
|
|
7
7
|
* 2. ReasoningBank approach: Agent learns from experience
|
|
8
8
|
*/
|
|
9
|
+
// Load environment variables from .env file
|
|
10
|
+
import { config } from 'dotenv';
|
|
11
|
+
config();
|
|
9
12
|
import { initialize, runTask, retrieveMemories, db } from './index.js';
|
|
10
13
|
console.log('🎯 ReasoningBank vs Traditional Approach - Live Demo\n');
|
|
11
14
|
console.log('='.repeat(80));
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "reasoning_bank_distill_failure",
|
|
3
3
|
"version": "1.0.0",
|
|
4
4
|
"description": "Extract failure guardrails and preventative patterns from failed trajectories. Creates counterfactual memories.",
|
|
5
|
-
"model": "
|
|
5
|
+
"model": "deepseek/deepseek-chat",
|
|
6
6
|
"temperature": 0.3,
|
|
7
7
|
"max_tokens": 2048,
|
|
8
8
|
"system": "You are a failure analysis specialist. Your role is to analyze failed task trajectories and extract guardrails, pitfalls, and recovery strategies. Focus on preventable errors and how to detect/avoid them.",
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "reasoning_bank_distill_success",
|
|
3
3
|
"version": "1.0.0",
|
|
4
4
|
"description": "Extract reusable strategy principles from successful trajectories. Creates title/description/content memories.",
|
|
5
|
-
"model": "
|
|
5
|
+
"model": "deepseek/deepseek-chat",
|
|
6
6
|
"temperature": 0.3,
|
|
7
7
|
"max_tokens": 2048,
|
|
8
8
|
"system": "You are a knowledge extraction specialist. Your role is to analyze successful task trajectories and extract reusable, generalizable strategy principles. Each principle should be concise, actionable, and avoid task-specific details like URLs, IDs, or PII.",
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "reasoning_bank_judge",
|
|
3
3
|
"version": "1.0.0",
|
|
4
4
|
"description": "LLM-as-judge for trajectory evaluation. Returns Success or Failure with confidence score.",
|
|
5
|
-
"model": "
|
|
5
|
+
"model": "deepseek/deepseek-chat",
|
|
6
6
|
"temperature": 0,
|
|
7
7
|
"max_tokens": 512,
|
|
8
8
|
"system": "You are a strict evaluator for task completion. Your role is to judge whether a task trajectory achieved its goal based on the final state and outputs. Be conservative: only label Success if the acceptance criteria are clearly met. Respond with pure JSON.",
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "reasoning_bank_matts_aggregate",
|
|
3
3
|
"version": "1.0.0",
|
|
4
4
|
"description": "Self-contrast aggregation for parallel MaTTS. Compares multiple trajectories to extract high-quality, generalizable memories.",
|
|
5
|
-
"model": "
|
|
5
|
+
"model": "deepseek/deepseek-chat",
|
|
6
6
|
"temperature": 0.2,
|
|
7
7
|
"max_tokens": 3072,
|
|
8
8
|
"system": "You are a meta-learning specialist analyzing multiple attempts at the same task. Your role is to identify patterns that distinguish successful approaches from failures, and extract robust, generalizable strategies.",
|
|
@@ -17,7 +17,7 @@ const DEFAULT_CONFIG = {
|
|
|
17
17
|
min_score: 0.3
|
|
18
18
|
},
|
|
19
19
|
judge: {
|
|
20
|
-
model: '
|
|
20
|
+
model: 'deepseek/deepseek-chat',
|
|
21
21
|
max_tokens: 512,
|
|
22
22
|
temperature: 0,
|
|
23
23
|
confidence_threshold: 0.5
|
|
@@ -108,7 +108,7 @@ export function loadConfig() {
|
|
|
108
108
|
min_score: raw.retrieve?.min_score ?? 0.3
|
|
109
109
|
},
|
|
110
110
|
judge: {
|
|
111
|
-
model: raw.judge?.model ?? '
|
|
111
|
+
model: raw.judge?.model ?? 'deepseek/deepseek-chat',
|
|
112
112
|
max_tokens: raw.judge?.max_tokens ?? 512,
|
|
113
113
|
temperature: raw.judge?.temperature ?? 0,
|
|
114
114
|
confidence_threshold: raw.judge?.fallback_confidence ?? 0.5
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agentic-flow",
|
|
3
|
-
"version": "1.5.
|
|
3
|
+
"version": "1.5.6",
|
|
4
4
|
"description": "Production-ready AI agent orchestration platform with 66 specialized agents, 213 MCP tools, ReasoningBank learning memory, and autonomous multi-agent swarms. Built by @ruvnet with Claude Agent SDK, neural networks, memory persistence, GitHub integration, and distributed consensus protocols.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|