swarm-code 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +384 -0
- package/bin/swarm.mjs +45 -0
- package/dist/agents/aider.d.ts +12 -0
- package/dist/agents/aider.js +182 -0
- package/dist/agents/claude-code.d.ts +9 -0
- package/dist/agents/claude-code.js +216 -0
- package/dist/agents/codex.d.ts +14 -0
- package/dist/agents/codex.js +193 -0
- package/dist/agents/direct-llm.d.ts +9 -0
- package/dist/agents/direct-llm.js +78 -0
- package/dist/agents/mock.d.ts +9 -0
- package/dist/agents/mock.js +77 -0
- package/dist/agents/opencode.d.ts +23 -0
- package/dist/agents/opencode.js +571 -0
- package/dist/agents/provider.d.ts +11 -0
- package/dist/agents/provider.js +31 -0
- package/dist/cli.d.ts +15 -0
- package/dist/cli.js +285 -0
- package/dist/compression/compressor.d.ts +28 -0
- package/dist/compression/compressor.js +265 -0
- package/dist/config.d.ts +42 -0
- package/dist/config.js +170 -0
- package/dist/core/repl.d.ts +69 -0
- package/dist/core/repl.js +336 -0
- package/dist/core/rlm.d.ts +63 -0
- package/dist/core/rlm.js +409 -0
- package/dist/core/runtime.py +335 -0
- package/dist/core/types.d.ts +131 -0
- package/dist/core/types.js +19 -0
- package/dist/env.d.ts +10 -0
- package/dist/env.js +75 -0
- package/dist/interactive-swarm.d.ts +20 -0
- package/dist/interactive-swarm.js +1041 -0
- package/dist/interactive.d.ts +10 -0
- package/dist/interactive.js +1765 -0
- package/dist/main.d.ts +15 -0
- package/dist/main.js +242 -0
- package/dist/mcp/server.d.ts +15 -0
- package/dist/mcp/server.js +72 -0
- package/dist/mcp/session.d.ts +73 -0
- package/dist/mcp/session.js +184 -0
- package/dist/mcp/tools.d.ts +15 -0
- package/dist/mcp/tools.js +377 -0
- package/dist/memory/episodic.d.ts +132 -0
- package/dist/memory/episodic.js +390 -0
- package/dist/prompts/orchestrator.d.ts +5 -0
- package/dist/prompts/orchestrator.js +191 -0
- package/dist/routing/model-router.d.ts +130 -0
- package/dist/routing/model-router.js +515 -0
- package/dist/swarm.d.ts +14 -0
- package/dist/swarm.js +557 -0
- package/dist/threads/cache.d.ts +58 -0
- package/dist/threads/cache.js +198 -0
- package/dist/threads/manager.d.ts +85 -0
- package/dist/threads/manager.js +659 -0
- package/dist/ui/banner.d.ts +14 -0
- package/dist/ui/banner.js +42 -0
- package/dist/ui/dashboard.d.ts +33 -0
- package/dist/ui/dashboard.js +151 -0
- package/dist/ui/index.d.ts +10 -0
- package/dist/ui/index.js +11 -0
- package/dist/ui/log.d.ts +39 -0
- package/dist/ui/log.js +126 -0
- package/dist/ui/onboarding.d.ts +14 -0
- package/dist/ui/onboarding.js +518 -0
- package/dist/ui/spinner.d.ts +25 -0
- package/dist/ui/spinner.js +113 -0
- package/dist/ui/summary.d.ts +18 -0
- package/dist/ui/summary.js +113 -0
- package/dist/ui/theme.d.ts +63 -0
- package/dist/ui/theme.js +97 -0
- package/dist/viewer.d.ts +12 -0
- package/dist/viewer.js +1284 -0
- package/dist/worktree/manager.d.ts +45 -0
- package/dist/worktree/manager.js +266 -0
- package/dist/worktree/merge.d.ts +28 -0
- package/dist/worktree/merge.js +138 -0
- package/package.json +69 -0
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model router — auto-selects the best agent + model combination per task.
|
|
3
|
+
*
|
|
4
|
+
* Inspired by Slate's approach: the orchestrator LLM naturally selects agents
|
|
5
|
+
* and models based on task complexity. This module provides a fallback
|
|
6
|
+
* rule-based router for when auto_model_selection is enabled, ensuring
|
|
7
|
+
* cost-efficient defaults even when the orchestrator doesn't explicitly choose.
|
|
8
|
+
*
|
|
9
|
+
* Two routing dimensions:
|
|
10
|
+
* 1. Task complexity (simple/medium/complex) → picks model tier
|
|
11
|
+
* 2. Task slot (execution/search/reasoning/planning) → picks agent + model specialty
|
|
12
|
+
*
|
|
13
|
+
* Two routing modes:
|
|
14
|
+
* 1. Orchestrator-driven (default): The orchestrator prompt teaches the LLM
|
|
15
|
+
* about agent strengths, and the LLM passes agent/model in thread() calls.
|
|
16
|
+
* 2. Auto-routing (auto_model_selection=true): This router overrides the
|
|
17
|
+
* orchestrator's choice with a cost-optimal selection based on task analysis.
|
|
18
|
+
*
|
|
19
|
+
* Enhanced with:
|
|
20
|
+
* - FailureTracker: session-level failure tracking with decay weighting
|
|
21
|
+
* - Success rate weighting: penalizes agents with high failure rates
|
|
22
|
+
* - File pattern matching: boosts agents that historically handle specific file types
|
|
23
|
+
* - Aggregate episodic stats: fallback to best-performing agent per slot
|
|
24
|
+
*/
|
|
25
|
+
import type { ModelSlots } from "../config.js";
|
|
26
|
+
import type { SwarmConfig } from "../core/types.js";
|
|
27
|
+
import type { EpisodicMemory } from "../memory/episodic.js";
|
|
28
|
+
export interface AgentCapability {
|
|
29
|
+
name: string;
|
|
30
|
+
/** Cost tier: 1 = cheapest, 5 = most expensive */
|
|
31
|
+
costTier: number;
|
|
32
|
+
/** Speed tier: 1 = fastest, 5 = slowest */
|
|
33
|
+
speedTier: number;
|
|
34
|
+
/** What this agent excels at */
|
|
35
|
+
strengths: string[];
|
|
36
|
+
/** Best model for this agent (provider/model-id format) */
|
|
37
|
+
defaultModel: string;
|
|
38
|
+
/** Cheaper model for simple tasks */
|
|
39
|
+
cheapModel: string;
|
|
40
|
+
/** Premium model for complex tasks */
|
|
41
|
+
premiumModel: string;
|
|
42
|
+
}
|
|
43
|
+
export declare const AGENT_CAPABILITIES: Record<string, AgentCapability>;
|
|
44
|
+
export type TaskComplexity = "simple" | "medium" | "complex";
|
|
45
|
+
/** Simple keyword-based complexity classifier. */
|
|
46
|
+
export declare function classifyTaskComplexity(task: string): TaskComplexity;
|
|
47
|
+
/**
|
|
48
|
+
* Task slots — named categories that map to specialized model/agent combos.
|
|
49
|
+
* Inspired by Slate's model slots (main, subagent, search, reasoning).
|
|
50
|
+
*/
|
|
51
|
+
export type TaskSlot = "execution" | "search" | "reasoning" | "planning";
|
|
52
|
+
export declare const DEFAULT_MODEL_SLOTS: ModelSlots;
|
|
53
|
+
/** Classify a task into a named slot based on keyword analysis. */
|
|
54
|
+
export declare function classifyTaskSlot(task: string): TaskSlot;
|
|
55
|
+
export interface FailureRecord {
|
|
56
|
+
agent: string;
|
|
57
|
+
model: string;
|
|
58
|
+
task: string;
|
|
59
|
+
error: string;
|
|
60
|
+
timestamp: number;
|
|
61
|
+
/** Transient errors (rate limit, timeout) decay faster than permanent ones. */
|
|
62
|
+
isTransient: boolean;
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Tracks agent/model failures within a session to inform routing decisions.
|
|
66
|
+
*
|
|
67
|
+
* Failure records decay over time — recent failures weigh more heavily.
|
|
68
|
+
* Transient errors (rate limits, timeouts) decay faster than permanent ones.
|
|
69
|
+
*/
|
|
70
|
+
export declare class FailureTracker {
|
|
71
|
+
private failures;
|
|
72
|
+
/** Half-life for permanent failure decay (ms). Failures lose half their weight after this. */
|
|
73
|
+
private readonly permanentHalfLifeMs;
|
|
74
|
+
/** Half-life for transient failure decay (ms). Shorter — transient issues resolve quickly. */
|
|
75
|
+
private readonly transientHalfLifeMs;
|
|
76
|
+
constructor(permanentHalfLifeMs?: number, transientHalfLifeMs?: number);
|
|
77
|
+
/**
|
|
78
|
+
* Record a failure for an agent+model pair.
|
|
79
|
+
* Classifies the error as transient or permanent based on pattern matching.
|
|
80
|
+
*/
|
|
81
|
+
recordFailure(agent: string, model: string, task: string, error: string): void;
|
|
82
|
+
/**
|
|
83
|
+
* Get the weighted failure rate for an agent+model pair (0-1).
|
|
84
|
+
*
|
|
85
|
+
* Uses exponential decay so recent failures count more than old ones.
|
|
86
|
+
* The rate is capped at 1.0 (effectively: agent is completely unreliable).
|
|
87
|
+
*/
|
|
88
|
+
getFailureRate(agent: string, model?: string): number;
|
|
89
|
+
/**
|
|
90
|
+
* Check if an agent has a 100% failure rate (all recent attempts failed,
|
|
91
|
+
* with no significant decay). Used to skip completely broken agents.
|
|
92
|
+
*/
|
|
93
|
+
isFullyFailed(agent: string): boolean;
|
|
94
|
+
/** Get all failure records (for debugging/inspection). */
|
|
95
|
+
getFailures(): FailureRecord[];
|
|
96
|
+
/** Get the number of raw (undecayed) failures for an agent. */
|
|
97
|
+
getFailureCount(agent: string): number;
|
|
98
|
+
/** Clear all failure records. */
|
|
99
|
+
clear(): void;
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Extract file extensions mentioned in a task description.
|
|
103
|
+
* Returns unique lowercase extensions (e.g., [".ts", ".py"]).
|
|
104
|
+
*/
|
|
105
|
+
export declare function extractFileExtensions(task: string): string[];
|
|
106
|
+
export interface RouteResult {
|
|
107
|
+
agent: string;
|
|
108
|
+
model: string;
|
|
109
|
+
slot: TaskSlot;
|
|
110
|
+
reason: string;
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Route a task to the best agent + model combination.
|
|
114
|
+
*
|
|
115
|
+
* Logic:
|
|
116
|
+
* 1. Classify task complexity (simple/medium/complex)
|
|
117
|
+
* 2. Classify task slot (execution/search/reasoning/planning)
|
|
118
|
+
* 3. Check for slot-specific model overrides in config
|
|
119
|
+
* 4. Score agents with slot preference bonus
|
|
120
|
+
* 5. Apply failure rate penalty (skip fully-failed agents)
|
|
121
|
+
* 6. Apply file pattern matching bonus from episodic memory
|
|
122
|
+
* 7. Fallback to aggregate episodic stats when no high-confidence match
|
|
123
|
+
* 8. Pick the highest-scoring capable option
|
|
124
|
+
*/
|
|
125
|
+
export declare function routeTask(task: string, config: SwarmConfig, memory?: EpisodicMemory, failureTracker?: FailureTracker): Promise<RouteResult>;
|
|
126
|
+
/**
|
|
127
|
+
* Get a description of available agents and their capabilities
|
|
128
|
+
* for inclusion in the orchestrator prompt.
|
|
129
|
+
*/
|
|
130
|
+
export declare function describeAvailableAgents(): Promise<string>;
|
|
@@ -0,0 +1,515 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model router — auto-selects the best agent + model combination per task.
|
|
3
|
+
*
|
|
4
|
+
* Inspired by Slate's approach: the orchestrator LLM naturally selects agents
|
|
5
|
+
* and models based on task complexity. This module provides a fallback
|
|
6
|
+
* rule-based router for when auto_model_selection is enabled, ensuring
|
|
7
|
+
* cost-efficient defaults even when the orchestrator doesn't explicitly choose.
|
|
8
|
+
*
|
|
9
|
+
* Two routing dimensions:
|
|
10
|
+
* 1. Task complexity (simple/medium/complex) → picks model tier
|
|
11
|
+
* 2. Task slot (execution/search/reasoning/planning) → picks agent + model specialty
|
|
12
|
+
*
|
|
13
|
+
* Two routing modes:
|
|
14
|
+
* 1. Orchestrator-driven (default): The orchestrator prompt teaches the LLM
|
|
15
|
+
* about agent strengths, and the LLM passes agent/model in thread() calls.
|
|
16
|
+
* 2. Auto-routing (auto_model_selection=true): This router overrides the
|
|
17
|
+
* orchestrator's choice with a cost-optimal selection based on task analysis.
|
|
18
|
+
*
|
|
19
|
+
* Enhanced with:
|
|
20
|
+
* - FailureTracker: session-level failure tracking with decay weighting
|
|
21
|
+
* - Success rate weighting: penalizes agents with high failure rates
|
|
22
|
+
* - File pattern matching: boosts agents that historically handle specific file types
|
|
23
|
+
* - Aggregate episodic stats: fallback to best-performing agent per slot
|
|
24
|
+
*/
|
|
25
|
+
import { getAvailableAgents } from "../agents/provider.js";
|
|
26
|
+
export const AGENT_CAPABILITIES = {
|
|
27
|
+
opencode: {
|
|
28
|
+
name: "opencode",
|
|
29
|
+
costTier: 2,
|
|
30
|
+
speedTier: 2,
|
|
31
|
+
strengths: ["general-purpose", "multi-language", "fast", "tool-use", "testing"],
|
|
32
|
+
defaultModel: "anthropic/claude-sonnet-4-6",
|
|
33
|
+
cheapModel: "anthropic/claude-haiku-4-5",
|
|
34
|
+
premiumModel: "anthropic/claude-opus-4-6",
|
|
35
|
+
},
|
|
36
|
+
"claude-code": {
|
|
37
|
+
name: "claude-code",
|
|
38
|
+
costTier: 3,
|
|
39
|
+
speedTier: 3,
|
|
40
|
+
strengths: ["deep-analysis", "refactoring", "architecture", "complex-reasoning", "large-codebase"],
|
|
41
|
+
defaultModel: "claude-sonnet-4-6",
|
|
42
|
+
cheapModel: "claude-haiku-4-5",
|
|
43
|
+
premiumModel: "claude-opus-4-6",
|
|
44
|
+
},
|
|
45
|
+
codex: {
|
|
46
|
+
name: "codex",
|
|
47
|
+
costTier: 2,
|
|
48
|
+
speedTier: 2,
|
|
49
|
+
strengths: ["code-execution", "shell-commands", "testing", "openai-models", "tool-use"],
|
|
50
|
+
defaultModel: "o3-mini",
|
|
51
|
+
cheapModel: "gpt-4o-mini",
|
|
52
|
+
premiumModel: "o3",
|
|
53
|
+
},
|
|
54
|
+
aider: {
|
|
55
|
+
name: "aider",
|
|
56
|
+
costTier: 1,
|
|
57
|
+
speedTier: 1,
|
|
58
|
+
strengths: ["targeted-edits", "minimal-changes", "git-aware", "cost-efficient", "linting", "formatting"],
|
|
59
|
+
defaultModel: "anthropic/claude-sonnet-4-6",
|
|
60
|
+
cheapModel: "anthropic/claude-haiku-4-5",
|
|
61
|
+
premiumModel: "anthropic/claude-opus-4-6",
|
|
62
|
+
},
|
|
63
|
+
"direct-llm": {
|
|
64
|
+
name: "direct-llm",
|
|
65
|
+
costTier: 1,
|
|
66
|
+
speedTier: 1,
|
|
67
|
+
strengths: ["analysis", "planning", "no-file-changes", "classification", "lightweight"],
|
|
68
|
+
defaultModel: "anthropic/claude-sonnet-4-6",
|
|
69
|
+
cheapModel: "anthropic/claude-haiku-4-5",
|
|
70
|
+
premiumModel: "anthropic/claude-opus-4-6",
|
|
71
|
+
},
|
|
72
|
+
};
|
|
73
|
+
/** Simple keyword-based complexity classifier. */
|
|
74
|
+
export function classifyTaskComplexity(task) {
|
|
75
|
+
const lower = task.toLowerCase();
|
|
76
|
+
// Complex indicators
|
|
77
|
+
const complexPatterns = [
|
|
78
|
+
"refactor",
|
|
79
|
+
"architect",
|
|
80
|
+
"redesign",
|
|
81
|
+
"migrate",
|
|
82
|
+
"rewrite",
|
|
83
|
+
"overhaul",
|
|
84
|
+
"restructure",
|
|
85
|
+
"security audit",
|
|
86
|
+
"performance optim",
|
|
87
|
+
"complex",
|
|
88
|
+
"multiple files",
|
|
89
|
+
"across the codebase",
|
|
90
|
+
"entire",
|
|
91
|
+
"all files",
|
|
92
|
+
"comprehensive",
|
|
93
|
+
];
|
|
94
|
+
if (complexPatterns.some((p) => lower.includes(p)))
|
|
95
|
+
return "complex";
|
|
96
|
+
// Simple indicators
|
|
97
|
+
const simplePatterns = [
|
|
98
|
+
"add comment",
|
|
99
|
+
"fix typo",
|
|
100
|
+
"rename",
|
|
101
|
+
"format",
|
|
102
|
+
"lint",
|
|
103
|
+
"add import",
|
|
104
|
+
"remove unused",
|
|
105
|
+
"update version",
|
|
106
|
+
"bump",
|
|
107
|
+
"simple",
|
|
108
|
+
"add docstring",
|
|
109
|
+
"fix indent",
|
|
110
|
+
"whitespace",
|
|
111
|
+
];
|
|
112
|
+
if (simplePatterns.some((p) => lower.includes(p)))
|
|
113
|
+
return "simple";
|
|
114
|
+
// Default to medium
|
|
115
|
+
return "medium";
|
|
116
|
+
}
|
|
117
|
+
export const DEFAULT_MODEL_SLOTS = {
|
|
118
|
+
execution: "", // empty = use agent's default based on complexity
|
|
119
|
+
search: "",
|
|
120
|
+
reasoning: "",
|
|
121
|
+
planning: "",
|
|
122
|
+
};
|
|
123
|
+
/** Preferred agent per slot when auto-routing. */
|
|
124
|
+
const SLOT_AGENT_PREFERENCES = {
|
|
125
|
+
execution: ["opencode", "codex", "claude-code", "aider"],
|
|
126
|
+
search: ["direct-llm", "opencode", "codex"],
|
|
127
|
+
reasoning: ["claude-code", "direct-llm", "opencode"],
|
|
128
|
+
planning: ["direct-llm", "claude-code"],
|
|
129
|
+
};
|
|
130
|
+
/** Classify a task into a named slot based on keyword analysis. */
|
|
131
|
+
export function classifyTaskSlot(task) {
|
|
132
|
+
const lower = task.toLowerCase();
|
|
133
|
+
// Search patterns — retrieving information, finding things
|
|
134
|
+
const searchPatterns = [
|
|
135
|
+
"search",
|
|
136
|
+
"find",
|
|
137
|
+
"look up",
|
|
138
|
+
"locate",
|
|
139
|
+
"grep",
|
|
140
|
+
"what is",
|
|
141
|
+
"where is",
|
|
142
|
+
"which file",
|
|
143
|
+
"list all",
|
|
144
|
+
"documentation",
|
|
145
|
+
"docs",
|
|
146
|
+
"research",
|
|
147
|
+
"investigate",
|
|
148
|
+
];
|
|
149
|
+
if (searchPatterns.some((p) => lower.includes(p)))
|
|
150
|
+
return "search";
|
|
151
|
+
// Reasoning patterns — analysis, review, understanding
|
|
152
|
+
const reasoningPatterns = [
|
|
153
|
+
"analyze",
|
|
154
|
+
"analysis",
|
|
155
|
+
"review",
|
|
156
|
+
"explain",
|
|
157
|
+
"understand",
|
|
158
|
+
"why does",
|
|
159
|
+
"how does",
|
|
160
|
+
"debug",
|
|
161
|
+
"diagnose",
|
|
162
|
+
"trace",
|
|
163
|
+
"reason",
|
|
164
|
+
"evaluate",
|
|
165
|
+
"assess",
|
|
166
|
+
"compare",
|
|
167
|
+
];
|
|
168
|
+
if (reasoningPatterns.some((p) => lower.includes(p)))
|
|
169
|
+
return "reasoning";
|
|
170
|
+
// Planning patterns — design, architecture, strategy
|
|
171
|
+
const planningPatterns = [
|
|
172
|
+
"plan",
|
|
173
|
+
"design",
|
|
174
|
+
"architect",
|
|
175
|
+
"propose",
|
|
176
|
+
"strategy",
|
|
177
|
+
"roadmap",
|
|
178
|
+
"outline",
|
|
179
|
+
"spec",
|
|
180
|
+
"specification",
|
|
181
|
+
"rfc",
|
|
182
|
+
"how should",
|
|
183
|
+
"what approach",
|
|
184
|
+
"break down",
|
|
185
|
+
];
|
|
186
|
+
if (planningPatterns.some((p) => lower.includes(p)))
|
|
187
|
+
return "planning";
|
|
188
|
+
// Default: execution (coding, fixing, building)
|
|
189
|
+
return "execution";
|
|
190
|
+
}
|
|
191
|
+
// ── Failure Tracker ────────────────────────────────────────────────────────
|
|
192
|
+
/** Patterns that indicate transient errors (rate limits, timeouts, server errors). */
|
|
193
|
+
const TRANSIENT_ERROR_PATTERNS = [
|
|
194
|
+
/timeout/i,
|
|
195
|
+
/timed?\s*out/i,
|
|
196
|
+
/rate limit/i,
|
|
197
|
+
/429/,
|
|
198
|
+
/503/,
|
|
199
|
+
/502/,
|
|
200
|
+
/500/,
|
|
201
|
+
/too many requests/i,
|
|
202
|
+
/temporarily unavailable/i,
|
|
203
|
+
/server error/i,
|
|
204
|
+
/overloaded/i,
|
|
205
|
+
/capacity/i,
|
|
206
|
+
/ECONNRESET/i,
|
|
207
|
+
/ECONNREFUSED/i,
|
|
208
|
+
/EPIPE/i,
|
|
209
|
+
];
|
|
210
|
+
/**
|
|
211
|
+
* Tracks agent/model failures within a session to inform routing decisions.
|
|
212
|
+
*
|
|
213
|
+
* Failure records decay over time — recent failures weigh more heavily.
|
|
214
|
+
* Transient errors (rate limits, timeouts) decay faster than permanent ones.
|
|
215
|
+
*/
|
|
216
|
+
export class FailureTracker {
|
|
217
|
+
failures = [];
|
|
218
|
+
/** Half-life for permanent failure decay (ms). Failures lose half their weight after this. */
|
|
219
|
+
permanentHalfLifeMs;
|
|
220
|
+
/** Half-life for transient failure decay (ms). Shorter — transient issues resolve quickly. */
|
|
221
|
+
transientHalfLifeMs;
|
|
222
|
+
constructor(permanentHalfLifeMs = 10 * 60 * 1000, transientHalfLifeMs = 3 * 60 * 1000) {
|
|
223
|
+
this.permanentHalfLifeMs = permanentHalfLifeMs;
|
|
224
|
+
this.transientHalfLifeMs = transientHalfLifeMs;
|
|
225
|
+
}
|
|
226
|
+
/**
|
|
227
|
+
* Record a failure for an agent+model pair.
|
|
228
|
+
* Classifies the error as transient or permanent based on pattern matching.
|
|
229
|
+
*/
|
|
230
|
+
recordFailure(agent, model, task, error) {
|
|
231
|
+
const isTransient = TRANSIENT_ERROR_PATTERNS.some((p) => p.test(error));
|
|
232
|
+
this.failures.push({
|
|
233
|
+
agent,
|
|
234
|
+
model,
|
|
235
|
+
task,
|
|
236
|
+
error,
|
|
237
|
+
timestamp: Date.now(),
|
|
238
|
+
isTransient,
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
/**
|
|
242
|
+
* Get the weighted failure rate for an agent+model pair (0-1).
|
|
243
|
+
*
|
|
244
|
+
* Uses exponential decay so recent failures count more than old ones.
|
|
245
|
+
* The rate is capped at 1.0 (effectively: agent is completely unreliable).
|
|
246
|
+
*/
|
|
247
|
+
getFailureRate(agent, model) {
|
|
248
|
+
const now = Date.now();
|
|
249
|
+
let weightedFailures = 0;
|
|
250
|
+
for (const f of this.failures) {
|
|
251
|
+
if (f.agent !== agent)
|
|
252
|
+
continue;
|
|
253
|
+
if (model && f.model !== model)
|
|
254
|
+
continue;
|
|
255
|
+
const age = now - f.timestamp;
|
|
256
|
+
const halfLife = f.isTransient ? this.transientHalfLifeMs : this.permanentHalfLifeMs;
|
|
257
|
+
// Exponential decay: weight = 2^(-age/halfLife)
|
|
258
|
+
const weight = 2 ** (-age / halfLife);
|
|
259
|
+
weightedFailures += weight;
|
|
260
|
+
}
|
|
261
|
+
// Normalize: 3 weighted failures = rate of 1.0
|
|
262
|
+
// This means a single recent failure gives ~0.33, two give ~0.67, three+ saturate at 1.0
|
|
263
|
+
return Math.min(1, weightedFailures / 3);
|
|
264
|
+
}
|
|
265
|
+
/**
|
|
266
|
+
* Check if an agent has a 100% failure rate (all recent attempts failed,
|
|
267
|
+
* with no significant decay). Used to skip completely broken agents.
|
|
268
|
+
*/
|
|
269
|
+
isFullyFailed(agent) {
|
|
270
|
+
return this.getFailureRate(agent) >= 0.99;
|
|
271
|
+
}
|
|
272
|
+
/** Get all failure records (for debugging/inspection). */
|
|
273
|
+
getFailures() {
|
|
274
|
+
return [...this.failures];
|
|
275
|
+
}
|
|
276
|
+
/** Get the number of raw (undecayed) failures for an agent. */
|
|
277
|
+
getFailureCount(agent) {
|
|
278
|
+
return this.failures.filter((f) => f.agent === agent).length;
|
|
279
|
+
}
|
|
280
|
+
/** Clear all failure records. */
|
|
281
|
+
clear() {
|
|
282
|
+
this.failures = [];
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
// ── File pattern extraction ────────────────────────────────────────────────
|
|
286
|
+
/** Common file extensions to look for in task descriptions. */
|
|
287
|
+
const FILE_EXTENSION_PATTERN = /\.(ts|tsx|js|jsx|py|rs|go|java|rb|cpp|c|h|css|scss|html|json|yaml|yml|toml|md|sql|sh|bash|zsh|vue|svelte|swift|kt|cs|php)\b/gi;
|
|
288
|
+
/**
|
|
289
|
+
* Extract file extensions mentioned in a task description.
|
|
290
|
+
* Returns unique lowercase extensions (e.g., [".ts", ".py"]).
|
|
291
|
+
*/
|
|
292
|
+
export function extractFileExtensions(task) {
|
|
293
|
+
const matches = task.match(FILE_EXTENSION_PATTERN);
|
|
294
|
+
if (!matches)
|
|
295
|
+
return [];
|
|
296
|
+
const unique = new Set(matches.map((m) => m.toLowerCase()));
|
|
297
|
+
return [...unique];
|
|
298
|
+
}
|
|
299
|
+
/**
|
|
300
|
+
* Route a task to the best agent + model combination.
|
|
301
|
+
*
|
|
302
|
+
* Logic:
|
|
303
|
+
* 1. Classify task complexity (simple/medium/complex)
|
|
304
|
+
* 2. Classify task slot (execution/search/reasoning/planning)
|
|
305
|
+
* 3. Check for slot-specific model overrides in config
|
|
306
|
+
* 4. Score agents with slot preference bonus
|
|
307
|
+
* 5. Apply failure rate penalty (skip fully-failed agents)
|
|
308
|
+
* 6. Apply file pattern matching bonus from episodic memory
|
|
309
|
+
* 7. Fallback to aggregate episodic stats when no high-confidence match
|
|
310
|
+
* 8. Pick the highest-scoring capable option
|
|
311
|
+
*/
|
|
312
|
+
export async function routeTask(task, config, memory, failureTracker) {
|
|
313
|
+
const complexity = classifyTaskComplexity(task);
|
|
314
|
+
const slot = classifyTaskSlot(task);
|
|
315
|
+
const available = await getAvailableAgents();
|
|
316
|
+
const lower = task.toLowerCase();
|
|
317
|
+
// Check for slot-specific model override from config
|
|
318
|
+
const slotOverrides = config.model_slots || DEFAULT_MODEL_SLOTS;
|
|
319
|
+
const slotModel = slotOverrides[slot];
|
|
320
|
+
// Check episodic memory for past successful strategies
|
|
321
|
+
const memoryRecommendation = memory?.recommendStrategy(task);
|
|
322
|
+
// Extract file extensions for file-pattern matching
|
|
323
|
+
const taskExtensions = extractFileExtensions(task);
|
|
324
|
+
// Get aggregate stats from episodic memory (if available)
|
|
325
|
+
const aggregateStats = memory?.getAggregateStats?.();
|
|
326
|
+
// If no agents available, fall back to direct-llm
|
|
327
|
+
if (available.length === 0) {
|
|
328
|
+
return {
|
|
329
|
+
agent: "direct-llm",
|
|
330
|
+
model: slotModel || config.default_model,
|
|
331
|
+
slot,
|
|
332
|
+
reason: "no agent backends available, falling back to direct LLM",
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
// If episodic memory has a recommendation at confidence >= 0.3, consider it —
|
|
336
|
+
// but weight by (1 - failureRate) of the recommended agent
|
|
337
|
+
if (memoryRecommendation &&
|
|
338
|
+
memoryRecommendation.confidence >= 0.3 &&
|
|
339
|
+
available.includes(memoryRecommendation.agent)) {
|
|
340
|
+
const failureRate = failureTracker?.getFailureRate(memoryRecommendation.agent) ?? 0;
|
|
341
|
+
const adjustedConfidence = memoryRecommendation.confidence * (1 - failureRate);
|
|
342
|
+
// Only take the fast path if adjusted confidence is still strong (>= 0.5)
|
|
343
|
+
if (adjustedConfidence >= 0.5 && !failureTracker?.isFullyFailed(memoryRecommendation.agent)) {
|
|
344
|
+
return {
|
|
345
|
+
agent: memoryRecommendation.agent,
|
|
346
|
+
model: slotModel || memoryRecommendation.model,
|
|
347
|
+
slot,
|
|
348
|
+
reason: `${slot}/${complexity} → ${memoryRecommendation.agent} (episodic memory, ` +
|
|
349
|
+
`${(memoryRecommendation.confidence * 100).toFixed(0)}% confidence` +
|
|
350
|
+
`${failureRate > 0 ? `, adjusted: ${(adjustedConfidence * 100).toFixed(0)}%` : ""})`,
|
|
351
|
+
};
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
// Build file-extension success map from episodic memory
|
|
355
|
+
const fileExtensionBonus = new Map();
|
|
356
|
+
if (taskExtensions.length > 0 && aggregateStats?.fileExtensions) {
|
|
357
|
+
for (const ext of taskExtensions) {
|
|
358
|
+
const agentsForExt = aggregateStats.fileExtensions.get(ext);
|
|
359
|
+
if (agentsForExt) {
|
|
360
|
+
for (const [agent, count] of agentsForExt) {
|
|
361
|
+
const current = fileExtensionBonus.get(agent) || 0;
|
|
362
|
+
// Bonus scales with number of successful episodes for this extension,
|
|
363
|
+
// capped at 2 points per extension
|
|
364
|
+
fileExtensionBonus.set(agent, current + Math.min(2, count * 0.5));
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
// Get preferred agents for this slot
|
|
370
|
+
const slotPrefs = SLOT_AGENT_PREFERENCES[slot];
|
|
371
|
+
// Score each available agent for this task
|
|
372
|
+
const scored = available
|
|
373
|
+
.filter((name) => {
|
|
374
|
+
// Skip agents that are fully failed (100% failure rate)
|
|
375
|
+
if (failureTracker?.isFullyFailed(name))
|
|
376
|
+
return false;
|
|
377
|
+
return true;
|
|
378
|
+
})
|
|
379
|
+
.map((name) => {
|
|
380
|
+
const cap = AGENT_CAPABILITIES[name];
|
|
381
|
+
if (!cap)
|
|
382
|
+
return { name, score: 0, model: config.default_model };
|
|
383
|
+
let score = 0;
|
|
384
|
+
// Strength matching (word-boundary to avoid substring false positives)
|
|
385
|
+
for (const strength of cap.strengths) {
|
|
386
|
+
const keywords = strength.split("-");
|
|
387
|
+
for (const kw of keywords) {
|
|
388
|
+
if (kw.length > 3 && new RegExp(`\\b${kw}\\b`).test(lower))
|
|
389
|
+
score += 2;
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
// Slot preference bonus — agents preferred for this slot get a boost
|
|
393
|
+
const slotRank = slotPrefs.indexOf(name);
|
|
394
|
+
if (slotRank !== -1) {
|
|
395
|
+
score += (slotPrefs.length - slotRank) * 2;
|
|
396
|
+
}
|
|
397
|
+
// Episodic memory boost — agents that worked well for similar tasks
|
|
398
|
+
if (memoryRecommendation && memoryRecommendation.agent === name) {
|
|
399
|
+
const failureRate = failureTracker?.getFailureRate(name) ?? 0;
|
|
400
|
+
// Weight the memory recommendation by (1 - failureRate)
|
|
401
|
+
score += memoryRecommendation.confidence * 5 * (1 - failureRate);
|
|
402
|
+
}
|
|
403
|
+
// Aggregate stats boost — if no strong episodic match, use historical performance
|
|
404
|
+
if (aggregateStats && (!memoryRecommendation || memoryRecommendation.confidence < 0.3)) {
|
|
405
|
+
const agentStats = aggregateStats.perAgent.get(name);
|
|
406
|
+
if (agentStats) {
|
|
407
|
+
// Boost agents that historically perform well for this slot
|
|
408
|
+
if (agentStats.slotCounts.get(slot)) {
|
|
409
|
+
const slotSuccesses = agentStats.slotCounts.get(slot);
|
|
410
|
+
// Small boost proportional to past successes in this slot (capped at 3)
|
|
411
|
+
score += Math.min(3, slotSuccesses * 0.5);
|
|
412
|
+
}
|
|
413
|
+
// Slight efficiency bonus for agents with low average cost
|
|
414
|
+
if (agentStats.avgCostUsd < 0.05) {
|
|
415
|
+
score += 1;
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
// File pattern matching bonus — agents that succeeded with these file types
|
|
420
|
+
const extBonus = fileExtensionBonus.get(name) || 0;
|
|
421
|
+
if (extBonus > 0) {
|
|
422
|
+
score += extBonus;
|
|
423
|
+
}
|
|
424
|
+
// Complexity-cost alignment
|
|
425
|
+
if (complexity === "simple") {
|
|
426
|
+
// Prefer cheap + fast agents
|
|
427
|
+
score += 5 - cap.costTier + (5 - cap.speedTier);
|
|
428
|
+
}
|
|
429
|
+
else if (complexity === "medium") {
|
|
430
|
+
// Balanced — moderate cost/capability, favor speed
|
|
431
|
+
score += 3 - Math.abs(cap.costTier - 2) + (4 - cap.speedTier);
|
|
432
|
+
}
|
|
433
|
+
else if (complexity === "complex") {
|
|
434
|
+
// Prefer capable agents, cost is less important
|
|
435
|
+
score += cap.costTier; // Higher cost often = more capable
|
|
436
|
+
}
|
|
437
|
+
// Failure rate penalty — penalize agents that have been failing recently
|
|
438
|
+
if (failureTracker) {
|
|
439
|
+
const failureRate = failureTracker.getFailureRate(name);
|
|
440
|
+
score -= failureRate * 10;
|
|
441
|
+
}
|
|
442
|
+
// Select model: slot override > memory suggestion > complexity-based default
|
|
443
|
+
let model;
|
|
444
|
+
if (slotModel) {
|
|
445
|
+
model = slotModel;
|
|
446
|
+
}
|
|
447
|
+
else if (memoryRecommendation && memoryRecommendation.agent === name && memoryRecommendation.model) {
|
|
448
|
+
model = memoryRecommendation.model;
|
|
449
|
+
}
|
|
450
|
+
else {
|
|
451
|
+
switch (complexity) {
|
|
452
|
+
case "simple":
|
|
453
|
+
model = cap.cheapModel;
|
|
454
|
+
break;
|
|
455
|
+
case "complex":
|
|
456
|
+
model = cap.premiumModel;
|
|
457
|
+
break;
|
|
458
|
+
default:
|
|
459
|
+
model = cap.defaultModel;
|
|
460
|
+
break;
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
return { name, score, model };
|
|
464
|
+
})
|
|
465
|
+
.sort((a, b) => b.score - a.score);
|
|
466
|
+
// If all agents were filtered out (all fully failed), fall back to direct-llm
|
|
467
|
+
if (scored.length === 0) {
|
|
468
|
+
return {
|
|
469
|
+
agent: "direct-llm",
|
|
470
|
+
model: slotModel || config.default_model,
|
|
471
|
+
slot,
|
|
472
|
+
reason: `${slot}/${complexity} → direct-llm (all agents have 100% failure rate, fallback)`,
|
|
473
|
+
};
|
|
474
|
+
}
|
|
475
|
+
const best = scored[0];
|
|
476
|
+
const memNote = memoryRecommendation ? `, memory: ${memoryRecommendation.agent}` : "";
|
|
477
|
+
const bestFailRate = failureTracker?.getFailureRate(best.name) ?? 0;
|
|
478
|
+
const failNote = bestFailRate > 0 ? `, failures: ${(bestFailRate * 100).toFixed(0)}%` : "";
|
|
479
|
+
return {
|
|
480
|
+
agent: best.name,
|
|
481
|
+
model: best.model,
|
|
482
|
+
slot,
|
|
483
|
+
reason: `${slot}/${complexity} → ${best.name} (score: ${best.score.toFixed(1)}${memNote}${failNote})`,
|
|
484
|
+
};
|
|
485
|
+
}
|
|
486
|
+
/**
|
|
487
|
+
* Get a description of available agents and their capabilities
|
|
488
|
+
* for inclusion in the orchestrator prompt.
|
|
489
|
+
*/
|
|
490
|
+
export async function describeAvailableAgents() {
|
|
491
|
+
const available = await getAvailableAgents();
|
|
492
|
+
if (available.length === 0)
|
|
493
|
+
return "No agent backends available.";
|
|
494
|
+
const lines = [];
|
|
495
|
+
for (const name of available) {
|
|
496
|
+
const cap = AGENT_CAPABILITIES[name];
|
|
497
|
+
if (!cap) {
|
|
498
|
+
lines.push(`- **${name}**: Available (no capability metadata)`);
|
|
499
|
+
continue;
|
|
500
|
+
}
|
|
501
|
+
const cost = ["$", "$$", "$$$", "$$$$", "$$$$$"][cap.costTier - 1];
|
|
502
|
+
const speed = ["fast", "fast", "medium", "slow", "very slow"][cap.speedTier - 1];
|
|
503
|
+
lines.push(`- **${name}** (${cost}, ${speed}): ${cap.strengths.join(", ")}` +
|
|
504
|
+
`\n Default model: \`${cap.defaultModel}\` | Cheap: \`${cap.cheapModel}\` | Premium: \`${cap.premiumModel}\``);
|
|
505
|
+
}
|
|
506
|
+
// Add slot routing info
|
|
507
|
+
lines.push("");
|
|
508
|
+
lines.push("**Model slots** (auto-routing selects the best agent per task type):");
|
|
509
|
+
lines.push("- `execution` — coding, fixing, building → prefers opencode, codex");
|
|
510
|
+
lines.push("- `search` — finding files, researching docs → prefers direct-llm");
|
|
511
|
+
lines.push("- `reasoning` — analysis, debugging, review → prefers claude-code, direct-llm");
|
|
512
|
+
lines.push("- `planning` — design, architecture, strategy → prefers direct-llm, claude-code");
|
|
513
|
+
return lines.join("\n");
|
|
514
|
+
}
|
|
515
|
+
//# sourceMappingURL=model-router.js.map
|
package/dist/swarm.d.ts
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Swarm mode — orchestrates coding agents in parallel via the RLM loop.
|
|
3
|
+
*
|
|
4
|
+
* Usage: swarm --dir ./my-project "add error handling to all API routes"
|
|
5
|
+
*
|
|
6
|
+
* This module:
|
|
7
|
+
* 1. Parses swarm-specific CLI args
|
|
8
|
+
* 2. Scans the target directory to build a codebase context
|
|
9
|
+
* 3. Sets up ThreadManager + WorktreeManager
|
|
10
|
+
* 4. Runs the RLM loop with the swarm orchestrator prompt
|
|
11
|
+
* 5. Cleans up worktrees on exit
|
|
12
|
+
*/
|
|
13
|
+
import "./env.js";
|
|
14
|
+
export declare function runSwarmMode(rawArgs: string[]): Promise<void>;
|