swarm-code 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +384 -0
- package/bin/swarm.mjs +45 -0
- package/dist/agents/aider.d.ts +12 -0
- package/dist/agents/aider.js +182 -0
- package/dist/agents/claude-code.d.ts +9 -0
- package/dist/agents/claude-code.js +216 -0
- package/dist/agents/codex.d.ts +14 -0
- package/dist/agents/codex.js +193 -0
- package/dist/agents/direct-llm.d.ts +9 -0
- package/dist/agents/direct-llm.js +78 -0
- package/dist/agents/mock.d.ts +9 -0
- package/dist/agents/mock.js +77 -0
- package/dist/agents/opencode.d.ts +23 -0
- package/dist/agents/opencode.js +571 -0
- package/dist/agents/provider.d.ts +11 -0
- package/dist/agents/provider.js +31 -0
- package/dist/cli.d.ts +15 -0
- package/dist/cli.js +285 -0
- package/dist/compression/compressor.d.ts +28 -0
- package/dist/compression/compressor.js +265 -0
- package/dist/config.d.ts +42 -0
- package/dist/config.js +170 -0
- package/dist/core/repl.d.ts +69 -0
- package/dist/core/repl.js +336 -0
- package/dist/core/rlm.d.ts +63 -0
- package/dist/core/rlm.js +409 -0
- package/dist/core/runtime.py +335 -0
- package/dist/core/types.d.ts +131 -0
- package/dist/core/types.js +19 -0
- package/dist/env.d.ts +10 -0
- package/dist/env.js +75 -0
- package/dist/interactive-swarm.d.ts +20 -0
- package/dist/interactive-swarm.js +1041 -0
- package/dist/interactive.d.ts +10 -0
- package/dist/interactive.js +1765 -0
- package/dist/main.d.ts +15 -0
- package/dist/main.js +242 -0
- package/dist/mcp/server.d.ts +15 -0
- package/dist/mcp/server.js +72 -0
- package/dist/mcp/session.d.ts +73 -0
- package/dist/mcp/session.js +184 -0
- package/dist/mcp/tools.d.ts +15 -0
- package/dist/mcp/tools.js +377 -0
- package/dist/memory/episodic.d.ts +132 -0
- package/dist/memory/episodic.js +390 -0
- package/dist/prompts/orchestrator.d.ts +5 -0
- package/dist/prompts/orchestrator.js +191 -0
- package/dist/routing/model-router.d.ts +130 -0
- package/dist/routing/model-router.js +515 -0
- package/dist/swarm.d.ts +14 -0
- package/dist/swarm.js +557 -0
- package/dist/threads/cache.d.ts +58 -0
- package/dist/threads/cache.js +198 -0
- package/dist/threads/manager.d.ts +85 -0
- package/dist/threads/manager.js +659 -0
- package/dist/ui/banner.d.ts +14 -0
- package/dist/ui/banner.js +42 -0
- package/dist/ui/dashboard.d.ts +33 -0
- package/dist/ui/dashboard.js +151 -0
- package/dist/ui/index.d.ts +10 -0
- package/dist/ui/index.js +11 -0
- package/dist/ui/log.d.ts +39 -0
- package/dist/ui/log.js +126 -0
- package/dist/ui/onboarding.d.ts +14 -0
- package/dist/ui/onboarding.js +518 -0
- package/dist/ui/spinner.d.ts +25 -0
- package/dist/ui/spinner.js +113 -0
- package/dist/ui/summary.d.ts +18 -0
- package/dist/ui/summary.js +113 -0
- package/dist/ui/theme.d.ts +63 -0
- package/dist/ui/theme.js +97 -0
- package/dist/viewer.d.ts +12 -0
- package/dist/viewer.js +1284 -0
- package/dist/worktree/manager.d.ts +45 -0
- package/dist/worktree/manager.js +266 -0
- package/dist/worktree/merge.d.ts +28 -0
- package/dist/worktree/merge.js +138 -0
- package/package.json +69 -0
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Episodic memory — persists successful thread strategies to disk.
|
|
3
|
+
*
|
|
4
|
+
* Records episodes after successful thread completions:
|
|
5
|
+
* - Task pattern (normalized keywords)
|
|
6
|
+
* - Agent + model used
|
|
7
|
+
* - Result quality (success, duration, cost, files changed)
|
|
8
|
+
* - Task slot + complexity classification
|
|
9
|
+
*
|
|
10
|
+
* Recall: given a new task, find similar past episodes and return
|
|
11
|
+
* the strategies that worked. Used to inform agent/model selection
|
|
12
|
+
* and provide context hints to the orchestrator.
|
|
13
|
+
*
|
|
14
|
+
* Storage: JSON files in ~/.swarm/memory/episodes/ indexed by
|
|
15
|
+
* task similarity hash (trigram-based).
|
|
16
|
+
*/
|
|
17
|
+
import { createHash } from "node:crypto";
|
|
18
|
+
import * as fs from "node:fs";
|
|
19
|
+
import * as path from "node:path";
|
|
20
|
+
// ── Helpers ────────────────────────────────────────────────────────────────
|
|
21
|
+
/** Stop words to filter from task keywords. */
|
|
22
|
+
const STOP_WORDS = new Set([
|
|
23
|
+
"the",
|
|
24
|
+
"a",
|
|
25
|
+
"an",
|
|
26
|
+
"is",
|
|
27
|
+
"are",
|
|
28
|
+
"was",
|
|
29
|
+
"were",
|
|
30
|
+
"be",
|
|
31
|
+
"been",
|
|
32
|
+
"being",
|
|
33
|
+
"have",
|
|
34
|
+
"has",
|
|
35
|
+
"had",
|
|
36
|
+
"do",
|
|
37
|
+
"does",
|
|
38
|
+
"did",
|
|
39
|
+
"will",
|
|
40
|
+
"would",
|
|
41
|
+
"could",
|
|
42
|
+
"should",
|
|
43
|
+
"may",
|
|
44
|
+
"might",
|
|
45
|
+
"shall",
|
|
46
|
+
"can",
|
|
47
|
+
"to",
|
|
48
|
+
"of",
|
|
49
|
+
"in",
|
|
50
|
+
"for",
|
|
51
|
+
"on",
|
|
52
|
+
"with",
|
|
53
|
+
"at",
|
|
54
|
+
"by",
|
|
55
|
+
"from",
|
|
56
|
+
"as",
|
|
57
|
+
"into",
|
|
58
|
+
"through",
|
|
59
|
+
"during",
|
|
60
|
+
"before",
|
|
61
|
+
"after",
|
|
62
|
+
"above",
|
|
63
|
+
"below",
|
|
64
|
+
"between",
|
|
65
|
+
"and",
|
|
66
|
+
"but",
|
|
67
|
+
"or",
|
|
68
|
+
"not",
|
|
69
|
+
"no",
|
|
70
|
+
"all",
|
|
71
|
+
"each",
|
|
72
|
+
"every",
|
|
73
|
+
"both",
|
|
74
|
+
"few",
|
|
75
|
+
"more",
|
|
76
|
+
"most",
|
|
77
|
+
"other",
|
|
78
|
+
"some",
|
|
79
|
+
"such",
|
|
80
|
+
"than",
|
|
81
|
+
"too",
|
|
82
|
+
"very",
|
|
83
|
+
"just",
|
|
84
|
+
"about",
|
|
85
|
+
"this",
|
|
86
|
+
"that",
|
|
87
|
+
"these",
|
|
88
|
+
"those",
|
|
89
|
+
"it",
|
|
90
|
+
"its",
|
|
91
|
+
"my",
|
|
92
|
+
"your",
|
|
93
|
+
"our",
|
|
94
|
+
]);
|
|
95
|
+
/** Extract normalized keywords from a task string. */
|
|
96
|
+
function extractKeywords(task) {
|
|
97
|
+
return task
|
|
98
|
+
.toLowerCase()
|
|
99
|
+
.replace(/[^a-z0-9\s-_./]/g, " ")
|
|
100
|
+
.split(/\s+/)
|
|
101
|
+
.filter((w) => w.length > 2 && !STOP_WORDS.has(w))
|
|
102
|
+
.sort();
|
|
103
|
+
}
|
|
104
|
+
/** Generate trigrams from a string for fuzzy matching. */
|
|
105
|
+
function trigrams(s) {
|
|
106
|
+
const padded = ` ${s.toLowerCase()} `;
|
|
107
|
+
const result = new Set();
|
|
108
|
+
for (let i = 0; i < padded.length - 2; i++) {
|
|
109
|
+
result.add(padded.slice(i, i + 3));
|
|
110
|
+
}
|
|
111
|
+
return result;
|
|
112
|
+
}
|
|
113
|
+
/** Compute Jaccard similarity between two trigram sets. */
|
|
114
|
+
function trigramSimilarity(a, b) {
|
|
115
|
+
if (a.size === 0 && b.size === 0)
|
|
116
|
+
return 1;
|
|
117
|
+
let intersection = 0;
|
|
118
|
+
for (const t of a) {
|
|
119
|
+
if (b.has(t))
|
|
120
|
+
intersection++;
|
|
121
|
+
}
|
|
122
|
+
const union = a.size + b.size - intersection;
|
|
123
|
+
return union === 0 ? 0 : intersection / union;
|
|
124
|
+
}
|
|
125
|
+
/** Generate a deterministic episode ID. */
|
|
126
|
+
function episodeId(task, timestamp) {
|
|
127
|
+
return createHash("sha256").update(`${task}:${timestamp}`).digest("hex").slice(0, 16);
|
|
128
|
+
}
|
|
129
|
+
// ── Episodic Memory Store ──────────────────────────────────────────────────
|
|
130
|
+
export class EpisodicMemory {
|
|
131
|
+
memoryDir;
|
|
132
|
+
episodes = [];
|
|
133
|
+
loaded = false;
|
|
134
|
+
maxEpisodes;
|
|
135
|
+
constructor(memoryDir, maxEpisodes = 500) {
|
|
136
|
+
this.memoryDir = path.join(memoryDir, "episodes");
|
|
137
|
+
this.maxEpisodes = maxEpisodes;
|
|
138
|
+
}
|
|
139
|
+
/** Initialize — create directory and load existing episodes. */
|
|
140
|
+
async init() {
|
|
141
|
+
fs.mkdirSync(this.memoryDir, { recursive: true });
|
|
142
|
+
await this.loadAll();
|
|
143
|
+
}
|
|
144
|
+
/** Record a new episode from a completed thread. */
|
|
145
|
+
async record(params) {
|
|
146
|
+
// Only record successful episodes — failures don't teach useful strategies
|
|
147
|
+
if (!params.success) {
|
|
148
|
+
return {
|
|
149
|
+
id: "",
|
|
150
|
+
taskKeywords: [],
|
|
151
|
+
filesChangedCount: params.filesChanged.length,
|
|
152
|
+
timestamp: Date.now(),
|
|
153
|
+
...params,
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
const timestamp = Date.now();
|
|
157
|
+
const episode = {
|
|
158
|
+
id: episodeId(params.task, timestamp),
|
|
159
|
+
task: params.task,
|
|
160
|
+
taskKeywords: extractKeywords(params.task),
|
|
161
|
+
agent: params.agent,
|
|
162
|
+
model: params.model,
|
|
163
|
+
slot: params.slot,
|
|
164
|
+
complexity: params.complexity,
|
|
165
|
+
success: params.success,
|
|
166
|
+
durationMs: params.durationMs,
|
|
167
|
+
estimatedCostUsd: params.estimatedCostUsd,
|
|
168
|
+
filesChangedCount: params.filesChanged.length,
|
|
169
|
+
filesChanged: params.filesChanged,
|
|
170
|
+
summary: params.summary.slice(0, 2000), // Cap stored summary
|
|
171
|
+
timestamp,
|
|
172
|
+
};
|
|
173
|
+
this.episodes.push(episode);
|
|
174
|
+
// Evict oldest if over capacity
|
|
175
|
+
if (this.episodes.length > this.maxEpisodes) {
|
|
176
|
+
const removed = this.episodes.shift();
|
|
177
|
+
this.deleteFile(removed.id);
|
|
178
|
+
}
|
|
179
|
+
// Persist to disk
|
|
180
|
+
await this.saveEpisode(episode);
|
|
181
|
+
return episode;
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Recall similar past episodes for a given task.
|
|
185
|
+
* Returns episodes sorted by similarity (highest first).
|
|
186
|
+
*/
|
|
187
|
+
recall(task, maxResults = 5, minSimilarity = 0.15) {
|
|
188
|
+
if (this.episodes.length === 0)
|
|
189
|
+
return [];
|
|
190
|
+
const taskTrigrams = trigrams(task);
|
|
191
|
+
const taskKeywords = new Set(extractKeywords(task));
|
|
192
|
+
const scored = [];
|
|
193
|
+
for (const episode of this.episodes) {
|
|
194
|
+
if (!episode.success)
|
|
195
|
+
continue;
|
|
196
|
+
// Trigram similarity on full task string
|
|
197
|
+
const triSim = trigramSimilarity(taskTrigrams, trigrams(episode.task));
|
|
198
|
+
// Keyword overlap bonus
|
|
199
|
+
const epKeywords = new Set(episode.taskKeywords);
|
|
200
|
+
let keywordOverlap = 0;
|
|
201
|
+
for (const kw of taskKeywords) {
|
|
202
|
+
if (epKeywords.has(kw))
|
|
203
|
+
keywordOverlap++;
|
|
204
|
+
}
|
|
205
|
+
const maxKeywords = Math.max(taskKeywords.size, epKeywords.size);
|
|
206
|
+
const kwSim = maxKeywords > 0 ? keywordOverlap / maxKeywords : 0;
|
|
207
|
+
// Combined similarity (weighted: trigrams 60%, keywords 40%)
|
|
208
|
+
const similarity = triSim * 0.6 + kwSim * 0.4;
|
|
209
|
+
if (similarity >= minSimilarity) {
|
|
210
|
+
scored.push({ episode, similarity });
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
return scored.sort((a, b) => b.similarity - a.similarity).slice(0, maxResults);
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* Get strategy recommendations based on past episodes.
|
|
217
|
+
* Returns a formatted string for inclusion in orchestrator context.
|
|
218
|
+
*/
|
|
219
|
+
getStrategyHints(task) {
|
|
220
|
+
const recalls = this.recall(task, 3, 0.2);
|
|
221
|
+
if (recalls.length === 0)
|
|
222
|
+
return null;
|
|
223
|
+
const lines = ["Past successful strategies for similar tasks:"];
|
|
224
|
+
for (const { episode, similarity } of recalls) {
|
|
225
|
+
const sim = (similarity * 100).toFixed(0);
|
|
226
|
+
const cost = episode.estimatedCostUsd.toFixed(4);
|
|
227
|
+
const duration = (episode.durationMs / 1000).toFixed(1);
|
|
228
|
+
lines.push(` - [${sim}% match] "${episode.task.slice(0, 80)}" → ` +
|
|
229
|
+
`${episode.agent}/${episode.model} (${episode.slot}, ${duration}s, $${cost}, ` +
|
|
230
|
+
`${episode.filesChangedCount} files)`);
|
|
231
|
+
}
|
|
232
|
+
return lines.join("\n");
|
|
233
|
+
}
|
|
234
|
+
/**
|
|
235
|
+
* Get the best agent/model recommendation for a task based on past episodes.
|
|
236
|
+
* Returns null if no relevant episodes found.
|
|
237
|
+
*/
|
|
238
|
+
recommendStrategy(task) {
|
|
239
|
+
const recalls = this.recall(task, 5, 0.25);
|
|
240
|
+
if (recalls.length === 0)
|
|
241
|
+
return null;
|
|
242
|
+
// Score agent+model pairs as composite keys to avoid mismatched combos
|
|
243
|
+
const pairScores = new Map();
|
|
244
|
+
for (const { episode, similarity } of recalls) {
|
|
245
|
+
const quality = 1 / (1 + episode.estimatedCostUsd * 10 + episode.durationMs / 60000);
|
|
246
|
+
const score = similarity * quality;
|
|
247
|
+
const pairKey = `${episode.agent}::${episode.model}`;
|
|
248
|
+
const existing = pairScores.get(pairKey);
|
|
249
|
+
if (existing) {
|
|
250
|
+
existing.score += score;
|
|
251
|
+
}
|
|
252
|
+
else {
|
|
253
|
+
pairScores.set(pairKey, { agent: episode.agent, model: episode.model, score });
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
// Pick highest-scoring agent+model pair
|
|
257
|
+
let bestPair = null;
|
|
258
|
+
for (const pair of pairScores.values()) {
|
|
259
|
+
if (!bestPair || pair.score > bestPair.score) {
|
|
260
|
+
bestPair = pair;
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
if (!bestPair)
|
|
264
|
+
return null;
|
|
265
|
+
return {
|
|
266
|
+
agent: bestPair.agent,
|
|
267
|
+
model: bestPair.model,
|
|
268
|
+
confidence: Math.min(1, recalls[0].similarity),
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
/**
|
|
272
|
+
* Get aggregate statistics across all episodes, grouped by agent.
|
|
273
|
+
*
|
|
274
|
+
* Returns per-agent success rates, average costs/durations, slot distributions,
|
|
275
|
+
* and a file-extension-to-agent mapping. Used by the model router as a fallback
|
|
276
|
+
* when no high-confidence episodic match exists.
|
|
277
|
+
*
|
|
278
|
+
* Returns null if no episodes are loaded (graceful degradation).
|
|
279
|
+
*/
|
|
280
|
+
getAggregateStats() {
|
|
281
|
+
if (this.episodes.length === 0)
|
|
282
|
+
return null;
|
|
283
|
+
const perAgent = new Map();
|
|
284
|
+
const fileExtensions = new Map();
|
|
285
|
+
for (const episode of this.episodes) {
|
|
286
|
+
if (!episode.success)
|
|
287
|
+
continue;
|
|
288
|
+
// Initialize agent stats if needed
|
|
289
|
+
let stats = perAgent.get(episode.agent);
|
|
290
|
+
if (!stats) {
|
|
291
|
+
stats = {
|
|
292
|
+
totalEpisodes: 0,
|
|
293
|
+
avgDurationMs: 0,
|
|
294
|
+
avgCostUsd: 0,
|
|
295
|
+
slotCounts: new Map(),
|
|
296
|
+
fileExtensions: new Set(),
|
|
297
|
+
};
|
|
298
|
+
perAgent.set(episode.agent, stats);
|
|
299
|
+
}
|
|
300
|
+
// Running average: update incrementally
|
|
301
|
+
const n = stats.totalEpisodes;
|
|
302
|
+
stats.avgDurationMs = (stats.avgDurationMs * n + episode.durationMs) / (n + 1);
|
|
303
|
+
stats.avgCostUsd = (stats.avgCostUsd * n + episode.estimatedCostUsd) / (n + 1);
|
|
304
|
+
stats.totalEpisodes++;
|
|
305
|
+
// Slot counts
|
|
306
|
+
if (episode.slot) {
|
|
307
|
+
stats.slotCounts.set(episode.slot, (stats.slotCounts.get(episode.slot) || 0) + 1);
|
|
308
|
+
}
|
|
309
|
+
// Extract file extensions from changed files
|
|
310
|
+
for (const filePath of episode.filesChanged) {
|
|
311
|
+
const dotIdx = filePath.lastIndexOf(".");
|
|
312
|
+
if (dotIdx !== -1 && dotIdx < filePath.length - 1) {
|
|
313
|
+
const ext = filePath.slice(dotIdx).toLowerCase();
|
|
314
|
+
stats.fileExtensions.add(ext);
|
|
315
|
+
// Update global file extension → agent mapping
|
|
316
|
+
let agentMap = fileExtensions.get(ext);
|
|
317
|
+
if (!agentMap) {
|
|
318
|
+
agentMap = new Map();
|
|
319
|
+
fileExtensions.set(ext, agentMap);
|
|
320
|
+
}
|
|
321
|
+
agentMap.set(episode.agent, (agentMap.get(episode.agent) || 0) + 1);
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
return { perAgent, fileExtensions };
|
|
326
|
+
}
|
|
327
|
+
/** Get total episode count. */
|
|
328
|
+
get size() {
|
|
329
|
+
return this.episodes.length;
|
|
330
|
+
}
|
|
331
|
+
/** Get all episodes (for viewer). */
|
|
332
|
+
getAll() {
|
|
333
|
+
return [...this.episodes];
|
|
334
|
+
}
|
|
335
|
+
// ── Persistence ──────────────────────────────────────────────────────────
|
|
336
|
+
async loadAll() {
|
|
337
|
+
if (this.loaded)
|
|
338
|
+
return;
|
|
339
|
+
this.loaded = true;
|
|
340
|
+
if (!fs.existsSync(this.memoryDir))
|
|
341
|
+
return;
|
|
342
|
+
let files;
|
|
343
|
+
try {
|
|
344
|
+
files = fs.readdirSync(this.memoryDir).filter((f) => f.endsWith(".json"));
|
|
345
|
+
}
|
|
346
|
+
catch {
|
|
347
|
+
return;
|
|
348
|
+
}
|
|
349
|
+
for (const file of files) {
|
|
350
|
+
try {
|
|
351
|
+
const raw = fs.readFileSync(path.join(this.memoryDir, file), "utf-8");
|
|
352
|
+
const episode = JSON.parse(raw);
|
|
353
|
+
if (episode.id && episode.task) {
|
|
354
|
+
this.episodes.push(episode);
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
catch {
|
|
358
|
+
// Skip corrupt files
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
// Sort by timestamp (oldest first for consistent eviction)
|
|
362
|
+
this.episodes.sort((a, b) => a.timestamp - b.timestamp);
|
|
363
|
+
// Trim to max
|
|
364
|
+
while (this.episodes.length > this.maxEpisodes) {
|
|
365
|
+
const removed = this.episodes.shift();
|
|
366
|
+
this.deleteFile(removed.id);
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
async saveEpisode(episode) {
|
|
370
|
+
try {
|
|
371
|
+
const filePath = path.join(this.memoryDir, `${episode.id}.json`);
|
|
372
|
+
fs.writeFileSync(filePath, JSON.stringify(episode, null, 2), "utf-8");
|
|
373
|
+
}
|
|
374
|
+
catch {
|
|
375
|
+
// Non-fatal — memory is best-effort
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
deleteFile(id) {
|
|
379
|
+
try {
|
|
380
|
+
const filePath = path.join(this.memoryDir, `${id}.json`);
|
|
381
|
+
if (fs.existsSync(filePath)) {
|
|
382
|
+
fs.unlinkSync(filePath);
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
catch {
|
|
386
|
+
// Non-fatal
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
//# sourceMappingURL=episodic.js.map
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Orchestrator system prompt — teaches the LLM how to use swarm primitives.
|
|
3
|
+
*/
|
|
4
|
+
export function buildSwarmSystemPrompt(config, agentDescriptions) {
|
|
5
|
+
return `You are a Swarm Orchestrator — a Recursive Language Model (RLM) agent enhanced with the ability to spawn coding agent threads in isolated git worktrees.
|
|
6
|
+
|
|
7
|
+
## Available Primitives
|
|
8
|
+
|
|
9
|
+
### 1. \`context\` variable
|
|
10
|
+
The full input text or codebase listing (may be very large). Check \`len(context)\` first.
|
|
11
|
+
|
|
12
|
+
### 2. \`llm_query(sub_context, instruction)\` — Lightweight analysis
|
|
13
|
+
Send text to an LLM for summarization, extraction, classification. No file changes.
|
|
14
|
+
For parallel queries: \`async_llm_query()\` with \`asyncio.gather()\`.
|
|
15
|
+
|
|
16
|
+
### 3. \`thread(task, context="", agent="${config.default_agent}", model="${config.default_model}", files=None)\` — Coding agent thread
|
|
17
|
+
Spawns a coding agent in an isolated git worktree. The agent can read/write files, run commands, etc.
|
|
18
|
+
Returns a compressed result string (status, files changed, diff, output summary).
|
|
19
|
+
|
|
20
|
+
Parameters:
|
|
21
|
+
- \`task\`: What the agent should do (be specific and self-contained)
|
|
22
|
+
- \`context\`: Additional context to include (extracted code, requirements, etc.)
|
|
23
|
+
- \`agent\`: Agent backend — choose based on task (see Available Agents below)
|
|
24
|
+
- \`model\`: Model in provider/model-id format (e.g., "anthropic/claude-sonnet-4-6", "openai/gpt-4o", "google/gemini-2.5-flash")
|
|
25
|
+
- \`files\`: List of relevant file paths (hint for the agent)
|
|
26
|
+
|
|
27
|
+
### 4. \`async_thread(task, context="", agent="${config.default_agent}", model="${config.default_model}", files=None)\` — Parallel threads
|
|
28
|
+
Same as thread() but async. Use with \`asyncio.gather()\` for parallel execution.
|
|
29
|
+
|
|
30
|
+
### 5. \`merge_threads()\` — Merge all completed thread branches
|
|
31
|
+
Merges thread branches back into the main branch sequentially. Returns merge status.
|
|
32
|
+
|
|
33
|
+
### 6. \`FINAL(answer)\` / \`FINAL_VAR(variable)\` — Return answer
|
|
34
|
+
Call when you have a complete answer or summary of work done.
|
|
35
|
+
|
|
36
|
+
${agentDescriptions
|
|
37
|
+
? `## Available Agents
|
|
38
|
+
|
|
39
|
+
${agentDescriptions}
|
|
40
|
+
|
|
41
|
+
**Agent selection by task slot:**
|
|
42
|
+
- **Execution** (coding, fixing, building): Use \`opencode\` or \`codex\` — fast, tool-capable
|
|
43
|
+
- **Search** (finding files, researching docs): Use \`direct-llm\` — lightweight, no agent overhead
|
|
44
|
+
- **Reasoning** (analysis, debugging, review): Use \`claude-code\` or \`direct-llm\` — deep analysis
|
|
45
|
+
- **Planning** (design, architecture, strategy): Use \`direct-llm\` or \`claude-code\` — structured thinking
|
|
46
|
+
|
|
47
|
+
**Model tier by complexity:**
|
|
48
|
+
- **Simple** (rename, lint, format): Use cheap models (haiku, gpt-4o-mini)
|
|
49
|
+
- **Standard** (bug fixes, features, tests): Use default models (sonnet, o3-mini)
|
|
50
|
+
- **Complex** (refactoring, migrations): Use premium models (opus, o3)
|
|
51
|
+
- **OpenAI-specific**: Use \`codex\` for best o3/gpt-4o compatibility
|
|
52
|
+
- When in doubt, use \`${config.default_agent}\` with the default model
|
|
53
|
+
|
|
54
|
+
`
|
|
55
|
+
: ""}## Strategy
|
|
56
|
+
|
|
57
|
+
1. **Analyze first**: Use \`llm_query()\` or direct Python to understand the codebase/task
|
|
58
|
+
2. **Decompose**: Break the task into independent, parallelizable units
|
|
59
|
+
3. **Extract context**: For each thread, extract ONLY the relevant code/context — don't send everything
|
|
60
|
+
4. **Spawn threads**: Use \`async_thread()\` + \`asyncio.gather()\` for parallel work
|
|
61
|
+
5. **Inspect results**: Check each thread's result for success/failure
|
|
62
|
+
6. **Merge**: Call \`merge_threads()\` to integrate changes
|
|
63
|
+
7. **Verify**: Optionally spawn a test thread to verify the merged result
|
|
64
|
+
8. **Report**: Call \`FINAL()\` with a summary
|
|
65
|
+
|
|
66
|
+
## Episode Quality & Caching
|
|
67
|
+
|
|
68
|
+
- **Thread results are episodes**: Each thread returns a compressed summary of only the successful operations and conclusions — failed attempts, stack traces, and retries are filtered out automatically.
|
|
69
|
+
- **Subthread caching**: Identical threads (same task + files + agent + model) are cached. If you spawn the same thread twice, the second call returns instantly from cache. Design your tasks to be deterministic and reusable where possible.
|
|
70
|
+
- **Cost optimization**: Prefer spawning many small, focused threads over few large ones. Small threads cache better and fail more gracefully.
|
|
71
|
+
|
|
72
|
+
## Rules
|
|
73
|
+
|
|
74
|
+
1. Write valid Python 3 code in \`\`\`python blocks
|
|
75
|
+
2. Be specific in thread tasks — each thread should be self-contained
|
|
76
|
+
3. Pass relevant context to threads — they run in clean worktrees and don't see other threads' changes
|
|
77
|
+
4. Use \`print()\` for intermediate output visible in the next iteration
|
|
78
|
+
5. Max ${config.max_threads} concurrent threads, ${config.max_total_threads} total per session
|
|
79
|
+
6. Thread timeout: ${config.thread_timeout_ms / 1000}s per thread
|
|
80
|
+
7. Don't call FINAL prematurely — verify thread results first
|
|
81
|
+
8. The REPL persists state — variables survive across iterations
|
|
82
|
+
|
|
83
|
+
## Examples
|
|
84
|
+
|
|
85
|
+
**Single thread:**
|
|
86
|
+
\`\`\`python
|
|
87
|
+
result = thread("Fix the authentication bug in src/auth.ts — the JWT token validation is not checking expiry",
|
|
88
|
+
context="The auth module uses jsonwebtoken library...",
|
|
89
|
+
files=["src/auth.ts", "src/middleware/auth.ts"])
|
|
90
|
+
print(result)
|
|
91
|
+
\`\`\`
|
|
92
|
+
|
|
93
|
+
**Parallel threads:**
|
|
94
|
+
\`\`\`python
|
|
95
|
+
import asyncio
|
|
96
|
+
|
|
97
|
+
results = await asyncio.gather(
|
|
98
|
+
async_thread("Add input validation to the POST /users endpoint", files=["src/routes/users.ts"]),
|
|
99
|
+
async_thread("Add input validation to the POST /orders endpoint", files=["src/routes/orders.ts"]),
|
|
100
|
+
async_thread("Add input validation to the POST /products endpoint", files=["src/routes/products.ts"]),
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
for i, r in enumerate(results):
|
|
104
|
+
print(f"Thread {i+1}: {r[:200]}")
|
|
105
|
+
\`\`\`
|
|
106
|
+
|
|
107
|
+
**Analyze then act:**
|
|
108
|
+
\`\`\`python
|
|
109
|
+
# First, understand the codebase
|
|
110
|
+
analysis = llm_query(context[:5000], "List all API route files and their endpoints")
|
|
111
|
+
print(analysis)
|
|
112
|
+
\`\`\`
|
|
113
|
+
|
|
114
|
+
Then in the next iteration:
|
|
115
|
+
\`\`\`python
|
|
116
|
+
# Now spawn threads based on analysis
|
|
117
|
+
import asyncio
|
|
118
|
+
tasks = []
|
|
119
|
+
for route_file in route_files:
|
|
120
|
+
tasks.append(async_thread(f"Add error handling to {route_file}", files=[route_file]))
|
|
121
|
+
results = await asyncio.gather(*tasks)
|
|
122
|
+
\`\`\`
|
|
123
|
+
|
|
124
|
+
**Merge and verify:**
|
|
125
|
+
\`\`\`python
|
|
126
|
+
merge_result = merge_threads()
|
|
127
|
+
print(merge_result)
|
|
128
|
+
|
|
129
|
+
# Verify
|
|
130
|
+
test_result = thread("Run the test suite and fix any failures", files=["package.json"])
|
|
131
|
+
print(test_result)
|
|
132
|
+
|
|
133
|
+
FINAL(f"Completed: added error handling to {len(route_files)} route files. All threads merged successfully.")
|
|
134
|
+
\`\`\`
|
|
135
|
+
|
|
136
|
+
**Thread DAG composition (T1+T2 → T3):**
|
|
137
|
+
\`\`\`python
|
|
138
|
+
import asyncio
|
|
139
|
+
|
|
140
|
+
# Stage 1: Research in parallel (variables persist across iterations)
|
|
141
|
+
analysis, test_gaps = await asyncio.gather(
|
|
142
|
+
async_thread("Analyze the auth module and list all endpoints without rate limiting",
|
|
143
|
+
files=["src/auth/"], agent="direct-llm"),
|
|
144
|
+
async_thread("Run the test suite and identify files with <50% coverage",
|
|
145
|
+
files=["package.json"]),
|
|
146
|
+
)
|
|
147
|
+
print("Analysis:", analysis[:300])
|
|
148
|
+
print("Test gaps:", test_gaps[:300])
|
|
149
|
+
\`\`\`
|
|
150
|
+
|
|
151
|
+
Then compose results into downstream threads:
|
|
152
|
+
\`\`\`python
|
|
153
|
+
import asyncio
|
|
154
|
+
|
|
155
|
+
# Stage 2: Act on Stage 1 results (compose thread outputs as input context)
|
|
156
|
+
impl_results = await asyncio.gather(
|
|
157
|
+
async_thread("Add rate limiting to these endpoints", context=analysis,
|
|
158
|
+
files=["src/auth/middleware.ts"]),
|
|
159
|
+
async_thread("Add tests for the files with low coverage", context=test_gaps,
|
|
160
|
+
files=["src/__tests__/"]),
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# Stage 3: Merge and validate
|
|
164
|
+
merge_threads()
|
|
165
|
+
final_check = thread("Run full test suite, verify rate limiting works, fix failures",
|
|
166
|
+
context=f"Rate limiting: {impl_results[0][:200]}\\nNew tests: {impl_results[1][:200]}")
|
|
167
|
+
print(final_check)
|
|
168
|
+
\`\`\`
|
|
169
|
+
|
|
170
|
+
**Multi-stage pipeline with re-routing on failure:**
|
|
171
|
+
\`\`\`python
|
|
172
|
+
# Attempt with cheap model first
|
|
173
|
+
result = thread("Fix the flaky test in test_auth.py", agent="aider",
|
|
174
|
+
model="anthropic/claude-haiku-4-5", files=["tests/test_auth.py"])
|
|
175
|
+
if "FAILED" in result:
|
|
176
|
+
# Escalate to premium model with the failure context
|
|
177
|
+
result = thread("Fix this test — previous attempt failed", context=result,
|
|
178
|
+
agent="claude-code", model="claude-opus-4-6",
|
|
179
|
+
files=["tests/test_auth.py", "src/auth.py"])
|
|
180
|
+
print(result)
|
|
181
|
+
\`\`\`
|
|
182
|
+
|
|
183
|
+
## Output format
|
|
184
|
+
|
|
185
|
+
Respond with ONLY a Python code block. No explanation before or after.
|
|
186
|
+
|
|
187
|
+
\`\`\`python
|
|
188
|
+
# Your code here
|
|
189
|
+
\`\`\``;
|
|
190
|
+
}
|
|
191
|
+
//# sourceMappingURL=orchestrator.js.map
|