claude-octopus 1.0.5 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +29 -0
- package/.github/workflows/sdk-watch.yml +86 -0
- package/README.md +325 -92
- package/dist/cli-prompts.d.ts +22 -0
- package/dist/cli-prompts.js +121 -0
- package/dist/cli.d.ts +8 -0
- package/dist/cli.js +70 -0
- package/dist/config.d.ts +3 -1
- package/dist/config.js +19 -0
- package/dist/constants.js +7 -0
- package/dist/dashboard-template.d.ts +10 -0
- package/dist/dashboard-template.js +184 -0
- package/dist/dashboard.d.ts +10 -0
- package/dist/dashboard.js +201 -0
- package/dist/index.d.ts +3 -5
- package/dist/index.js +63 -39
- package/dist/init.d.ts +9 -0
- package/dist/init.js +145 -0
- package/dist/lib.d.ts +4 -19
- package/dist/lib.js +32 -18
- package/dist/query-helpers.d.ts +30 -0
- package/dist/query-helpers.js +142 -0
- package/dist/report-renderers.d.ts +16 -0
- package/dist/report-renderers.js +154 -0
- package/dist/report-styles.d.ts +4 -0
- package/dist/report-styles.js +80 -0
- package/dist/report.d.ts +16 -0
- package/dist/report.js +82 -0
- package/dist/templates.d.ts +31 -0
- package/dist/templates.js +179 -0
- package/dist/timeline.d.ts +47 -0
- package/dist/timeline.js +107 -0
- package/dist/tools/factory.js +3 -2
- package/dist/tools/query.d.ts +2 -2
- package/dist/tools/query.js +48 -116
- package/dist/tools/report.d.ts +3 -0
- package/dist/tools/report.js +39 -0
- package/dist/tools/timeline.d.ts +3 -0
- package/dist/tools/timeline.js +122 -0
- package/dist/types.d.ts +8 -0
- package/package.json +11 -11
- package/src/cli-prompts.ts +160 -0
- package/src/cli.ts +70 -0
- package/src/config.ts +21 -1
- package/src/constants.ts +7 -0
- package/src/dashboard-template.ts +207 -0
- package/src/dashboard.test.ts +79 -0
- package/src/dashboard.ts +226 -0
- package/src/index.ts +80 -42
- package/src/init.test.ts +123 -0
- package/src/init.ts +186 -0
- package/src/lib.test.ts +22 -12
- package/src/lib.ts +34 -38
- package/src/query-helpers.ts +209 -0
- package/src/report-renderers.ts +185 -0
- package/src/report-styles.ts +81 -0
- package/src/report.test.ts +145 -0
- package/src/report.ts +118 -0
- package/src/templates.test.ts +149 -0
- package/src/templates.ts +203 -0
- package/src/timeline.test.ts +152 -0
- package/src/timeline.ts +152 -0
- package/src/tools/factory.ts +3 -2
- package/src/tools/query.ts +53 -143
- package/src/tools/report.ts +46 -0
- package/src/tools/timeline.ts +137 -0
- package/src/types.ts +10 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
permissions:
|
|
10
|
+
contents: read
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
build-and-test:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- uses: pnpm/action-setup@v4
|
|
19
|
+
with:
|
|
20
|
+
version: 9
|
|
21
|
+
|
|
22
|
+
- uses: actions/setup-node@v4
|
|
23
|
+
with:
|
|
24
|
+
node-version: "22"
|
|
25
|
+
cache: pnpm
|
|
26
|
+
|
|
27
|
+
- run: pnpm install --frozen-lockfile
|
|
28
|
+
- run: pnpm build
|
|
29
|
+
- run: pnpm test
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
name: SDK watch — auto-bump Agent SDK
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
schedule:
|
|
5
|
+
# Every 6 hours
|
|
6
|
+
- cron: "0 */6 * * *"
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
|
|
9
|
+
permissions:
|
|
10
|
+
contents: write
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
check-and-bump:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- uses: pnpm/action-setup@v4
|
|
19
|
+
with:
|
|
20
|
+
version: 9
|
|
21
|
+
|
|
22
|
+
- uses: actions/setup-node@v4
|
|
23
|
+
with:
|
|
24
|
+
node-version: "22"
|
|
25
|
+
registry-url: "https://registry.npmjs.org"
|
|
26
|
+
cache: pnpm
|
|
27
|
+
|
|
28
|
+
- run: pnpm install --frozen-lockfile
|
|
29
|
+
|
|
30
|
+
# ── Detect new SDK version ─────────────────────────────────
|
|
31
|
+
- name: Check for SDK update
|
|
32
|
+
id: check
|
|
33
|
+
run: |
|
|
34
|
+
CURRENT=$(node -e "console.log(require('./node_modules/@anthropic-ai/claude-agent-sdk/package.json').version)")
|
|
35
|
+
LATEST=$(npm view @anthropic-ai/claude-agent-sdk version)
|
|
36
|
+
echo "current=$CURRENT" >> "$GITHUB_OUTPUT"
|
|
37
|
+
echo "latest=$LATEST" >> "$GITHUB_OUTPUT"
|
|
38
|
+
if [ "$CURRENT" = "$LATEST" ]; then
|
|
39
|
+
echo "skip=true" >> "$GITHUB_OUTPUT"
|
|
40
|
+
else
|
|
41
|
+
echo "skip=false" >> "$GITHUB_OUTPUT"
|
|
42
|
+
fi
|
|
43
|
+
|
|
44
|
+
# ── Bump SDK ───────────────────────────────────────────────
|
|
45
|
+
- name: Update SDK dependency
|
|
46
|
+
if: steps.check.outputs.skip == 'false'
|
|
47
|
+
run: pnpm add @anthropic-ai/claude-agent-sdk@latest
|
|
48
|
+
|
|
49
|
+
# ── Build + test ───────────────────────────────────────────
|
|
50
|
+
- name: Build
|
|
51
|
+
if: steps.check.outputs.skip == 'false'
|
|
52
|
+
run: pnpm build
|
|
53
|
+
|
|
54
|
+
- name: Test
|
|
55
|
+
if: steps.check.outputs.skip == 'false'
|
|
56
|
+
run: pnpm test
|
|
57
|
+
|
|
58
|
+
# ── Bump octopus patch version ─────────────────────────────
|
|
59
|
+
- name: Bump patch version
|
|
60
|
+
if: steps.check.outputs.skip == 'false'
|
|
61
|
+
id: version
|
|
62
|
+
run: |
|
|
63
|
+
NEW_VERSION=$(node -e "
|
|
64
|
+
const pkg = require('./package.json');
|
|
65
|
+
const [maj, min, pat] = pkg.version.split('.').map(Number);
|
|
66
|
+
console.log(maj + '.' + min + '.' + (pat + 1));
|
|
67
|
+
")
|
|
68
|
+
pnpm pkg set version="$NEW_VERSION"
|
|
69
|
+
echo "version=$NEW_VERSION" >> "$GITHUB_OUTPUT"
|
|
70
|
+
|
|
71
|
+
# ── Commit + push ──────────────────────────────────────────
|
|
72
|
+
- name: Commit and push
|
|
73
|
+
if: steps.check.outputs.skip == 'false'
|
|
74
|
+
run: |
|
|
75
|
+
git config user.name "github-actions[bot]"
|
|
76
|
+
git config user.email "github-actions[bot]@users.noreply.github.com"
|
|
77
|
+
git add package.json pnpm-lock.yaml
|
|
78
|
+
git commit -m "chore: bump agent-sdk ${{ steps.check.outputs.current }} -> ${{ steps.check.outputs.latest }}, release v${{ steps.version.outputs.version }}"
|
|
79
|
+
git push
|
|
80
|
+
|
|
81
|
+
# ── Publish to npm ─────────────────────────────────────────
|
|
82
|
+
- name: Publish
|
|
83
|
+
if: steps.check.outputs.skip == 'false'
|
|
84
|
+
run: pnpm publish --no-git-checks --access public
|
|
85
|
+
env:
|
|
86
|
+
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
package/README.md
CHANGED
|
@@ -30,14 +30,22 @@ Or skip the install entirely — use `npx` directly in your `.mcp.json` (see Qui
|
|
|
30
30
|
|
|
31
31
|
## Quick Start
|
|
32
32
|
|
|
33
|
-
|
|
33
|
+
The fastest way to get started:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
npx claude-octopus init
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
This interactive wizard lets you pick a template, detects your MCP client, and writes the config for you.
|
|
40
|
+
|
|
41
|
+
Or add to your `.mcp.json` manually:
|
|
34
42
|
|
|
35
43
|
```json
|
|
36
44
|
{
|
|
37
45
|
"mcpServers": {
|
|
38
46
|
"claude": {
|
|
39
47
|
"command": "npx",
|
|
40
|
-
"args": ["claude-octopus"],
|
|
48
|
+
"args": ["claude-octopus@latest"],
|
|
41
49
|
"env": {
|
|
42
50
|
"CLAUDE_PERMISSION_MODE": "bypassPermissions"
|
|
43
51
|
}
|
|
@@ -46,7 +54,17 @@ Add to your `.mcp.json`:
|
|
|
46
54
|
}
|
|
47
55
|
```
|
|
48
56
|
|
|
49
|
-
This gives you
|
|
57
|
+
This gives you five tools:
|
|
58
|
+
|
|
59
|
+
| Tool | Purpose |
|
|
60
|
+
|------|---------|
|
|
61
|
+
| `claude_code` | Send a task, get a result |
|
|
62
|
+
| `claude_code_reply` | Continue a conversation |
|
|
63
|
+
| `claude_code_timeline` | Query the workflow timeline |
|
|
64
|
+
| `claude_code_transcript` | Read full session transcripts |
|
|
65
|
+
| `claude_code_report` | Generate HTML reports |
|
|
66
|
+
|
|
67
|
+
That's it — you have Claude Code as a tool, with full workflow observability built in.
|
|
50
68
|
|
|
51
69
|
## Multiple Agents
|
|
52
70
|
|
|
@@ -57,7 +75,7 @@ The real power is running several instances with different configurations:
|
|
|
57
75
|
"mcpServers": {
|
|
58
76
|
"code-reviewer": {
|
|
59
77
|
"command": "npx",
|
|
60
|
-
"args": ["claude-octopus"],
|
|
78
|
+
"args": ["claude-octopus@latest"],
|
|
61
79
|
"env": {
|
|
62
80
|
"CLAUDE_TOOL_NAME": "code_reviewer",
|
|
63
81
|
"CLAUDE_SERVER_NAME": "code-reviewer",
|
|
@@ -70,7 +88,7 @@ The real power is running several instances with different configurations:
|
|
|
70
88
|
},
|
|
71
89
|
"test-writer": {
|
|
72
90
|
"command": "npx",
|
|
73
|
-
"args": ["claude-octopus"],
|
|
91
|
+
"args": ["claude-octopus@latest"],
|
|
74
92
|
"env": {
|
|
75
93
|
"CLAUDE_TOOL_NAME": "test_writer",
|
|
76
94
|
"CLAUDE_SERVER_NAME": "test-writer",
|
|
@@ -81,7 +99,7 @@ The real power is running several instances with different configurations:
|
|
|
81
99
|
},
|
|
82
100
|
"quick-qa": {
|
|
83
101
|
"command": "npx",
|
|
84
|
-
"args": ["claude-octopus"],
|
|
102
|
+
"args": ["claude-octopus@latest"],
|
|
85
103
|
"env": {
|
|
86
104
|
"CLAUDE_TOOL_NAME": "quick_qa",
|
|
87
105
|
"CLAUDE_SERVER_NAME": "quick-qa",
|
|
@@ -95,7 +113,32 @@ The real power is running several instances with different configurations:
|
|
|
95
113
|
}
|
|
96
114
|
```
|
|
97
115
|
|
|
98
|
-
Your MCP client now sees
|
|
116
|
+
Your MCP client now sees distinct tools for each agent — `code_reviewer`, `test_writer`, `quick_qa` — each purpose-built.
|
|
117
|
+
|
|
118
|
+
## Multi-Agent Orchestration
|
|
119
|
+
|
|
120
|
+
Agents can coordinate through a **coordinator pattern**: one agent has the others as inner MCP tools via `CLAUDE_MCP_SERVERS`, and its system prompt drives the pipeline.
|
|
121
|
+
|
|
122
|
+
```json
|
|
123
|
+
{
|
|
124
|
+
"mcpServers": {
|
|
125
|
+
"publishing-house": {
|
|
126
|
+
"command": "npx",
|
|
127
|
+
"args": ["claude-octopus@latest"],
|
|
128
|
+
"env": {
|
|
129
|
+
"CLAUDE_TOOL_NAME": "publishing_house",
|
|
130
|
+
"CLAUDE_SERVER_NAME": "publishing-house",
|
|
131
|
+
"CLAUDE_MODEL": "opus",
|
|
132
|
+
"CLAUDE_PERMISSION_MODE": "bypassPermissions",
|
|
133
|
+
"CLAUDE_APPEND_PROMPT": "You are a publishing house coordinator. Dispatch tasks to your specialist agents and drive the pipeline to completion.",
|
|
134
|
+
"CLAUDE_MCP_SERVERS": "{\"researcher\":{\"command\":\"npx\",\"args\":[\"claude-octopus@latest\"],\"env\":{\"CLAUDE_TOOL_NAME\":\"researcher\",\"CLAUDE_SERVER_NAME\":\"researcher\",\"CLAUDE_MODEL\":\"sonnet\",\"CLAUDE_PERMISSION_MODE\":\"bypassPermissions\"}},\"architect\":{\"command\":\"npx\",\"args\":[\"claude-octopus@latest\"],\"env\":{\"CLAUDE_TOOL_NAME\":\"architect\",\"CLAUDE_SERVER_NAME\":\"architect\",\"CLAUDE_MODEL\":\"opus\",\"CLAUDE_PERMISSION_MODE\":\"bypassPermissions\"}}}"
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
The coordinator agent autonomously calls `researcher`, `architect`, etc. as MCP tools — fully autonomous, no human in the loop until it finishes. Every invocation is tracked in the shared timeline.
|
|
99
142
|
|
|
100
143
|
## Agent Factory
|
|
101
144
|
|
|
@@ -106,7 +149,7 @@ Don't want to write configs by hand? Add a factory instance:
|
|
|
106
149
|
"mcpServers": {
|
|
107
150
|
"agent-factory": {
|
|
108
151
|
"command": "npx",
|
|
109
|
-
"args": ["claude-octopus"],
|
|
152
|
+
"args": ["claude-octopus@latest"],
|
|
110
153
|
"env": {
|
|
111
154
|
"CLAUDE_FACTORY_ONLY": "true",
|
|
112
155
|
"CLAUDE_SERVER_NAME": "agent-factory"
|
|
@@ -120,31 +163,209 @@ This exposes a single `create_claude_code_mcp` tool — an interactive wizard. T
|
|
|
120
163
|
|
|
121
164
|
In factory-only mode, no query tools are registered — just the wizard. This keeps routing clean: the factory creates agents, the agents do work.
|
|
122
165
|
|
|
166
|
+
## Init Wizard
|
|
167
|
+
|
|
168
|
+
Don't want to edit JSON by hand? The init wizard gets you from zero to working in 30 seconds:
|
|
169
|
+
|
|
170
|
+
```bash
|
|
171
|
+
npx claude-octopus init
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
```
|
|
175
|
+
Claude Octopus — init wizard
|
|
176
|
+
|
|
177
|
+
One brain, many arms. Let's set up your agents.
|
|
178
|
+
|
|
179
|
+
Pick a template (or build your own):
|
|
180
|
+
|
|
181
|
+
1. Code Review Team — Reviewer + test writer + security auditor
|
|
182
|
+
2. Publishing House — Researcher + architect + editor + proofreader
|
|
183
|
+
3. Tiered Models — Haiku for quick Q&A, Sonnet for coding, Opus for hard problems
|
|
184
|
+
4. Solo Agent — Single Claude Code agent with sensible defaults
|
|
185
|
+
5. Agent Factory — Interactive wizard that generates agent configs on demand
|
|
186
|
+
6. Custom — describe your own agent(s)
|
|
187
|
+
|
|
188
|
+
Choice [1-6]:
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
It auto-detects installed MCP clients (Claude Desktop, Claude Code, Cursor, Windsurf), merges with existing config, and warns before overwriting.
|
|
192
|
+
|
|
193
|
+
### Skip the menu
|
|
194
|
+
|
|
195
|
+
```bash
|
|
196
|
+
npx claude-octopus init --template code-review-team
|
|
197
|
+
npx claude-octopus init --template tiered-models
|
|
198
|
+
npx claude-octopus init --template publishing-house
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
## Templates
|
|
202
|
+
|
|
203
|
+
Five built-in templates, battle-tested and ready to use:
|
|
204
|
+
|
|
205
|
+
| Template | Agents | Purpose |
|
|
206
|
+
|----------|--------|---------|
|
|
207
|
+
| `code-review-team` | code-reviewer (opus), test-writer (sonnet), security-auditor (opus) | Thorough code review pipeline |
|
|
208
|
+
| `publishing-house` | researcher (sonnet), architect (opus), editor (sonnet), proofreader (haiku) | Multi-stage content/code pipeline |
|
|
209
|
+
| `tiered-models` | quick-qa (haiku), coder (sonnet), deep-thinker (opus) | Right model for the job |
|
|
210
|
+
| `solo-agent` | claude (default) | Single agent, quick setup |
|
|
211
|
+
| `factory` | agent-factory | Generates configs on demand |
|
|
212
|
+
|
|
213
|
+
Each agent comes pre-tuned with appropriate model, tools, effort level, and system prompt.
|
|
214
|
+
|
|
215
|
+
## Dashboard
|
|
216
|
+
|
|
217
|
+
Monitor your agents in real time:
|
|
218
|
+
|
|
219
|
+
```bash
|
|
220
|
+
npx claude-octopus dashboard
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
Opens a local web dashboard at `http://localhost:3456` with:
|
|
224
|
+
|
|
225
|
+
- **Live stats** — total runs, invocations, cost, turns, errors
|
|
226
|
+
- **Recent activity** — agent cards for the latest run
|
|
227
|
+
- **Run table** — all runs with cost, duration, and status
|
|
228
|
+
- **Auto-refresh** — SSE connection pushes updates as agents run
|
|
229
|
+
|
|
230
|
+
```bash
|
|
231
|
+
# Custom port
|
|
232
|
+
npx claude-octopus dashboard --port 8080
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
The dashboard reads the same timeline index used by the `_timeline` and `_report` tools. No additional configuration needed.
|
|
236
|
+
|
|
123
237
|
## Tools
|
|
124
238
|
|
|
125
239
|
Each non-factory instance exposes:
|
|
126
240
|
|
|
127
|
-
| Tool
|
|
128
|
-
|
|
129
|
-
| `<name>`
|
|
130
|
-
| `<name>_reply` | Continue a previous conversation by `session_id`
|
|
241
|
+
| Tool | Purpose |
|
|
242
|
+
|------|---------|
|
|
243
|
+
| `<name>` | Send a task to the agent, get a response + `session_id` + `run_id` |
|
|
244
|
+
| `<name>_reply` | Continue a previous conversation by `session_id` |
|
|
245
|
+
| `<name>_timeline` | Query the cross-agent workflow timeline |
|
|
246
|
+
| `<name>_transcript` | Retrieve full session transcript from Claude Code's storage |
|
|
247
|
+
| `<name>_report` | Generate a self-contained HTML report for a run or all runs |
|
|
248
|
+
|
|
249
|
+
### Query and reply parameters
|
|
250
|
+
|
|
251
|
+
| Parameter | Description |
|
|
252
|
+
|-----------|-------------|
|
|
253
|
+
| `prompt` | The task or question (required) |
|
|
254
|
+
| `run_id` | Workflow run ID — groups related agent calls into one timeline. Auto-generated if omitted; returned in every response for propagation. |
|
|
255
|
+
| `cwd` | Working directory override |
|
|
256
|
+
| `model` | Model override (`sonnet`, `opus`, `haiku`, or full ID) |
|
|
257
|
+
| `tools` | Restrict available tools (intersects with server restriction) |
|
|
258
|
+
| `disallowedTools` | Block additional tools (unions with server blacklist) |
|
|
259
|
+
| `additionalDirs` | Extra directories the agent can access |
|
|
260
|
+
| `plugins` | Additional plugin paths to load |
|
|
261
|
+
| `effort` | Thinking effort (`low`, `medium`, `high`, `max`) |
|
|
262
|
+
| `permissionMode` | Permission mode (can only tighten, never loosen) |
|
|
263
|
+
| `maxTurns` | Max conversation turns |
|
|
264
|
+
| `maxBudgetUsd` | Max spend in USD |
|
|
265
|
+
| `systemPrompt` | Additional prompt (appended to server default) |
|
|
266
|
+
|
|
267
|
+
## Timeline
|
|
268
|
+
|
|
269
|
+
Every agent invocation is recorded in a lightweight JSONL index at `~/.claude-octopus/timelines/timeline.jsonl`. This solves the multi-agent correlation problem: when several agents participate in a workflow, the timeline tracks which sessions belong to the same run, in what order they executed, and what role each played.
|
|
270
|
+
|
|
271
|
+
Full session transcripts stay in Claude Code's own storage (`~/.claude/projects/`). The timeline is just the table of contents — ~200 bytes per entry — that cross-references via `session_id`.
|
|
272
|
+
|
|
273
|
+
```mermaid
|
|
274
|
+
graph TB
|
|
275
|
+
subgraph "Timeline Index (~200 bytes/entry)"
|
|
276
|
+
TL["~/.claude-octopus/timelines/timeline.jsonl"]
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
subgraph "Claude Code Session Storage (full transcripts)"
|
|
280
|
+
S1["~/.claude/projects/.../ses-aaa.jsonl"]
|
|
281
|
+
S2["~/.claude/projects/.../ses-bbb.jsonl"]
|
|
282
|
+
S3["~/.claude/projects/.../ses-ccc.jsonl"]
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
TL -->|"session_id cross-ref"| S1
|
|
286
|
+
TL -->|"session_id cross-ref"| S2
|
|
287
|
+
TL -->|"session_id cross-ref"| S3
|
|
288
|
+
```
|
|
131
289
|
|
|
132
|
-
|
|
290
|
+
### How it works
|
|
133
291
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
292
|
+
1. Every `<name>` and `<name>_reply` call appends one line to the timeline
|
|
293
|
+
2. If you pass `run_id`, all agents sharing the same `run_id` are grouped into one run
|
|
294
|
+
3. If you omit `run_id`, one is auto-generated and returned in the response — pass it to subsequent agents to keep them grouped
|
|
295
|
+
|
|
296
|
+
### Querying the timeline
|
|
297
|
+
|
|
298
|
+
```
|
|
299
|
+
# List all runs
|
|
300
|
+
<name>_timeline({})
|
|
301
|
+
|
|
302
|
+
# Show one run's agent sequence
|
|
303
|
+
<name>_timeline({ run_id: "abc-123" })
|
|
304
|
+
|
|
305
|
+
# Look up a specific session
|
|
306
|
+
<name>_timeline({ session_id: "ses-xyz" })
|
|
307
|
+
|
|
308
|
+
# Retrieve full transcript (separate tool)
|
|
309
|
+
<name>_transcript({ session_id: "ses-xyz" })
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
### Multi-agent workflow example
|
|
313
|
+
|
|
314
|
+
```
|
|
315
|
+
Host: researcher({ prompt: "Research X", run_id: "pub-001" })
|
|
316
|
+
→ { run_id: "pub-001", session_id: "ses-aaa", result: "..." }
|
|
317
|
+
|
|
318
|
+
Host: architect({ prompt: "Structure based on...", run_id: "pub-001" })
|
|
319
|
+
→ { run_id: "pub-001", session_id: "ses-bbb", result: "..." }
|
|
320
|
+
|
|
321
|
+
Host: verifier({ prompt: "Check this plan", run_id: "pub-001" })
|
|
322
|
+
→ { run_id: "pub-001", session_id: "ses-ccc", result: "..." }
|
|
323
|
+
|
|
324
|
+
Later: researcher_timeline({ run_id: "pub-001" })
|
|
325
|
+
→ [
|
|
326
|
+
{ agent: "researcher", session_id: "ses-aaa", cost: 0.05, turns: 4 },
|
|
327
|
+
{ agent: "architect", session_id: "ses-bbb", cost: 0.08, turns: 6 },
|
|
328
|
+
{ agent: "verifier", session_id: "ses-ccc", cost: 0.03, turns: 3 },
|
|
329
|
+
]
|
|
330
|
+
|
|
331
|
+
Later: researcher_transcript({ session_id: "ses-aaa" })
|
|
332
|
+
→ full conversation transcript from Claude Code's storage
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
## HTML Reports
|
|
336
|
+
|
|
337
|
+
Generate self-contained HTML reports with agent sequence visualization, cost breakdown, and collapsible transcripts. Dark theme, no external dependencies — one file, open in any browser.
|
|
338
|
+
|
|
339
|
+
### Via MCP tool
|
|
340
|
+
|
|
341
|
+
```
|
|
342
|
+
<name>_report({}) # index of all runs
|
|
343
|
+
<name>_report({ run_id: "pub-001" }) # detailed report for one run
|
|
344
|
+
```
|
|
345
|
+
|
|
346
|
+
### Via CLI
|
|
347
|
+
|
|
348
|
+
```bash
|
|
349
|
+
# Index of all runs
|
|
350
|
+
npx claude-octopus report --out index.html
|
|
351
|
+
|
|
352
|
+
# Detailed report for one run
|
|
353
|
+
npx claude-octopus report pub-001 --out report.html
|
|
354
|
+
open report.html
|
|
355
|
+
|
|
356
|
+
# Without transcripts (faster, smaller file)
|
|
357
|
+
npx claude-octopus report pub-001 --no-transcripts --out report.html
|
|
358
|
+
|
|
359
|
+
# To stdout (pipe-friendly)
|
|
360
|
+
npx claude-octopus report pub-001 > report.html
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
### What's in the report
|
|
364
|
+
|
|
365
|
+
- **Run summary** — agent count, total cost, duration, total turns
|
|
366
|
+
- **Timeline bar** — numbered dots for each agent (green = success, red = error)
|
|
367
|
+
- **Agent cards** — timing, cost, turns, session ID, prompt excerpt
|
|
368
|
+
- **Collapsible transcripts** — full tool calls, reasoning, and results per agent
|
|
148
369
|
|
|
149
370
|
## Configuration
|
|
150
371
|
|
|
@@ -152,50 +373,56 @@ All configuration is via environment variables in `.mcp.json`. Every env var is
|
|
|
152
373
|
|
|
153
374
|
### Identity
|
|
154
375
|
|
|
155
|
-
| Env Var
|
|
156
|
-
|
|
157
|
-
| `CLAUDE_TOOL_NAME`
|
|
158
|
-
| `CLAUDE_DESCRIPTION`
|
|
159
|
-
| `CLAUDE_SERVER_NAME`
|
|
160
|
-
| `CLAUDE_FACTORY_ONLY` | Only expose the factory wizard tool
|
|
376
|
+
| Env Var | Description | Default |
|
|
377
|
+
|---------|-------------|---------|
|
|
378
|
+
| `CLAUDE_TOOL_NAME` | Tool name prefix (generates `<name>`, `<name>_reply`, `<name>_timeline`, `<name>_transcript`, `<name>_report`) | `claude_code` |
|
|
379
|
+
| `CLAUDE_DESCRIPTION` | Tool description shown to the host AI | generic |
|
|
380
|
+
| `CLAUDE_SERVER_NAME` | MCP server name in protocol handshake | `claude-octopus` |
|
|
381
|
+
| `CLAUDE_FACTORY_ONLY` | Only expose the factory wizard tool | `false` |
|
|
161
382
|
|
|
162
383
|
### Agent
|
|
163
384
|
|
|
164
|
-
| Env Var
|
|
165
|
-
|
|
166
|
-
| `CLAUDE_MODEL`
|
|
167
|
-
| `CLAUDE_CWD`
|
|
168
|
-
| `CLAUDE_PERMISSION_MODE`
|
|
169
|
-
| `CLAUDE_ALLOWED_TOOLS`
|
|
170
|
-
| `CLAUDE_DISALLOWED_TOOLS` | Comma-separated tool blacklist
|
|
171
|
-
| `CLAUDE_MAX_TURNS`
|
|
172
|
-
| `CLAUDE_MAX_BUDGET_USD`
|
|
173
|
-
| `CLAUDE_EFFORT`
|
|
385
|
+
| Env Var | Description | Default |
|
|
386
|
+
|---------|-------------|---------|
|
|
387
|
+
| `CLAUDE_MODEL` | Model (`sonnet`, `opus`, `haiku`, or full ID) | SDK default |
|
|
388
|
+
| `CLAUDE_CWD` | Working directory | `process.cwd()` |
|
|
389
|
+
| `CLAUDE_PERMISSION_MODE` | `default`, `acceptEdits`, `bypassPermissions`, `plan` | `default` |
|
|
390
|
+
| `CLAUDE_ALLOWED_TOOLS` | Comma-separated tool restriction (available tools) | all |
|
|
391
|
+
| `CLAUDE_DISALLOWED_TOOLS` | Comma-separated tool blacklist | none |
|
|
392
|
+
| `CLAUDE_MAX_TURNS` | Max conversation turns | unlimited |
|
|
393
|
+
| `CLAUDE_MAX_BUDGET_USD` | Max spend per invocation | unlimited |
|
|
394
|
+
| `CLAUDE_EFFORT` | `low`, `medium`, `high`, `max` | SDK default |
|
|
174
395
|
|
|
175
396
|
### Prompts
|
|
176
397
|
|
|
177
|
-
| Env Var
|
|
178
|
-
|
|
179
|
-
| `CLAUDE_SYSTEM_PROMPT` | Replaces the default Claude Code system prompt
|
|
398
|
+
| Env Var | Description |
|
|
399
|
+
|---------|-------------|
|
|
400
|
+
| `CLAUDE_SYSTEM_PROMPT` | Replaces the default Claude Code system prompt |
|
|
180
401
|
| `CLAUDE_APPEND_PROMPT` | Appended to the default prompt (usually what you want) |
|
|
181
402
|
|
|
182
403
|
### Advanced
|
|
183
404
|
|
|
184
|
-
| Env Var
|
|
185
|
-
|
|
186
|
-
| `CLAUDE_ADDITIONAL_DIRS` | Extra directories to grant access (comma-separated)
|
|
187
|
-
| `CLAUDE_PLUGINS`
|
|
188
|
-
| `CLAUDE_MCP_SERVERS`
|
|
405
|
+
| Env Var | Description |
|
|
406
|
+
|---------|-------------|
|
|
407
|
+
| `CLAUDE_ADDITIONAL_DIRS` | Extra directories to grant access (comma-separated) |
|
|
408
|
+
| `CLAUDE_PLUGINS` | Local plugin paths (comma-separated) |
|
|
409
|
+
| `CLAUDE_MCP_SERVERS` | MCP servers for the inner agent (JSON) |
|
|
189
410
|
| `CLAUDE_PERSIST_SESSION` | `true`/`false` — enable session resume (default: `true`) |
|
|
190
|
-
| `CLAUDE_SETTING_SOURCES` | Settings to load: `user`, `project`, `local`
|
|
191
|
-
| `CLAUDE_SETTINGS`
|
|
192
|
-
| `CLAUDE_BETAS`
|
|
411
|
+
| `CLAUDE_SETTING_SOURCES` | Settings to load: `user`, `project`, `local` |
|
|
412
|
+
| `CLAUDE_SETTINGS` | Path to settings JSON or inline JSON |
|
|
413
|
+
| `CLAUDE_BETAS` | Beta features (comma-separated) |
|
|
414
|
+
|
|
415
|
+
### Timeline
|
|
416
|
+
|
|
417
|
+
| Env Var | Description | Default |
|
|
418
|
+
|---------|-------------|---------|
|
|
419
|
+
| `CLAUDE_TIMELINE_DIR` | Directory for the cross-agent timeline index | `~/.claude-octopus/timelines` |
|
|
193
420
|
|
|
194
421
|
### Authentication
|
|
195
422
|
|
|
196
|
-
| Env Var
|
|
197
|
-
|
|
198
|
-
| `ANTHROPIC_API_KEY`
|
|
423
|
+
| Env Var | Description | Default |
|
|
424
|
+
|---------|-------------|---------|
|
|
425
|
+
| `ANTHROPIC_API_KEY` | Anthropic API key for this agent | inherited from parent |
|
|
199
426
|
| `CLAUDE_CODE_OAUTH_TOKEN` | Claude Code OAuth token for this agent | inherited from parent |
|
|
200
427
|
|
|
201
428
|
Leave both unset to inherit auth from the parent process. Set one per agent to use a different account or billing source.
|
|
@@ -204,50 +431,56 @@ Lists accept JSON arrays when values contain commas: `["path,with,comma", "/norm
|
|
|
204
431
|
|
|
205
432
|
## Security
|
|
206
433
|
|
|
207
|
-
- **Permission mode defaults to
|
|
434
|
+
- **Permission mode defaults to `default`** — tool executions prompt for approval unless you explicitly set `bypassPermissions`.
|
|
208
435
|
- **`cwd` overrides preserve agent knowledge** — when the host overrides `cwd`, the agent's configured base directory is automatically added to `additionalDirectories` so it retains access to its own context.
|
|
209
436
|
- **Tool restrictions narrow, never widen** — per-invocation `tools` intersects with the server restriction (can only remove tools, not add). `disallowedTools` unions (can only block more).
|
|
210
|
-
- **`_reply
|
|
437
|
+
- **`_reply` and `_transcript` tools respect persistence** — not registered when `CLAUDE_PERSIST_SESSION=false`.
|
|
438
|
+
- **Timeline writes are best-effort** — a failed timeline append never blocks or fails the primary query.
|
|
211
439
|
|
|
212
440
|
## Architecture
|
|
213
441
|
|
|
214
|
-
```
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
442
|
+
```mermaid
|
|
443
|
+
graph TB
|
|
444
|
+
subgraph "MCP Client (Claude Desktop, Cursor, etc.)"
|
|
445
|
+
C["Sees: code_reviewer, test_writer, quick_qa"]
|
|
446
|
+
end
|
|
447
|
+
|
|
448
|
+
C -->|"JSON-RPC / stdio"| O1
|
|
449
|
+
C -->|"JSON-RPC / stdio"| O2
|
|
450
|
+
C -->|"JSON-RPC / stdio"| O3
|
|
451
|
+
|
|
452
|
+
subgraph "Claude Octopus Instances"
|
|
453
|
+
O1["code-reviewer<br/>model=opus, tools=Read,Grep,Glob"]
|
|
454
|
+
O2["test-writer<br/>model=sonnet"]
|
|
455
|
+
O3["quick-qa<br/>model=haiku, budget=$0.02"]
|
|
456
|
+
end
|
|
457
|
+
|
|
458
|
+
O1 -->|"Agent SDK query()"| SDK["Claude Agent SDK"]
|
|
459
|
+
O2 -->|"Agent SDK query()"| SDK
|
|
460
|
+
O3 -->|"Agent SDK query()"| SDK
|
|
461
|
+
|
|
462
|
+
O1 -->|"append"| TL["Timeline Index<br/>~/.claude-octopus/timelines/"]
|
|
463
|
+
O2 -->|"append"| TL
|
|
464
|
+
O3 -->|"append"| TL
|
|
465
|
+
|
|
466
|
+
SDK -->|"persist"| SS["Session Storage<br/>~/.claude/projects/"]
|
|
467
|
+
TL -.->|"cross-ref"| SS
|
|
239
468
|
```
|
|
240
469
|
|
|
241
470
|
## How It Compares
|
|
242
471
|
|
|
243
|
-
| Feature
|
|
244
|
-
|
|
245
|
-
| Approach
|
|
246
|
-
|
|
|
247
|
-
| Multi-instance
|
|
248
|
-
| Per-instance config | No
|
|
249
|
-
|
|
|
250
|
-
|
|
|
472
|
+
| Feature | Built-in `claude` | [claude-code-mcp](https://github.com/steipete/claude-code-mcp) | **Claude Octopus** |
|
|
473
|
+
|---------|-------------------|----------------------------------------------------------------|--------------------|
|
|
474
|
+
| Approach | Built-in | CLI wrapping | Agent SDK |
|
|
475
|
+
| Tools per instance | 16 raw tools | 1 prompt tool | 5 (prompt, reply, timeline, transcript, report) |
|
|
476
|
+
| Multi-instance | No | No | Yes |
|
|
477
|
+
| Per-instance config | No | No | Yes (20 env vars) |
|
|
478
|
+
| Init wizard | No | No | Yes (`init` + 5 templates) |
|
|
479
|
+
| Factory wizard | No | No | Yes |
|
|
480
|
+
| Session continuity | No | No | Yes |
|
|
481
|
+
| Cross-agent timeline | No | No | Yes |
|
|
482
|
+
| Web dashboard | No | No | Yes (live, SSE) |
|
|
483
|
+
| HTML reports | No | No | Yes |
|
|
251
484
|
|
|
252
485
|
## Development
|
|
253
486
|
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLI prompt helpers and MCP client detection — extracted from init.ts.
|
|
3
|
+
*/
|
|
4
|
+
import { createInterface } from "node:readline/promises";
|
|
5
|
+
import type { AgentConfig } from "./templates.js";
|
|
6
|
+
export interface McpClient {
|
|
7
|
+
name: string;
|
|
8
|
+
configPath: string;
|
|
9
|
+
}
|
|
10
|
+
export interface McpConfig {
|
|
11
|
+
mcpServers?: Record<string, unknown>;
|
|
12
|
+
[key: string]: unknown;
|
|
13
|
+
}
|
|
14
|
+
export declare function detectMcpClients(): Promise<McpClient[]>;
|
|
15
|
+
export declare function readMcpConfig(path: string): Promise<McpConfig>;
|
|
16
|
+
export declare function writeMcpConfig(path: string, config: McpConfig): Promise<void>;
|
|
17
|
+
export declare function choose(rl: ReturnType<typeof createInterface>, prompt: string, options: {
|
|
18
|
+
label: string;
|
|
19
|
+
value: string;
|
|
20
|
+
}[]): Promise<string>;
|
|
21
|
+
export declare function confirm(rl: ReturnType<typeof createInterface>, prompt: string): Promise<boolean>;
|
|
22
|
+
export declare function buildCustomAgent(rl: ReturnType<typeof createInterface>): Promise<AgentConfig>;
|