agent-dispatch 0.4.0__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/.github/workflows/ci.yml +4 -0
  2. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/.github/workflows/publish.yml +5 -1
  3. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/CHANGELOG.md +121 -1
  4. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/PKG-INFO +44 -11
  5. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/README.md +43 -10
  6. agent_dispatch-0.6.0/SECURITY.md +77 -0
  7. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/agents.example.yaml +1 -0
  8. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/pyproject.toml +23 -1
  9. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/src/agent_dispatch/__init__.py +1 -1
  10. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/src/agent_dispatch/cache.py +15 -1
  11. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/src/agent_dispatch/cli.py +19 -6
  12. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/src/agent_dispatch/config.py +12 -2
  13. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/src/agent_dispatch/jobs.py +113 -3
  14. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/src/agent_dispatch/models.py +8 -0
  15. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/src/agent_dispatch/runner.py +229 -27
  16. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/src/agent_dispatch/server.py +240 -31
  17. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/tests/test_cache.py +33 -2
  18. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/tests/test_cli.py +35 -0
  19. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/tests/test_config.py +12 -0
  20. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/tests/test_jobs.py +176 -1
  21. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/tests/test_models.py +5 -4
  22. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/tests/test_runner.py +414 -0
  23. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/tests/test_server.py +518 -22
  24. agent_dispatch-0.4.0/SECURITY.md +0 -22
  25. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/.github/dependabot.yml +0 -0
  26. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/.gitignore +0 -0
  27. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/LICENSE +0 -0
  28. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/assets/mascot.png +0 -0
  29. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/tests/__init__.py +0 -0
  30. {agent_dispatch-0.4.0 → agent_dispatch-0.6.0}/tests/conftest.py +0 -0
@@ -6,6 +6,10 @@ on:
6
6
  pull_request:
7
7
  branches: [main]
8
8
 
9
+ # Least privilege: CI only needs to read the repo.
10
+ permissions:
11
+ contents: read
12
+
9
13
  jobs:
10
14
  test:
11
15
  runs-on: ubuntu-latest
@@ -4,12 +4,16 @@ on:
4
4
  release:
5
5
  types: [published]
6
6
 
7
+ # Default to no privileges; the publish job opts into exactly what it needs.
8
+ permissions: {}
9
+
7
10
  jobs:
8
11
  publish:
9
12
  runs-on: ubuntu-latest
10
13
  environment: pypi
11
14
  permissions:
12
- id-token: write
15
+ id-token: write # OIDC token for PyPI Trusted Publisher
16
+ contents: read # checkout the tagged source
13
17
  steps:
14
18
  - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
15
19
 
@@ -7,6 +7,124 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.6.0] - 2026-06-04
11
+
12
+ Reliability release: timeouts stop being fatal, permission-blocked "successes"
13
+ become visible, async jobs show live progress.
14
+
15
+ ### Fixed
16
+ - **`dispatch_stream` was broken on current claude CLIs** — they reject
17
+ `--print --output-format stream-json` without `--verbose` ("requires
18
+ --verbose"), so every stream dispatch (and CLI `test --stream`) failed
19
+ immediately. The runner now passes `--verbose`. Caught by live verification
20
+ against the real CLI before this release; without it the async-worker
21
+ switch to streaming (below) would have broken all `dispatch_async` jobs.
22
+
23
+ ### Added
24
+ - **Per-call timeout override.** `dispatch`, `dispatch_session`,
25
+ `dispatch_stream`, and `dispatch_async` accept `timeout_seconds` (0 = agent
26
+ default, clamped to 10–7200); `dispatch_parallel` accepts it per item. Use
27
+ it for known-long tasks instead of editing the agent config. CLI:
28
+ `agent-dispatch test <name> --timeout N`.
29
+ - **Resumable timeouts.** Fresh dispatches pre-assign a session UUID via
30
+ `--session-id`, so a timed-out dispatch still returns a `session_id` — the
31
+ partial transcript survives the kill. The timeout error now spells out the
32
+ recovery options: resume via `dispatch_session(..., session_id=...)`, retry
33
+ with `timeout_seconds`, or go async.
34
+ - **Denied-tools visibility.** The claude CLI's `permission_denials` output is
35
+ parsed into `DispatchResult.denied_tools`. A dispatch that "succeeds" while
36
+ tools were blocked (the agent answers "I need permission for X") now carries
37
+ `denied_tools` + a `hint` that the result may be incomplete and how to grant
38
+ access. On `is_error` results, non-empty denials force
39
+ `error_type="permission"` even when the error text has no permission
40
+ keywords. CLI `test` prints the hint as a yellow note.
41
+ - **Async job progress.** Async workers now run with streaming: the job file
42
+ keeps a rolling tail (last 20 lines, throttled to ~1 write/sec) of assistant
43
+ text and tool-use events. `dispatch_status` returns it as `progress` while
44
+ running (kept afterwards as a post-mortem trace); `dispatch_jobs` shows
45
+ `last_progress` for running jobs. New `JobStore.update_progress` (refuses
46
+ terminal jobs, so a trailing write can't resurrect a finished job).
47
+
48
+ ### Changed
49
+ - Timeout error messages are actionable (mention `timeout_seconds`,
50
+ `dispatch_async`, `agent-dispatch update --timeout`, and the resumable
51
+ session) instead of just "increase timeout in agents.yaml".
52
+ - Plain-text fallback successes now carry the generated `session_id`; the
53
+ stream "no result line" fallback does too (a crash mid-stream stays
54
+ resumable).
55
+ - **Old-CLI self-healing**: if the installed claude CLI predates
56
+ `--session-id`, dispatch detects the "unknown option" rejection and retries
57
+ once without the flag (logged warning; timed-out dispatches lose
58
+ resumability) instead of failing every dispatch.
59
+ - `dispatch_parallel` validates per-item `timeout_seconds` / `summary_chars`
60
+ numerically **up front** — a bad value rejects the whole call before any
61
+ dispatch runs, consistent with the structural validation contract.
62
+ - `denied_tools` parsing is bounded (10 entries, 100 chars per name) — the
63
+ field comes from the dispatched subprocess's output, which is untrusted;
64
+ unbounded lists could inflate job files and `return_ref` payloads.
65
+
66
+ ## [0.5.0] - 2026-06-01
67
+
68
+ Security-hardening release. A multi-agent audit of the codebase surfaced
69
+ several issues; the confirmed ones are fixed here, plus job cancellation,
70
+ cache bounding, and stale-job recovery.
71
+
72
+ ### Security
73
+ - **Path traversal in async jobs (fixed).** `dispatch_status`, `dispatch_wait`,
74
+ and `fetch_result` accept a caller-supplied `job_id`/`ref` that flowed
75
+ straight into `JobStore`'s file-path construction. A crafted value such as
76
+ `../../secret` could read any Job-shaped `.json` file outside the jobs
77
+ directory. Job ids are now validated against `^[0-9a-f]{32}$` at the tool
78
+ boundary (`_validate_ref`), in `JobStore.get`, and in `JobStore._path`
79
+ (defense in depth). Malformed ids are rejected without touching the
80
+ filesystem. New helper `jobs.is_valid_job_id`.
81
+ - **Argument/flag injection via structured CLI fields (fixed).** A
82
+ `session_id` (caller-controlled in `dispatch_session`) — or a misconfigured
83
+ `model`, `permission_mode`, or tool name — that started with `-` was placed
84
+ in the argument position after a flag (e.g. `--resume <session_id>`) and the
85
+ `claude` CLI parsed it as a *new* flag, allowing options like
86
+ `--permission-mode bypassPermissions` to be smuggled in. `_build_command`
87
+ now rejects any such value via `_reject_flaglike` (raising
88
+ `runner.ArgInjectionError`); `dispatch`/`dispatch_stream` surface it as a
89
+ clean failed result, never spawning a subprocess.
90
+ - **Tightened file permissions.** Job files are written `0o600` and the jobs
91
+ directory is created `0o700` (they hold full task/context/result payloads
92
+ that may contain secrets). `save_config` now writes `agents.yaml` `0o600`
93
+ and its parent directory `0o700`. All `chmod`s are best-effort (skipped on
94
+ platforms without POSIX modes).
95
+
96
+ ### Added
97
+ - `dispatch_cancel(job_id)` MCP tool — cancel a *pending* async job before it
98
+ starts. Running jobs are left to finish (their subprocess can't be safely
99
+ interrupted); the tool reports an `outcome` of `cancelled`, `running`,
100
+ `already_terminal`, or `not_found`. Makes the previously-unreachable
101
+ `cancelled` job status real. Backed by `JobStore.cancel`, and the
102
+ cancel/start race is closed by `mark_running` refusing a cancelled job.
103
+ - Cache size bound — `CacheSettings.max_size` (default 1000) caps the
104
+ in-memory dispatch cache, evicting the oldest entry first (FIFO by insertion
105
+ time; read access does not refresh, since the timestamp also drives TTL),
106
+ preventing unbounded memory growth from many unique requests. `cache_stats`
107
+ now reports `max_size` and `evictions`.
108
+ - Stale-job recovery — on startup the server marks jobs abandoned in
109
+ `running` (older than 1h, e.g. from a crashed prior run) as `failed` so
110
+ callers don't poll them forever (`JobStore.recover_stale`).
111
+
112
+ ### Changed
113
+ - Input bounds hardened across MCP tools: `dispatch_jobs(limit)` clamped to
114
+ `[1, 1000]`; `dispatch_gc(max_age_days)` rejects non-finite values;
115
+ `summary_chars` (in `dispatch` and per-item `dispatch_parallel`) clamped to
116
+ `[0, 100000]`; `dispatch_parallel` rejects more than
117
+ `max(100, max_concurrency * 20)` items to bound subprocess fan-out.
118
+ - Async job worker now logs lifecycle transitions (running / finished) with
119
+ the job id for easier production debugging.
120
+ - Type hints filled in (`_ref_payload`, `_run_job`, `_run_one`).
121
+ - Lint surface expanded — ruff now enforces bugbear (`B`), bandit security
122
+ (`S`), import order (`I`), and pyupgrade (`UP`) in addition to the defaults,
123
+ with documented ignores for the trusted `claude` subprocess calls.
124
+ - `SECURITY.md` rewritten: accurate supported-versions table and an expanded
125
+ threat model (bypassPermissions, on-disk job files, env inheritance,
126
+ best-effort recursion depth, argument-injection mitigation).
127
+
10
128
  ## [0.4.0] - 2026-05-15
11
129
 
12
130
  ### Added
@@ -152,7 +270,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
152
270
  - Dependabot for `pip` + `github-actions`, GitHub Actions pinned to
153
271
  commit SHAs for supply-chain integrity.
154
272
 
155
- [Unreleased]: https://github.com/ginkida/agent-dispatch/compare/v0.4.0...HEAD
273
+ [Unreleased]: https://github.com/ginkida/agent-dispatch/compare/v0.6.0...HEAD
274
+ [0.6.0]: https://github.com/ginkida/agent-dispatch/compare/v0.5.0...v0.6.0
275
+ [0.5.0]: https://github.com/ginkida/agent-dispatch/compare/v0.4.0...v0.5.0
156
276
  [0.4.0]: https://github.com/ginkida/agent-dispatch/compare/v0.3.0...v0.4.0
157
277
  [0.3.0]: https://github.com/ginkida/agent-dispatch/compare/v0.2.2...v0.3.0
158
278
  [0.2.2]: https://github.com/ginkida/agent-dispatch/compare/v0.2.1...v0.2.2
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agent-dispatch
3
- Version: 0.4.0
3
+ Version: 0.6.0
4
4
  Summary: MCP server that lets Claude Code agents delegate tasks to agents in other project directories
5
5
  Project-URL: Homepage, https://github.com/ginkida/agent-dispatch
6
6
  Project-URL: Repository, https://github.com/ginkida/agent-dispatch
@@ -130,6 +130,7 @@ One-shot task delegation. Results are cached — identical requests within TTL r
130
130
  | `response_format` | string | no | `"json"` to request a single JSON value; the parsed result lands in `parsed_result`. Empty = free-form text. |
131
131
  | `return_ref` | bool | no | When `true`, returns just a `ref` + summary preview instead of the full result text. Use `fetch_result(ref)` to load the full text on demand. |
132
132
  | `summary_chars` | int | no | Max chars of result text to include in the ref response (default 500). |
133
+ | `timeout_seconds` | int | no | One-off timeout override for this call (0 = agent's configured timeout; clamped to 10–7200). No config edit needed for known-long tasks. |
133
134
 
134
135
  ```json
135
136
  // Response (success)
@@ -155,6 +156,21 @@ One-shot task delegation. Results are cached — identical requests within TTL r
155
156
 
156
157
  **`error_type` values:** `permission` (tool/action denied), `timeout`, `recursion` (dispatch depth exceeded), `not_found` (missing directory or CLI), `cli_error` (other failures). Permission errors include an actionable hint.
157
158
 
159
+ **Resumable timeouts:** every fresh dispatch pre-assigns a session UUID (`--session-id`), so a timed-out dispatch still returns a `session_id` — the partial transcript survives the kill. The timeout error spells out the recovery: resume with `dispatch_session(agent, "Continue where you left off", session_id=...)`, retry with a bigger `timeout_seconds`, or use `dispatch_async`.
160
+
161
+ **Denied-tools visibility:** in non-interactive mode the claude CLI auto-denies tools the agent isn't allowed to use — the agent then often "succeeds" with an answer like *"I need your permission for one read-only query"*. When that happens the response carries the deterministic signal: `denied_tools` (parsed from the CLI's `permission_denials`) plus a `hint` explaining the result may be incomplete and how to grant access. `success` stays `true` — it's a soft signal, not a failure.
162
+
163
+ ```json
164
+ // Response (success, but a tool was blocked)
165
+ {
166
+ "agent": "analysis",
167
+ "success": true,
168
+ "result": "Here is the offline mapping. To finish I'd need to run one read-only query...",
169
+ "denied_tools": ["Bash"],
170
+ "hint": "1 tool call(s) were denied by permissions: Bash. The result may be incomplete..."
171
+ }
172
+ ```
173
+
158
174
  **Structured JSON output:** pass `response_format="json"` to ask the agent for a single JSON value. The runner appends an instruction footer ("respond with a single valid JSON value, no fences, no prose") and on success parses the response — the parsed value lands in `parsed_result`. The raw text is always in `result`. Parse failures leave `parsed_result=None` but don't fail the dispatch (soft mode).
159
175
 
160
176
  ```json
@@ -195,6 +211,9 @@ Multi-turn: continue a conversation with an agent. First call starts a session,
195
211
  | `context` | string | no | Extra context |
196
212
  | `caller` | string | no | Who is dispatching |
197
213
  | `goal` | string | no | Broader objective |
214
+ | `timeout_seconds` | int | no | One-off timeout override (0 = agent default; clamped to 10–7200) |
215
+
216
+ `dispatch_session` is also the **timeout recovery path**: a timed-out `dispatch` returns a `session_id` — pass it here with `task="Continue where you left off"` to salvage the partial work instead of restarting.
198
217
 
199
218
  ```
200
219
  Turn 1: dispatch_session("infra", "List running containers")
@@ -210,7 +229,7 @@ Run multiple tasks concurrently. Much faster than sequential `dispatch` calls.
210
229
 
211
230
  | Parameter | Type | Required | Description |
212
231
  |-----------|------|----------|-------------|
213
- | `dispatches` | string (JSON) | yes | JSON array of `{"agent", "task", "context?", "caller?", "goal?"}` |
232
+ | `dispatches` | string (JSON) | yes | JSON array of `{"agent", "task", "context?", "caller?", "goal?", "response_format?", "return_ref?", "summary_chars?", "timeout_seconds?"}` |
214
233
  | `aggregate` | string | no | Agent name to synthesize all results into one answer |
215
234
 
216
235
  **Important:** `dispatches` is a JSON string, not a list.
@@ -250,7 +269,7 @@ Run multiple tasks concurrently. Much faster than sequential `dispatch` calls.
250
269
 
251
270
  Same as `dispatch` but shows live progress while the agent works. Use for long-running tasks. Not cached.
252
271
 
253
- Parameters are identical to `dispatch`.
272
+ Parameters are the same as `dispatch` except `return_ref`/`summary_chars` (streaming is incompatible with ref-mode).
254
273
 
255
274
  ### `dispatch_dialogue`
256
275
 
@@ -339,18 +358,20 @@ fetch_result(ref="8f3a...e1", max_chars=2000) -> truncated, plus {"truncated":
339
358
 
340
359
  Refs reuse the same storage as `dispatch_async` jobs (under `~/.config/agent-dispatch/jobs/`), so any `job_id` returned by `dispatch_async` is also a valid `ref` for `fetch_result`. `parsed_result` (when `response_format="json"` is set) is small and is always inlined directly in the ref response — no second fetch needed.
341
360
 
342
- ### Async dispatch — `dispatch_async`, `dispatch_status`, `dispatch_wait`, `dispatch_jobs`, `dispatch_gc`
361
+ ### Async dispatch — `dispatch_async`, `dispatch_status`, `dispatch_wait`, `dispatch_cancel`, `dispatch_jobs`, `dispatch_gc`
343
362
 
344
363
  When a dispatched task is going to take a while, you don't want to block your own tool slot for minutes. Async dispatch returns a `job_id` immediately and lets you check back when you're ready.
345
364
 
346
365
  ```
347
- // 1. fire and forget
366
+ // 1. fire and forget (timeout_seconds= works here too for known-long tasks)
348
367
  dispatch_async(agent="infra", task="audit every container log for OOM kills today")
349
368
  -> {"job_id": "8f3a...e1", "status": "pending", "agent": "infra"}
350
369
 
351
370
  // 2. do other work, then check progress (non-blocking)
371
+ // `progress` is a rolling tail of what the agent is doing right now
352
372
  dispatch_status(job_id="8f3a...e1")
353
- -> {"id": "8f3a...e1", "status": "running", "started_at": 1730000123.4, ...}
373
+ -> {"id": "8f3a...e1", "status": "running", "started_at": 1730000123.4,
374
+ "progress": ["Using tool: Bash", "Scanning container logs for OOM events..."], ...}
354
375
 
355
376
  // 3. or block until done (with a timeout cap)
356
377
  dispatch_wait(job_id="8f3a...e1", timeout_seconds=120)
@@ -360,9 +381,13 @@ dispatch_wait(job_id="8f3a...e1", timeout_seconds=120)
360
381
  -> {"id": "...", "status": "running", "timed_out_waiting": true}
361
382
  ```
362
383
 
384
+ `dispatch_cancel(job_id)` cancels a job that is still **pending** (before its subprocess starts) — a running job is left to finish, since its `claude` subprocess can't be safely interrupted. The response carries an `outcome` of `cancelled`, `running`, `already_terminal`, or `not_found`.
385
+
386
+ Async workers run with streaming under the hood: the job file keeps a rolling tail (last 20 lines, ~1 write/sec) of assistant text and tool-use events. `dispatch_status` shows it as `progress` while the job runs and keeps it afterwards as a post-mortem trace; `dispatch_jobs` shows `last_progress` for running jobs.
387
+
363
388
  `dispatch_jobs(status?)` lists recent jobs as summaries (filter by `pending` / `running` / `done` / `failed` / `cancelled`). `dispatch_gc(max_age_days=7)` purges terminal jobs older than the threshold — pending and running jobs are never deleted.
364
389
 
365
- Job state persists to disk at `~/.config/agent-dispatch/jobs/` (override with `AGENT_DISPATCH_JOBS_DIR`). One JSON file per job, atomic writes — safe to read or `ls` while jobs are in flight.
390
+ Job state persists to disk at `~/.config/agent-dispatch/jobs/` (override with `AGENT_DISPATCH_JOBS_DIR`). One JSON file per job, written owner-only (`0o600`) with atomic writes — safe to read or `ls` while jobs are in flight. Caller-supplied `job_id`s are validated as 32-char hex before any file access (no path traversal). On startup the server marks jobs abandoned in `running` by a prior crashed instance as `failed`.
366
391
 
367
392
  | When to use async | When to use `dispatch` |
368
393
  |-------------------|------------------------|
@@ -390,6 +415,8 @@ All tools return errors as:
390
415
  | Need a combined summary from multiple agents | `dispatch_parallel` with `aggregate` |
391
416
  | Long task — don't block your tool slot | `dispatch_async` + `dispatch_wait` |
392
417
  | Check progress without blocking | `dispatch_status` |
418
+ | Known-long task, one-off | any dispatch tool with `timeout_seconds=...` |
419
+ | A dispatch timed out | `dispatch_session` with the `session_id` from the error |
393
420
 
394
421
  ## Configuration
395
422
 
@@ -418,10 +445,11 @@ settings:
418
445
  # - Read
419
446
  # - Edit
420
447
  max_dispatch_depth: 3 # recursion protection
421
- max_concurrency: 5 # max parallel claude -p processes
448
+ max_concurrency: 5 # max parallel claude -p processes (per dispatch path)
422
449
  cache:
423
450
  enabled: true
424
451
  ttl: 300 # seconds
452
+ max_size: 1000 # max cached entries; oldest evicted first (FIFO)
425
453
  ```
426
454
 
427
455
  Config is reloaded on every tool call — add agents without restarting.
@@ -459,11 +487,16 @@ agent-dispatch MCP server
459
487
 
460
488
  ## Safety
461
489
 
462
- - **Recursion protection** — `AGENT_DISPATCH_DEPTH` env var tracks nesting. Default limit: 3.
490
+ - **Recursion protection** — `AGENT_DISPATCH_DEPTH` env var tracks nesting. Default limit: 3. Best-effort across the subprocess boundary (see [SECURITY.md](SECURITY.md)).
491
+ - **Argument-injection guard** — structured CLI fields (`session_id`, `model`, `permission_mode`, tool names) that start with `-` are rejected so they can't smuggle extra `claude` flags.
492
+ - **Path-traversal guard** — caller-supplied `job_id`/`ref` values are validated as 32-char hex before any filesystem access.
493
+ - **Owner-only state** — job files (`0o600`) and `agents.yaml` (`0o600`) are written for the owner only; their directories are `0o700`.
463
494
  - **Cost control** — `max_budget_usd` per agent or globally.
464
- - **Concurrency** — `max_concurrency` (default: 5) limits parallel `claude -p` processes.
495
+ - **Concurrency** — `max_concurrency` (default: 5) caps parallel `claude -p` processes. Note: the sync and async dispatch paths use separate semaphores, so the worst-case total is `2 × max_concurrency`.
465
496
  - **Timeout** — per-agent or global (default: 300s). Orphaned processes are cleaned up.
466
- - **Caching** — identical `(agent, task, context)` requests return cached results. Only successes are cached. Sessions and dialogues are never cached.
497
+ - **Caching** — identical `(agent, task, context, caller, goal, response_format)` requests return cached results, bounded by `cache.max_size` (oldest entry evicted first). Only successes are cached. Sessions and dialogues are never cached.
498
+
499
+ See [SECURITY.md](SECURITY.md) for the full threat model (including the `bypassPermissions` escalation risk and on-disk job files).
467
500
 
468
501
  ## CLI
469
502
 
@@ -100,6 +100,7 @@ One-shot task delegation. Results are cached — identical requests within TTL r
100
100
  | `response_format` | string | no | `"json"` to request a single JSON value; the parsed result lands in `parsed_result`. Empty = free-form text. |
101
101
  | `return_ref` | bool | no | When `true`, returns just a `ref` + summary preview instead of the full result text. Use `fetch_result(ref)` to load the full text on demand. |
102
102
  | `summary_chars` | int | no | Max chars of result text to include in the ref response (default 500). |
103
+ | `timeout_seconds` | int | no | One-off timeout override for this call (0 = agent's configured timeout; clamped to 10–7200). No config edit needed for known-long tasks. |
103
104
 
104
105
  ```json
105
106
  // Response (success)
@@ -125,6 +126,21 @@ One-shot task delegation. Results are cached — identical requests within TTL r
125
126
 
126
127
  **`error_type` values:** `permission` (tool/action denied), `timeout`, `recursion` (dispatch depth exceeded), `not_found` (missing directory or CLI), `cli_error` (other failures). Permission errors include an actionable hint.
127
128
 
129
+ **Resumable timeouts:** every fresh dispatch pre-assigns a session UUID (`--session-id`), so a timed-out dispatch still returns a `session_id` — the partial transcript survives the kill. The timeout error spells out the recovery: resume with `dispatch_session(agent, "Continue where you left off", session_id=...)`, retry with a bigger `timeout_seconds`, or use `dispatch_async`.
130
+
131
+ **Denied-tools visibility:** in non-interactive mode the claude CLI auto-denies tools the agent isn't allowed to use — the agent then often "succeeds" with an answer like *"I need your permission for one read-only query"*. When that happens the response carries the deterministic signal: `denied_tools` (parsed from the CLI's `permission_denials`) plus a `hint` explaining the result may be incomplete and how to grant access. `success` stays `true` — it's a soft signal, not a failure.
132
+
133
+ ```json
134
+ // Response (success, but a tool was blocked)
135
+ {
136
+ "agent": "analysis",
137
+ "success": true,
138
+ "result": "Here is the offline mapping. To finish I'd need to run one read-only query...",
139
+ "denied_tools": ["Bash"],
140
+ "hint": "1 tool call(s) were denied by permissions: Bash. The result may be incomplete..."
141
+ }
142
+ ```
143
+
128
144
  **Structured JSON output:** pass `response_format="json"` to ask the agent for a single JSON value. The runner appends an instruction footer ("respond with a single valid JSON value, no fences, no prose") and on success parses the response — the parsed value lands in `parsed_result`. The raw text is always in `result`. Parse failures leave `parsed_result=None` but don't fail the dispatch (soft mode).
129
145
 
130
146
  ```json
@@ -165,6 +181,9 @@ Multi-turn: continue a conversation with an agent. First call starts a session,
165
181
  | `context` | string | no | Extra context |
166
182
  | `caller` | string | no | Who is dispatching |
167
183
  | `goal` | string | no | Broader objective |
184
+ | `timeout_seconds` | int | no | One-off timeout override (0 = agent default; clamped to 10–7200) |
185
+
186
+ `dispatch_session` is also the **timeout recovery path**: a timed-out `dispatch` returns a `session_id` — pass it here with `task="Continue where you left off"` to salvage the partial work instead of restarting.
168
187
 
169
188
  ```
170
189
  Turn 1: dispatch_session("infra", "List running containers")
@@ -180,7 +199,7 @@ Run multiple tasks concurrently. Much faster than sequential `dispatch` calls.
180
199
 
181
200
  | Parameter | Type | Required | Description |
182
201
  |-----------|------|----------|-------------|
183
- | `dispatches` | string (JSON) | yes | JSON array of `{"agent", "task", "context?", "caller?", "goal?"}` |
202
+ | `dispatches` | string (JSON) | yes | JSON array of `{"agent", "task", "context?", "caller?", "goal?", "response_format?", "return_ref?", "summary_chars?", "timeout_seconds?"}` |
184
203
  | `aggregate` | string | no | Agent name to synthesize all results into one answer |
185
204
 
186
205
  **Important:** `dispatches` is a JSON string, not a list.
@@ -220,7 +239,7 @@ Run multiple tasks concurrently. Much faster than sequential `dispatch` calls.
220
239
 
221
240
  Same as `dispatch` but shows live progress while the agent works. Use for long-running tasks. Not cached.
222
241
 
223
- Parameters are identical to `dispatch`.
242
+ Parameters are the same as `dispatch` except `return_ref`/`summary_chars` (streaming is incompatible with ref-mode).
224
243
 
225
244
  ### `dispatch_dialogue`
226
245
 
@@ -309,18 +328,20 @@ fetch_result(ref="8f3a...e1", max_chars=2000) -> truncated, plus {"truncated":
309
328
 
310
329
  Refs reuse the same storage as `dispatch_async` jobs (under `~/.config/agent-dispatch/jobs/`), so any `job_id` returned by `dispatch_async` is also a valid `ref` for `fetch_result`. `parsed_result` (when `response_format="json"` is set) is small and is always inlined directly in the ref response — no second fetch needed.
311
330
 
312
- ### Async dispatch — `dispatch_async`, `dispatch_status`, `dispatch_wait`, `dispatch_jobs`, `dispatch_gc`
331
+ ### Async dispatch — `dispatch_async`, `dispatch_status`, `dispatch_wait`, `dispatch_cancel`, `dispatch_jobs`, `dispatch_gc`
313
332
 
314
333
  When a dispatched task is going to take a while, you don't want to block your own tool slot for minutes. Async dispatch returns a `job_id` immediately and lets you check back when you're ready.
315
334
 
316
335
  ```
317
- // 1. fire and forget
336
+ // 1. fire and forget (timeout_seconds= works here too for known-long tasks)
318
337
  dispatch_async(agent="infra", task="audit every container log for OOM kills today")
319
338
  -> {"job_id": "8f3a...e1", "status": "pending", "agent": "infra"}
320
339
 
321
340
  // 2. do other work, then check progress (non-blocking)
341
+ // `progress` is a rolling tail of what the agent is doing right now
322
342
  dispatch_status(job_id="8f3a...e1")
323
- -> {"id": "8f3a...e1", "status": "running", "started_at": 1730000123.4, ...}
343
+ -> {"id": "8f3a...e1", "status": "running", "started_at": 1730000123.4,
344
+ "progress": ["Using tool: Bash", "Scanning container logs for OOM events..."], ...}
324
345
 
325
346
  // 3. or block until done (with a timeout cap)
326
347
  dispatch_wait(job_id="8f3a...e1", timeout_seconds=120)
@@ -330,9 +351,13 @@ dispatch_wait(job_id="8f3a...e1", timeout_seconds=120)
330
351
  -> {"id": "...", "status": "running", "timed_out_waiting": true}
331
352
  ```
332
353
 
354
+ `dispatch_cancel(job_id)` cancels a job that is still **pending** (before its subprocess starts) — a running job is left to finish, since its `claude` subprocess can't be safely interrupted. The response carries an `outcome` of `cancelled`, `running`, `already_terminal`, or `not_found`.
355
+
356
+ Async workers run with streaming under the hood: the job file keeps a rolling tail (last 20 lines, ~1 write/sec) of assistant text and tool-use events. `dispatch_status` shows it as `progress` while the job runs and keeps it afterwards as a post-mortem trace; `dispatch_jobs` shows `last_progress` for running jobs.
357
+
333
358
  `dispatch_jobs(status?)` lists recent jobs as summaries (filter by `pending` / `running` / `done` / `failed` / `cancelled`). `dispatch_gc(max_age_days=7)` purges terminal jobs older than the threshold — pending and running jobs are never deleted.
334
359
 
335
- Job state persists to disk at `~/.config/agent-dispatch/jobs/` (override with `AGENT_DISPATCH_JOBS_DIR`). One JSON file per job, atomic writes — safe to read or `ls` while jobs are in flight.
360
+ Job state persists to disk at `~/.config/agent-dispatch/jobs/` (override with `AGENT_DISPATCH_JOBS_DIR`). One JSON file per job, written owner-only (`0o600`) with atomic writes — safe to read or `ls` while jobs are in flight. Caller-supplied `job_id`s are validated as 32-char hex before any file access (no path traversal). On startup the server marks jobs abandoned in `running` by a prior crashed instance as `failed`.
336
361
 
337
362
  | When to use async | When to use `dispatch` |
338
363
  |-------------------|------------------------|
@@ -360,6 +385,8 @@ All tools return errors as:
360
385
  | Need a combined summary from multiple agents | `dispatch_parallel` with `aggregate` |
361
386
  | Long task — don't block your tool slot | `dispatch_async` + `dispatch_wait` |
362
387
  | Check progress without blocking | `dispatch_status` |
388
+ | Known-long task, one-off | any dispatch tool with `timeout_seconds=...` |
389
+ | A dispatch timed out | `dispatch_session` with the `session_id` from the error |
363
390
 
364
391
  ## Configuration
365
392
 
@@ -388,10 +415,11 @@ settings:
388
415
  # - Read
389
416
  # - Edit
390
417
  max_dispatch_depth: 3 # recursion protection
391
- max_concurrency: 5 # max parallel claude -p processes
418
+ max_concurrency: 5 # max parallel claude -p processes (per dispatch path)
392
419
  cache:
393
420
  enabled: true
394
421
  ttl: 300 # seconds
422
+ max_size: 1000 # max cached entries; oldest evicted first (FIFO)
395
423
  ```
396
424
 
397
425
  Config is reloaded on every tool call — add agents without restarting.
@@ -429,11 +457,16 @@ agent-dispatch MCP server
429
457
 
430
458
  ## Safety
431
459
 
432
- - **Recursion protection** — `AGENT_DISPATCH_DEPTH` env var tracks nesting. Default limit: 3.
460
+ - **Recursion protection** — `AGENT_DISPATCH_DEPTH` env var tracks nesting. Default limit: 3. Best-effort across the subprocess boundary (see [SECURITY.md](SECURITY.md)).
461
+ - **Argument-injection guard** — structured CLI fields (`session_id`, `model`, `permission_mode`, tool names) that start with `-` are rejected so they can't smuggle extra `claude` flags.
462
+ - **Path-traversal guard** — caller-supplied `job_id`/`ref` values are validated as 32-char hex before any filesystem access.
463
+ - **Owner-only state** — job files (`0o600`) and `agents.yaml` (`0o600`) are written for the owner only; their directories are `0o700`.
433
464
  - **Cost control** — `max_budget_usd` per agent or globally.
434
- - **Concurrency** — `max_concurrency` (default: 5) limits parallel `claude -p` processes.
465
+ - **Concurrency** — `max_concurrency` (default: 5) caps parallel `claude -p` processes. Note: the sync and async dispatch paths use separate semaphores, so the worst-case total is `2 × max_concurrency`.
435
466
  - **Timeout** — per-agent or global (default: 300s). Orphaned processes are cleaned up.
436
- - **Caching** — identical `(agent, task, context)` requests return cached results. Only successes are cached. Sessions and dialogues are never cached.
467
+ - **Caching** — identical `(agent, task, context, caller, goal, response_format)` requests return cached results, bounded by `cache.max_size` (oldest entry evicted first). Only successes are cached. Sessions and dialogues are never cached.
468
+
469
+ See [SECURITY.md](SECURITY.md) for the full threat model (including the `bypassPermissions` escalation risk and on-disk job files).
437
470
 
438
471
  ## CLI
439
472
 
@@ -0,0 +1,77 @@
1
+ # Security Policy
2
+
3
+ ## Reporting a Vulnerability
4
+
5
+ If you discover a security vulnerability, please report it via [GitHub Security Advisories](https://github.com/ginkida/agent-dispatch/security/advisories/new).
6
+
7
+ **Do not** open a public issue for security vulnerabilities.
8
+
9
+ ## Supported Versions
10
+
11
+ | Version | Supported |
12
+ |---------|-----------|
13
+ | 0.5.x | Yes |
14
+ | 0.4.x | Yes |
15
+ | ≤ 0.3.x | No |
16
+
17
+ ## Threat Model
18
+
19
+ `agent-dispatch` runs `claude -p` subprocesses in configured directories on
20
+ behalf of a calling Claude Code agent. The MCP caller and the agent
21
+ configurations are part of the same trust domain as the user running the
22
+ server — this is a developer tool, not a multi-tenant service. With that in
23
+ mind, the security-relevant areas are:
24
+
25
+ ### Subprocess execution
26
+ - Tasks/context strings are passed as **argument-list** elements to
27
+ `subprocess.run`/`Popen` (never `shell=True`), so there is no shell
28
+ injection.
29
+ - **Argument injection is guarded.** Structured fields placed next to a CLI
30
+ flag (`session_id` → `--resume`, `model` → `--model`, `permission_mode`,
31
+ and tool names) are rejected if they start with `-`, which the `claude`
32
+ CLI would otherwise parse as a *separate* flag. See
33
+ `runner._reject_flaglike` / `runner.ArgInjectionError`.
34
+
35
+ ### Permission escalation (`bypassPermissions`)
36
+ - Setting `permission_mode: bypassPermissions` (or a permissive
37
+ `default_permission_mode`) disables Claude Code's permission prompts for
38
+ that agent — it can use any tool without confirmation. Only enable it for
39
+ agents whose project directories you trust. Prefer `allowed_tools` /
40
+ `disallowed_tools` for least privilege.
41
+ - A dispatched agent running with broad permissions can, in principle, start
42
+ its own `claude`/dispatch chain. Recursion depth (`AGENT_DISPATCH_DEPTH`,
43
+ bounded by `max_dispatch_depth`) is **best-effort**: it crosses the process
44
+ boundary via an environment variable, so a deliberately hostile agent that
45
+ clears its environment can reset the counter. It protects against accidental
46
+ A→B→A loops, not against an adversarial agent.
47
+
48
+ ### On-disk state
49
+ - Async/`return_ref` job records persist to
50
+ `~/.config/agent-dispatch/jobs/<job_id>.json` (override with
51
+ `AGENT_DISPATCH_JOBS_DIR`). They contain the full task, context, and result,
52
+ which may include sensitive output. Files are written `0o600` and the
53
+ directory `0o700` (owner-only). Call `dispatch_gc()` periodically to purge
54
+ old results.
55
+ - `agents.yaml` is written `0o600`. It records project paths and permission
56
+ settings.
57
+ - `job_id`s are unauthenticated 32-char hex UUIDs — anyone who can call the
58
+ MCP tools and knows a `job_id` can read its result. Don't relay `job_id`s
59
+ over untrusted channels. Caller-supplied `job_id`/`ref` values are validated
60
+ (`^[0-9a-f]{32}$`) before any filesystem access, blocking path traversal.
61
+
62
+ ### Environment & directories
63
+ - The dispatched subprocess inherits the **full parent environment**
64
+ (`os.environ.copy()`) — necessary for `claude` to find its credentials.
65
+ Keep secrets you don't want dispatched agents to see out of the shell that
66
+ launches the server.
67
+ - Agent directories are resolved to absolute paths via `Path.resolve()` and
68
+ must exist at registration time.
69
+
70
+ ### Cost
71
+ - `max_budget_usd` (per agent or as a default) caps spend per dispatch.
72
+
73
+ ## Reproducibility & CI
74
+
75
+ Third-party GitHub Actions are pinned to commit SHAs; workflows run with
76
+ least-privilege `permissions`. Releases publish to PyPI via OIDC Trusted
77
+ Publishing (no long-lived tokens).
@@ -45,3 +45,4 @@ settings:
45
45
  cache:
46
46
  enabled: true
47
47
  ttl: 300 # seconds; identical (agent, task, context) requests are cached
48
+ max_size: 1000 # max cached entries; oldest is evicted first (FIFO)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "agent-dispatch"
3
- version = "0.4.0"
3
+ version = "0.6.0"
4
4
  description = "MCP server that lets Claude Code agents delegate tasks to agents in other project directories"
5
5
  readme = "README.md"
6
6
  license = "MIT"
@@ -47,6 +47,28 @@ asyncio_mode = "auto"
47
47
  target-version = "py310"
48
48
  line-length = 100
49
49
 
50
+ [tool.ruff.lint]
51
+ select = [
52
+ "E", "W", # pycodestyle
53
+ "F", # pyflakes
54
+ "B", # flake8-bugbear (likely bugs)
55
+ "I", # isort (import order)
56
+ "UP", # pyupgrade (modern syntax)
57
+ "S", # flake8-bandit (security)
58
+ ]
59
+ ignore = [
60
+ # The dispatch family shells out to the trusted `claude` CLI with argument
61
+ # lists (never shell=True); see runner._build_command and the arg-injection
62
+ # guard (_reject_flaglike). Partial path is intentional — `claude` is
63
+ # resolved from PATH.
64
+ "S603", # subprocess call with possibly-untrusted input
65
+ "S607", # starting a process with a partial executable path
66
+ ]
67
+
68
+ [tool.ruff.lint.per-file-ignores]
69
+ # Tests legitimately assert and use throwaway /tmp paths.
70
+ "tests/**" = ["S101", "S108"]
71
+
50
72
  [project.optional-dependencies]
51
73
  dev = [
52
74
  "pytest>=8.0",
@@ -1,3 +1,3 @@
1
1
  """agent-dispatch: Delegate tasks between Claude Code agents across projects."""
2
2
 
3
- __version__ = "0.4.0"
3
+ __version__ = "0.6.0"
@@ -23,12 +23,14 @@ class DispatchCache:
23
23
  requests with different framing would collide and return the wrong response.
24
24
  """
25
25
 
26
- def __init__(self, ttl: int = 300) -> None:
26
+ def __init__(self, ttl: int = 300, max_size: int = 1000) -> None:
27
27
  self._ttl = ttl
28
+ self._max_size = max_size
28
29
  self._store: dict[str, tuple[float, DispatchResult]] = {}
29
30
  self._lock = threading.Lock()
30
31
  self._hits = 0
31
32
  self._misses = 0
33
+ self._evictions = 0
32
34
 
33
35
  @staticmethod
34
36
  def _make_key(
@@ -89,6 +91,15 @@ class DispatchCache:
89
91
  return # don't cache failures
90
92
  key = self._make_key(agent, task, context, caller, goal, response_format)
91
93
  with self._lock:
94
+ # Bound memory: when at capacity and inserting a new key, evict the
95
+ # oldest entry by insertion time (FIFO). We intentionally do NOT
96
+ # refresh timestamps on read — the timestamp also drives TTL expiry,
97
+ # so touching it on access would turn TTL into idle-time. Refreshing
98
+ # an existing key never triggers eviction.
99
+ if key not in self._store and len(self._store) >= self._max_size:
100
+ oldest = min(self._store, key=lambda k: self._store[k][0])
101
+ del self._store[oldest]
102
+ self._evictions += 1
92
103
  self._store[key] = (time.monotonic(), result)
93
104
 
94
105
  def clear(self) -> int:
@@ -97,6 +108,7 @@ class DispatchCache:
97
108
  self._store.clear()
98
109
  self._hits = 0
99
110
  self._misses = 0
111
+ self._evictions = 0
100
112
  return count
101
113
 
102
114
  def evict_expired(self) -> int:
@@ -112,8 +124,10 @@ class DispatchCache:
112
124
  total = self._hits + self._misses
113
125
  return {
114
126
  "size": len(self._store),
127
+ "max_size": self._max_size,
115
128
  "hits": self._hits,
116
129
  "misses": self._misses,
130
+ "evictions": self._evictions,
117
131
  "hit_rate": round(self._hits / total, 3) if total else 0.0,
118
132
  "ttl": self._ttl,
119
133
  }