@mmerterden/multi-agent-pipeline 10.0.6 → 10.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +143 -0
- package/README.md +5 -2
- package/docs/FIGMA_PIPELINE.md +12 -0
- package/docs/features.md +2 -0
- package/package.json +1 -1
- package/pipeline/agents/android-architect.md +3 -3
- package/pipeline/agents/backend-architect.md +3 -3
- package/pipeline/agents/code-reviewer.md +4 -4
- package/pipeline/agents/ios-architect.md +3 -3
- package/pipeline/agents/security-auditor.md +3 -3
- package/pipeline/commands/multi-agent/dev-autopilot.md +3 -3
- package/pipeline/commands/multi-agent/dev-local-autopilot.md +2 -2
- package/pipeline/commands/multi-agent/dev-local.md +3 -3
- package/pipeline/commands/multi-agent/dev.md +9 -9
- package/pipeline/commands/multi-agent/help.md +10 -10
- package/pipeline/commands/multi-agent/refs/channels/jira.md +1 -0
- package/pipeline/commands/multi-agent/refs/cross-cli-contract.md +7 -3
- package/pipeline/commands/multi-agent/refs/features/model-fallback.md +36 -18
- package/pipeline/commands/multi-agent/refs/phases/operations.md +15 -5
- package/pipeline/commands/multi-agent/refs/phases/phase-0-init.md +16 -2
- package/pipeline/commands/multi-agent/refs/phases/phase-1-analysis.md +8 -3
- package/pipeline/commands/multi-agent/refs/phases/phase-2-planning.md +1 -1
- package/pipeline/commands/multi-agent/refs/phases/phase-4-review.md +30 -8
- package/pipeline/commands/multi-agent/resume.md +4 -1
- package/pipeline/commands/multi-agent.md +5 -5
- package/pipeline/lib/fetch-confluence.sh +2 -2
- package/pipeline/lib/fetch-crashlytics.sh +2 -2
- package/pipeline/lib/fetch-fortify.sh +1 -1
- package/pipeline/lib/fetch-swagger.sh +1 -1
- package/pipeline/lib/figma-screenshot.sh +2 -2
- package/pipeline/preferences-template.json +8 -1
- package/pipeline/schemas/agent-state.schema.json +8 -0
- package/pipeline/schemas/figma-project-config.schema.json +39 -0
- package/pipeline/schemas/prefs.schema.json +3 -3
- package/pipeline/scripts/cost-table.json +0 -6
- package/pipeline/scripts/fixtures/install-layout.tsv +2 -2
- package/pipeline/scripts/phase-tracker.sh +7 -0
- package/pipeline/scripts/smoke-model-fallback.sh +20 -12
- package/pipeline/scripts/validate-state.mjs +108 -0
- package/pipeline/scripts/write-state.mjs +15 -4
- package/pipeline/skills/figma-android/README.md +3 -1
- package/pipeline/skills/figma-common/README.md +8 -1
- package/pipeline/skills/figma-common/figma-bottom-sheets/SKILL.md +152 -0
- package/pipeline/skills/figma-common/figma-evolve-component/SKILL.md +61 -0
- package/pipeline/skills/figma-common/figma-navigation/SKILL.md +156 -0
- package/pipeline/skills/figma-common/figma-overlays/SKILL.md +142 -0
- package/pipeline/skills/figma-common/figma-ui-patterns/SKILL.md +1 -0
- package/pipeline/skills/figma-common/figma-ui-patterns/patterns/animated-gradient-border.md +116 -0
- package/pipeline/skills/figma-ios/figma-to-component/SKILL.md +15 -0
- package/pipeline/skills/figma-ios/figma-to-component/phases/phase-3d-patterns.md +31 -0
- package/pipeline/skills/figma-ios/figma-to-component/phases/phase-4b-view.md +10 -0
- package/pipeline/skills/figma-ios/figma-to-component/reference/accessibility.md +55 -1
- package/pipeline/skills/figma-ios/figma-to-component/reference/orchestrator-discipline.md +1 -1
|
@@ -1,17 +1,28 @@
|
|
|
1
|
-
# Model Fallback Contract (
|
|
1
|
+
# Model Fallback Contract (v10.1.0)
|
|
2
2
|
|
|
3
|
-
Personas that declare `preferredModel:
|
|
4
|
-
(`claude-
|
|
3
|
+
Personas that declare `preferredModel: opus` route to the top available
|
|
4
|
+
intelligence tier (`claude-opus-4-8`). Opus access can be quota-limited or
|
|
5
5
|
temporarily unavailable. This contract defines when and how the orchestrator
|
|
6
6
|
falls back, deterministically, without ever editing persona files at runtime.
|
|
7
7
|
|
|
8
|
+
> **Fable retired (2026-06).** Earlier versions routed architect / Reviewer-1 /
|
|
9
|
+
> triage personas to `claude-fable-5` as the top tier. Fable 5 is no longer
|
|
10
|
+
> available, so opus is now the top tier and the persona frontmatter declares
|
|
11
|
+
> `preferredModel: opus`. The `premiumTierUntil` date gate stays in the contract
|
|
12
|
+
> as a generic mechanism for any future plan-window-limited premium tier.
|
|
13
|
+
|
|
8
14
|
## Tier ladder
|
|
9
15
|
|
|
10
16
|
```
|
|
11
|
-
|
|
17
|
+
opus -> sonnet -> haiku
|
|
12
18
|
```
|
|
13
19
|
|
|
14
|
-
One step down per trigger
|
|
20
|
+
One step down per trigger, walking the ladder until a tier dispatches or the
|
|
21
|
+
floor (`haiku`) is reached. `haiku` is the last-resort floor: a run that reaches
|
|
22
|
+
it is heavily degraded but still makes progress instead of hard-halting because
|
|
23
|
+
two tiers happened to be unavailable at once. A dispatch error at the floor is a
|
|
24
|
+
genuine outage - it falls through to the normal phase-error path (pause ->
|
|
25
|
+
resume), since there is nothing left to step down to. The mechanism is the existing per-dispatch override:
|
|
15
26
|
set `PHASE_MODEL_OVERRIDE=<fallback>` before dispatch (the orchestrator exports
|
|
16
27
|
`CLAUDE_CODE_SUBAGENT_MODEL` on Claude Code, or passes `--model` on Copilot
|
|
17
28
|
CLI). Persona frontmatter stays the source of truth for the *preferred* model;
|
|
@@ -20,13 +31,15 @@ the override is per-dispatch and leaves files untouched.
|
|
|
20
31
|
## Prefs knob
|
|
21
32
|
|
|
22
33
|
`prefs.global.modelFallback` (template default below; absent knob = `enabled: true`
|
|
23
|
-
with no date gate
|
|
34
|
+
with no date gate; an absent `floorModel` defaults to `haiku`, so installs that
|
|
35
|
+
predate this field still get the second downgrade step):
|
|
24
36
|
|
|
25
37
|
```json
|
|
26
38
|
"modelFallback": {
|
|
27
39
|
"enabled": true,
|
|
28
40
|
"premiumTierUntil": null,
|
|
29
|
-
"fallbackModel": "
|
|
41
|
+
"fallbackModel": "sonnet",
|
|
42
|
+
"floorModel": "haiku",
|
|
30
43
|
"onDispatchError": true
|
|
31
44
|
}
|
|
32
45
|
```
|
|
@@ -34,25 +47,30 @@ with no date gate):
|
|
|
34
47
|
| Field | Meaning |
|
|
35
48
|
|---|---|
|
|
36
49
|
| `enabled` | Master switch. `false` = always dispatch the persona's `preferredModel`, fail loudly on error. |
|
|
37
|
-
| `premiumTierUntil` | ISO date (`YYYY-MM-DD`) or `null`. When set and today is **after** this date, every `preferredModel
|
|
38
|
-
| `fallbackModel` | Target of the first downgrade step. Default `opus
|
|
39
|
-
| `
|
|
50
|
+
| `premiumTierUntil` | ISO date (`YYYY-MM-DD`) or `null`. When set and today is **after** this date, every `preferredModel` dispatch is downgraded to `fallbackModel` unless the user re-confirms (see Date gate). Use when the top tier is included in a plan only until a known date. |
|
|
51
|
+
| `fallbackModel` | Target of the first downgrade step. Default `sonnet` (the next tier below opus). |
|
|
52
|
+
| `floorModel` | Last-resort tier when `fallbackModel` also fails to dispatch. Default `haiku`. Set to the same value as `fallbackModel` (or `null`) to disable the second step and halt after one downgrade. |
|
|
53
|
+
| `onDispatchError` | When `true`, a failed top-tier dispatch (model unavailable / quota / 4xx on model id) retries once on `fallbackModel`, and a failed `fallbackModel` dispatch retries once on `floorModel`, instead of aborting the phase. |
|
|
40
54
|
|
|
41
55
|
## Triggers (checked in this order)
|
|
42
56
|
|
|
43
57
|
1. **Date gate (Phase 0 Step 0, once per run).** If `premiumTierUntil` is set and
|
|
44
58
|
in the past, print one line:
|
|
45
|
-
`WARN:
|
|
46
|
-
Then export the override for every
|
|
47
|
-
may instead ask once (`Keep
|
|
59
|
+
`WARN: premium tier plan window ended <date>; preferredModel personas will dispatch on <fallbackModel>. Set prefs.global.modelFallback.premiumTierUntil to null to keep the top tier on usage credits.`
|
|
60
|
+
Then export the override for every `preferredModel` persona this run. Interactive modes
|
|
61
|
+
may instead ask once (`Keep top tier (usage credits) / Fall back to <model>`);
|
|
48
62
|
autopilot never asks - it falls back and logs.
|
|
49
63
|
2. **Dispatch error (per dispatch).** On a model-unavailable / quota error from a
|
|
50
|
-
|
|
51
|
-
`PHASE_MODEL_OVERRIDE=<fallbackModel>`.
|
|
52
|
-
|
|
64
|
+
top-tier dispatch and `onDispatchError: true`: retry the same persona once with
|
|
65
|
+
`PHASE_MODEL_OVERRIDE=<fallbackModel>`. If that retry also fails with a dispatch
|
|
66
|
+
error and `floorModel` is set and differs from `fallbackModel`, retry once more
|
|
67
|
+
with `PHASE_MODEL_OVERRIDE=<floorModel>`. A failure at the floor (or when no
|
|
68
|
+
floor is configured) falls through to the normal phase-error path (pause ->
|
|
69
|
+
resume). Never silent-skip the persona. Each downgrade emits its own
|
|
70
|
+
`model_fallback` metric line so a two-step degrade is visible in Phase 7.
|
|
53
71
|
3. **Cost budget ceiling (existing gate).** When `cost-budget-check.mjs` exits 11
|
|
54
72
|
(exceeded) mid-run, the run already pauses per the cost-budget contract; on
|
|
55
|
-
user-approved continue,
|
|
73
|
+
user-approved continue, `preferredModel` personas downgrade to `fallbackModel` for the
|
|
56
74
|
remainder of the run.
|
|
57
75
|
|
|
58
76
|
## Logging
|
|
@@ -70,4 +88,4 @@ per-phase `model` field already carries the override).
|
|
|
70
88
|
- No edits to `pipeline/agents/*.md` at runtime; frontmatter is install-time
|
|
71
89
|
configuration only.
|
|
72
90
|
- Copilot CLI reviewer set and adapter-platform model pins are out of scope
|
|
73
|
-
(they do not
|
|
91
|
+
(they pin their own models and do not use this persona ladder).
|
|
@@ -51,10 +51,13 @@ Every task gets an auto-incremented short ID. Counter stored at `$HOME/.claude/l
|
|
|
51
51
|
## Resume Logic
|
|
52
52
|
|
|
53
53
|
1. Find state file: `$HOME/.claude/logs/multi-agent/{project}/{task-id}/agent-state.json`
|
|
54
|
-
2.
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
54
|
+
2. **Validate before re-entry** (required - a half-written or corrupt state silently resumes at the wrong phase):
|
|
55
|
+
- `node $HOME/.claude/scripts/validate-state.mjs <state-file>` (resume-safety check: parseable JSON + `currentPhase` in 0..7 + well-formed `phases`; tolerant of legacy shapes, not a strict schema match). On non-zero exit, do NOT guess a phase: surface the error and stop with `ERR: agent-state.json is unsafe to resume; inspect it or 'kill #N' and restart.`
|
|
56
|
+
- Confirm the worktree path on `state.worktreePath` / `state.projects[].worktreePath` exists and `git -C <wt> status` is clean-or-known. If the worktree is missing or locked, run the Phase 0 "Worktree stale-lock heal" before continuing.
|
|
57
|
+
3. Read `agent-log.md` for previous findings
|
|
58
|
+
4. Resume from `currentPhase + 1`. If `state.phases[currentPhase+1].subStep` is set, re-enter that phase and skip already-recorded sub-steps (see "Sub-step checkpoints").
|
|
59
|
+
5. **Always run Phase 7** on resume completion - ensures knowledge capture even if task was paused mid-pipeline
|
|
60
|
+
6. Log: "Resumed {jiraId} from Phase {N}"
|
|
58
61
|
|
|
59
62
|
---
|
|
60
63
|
|
|
@@ -64,6 +67,8 @@ Run phases sequentially. Log every step to `agent-log.md`.
|
|
|
64
67
|
If any phase fails, mark task as `paused` and stop - user can `resume` later.
|
|
65
68
|
Update `agent-state.json` at EVERY phase transition.
|
|
66
69
|
|
|
70
|
+
**Halt visibility (required, autopilot included).** A halt is never silent. Whenever a phase halts on a hard error (validator failed twice, no subagent returned, dispatch error past fallback, lock irrecoverable), in addition to the `agent-log.md` line: (a) write `state.status = "paused"` and `state.haltReason = "<phase>:<cause>"`; (b) record the cause on the tracker via `phase-tracker.sh meta <phase> halt "<cause>"` and `phase-tracker.sh update <phase> failed`; (c) emit one `>&2` alert line `HALT phase <N>: <cause> - resume with /multi-agent:resume #<id>`. Autopilot suppresses *confirmations*, not *halts* - the user must always be able to see why an unattended run stopped without reading the log.
|
|
71
|
+
|
|
67
72
|
### Pipeline Best Practices
|
|
68
73
|
|
|
69
74
|
**Subagent return format**: Instruct every subagent to return structured JSON, not prose:
|
|
@@ -86,6 +91,11 @@ This keeps orchestrator context lean and enables programmatic routing.
|
|
|
86
91
|
}
|
|
87
92
|
```
|
|
88
93
|
|
|
89
|
-
**Proactive compaction**:
|
|
94
|
+
**Proactive compaction + phase-boundary checkpoint**: the orchestrator follows ~2,500 lines of phase prose in one session; once context fills, it starts dropping steps - the single biggest cause of "it got stuck / skipped a step." Two defenses, both required on full-pipeline runs:
|
|
95
|
+
|
|
96
|
+
- *Phase-boundary checkpoint.* At every phase transition, before loading the next phase doc, write the durable state (`agent-state.json` phase status + `files[]` + `retryCount`) and append a one-line progress summary to `agent-log.md`. The next phase reads state + log, not the back-conversation - so a transition is a clean re-entry point even if context is later compacted.
|
|
97
|
+
- *Compaction trigger.* If conversation context exceeds ~50%, run `/compact` preserving "modified files, plan, open review findings, current phase + sub-step" before continuing. Don't wait for auto-compaction near the limit - it triggers exactly when context is worst and is lossy. After compaction, re-read `agent-state.json` to re-ground.
|
|
98
|
+
|
|
99
|
+
**Sub-step checkpoints (long phases)**: Phase 3 (dev/TDD cycles) and Phase 7 (report/channels) can run many minutes; a crash mid-phase loses everything since the last phase boundary and forces a full phase re-run on resume. For these phases, also record `state.phases[<n>].subStep` (a short token: `red`, `green`, `build`, `pr-opened`, `confluence-synced`, ...) and the `files[]` written so far after each meaningful unit of work. On resume, re-enter the phase but skip units whose `subStep` is already recorded and whose `files[]` exist in the worktree - re-do only the unfinished tail, never the whole phase.
|
|
90
100
|
|
|
91
101
|
**3-iteration hard kill**: Any retry loop (build fix, review fix) MUST stop after 3 attempts. On 4th failure -> pause, ask user. No exceptions.
|
|
@@ -37,7 +37,7 @@ OUTPUT_LANG=$(jq -r '.global.outputLanguage // "en"' "$PREFS_FILE" 2>/dev/null |
|
|
|
37
37
|
|
|
38
38
|
From this point on, every assistant-authored conversational line (status updates, findings, phase headers, summaries, error explanations) renders in `$OUTPUT_LANG`. `AskUserQuestion` payloads stay English. External payloads stay English. See `refs/rules.md` "Language Application" for the full contract. Skipping this step is the failure mode where the user sees an English wall of text in a Turkish session.
|
|
39
39
|
|
|
40
|
-
**Model fallback date gate** (same step, once per run): read `prefs.global.modelFallback`. If `premiumTierUntil` is set and in the past, apply the date-gate trigger from `refs/features/model-fallback.md` -
|
|
40
|
+
**Model fallback date gate** (same step, once per run): read `prefs.global.modelFallback`. If `premiumTierUntil` is set and in the past, apply the date-gate trigger from `refs/features/model-fallback.md` - `preferredModel` personas dispatch on `fallbackModel` for this run, with the one-line WARN. Dispatch-error and budget triggers in that contract apply per-dispatch later; nothing else to do here.
|
|
41
41
|
|
|
42
42
|
**First-run guard**: After loading prefs, check if `keychainMapping` has at least one non-null value. If ALL values are null (template defaults - setup never ran), show:
|
|
43
43
|
```
|
|
@@ -387,7 +387,18 @@ git -C $PROJECT_ROOT config user.email "{identity.email}"
|
|
|
387
387
|
|
|
388
388
|
`worktreePath` = `$PROJECT_ROOT`, `localMode` = `true`.
|
|
389
389
|
|
|
390
|
-
**If normal mode** (worktree - default): 2. Worktree path: Jira → `.worktrees/{jiraId}/`, GitHub → `.worktrees/GH{issueNo}/`, free-text → `.worktrees/task-{shortId}/` 3. `git -C $PROJECT_ROOT worktree add {path} -b {branch} origin/{baseBranch}` (if exists: enter, pull) 4. Set identity: `git -C {worktree-path} config user.name/email` 5. Create log dir + `agent-log.md` + `agent-state.json`:
|
|
390
|
+
**If normal mode** (worktree - default): 2. Worktree path: Jira → `.worktrees/{jiraId}/`, GitHub → `.worktrees/GH{issueNo}/`, free-text → `.worktrees/task-{shortId}/` 3. **Heal stale admin state first** (see "Worktree stale-lock heal" below), then `git -C $PROJECT_ROOT worktree add {path} -b {branch} origin/{baseBranch}` (if exists: enter, pull) 4. Set identity: `git -C {worktree-path} config user.name/email` 5. Create log dir + `agent-log.md` + `agent-state.json`:
|
|
391
|
+
|
|
392
|
+
**Worktree stale-lock heal (required before every `worktree add`):** a run killed mid-`worktree add` (OOM, SIGTERM, disk full) leaves a locked or broken admin entry under `.git/worktrees/{id}/`, so the retry fails with `fatal: '<path>' already exists`. Always run the heal first - it is a no-op on a clean repo:
|
|
393
|
+
|
|
394
|
+
```bash
|
|
395
|
+
git -C "$proj" worktree prune 2>/dev/null || true # drop entries whose dir is gone
|
|
396
|
+
if git -C "$proj" worktree list --porcelain | grep -qF "$WT_PATH"; then
|
|
397
|
+
git -C "$proj" worktree unlock "$WT_PATH" 2>/dev/null || true # clear a stale .locked marker
|
|
398
|
+
fi
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
If the path is still registered and healthy after the heal, enter + pull instead of re-adding (existing behavior). Only when `worktree add` still fails after the heal do the rollback / collision flow in the multi-repo block below.
|
|
391
402
|
|
|
392
403
|
**v2.1.0+ Multi-Repo Worktree Setup**:
|
|
393
404
|
|
|
@@ -396,6 +407,9 @@ When `state.projects[].length > 1`, repeat steps 2-4 **serially per repo** (work
|
|
|
396
407
|
```bash
|
|
397
408
|
for proj in "${PROJECTS[@]}"; do
|
|
398
409
|
WT_PATH="$proj/.worktrees/$BRANCH_DIR/"
|
|
410
|
+
git -C "$proj" worktree prune 2>/dev/null || true # heal stale admin state
|
|
411
|
+
git -C "$proj" worktree list --porcelain | grep -qF "$WT_PATH" \
|
|
412
|
+
&& git -C "$proj" worktree unlock "$WT_PATH" 2>/dev/null || true
|
|
399
413
|
git -C "$proj" worktree add "$WT_PATH" -b "$BRANCH" "origin/$BASE_BRANCH"
|
|
400
414
|
git -C "$WT_PATH" config user.name "${proj_identity_name}"
|
|
401
415
|
git -C "$WT_PATH" config user.email "${proj_identity_email}"
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
### Phase 1: Analysis (
|
|
1
|
+
### Phase 1: Analysis (Opus)
|
|
2
2
|
|
|
3
3
|
> **TLDR** - Opus-driven codebase exploration. Detects if the issue is already fixed (git blame, closed PRs), then launches parallel Explore sub-agents to map the affected code paths. Outputs: impact analysis, stack detection (auto-selects platform guide), relevant files, risk areas. Feeds Phase 2 planning.
|
|
4
4
|
|
|
@@ -117,10 +117,15 @@ Launch parallel Explore agents to scan codebase:
|
|
|
117
117
|
- Existing patterns and conventions
|
|
118
118
|
- Potential impact areas
|
|
119
119
|
|
|
120
|
-
Use `subagent_type: "Explore"` with thoroughness
|
|
120
|
+
Use `subagent_type: "Explore"` with thoroughness scaled to task size AND knowledge availability (first match wins):
|
|
121
121
|
|
|
122
|
+
- `taskType` is `bugfix`/`chore` AND scope is small (single named file, or a referenced crash/stack frame that pinpoints the site) → "light" (cheapest - scan only the named area + its direct callers)
|
|
122
123
|
- Knowledge exists → "medium" (targeted, cheaper)
|
|
123
|
-
- No knowledge → "very thorough" (full scan, first
|
|
124
|
+
- No knowledge, or `taskType` is `feature`/`refactor`/`component` → "very thorough" (full scan, first-time investment)
|
|
125
|
+
|
|
126
|
+
The light tier keeps a one-line bug fix from triggering a full-repo scan; pairing it with the deterministic `taskType` (Phase 0 Step 7) prevents the cheap path from firing on feature work.
|
|
127
|
+
|
|
128
|
+
**Dispatch resilience (required).** Explore agents run in parallel and the analyst synthesis waits on them, so a single stalled agent hangs the phase. Bound each Explore dispatch by a wall-clock budget (`EXPLORE_TIMEOUT_SECONDS`, default 180). If an agent has not returned by the budget: log `explore.timeout agent=<id>`, drop that agent's slice, and synthesize from the agents that did return. Proceed as long as at least one Explore agent returned; if zero returned, retry the cheapest single Explore once, then HALT with `ERR: no Explore agent returned within ${EXPLORE_TIMEOUT_SECONDS}s; resume with /multi-agent:resume #N.`. Never block indefinitely on a slow or dead dispatch.
|
|
124
129
|
|
|
125
130
|
#### Output contract
|
|
126
131
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
### Phase 2: Planning (
|
|
1
|
+
### Phase 2: Planning (Opus)
|
|
2
2
|
|
|
3
3
|
> **TLDR** - Opus decomposes the analysis into concrete tasks with file-level targets, risk grading, and architecture review. Before Phase 3 a **Plan Approval Gate** runs in normal mode: if the Jira/issue description is ambiguous the orchestrator asks the user structured clarification questions (max 2 rounds) - once scope is clear it renders the plan and loops on free-text edit requests until the user approves or aborts. The gate is **skipped entirely** for `--dev`, `autopilot`, and `--dev autopilot` (their speed/zero-interaction contracts are preserved).
|
|
4
4
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
### Phase 4: Review (deterministic gates + parallel + triage)
|
|
2
2
|
|
|
3
|
-
> **TLDR** - Three-stage review. Stage 1: deterministic gates (build + lint + test + secret scan) that MUST pass. Stage 2: AI models in parallel - reviewer set is **CLI-aware**: Claude Code dispatches 2 reviewers (
|
|
3
|
+
> **TLDR** - Three-stage review. Stage 1: deterministic gates (build + lint + test + secret scan) that MUST pass. Stage 2: AI models in parallel - reviewer set is **CLI-aware**: Claude Code dispatches 2 reviewers (Opus + Sonnet); Copilot CLI dispatches 3 reviewers (GPT-5.4 + Opus + Sonnet). Stage 3: Opus triage - evaluates raw findings, filters false-positives/out-of-scope, keeps only actionable items. Only triage-accepted blocking items loop back to Phase 3.
|
|
4
4
|
|
|
5
5
|
<!-- progress-contract: applied -->
|
|
6
6
|
Progress emission per `refs/progress-contract.md` - lines for each gate, each reviewer dispatch + finish, triage start, triage verdict, fix dispatch.
|
|
@@ -81,6 +81,14 @@ If changes include UI files (iOS: `*View.swift`, `*Screen.swift`, `*Cell.swift`;
|
|
|
81
81
|
- Missing safe area / keyboard avoidance → **important**
|
|
82
82
|
- Hardcoded colors instead of system/semantic colors → **suggestion**
|
|
83
83
|
|
|
84
|
+
**iOS - SwiftUI interaction & accessibility conventions** (skills: `figma-navigation`, `figma-overlays`, `figma-bottom-sheets`, plus the enriched `figma-to-swiftui` accessibility rules). Gated to changed SwiftUI files; each is native-SwiftUI-first unless the project's `figma-config` `ui.*` declares a custom system, in which case check against that system instead:
|
|
85
|
+
- A reusable component that **routes itself** (hardcoded `NavigationLink(destination: ConcreteScreen())`, calls a router) instead of emitting a typed `Output` → **important** (breaks reuse).
|
|
86
|
+
- A reusable component that **presents its own app-level overlay** (toast/alert/modal) instead of emitting an intent for the caller → **important** (double-fires when composed).
|
|
87
|
+
- `AnyView` routed through an overlay center / data-modal path (rich content should be a local `.sheet/.modal { }`) → **important**.
|
|
88
|
+
- Loading HUD not **ref-counted** / missing `end` on an error path / no min-show (flash) → **important**.
|
|
89
|
+
- A pinned bottom CTA built as a presented sheet instead of `.safeAreaInset(edge:.bottom)`, or detents that aren't ascending / use magic-number heights → **suggestion**.
|
|
90
|
+
- **Accessibility over-annotation** (redundant `.isButton` on `Button` / `.isStaticText` on `Text`, `.accessibilityLabel` duplicating visible text, a per-enum-variant a11y key for non-interactive visual state, hints on self-explanatory actions) → **suggestion**; **missing** decorative-image hiding or interactive-state `.accessibilityValue` → **important**.
|
|
91
|
+
|
|
84
92
|
**Android - Material Design compliance** (skills: `compose-components`, `android-architecture`):
|
|
85
93
|
- Non-Material3 component when M3 equivalent exists → **suggestion**
|
|
86
94
|
- Missing `contentDescription` on icons/images → **blocking**
|
|
@@ -159,6 +167,14 @@ Visual-fidelity mismatches against the captured screenshot are BLOCKING findings
|
|
|
159
167
|
|
|
160
168
|
When `state.figmaAccess.tier === 3` (user-attached screenshot, no Code Connect snippet), the reviewer additionally sets `findings[i].severity = "blocking"` and `findings[i].tag = "review_blocking_tier3"` on every UI atom that lacks a confirmed canonical-component mapping. The triage step preserves these findings unless the user has explicitly cleared the open question.
|
|
161
169
|
|
|
170
|
+
#### Step 1.9 - Context economy (cache prefix + diff cap)
|
|
171
|
+
|
|
172
|
+
Phase 4 sends the same diff to every reviewer and then to triage, so the diff is the dominant token cost. Two measures keep it bounded:
|
|
173
|
+
|
|
174
|
+
**Shared cache prefix.** Build the reviewer and triage prompts so the large invariant context - the full diff, the Phase 1 analysis summary, the Phase 2 plan - is a byte-identical leading block across all dispatches in this iteration. Only the per-reviewer focus + skill line varies, and it goes AFTER the shared block. When the host supports prompt caching, the 2nd/3rd reviewer and the triage call then read that prefix at the discounted cache-read rate instead of re-billing it as fresh input. Forward the host-reported cache-read count as `tokens_cached` per the Token telemetry contract so the saving lands in the cost ledger.
|
|
175
|
+
|
|
176
|
+
**Single-repo diff cap.** If the diff exceeds the Phase 4 token allowance (`token-budget.json`), truncate the largest files and append a footer `[truncated - full diff in file://$WORKTREE/.review-diff.txt]`, writing the full diff to that path. Reviewers and triage receive the same capped view + the marker so they can flag "review the full diff manually." Log `review.diff_truncated bytes_dropped=<N>`. (Multi-repo already caps the combined diff at 80% of budget; this is the single-repo equivalent.)
|
|
177
|
+
|
|
162
178
|
#### Step 2 - Parallel AI Review (CLI-aware reviewer set)
|
|
163
179
|
|
|
164
180
|
Launch Agent instances **in parallel** using the shared `code-reviewer` subagent definition (`~/.claude/agents/code-reviewer.md`). The reviewer set is determined by the host CLI - GPT-5.4 is only available on Copilot CLI, so Claude Code skips that reviewer and runs a 2-model parallel review; Copilot CLI runs all three.
|
|
@@ -171,7 +187,7 @@ Launch Agent instances **in parallel** using the shared `code-reviewer` subagent
|
|
|
171
187
|
|
|
172
188
|
Each reviewer inherits the `code-reviewer` agent's focus areas (Security, Architecture, Quality, Performance) and output contract. The orchestrator overrides only the model and the stack-specific skill per-reviewer - no prompt duplication.
|
|
173
189
|
|
|
174
|
-
**Model override wiring:** `code-reviewer.md` declares `preferredModel:
|
|
190
|
+
**Model override wiring:** `code-reviewer.md` declares `preferredModel: opus`, so Reviewer 1 uses the persona default. Reviewer 2 (Copilot-only, `gpt-5.4`) and Reviewer 3 (`claude-sonnet-4.6`) set `PHASE_MODEL_OVERRIDE=<model>` before dispatch - the orchestrator exports `CLAUDE_CODE_SUBAGENT_MODEL` on Claude Code, or passes `--model` on Copilot CLI. Full precedence rule: `skills/shared/core/multi-agent/SKILL.md#agent-dispatch--per-persona-model-routing-v610`. Opus dispatches are additionally subject to the fallback contract (`refs/features/model-fallback.md`): dispatch-error retry on `fallbackModel` (sonnet) and budget-ceiling downgrade.
|
|
175
191
|
|
|
176
192
|
**Adapter platforms (Cursor / Antigravity / VS Code Copilot Chat) - reviewer set.** The native trio above is a Claude-Code / Copilot-CLI capability. On the adapter platforms the pipeline configures a 2-model cross-vendor review using the models each platform actually exposes (lineup in `pipeline/adapters/_base.mjs#REVIEWER_MODELS`; update as the platforms ship new versions):
|
|
177
193
|
|
|
@@ -185,7 +201,7 @@ So cross-vendor review IS restored on the adapter platforms (two different model
|
|
|
185
201
|
|
|
186
202
|
**Stack-specific skills loaded per reviewer** (from Phase 1 `detectedStack`). On Claude Code, Reviewer 2 (GPT-5.4) is not dispatched - its skill column is ignored. On Copilot CLI all three columns are used.
|
|
187
203
|
|
|
188
|
-
| Stack | Reviewer 1 (
|
|
204
|
+
| Stack | Reviewer 1 (Opus) | Reviewer 2 (GPT-5.4 - Copilot CLI only) | Reviewer 3 (Sonnet) |
|
|
189
205
|
|-------|-------------------|-----------------------------------------|---------------------|
|
|
190
206
|
| iOS/Swift | `ios-security`, `swiftui-performance`, `hig-patterns` | `swift-concurrency`, `ios-accessibility` | `swiftui-pro`, `swift-testing` |
|
|
191
207
|
| Android/Kotlin | `android-security`, `android-performance` | `compose-testing`, `android-architecture` | `compose-components`, `kotlin-coroutines-expert` |
|
|
@@ -196,9 +212,13 @@ So cross-vendor review IS restored on the adapter platforms (two different model
|
|
|
196
212
|
|
|
197
213
|
Skills are injected into reviewer prompt context - the reviewer uses them as reference, not as commands.
|
|
198
214
|
|
|
215
|
+
**iOS/Swift - interaction & convention skills (conditional).** When the diff touches SwiftUI UI files (`*View.swift`, `*Screen.swift`, `*Configuration.swift`, `*+Modifiers.swift`), additionally inject the relevant `figma-common` convention skills as reference for the iOS reviewers: `figma-navigation`, `figma-overlays`, `figma-bottom-sheets` (interaction: emit-intent vs self-route/self-present; native-SwiftUI-first vs the project's `ui.*` custom system), and the enriched `figma-to-swiftui` accessibility rules (minimalism). These back the Step 1.5 iOS convention checks. Generic across SwiftUI projects - not tied to any one app. Omit when the diff has no SwiftUI UI changes (keeps the reviewer prompt lean).
|
|
216
|
+
|
|
217
|
+
**Dispatch timeout (required, mirrors triage 3.3).** Reviewers run in parallel and triage waits on all of them, so one stalled reviewer hangs the phase. Bound each reviewer dispatch by `REVIEWER_TIMEOUT_SECONDS` (default 180). If a reviewer has not returned by the budget: log `review.reviewer_timeout reviewer=<name>`, treat that reviewer as absent, and proceed to triage with the reviewers that did return. The merged-findings count and `consensus.reviewerCount` reflect only the reviewers that returned. If **zero** reviewers return, retry the Opus reviewer once; on a second total failure HALT with `ERR: no reviewer returned within ${REVIEWER_TIMEOUT_SECONDS}s; resume with /multi-agent:resume #N.`. The Step 2.5 rebuttal round uses the same per-dispatch timeout. Never block indefinitely on a slow or dead reviewer dispatch.
|
|
218
|
+
|
|
199
219
|
#### Output contract - reviewer step
|
|
200
220
|
|
|
201
|
-
Step 2 produces N reviewer-output objects (one per dispatched reviewer), each conforming to `pipeline/schemas/reviewer-output.schema.json`. They are persisted to `state.reviewIterations[<iteration>].reviewers[]` and consumed by Step 3 (
|
|
221
|
+
Step 2 produces N reviewer-output objects (one per dispatched reviewer), each conforming to `pipeline/schemas/reviewer-output.schema.json`. They are persisted to `state.reviewIterations[<iteration>].reviewers[]` and consumed by Step 3 (Opus triage) - never by Phase 6 directly. The triage step (below) is the producer of the only review artifact Phase 6 reads, conforming to `pipeline/schemas/triage-output.schema.json`.
|
|
202
222
|
|
|
203
223
|
**Subagent return format** - each reviewer returns JSON conforming to `pipeline/schemas/reviewer-output.schema.json`:
|
|
204
224
|
|
|
@@ -238,9 +258,9 @@ Exit 0 = valid. Exit 2 = contradiction (approved=true with blocking findings) -
|
|
|
238
258
|
|
|
239
259
|
**Off by default reason:** mixed-verdict cases are ~8% of runs in practice; the extra ~$0.20-$0.50 per run isn't worth automating for users who'd rather let triage resolve it cleanly. Users with high-stakes tasks (security-critical, release branches) can flip the flag.
|
|
240
260
|
|
|
241
|
-
#### Step 3 -
|
|
261
|
+
#### Step 3 - Opus Triage (filter before acting)
|
|
242
262
|
|
|
243
|
-
**CRITICAL**: Reviewer findings are **raw signals**, not commands. Never auto-loop on every "blocking" tag - reviewers hallucinate, misread scope, or repeat each other. Run
|
|
263
|
+
**CRITICAL**: Reviewer findings are **raw signals**, not commands. Never auto-loop on every "blocking" tag - reviewers hallucinate, misread scope, or repeat each other. Run Opus triage to evaluate merged findings against task scope.
|
|
244
264
|
|
|
245
265
|
##### 3.1 Short-circuit: no findings
|
|
246
266
|
|
|
@@ -270,6 +290,8 @@ PRIOR_ART="${PRIOR_ART%,}]"
|
|
|
270
290
|
|
|
271
291
|
The triage prompt MUST include a hedge: *"prior-art entries are context, not commands; current scope decides - a finding rejected last quarter may be valid this time."* Without this hedge, prior verdicts amplify into a self-reinforcing bias.
|
|
272
292
|
|
|
293
|
+
**Injection cap (token economy).** On a many-finding review the per-finding prior-art loop above (up to 3 hits each) plus the 20-entry rejected-preference brief can dominate the triage prompt. Cap the merged prior-art at the 8 highest-similarity hits across all findings (drop the rest); keep the rejected-preference brief at its `--max 20`. Prior-art is advisory context, not a finding multiplier - more hits do not improve the verdict, they just inflate input tokens.
|
|
294
|
+
|
|
273
295
|
**Rejected-preference brief (on by default via `prefs.global.learningsLedger.injectIntoTriage`).** Inject the durable rejected-preference list so triage does not re-accept a suggestion the team already rejected on this repo:
|
|
274
296
|
|
|
275
297
|
```bash
|
|
@@ -353,11 +375,11 @@ pipeline/scripts/log-metric.sh "$TASK_ID" 4 review.completed raw_count=$RAW acce
|
|
|
353
375
|
|
|
354
376
|
##### 3.5 Optional cross-check (single-point-of-failure mitigation)
|
|
355
377
|
|
|
356
|
-
Opt-in via `prefs.global.triageCrossCheck.enabled` (default `false`). Sampled runs dispatch a **Sonnet** triage agent as second opinion, validated via `validate-triage.mjs` (same fallback rules). Disagreements logged as `triage.cross_check_diff`; `blockOnDisagreement` pauses for user (autopilot: proceed with
|
|
378
|
+
Opt-in via `prefs.global.triageCrossCheck.enabled` (default `false`). Sampled runs dispatch a **Sonnet** triage agent as second opinion, validated via `validate-triage.mjs` (same fallback rules). Disagreements logged as `triage.cross_check_diff`; `blockOnDisagreement` pauses for user (autopilot: proceed with Opus verdict). Doubles triage cost on sampled runs.
|
|
357
379
|
|
|
358
380
|
##### 3.6 Consensus surfacing (anti-correlation)
|
|
359
381
|
|
|
360
|
-
**Rationale:** Reviewer 1 (
|
|
382
|
+
**Rationale:** Reviewer 1 (Opus) and Reviewer 3 (Sonnet) share a base model family, so unanimous agreement on a *judgment call* is not independent confirmation - same-family models drift the same way on ambiguous prompts. Treating "both approved" as proof produces false-consensus passes. Triage therefore records a `consensus` block (schema v3.1.0) and surfaces disagreement and unverified agreement to the user rather than burying it.
|
|
361
383
|
|
|
362
384
|
After the triage verdict is computed, populate `triage.consensus`:
|
|
363
385
|
|
|
@@ -13,9 +13,12 @@ Resume a paused or failed task from the last successful phase.
|
|
|
13
13
|
|
|
14
14
|
1. **Find the task** - parse `#N` from the argument, or pick the most recent worktree with `status != "done"`.
|
|
15
15
|
|
|
16
|
-
2. **Read state** - parse `agent-state.json`:
|
|
16
|
+
2. **Read + validate state** - parse `agent-state.json`:
|
|
17
|
+
- Validate first: `node $HOME/.claude/scripts/validate-state.mjs <state-file>` (resume-safety check, tolerant of legacy shapes). On non-zero exit, do NOT guess a phase - surface the errors and stop with `ERR: agent-state.json is unsafe to resume; inspect it or 'kill #N' and restart.`
|
|
18
|
+
- Confirm the worktree (`worktreePath` / `projects[].worktreePath`) exists and is usable; if missing or locked, run the Phase 0 "Worktree stale-lock heal" before continuing.
|
|
17
19
|
- `currentPhase` - last completed phase
|
|
18
20
|
- `status` - `paused` | `failed` | `in_progress`
|
|
21
|
+
- `haltReason` - if set, show it so the user knows why the run stopped; clear it on successful re-entry
|
|
19
22
|
- `autopilot` - preserve the mode
|
|
20
23
|
|
|
21
24
|
3. **Load context** - read prior-phase findings from `agent-log.md`:
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
---
|
|
2
|
-
description: "Task orchestrator - full pipeline via Jira ID + branch or GitHub Issue URL: analysis, plan, TDD development, parallel review +
|
|
2
|
+
description: "Task orchestrator - full pipeline via Jira ID + branch or GitHub Issue URL: analysis, plan, TDD development, parallel review + Opus triage (CLI-aware: 2-model on Claude Code, 3-model on Copilot CLI), commit, log"
|
|
3
3
|
allowed-tools: Agent, Bash, Read, Write, Edit, Glob, Grep, TaskCreate, TaskUpdate, TaskList, TaskGet, AskUserQuestion, WebFetch, WebSearch, NotebookEdit, Skill
|
|
4
4
|
---
|
|
5
5
|
|
|
@@ -82,8 +82,8 @@ Lib scripts (`~/.claude/lib/`):
|
|
|
82
82
|
| `stack [ios\|android\|backend\|mobile\|all]` | Swap skills for next conversation. No arg = show current stack. |
|
|
83
83
|
| `language [en\|tr]` | Show or set the assistant `outputLanguage` (explanations and chat replies). `promptLanguage` is locked to `en` and is not toggleable. No arg = show current `outputLanguage`. With `en` or `tr` = set and persist `outputLanguage`. External payloads (commits, PR bodies, Jira) stay English. |
|
|
84
84
|
| `setup` | Keychain token + Git Identity onboarding |
|
|
85
|
-
| `--dev` | Dev-only: Init -> Dev(
|
|
86
|
-
| `--dev autopilot` or `dev-autopilot` | Fastest path - Dev(
|
|
85
|
+
| `--dev` | Dev-only: Init -> Dev(Opus) -> Commit -> Report |
|
|
86
|
+
| `--dev autopilot` or `dev-autopilot` | Fastest path - Dev(Opus) + zero interaction |
|
|
87
87
|
| `--local` | No worktree - works directly on local branch |
|
|
88
88
|
| `autopilot` | Skip user confirmations, auto commit/PR |
|
|
89
89
|
| No args / `help` | Show usage guide |
|
|
@@ -220,7 +220,7 @@ Save to `prefs.projects[{project}].branches`.
|
|
|
220
220
|
```
|
|
221
221
|
Pipeline mode:
|
|
222
222
|
1. Full pipeline (8 phases, Sonnet dev, parallel review + triage - CLI-aware reviewer set)
|
|
223
|
-
2. --dev (fast: Init → Dev(
|
|
223
|
+
2. --dev (fast: Init → Dev(Opus) → Commit → Report)
|
|
224
224
|
Select [1/2]:
|
|
225
225
|
```
|
|
226
226
|
|
|
@@ -247,7 +247,7 @@ When called with `review`:
|
|
|
247
247
|
1. Detect current branch and project from cwd (or ask)
|
|
248
248
|
2. Get diff: `git diff HEAD` (unstaged + staged)
|
|
249
249
|
3. If no diff, get diff against base branch: `git diff origin/{baseBranch}...HEAD`
|
|
250
|
-
4. Launch Phase 4 review (parallel +
|
|
250
|
+
4. Launch Phase 4 review (parallel + Opus triage - 2-model on Claude Code, 3-model on Copilot CLI) on the diff
|
|
251
251
|
5. No worktree, no state file - lightweight one-shot review
|
|
252
252
|
6. Print findings to terminal
|
|
253
253
|
|
|
@@ -147,7 +147,7 @@ m = re.match(r'/display/[^/]+/(.+)', p.path)
|
|
|
147
147
|
print(up.unquote(m.group(1)) if m else '')
|
|
148
148
|
" URL_PARSE_INPUT="$URL")
|
|
149
149
|
if [ -n "$TITLE_RAW" ]; then
|
|
150
|
-
LOOKUP=$(curl -sS --fail --max-time "$TIMEOUT" \
|
|
150
|
+
LOOKUP=$(curl -sS --fail --max-time "$TIMEOUT" --connect-timeout 5 \
|
|
151
151
|
-H "Authorization: Bearer $TOKEN" -H "Accept: application/json" \
|
|
152
152
|
--data-urlencode "spaceKey=$SPACE_KEY" \
|
|
153
153
|
--data-urlencode "title=$TITLE_RAW" \
|
|
@@ -171,7 +171,7 @@ if [ -z "$PAGE_ID_RESOLVED" ]; then
|
|
|
171
171
|
fi
|
|
172
172
|
|
|
173
173
|
# Fetch the page body in storage format.
|
|
174
|
-
PAGE_JSON=$(curl -sS --fail --max-time "$TIMEOUT" \
|
|
174
|
+
PAGE_JSON=$(curl -sS --fail --max-time "$TIMEOUT" --connect-timeout 5 \
|
|
175
175
|
-H "Authorization: Bearer $TOKEN" -H "Accept: application/json" \
|
|
176
176
|
"$API_BASE/content/$PAGE_ID_RESOLVED?expand=body.storage,space" 2>/dev/null || true)
|
|
177
177
|
|
|
@@ -244,7 +244,7 @@ fi
|
|
|
244
244
|
APP_REF="$PLATFORM:$BUNDLE"
|
|
245
245
|
ISSUE_URL_API="https://firebasecrashlytics.googleapis.com/v1alpha/projects/$PROJECT_ID/apps/$APP_REF/issues/$ISSUE_ID"
|
|
246
246
|
|
|
247
|
-
ISSUE_JSON=$(curl -sS --fail --max-time "$TIMEOUT" \
|
|
247
|
+
ISSUE_JSON=$(curl -sS --fail --max-time "$TIMEOUT" --connect-timeout 5 \
|
|
248
248
|
-H "Authorization: Bearer $ACCESS_TOKEN" -H "Accept: application/json" \
|
|
249
249
|
"$ISSUE_URL_API" 2>/dev/null || true)
|
|
250
250
|
|
|
@@ -256,7 +256,7 @@ fi
|
|
|
256
256
|
SESSION_JSON=""
|
|
257
257
|
if [ -n "$SESSION_ID" ]; then
|
|
258
258
|
SESSION_URL_API="$ISSUE_URL_API/sessions/$SESSION_ID"
|
|
259
|
-
SESSION_JSON=$(curl -sS --fail --max-time "$TIMEOUT" \
|
|
259
|
+
SESSION_JSON=$(curl -sS --fail --max-time "$TIMEOUT" --connect-timeout 5 \
|
|
260
260
|
-H "Authorization: Bearer $ACCESS_TOKEN" -H "Accept: application/json" \
|
|
261
261
|
"$SESSION_URL_API" 2>/dev/null || true)
|
|
262
262
|
fi
|
|
@@ -153,7 +153,7 @@ fortify_get() {
|
|
|
153
153
|
else
|
|
154
154
|
hdr="Authorization: FortifyToken $(printf '%s' "$TOKEN" | base64 | tr -d '\n')"
|
|
155
155
|
fi
|
|
156
|
-
resp=$(curl -sS --max-time "$TIMEOUT" -w "\n%{http_code}" \
|
|
156
|
+
resp=$(curl -sS --max-time "$TIMEOUT" --connect-timeout 5 -w "\n%{http_code}" \
|
|
157
157
|
-H "$hdr" -H "Accept: application/json" "$url" 2>/dev/null || true)
|
|
158
158
|
http=$(printf '%s' "$resp" | tail -n1)
|
|
159
159
|
if [ "$http" = "200" ]; then
|
|
@@ -55,7 +55,7 @@ trap 'rm -f "$TMP_RAW"' EXIT
|
|
|
55
55
|
|
|
56
56
|
# Pull the spec with a size cap (curl --max-filesize doesn't exist on every
|
|
57
57
|
# platform; we let curl run with --output then check the size).
|
|
58
|
-
CURL_ARGS=(-sS --fail --max-time "$TIMEOUT" --location --output "$TMP_RAW" \
|
|
58
|
+
CURL_ARGS=(-sS --fail --max-time "$TIMEOUT" --connect-timeout 5 --location --output "$TMP_RAW" \
|
|
59
59
|
--write-out "%{http_code}\n%{content_type}\n%{size_download}\n")
|
|
60
60
|
if [ -n "${SWAGGER_AUTH_HEADER:-}" ]; then
|
|
61
61
|
CURL_ARGS+=(-H "$SWAGGER_AUTH_HEADER")
|
|
@@ -171,7 +171,7 @@ curl_figma() {
|
|
|
171
171
|
local code attempt sleep_s
|
|
172
172
|
for attempt in 1 2 3; do
|
|
173
173
|
code=$(curl -sS -o "$out" -w '%{http_code}' \
|
|
174
|
-
--max-time "$HTTP_TIMEOUT" \
|
|
174
|
+
--max-time "$HTTP_TIMEOUT" --connect-timeout 5 \
|
|
175
175
|
-H "X-Figma-Token: $FIGMA_TOKEN" \
|
|
176
176
|
"$url" || echo "000")
|
|
177
177
|
case "$code" in
|
|
@@ -273,7 +273,7 @@ render_and_download() {
|
|
|
273
273
|
printf "%s\tFAIL\t0\n" "$nid"
|
|
274
274
|
exit 0
|
|
275
275
|
fi
|
|
276
|
-
if curl -sS --max-time "$2" -o "$out" "$url"; then
|
|
276
|
+
if curl -sS --max-time "$2" --connect-timeout 5 -o "$out" "$url"; then
|
|
277
277
|
size=$(wc -c < "$out" | tr -d " ")
|
|
278
278
|
printf "%s\t%s\t%s\n" "$nid" "$out" "$size"
|
|
279
279
|
else
|
|
@@ -61,8 +61,15 @@
|
|
|
61
61
|
"modelFallback": {
|
|
62
62
|
"enabled": true,
|
|
63
63
|
"premiumTierUntil": null,
|
|
64
|
-
"fallbackModel": "
|
|
64
|
+
"fallbackModel": "sonnet",
|
|
65
65
|
"onDispatchError": true
|
|
66
|
+
},
|
|
67
|
+
"costBudget": {
|
|
68
|
+
"enabled": true,
|
|
69
|
+
"maxUsd": 5.0,
|
|
70
|
+
"warnPct": 80,
|
|
71
|
+
"onExceed": "warn",
|
|
72
|
+
"pricingModel": "opus"
|
|
66
73
|
}
|
|
67
74
|
},
|
|
68
75
|
"projects": {},
|
|
@@ -81,6 +81,10 @@
|
|
|
81
81
|
},
|
|
82
82
|
"startedAt": { "type": "string", "format": "date-time" },
|
|
83
83
|
"finishedAt": { "type": ["string", "null"], "format": "date-time" },
|
|
84
|
+
"haltReason": {
|
|
85
|
+
"type": ["string", "null"],
|
|
86
|
+
"description": "Set when a phase halts on a hard error (validator failed twice, no subagent returned, dispatch error past fallback, lock irrecoverable). Format '<phase>:<cause>'. Surfaced to the user and cleared on successful resume. See operations.md 'Halt visibility'."
|
|
87
|
+
},
|
|
84
88
|
"telemetry": {
|
|
85
89
|
"type": "object",
|
|
86
90
|
"additionalProperties": true,
|
|
@@ -145,6 +149,10 @@
|
|
|
145
149
|
"description": "Files produced or modified by this phase. Used for semantic revert."
|
|
146
150
|
},
|
|
147
151
|
"retryCount": { "type": "integer", "minimum": 0, "maximum": 3 },
|
|
152
|
+
"subStep": {
|
|
153
|
+
"type": "string",
|
|
154
|
+
"description": "Sub-step checkpoint for long phases (3, 7) so resume re-does only the unfinished tail instead of the whole phase. Short token, e.g. 'red'|'green'|'build'|'pr-opened'|'confluence-synced'. See operations.md 'Sub-step checkpoints'."
|
|
155
|
+
},
|
|
148
156
|
"notes": { "type": "string" },
|
|
149
157
|
"clarificationRounds": {
|
|
150
158
|
"type": "integer",
|
|
@@ -224,6 +224,45 @@
|
|
|
224
224
|
}
|
|
225
225
|
}
|
|
226
226
|
},
|
|
227
|
+
"ui": {
|
|
228
|
+
"type": "object",
|
|
229
|
+
"additionalProperties": false,
|
|
230
|
+
"description": "UI interaction systems. Consumed by the figma-navigation / figma-overlays / figma-bottom-sheets convention skills and by Phase 4 review. When a section is absent or its mode is 'native', the pipeline uses stock SwiftUI (NavigationStack, .alert/.sheet(item:), .sheet+presentationDetents). Set mode 'custom' to route to a project-supplied system by the type names below.",
|
|
231
|
+
"properties": {
|
|
232
|
+
"navigationSystem": {
|
|
233
|
+
"type": "object",
|
|
234
|
+
"additionalProperties": false,
|
|
235
|
+
"properties": {
|
|
236
|
+
"mode": { "type": "string", "enum": ["native", "custom"], "default": "native" },
|
|
237
|
+
"router": { "type": "string", "description": "Router type that dispatches routes (custom mode)." },
|
|
238
|
+
"routeEnum": { "type": "string", "description": "Exhaustive cross-surface route enum (custom mode)." },
|
|
239
|
+
"sceneType": { "type": "string", "description": "Headless screen container type (custom mode, optional)." },
|
|
240
|
+
"navigatorProtocol": { "type": "string", "description": "DI'd selection/present API (custom mode, optional)." }
|
|
241
|
+
}
|
|
242
|
+
},
|
|
243
|
+
"overlaySystem": {
|
|
244
|
+
"type": "object",
|
|
245
|
+
"additionalProperties": false,
|
|
246
|
+
"properties": {
|
|
247
|
+
"mode": { "type": "string", "enum": ["native", "custom"], "default": "native" },
|
|
248
|
+
"center": { "type": "string", "description": "DI'd overlay center protocol callable from VM/coordinator (custom mode)." },
|
|
249
|
+
"surfaces": { "type": "array", "items": { "type": "string" }, "description": "Independent render surfaces, e.g. root/currentScene (custom mode, optional)." }
|
|
250
|
+
}
|
|
251
|
+
},
|
|
252
|
+
"sheetSystem": {
|
|
253
|
+
"type": "object",
|
|
254
|
+
"additionalProperties": false,
|
|
255
|
+
"properties": {
|
|
256
|
+
"mode": { "type": "string", "enum": ["native", "custom"], "default": "native" },
|
|
257
|
+
"brandedModifier": { "type": "string", "description": "Branded sheet entry modifier, e.g. bottomSheet (custom mode)." },
|
|
258
|
+
"headlessModifiers": { "type": "array", "items": { "type": "string" }, "description": "Headless sheet modifiers, e.g. modalSheet/dockedSheet/expandableSheet/detentNavigationSheet (custom mode)." },
|
|
259
|
+
"detentType": { "type": "string", "description": "Detent enum type (custom mode)." },
|
|
260
|
+
"backdropType": { "type": "string", "description": "Backdrop enum type (custom mode)." },
|
|
261
|
+
"cornerRadiusToken": { "type": "string", "description": "Branded top-corner radius token (custom mode)." }
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
},
|
|
227
266
|
"teams": {
|
|
228
267
|
"type": "array",
|
|
229
268
|
"description": "Team definitions. Each entry maps a logical team to a GitHub label and a Jira custom-field value.",
|
|
@@ -803,12 +803,12 @@
|
|
|
803
803
|
"costBudget": {
|
|
804
804
|
"type": "object",
|
|
805
805
|
"additionalProperties": false,
|
|
806
|
-
"description": "v9.2+ - Proactive per-task cost ceiling. Complements the end-of-run cost summary: pipeline/scripts/cost-budget-check.mjs reads the phase-tracker token accumulators after each phase, prices them via cost-table.json, and warns or halts before spend runs away. Top user-cited reason to abandon an AI coding tool is opaque/spiralling cost; this surfaces it live.
|
|
806
|
+
"description": "v9.2+ - Proactive per-task cost ceiling. Complements the end-of-run cost summary: pipeline/scripts/cost-budget-check.mjs reads the phase-tracker token accumulators after each phase, prices them via cost-table.json, and warns or halts before spend runs away. Top user-cited reason to abandon an AI coding tool is opaque/spiralling cost; this surfaces it live. Enabled by default in warn mode (v10.1+); set enabled:false to opt out, or onExceed:halt to make the ceiling blocking.",
|
|
807
807
|
"properties": {
|
|
808
808
|
"enabled": {
|
|
809
809
|
"type": "boolean",
|
|
810
|
-
"default":
|
|
811
|
-
"description": "Master switch. When off, cost-budget-check.mjs exits 0 silently and the pipeline behaves exactly as before."
|
|
810
|
+
"default": true,
|
|
811
|
+
"description": "Master switch. Default true (warn mode) so runaway cost is visible live. When off, cost-budget-check.mjs exits 0 silently and the pipeline behaves exactly as before."
|
|
812
812
|
},
|
|
813
813
|
"maxUsd": {
|
|
814
814
|
"type": "number",
|
|
@@ -2,12 +2,6 @@
|
|
|
2
2
|
"_readme": "Per-model unit prices in USD per million tokens. Source: Anthropic public pricing (verified 2026-04-21). Update when Anthropic publishes new tiers. Unknown models render USD as ' - ' and emit a footnote - never block PR-body generation. cacheReadPerMtok is the discounted rate for prompt-cache hits (~10% of inPerMtok); the renderer prices a phase's tokens_cached at this rate when the tracker records it, so resume/cache reuse is visible in the ledger.",
|
|
3
3
|
"schemaVersion": "1.1.0",
|
|
4
4
|
"prices": {
|
|
5
|
-
"fable": {
|
|
6
|
-
"inPerMtok": 10.0,
|
|
7
|
-
"outPerMtok": 50.0,
|
|
8
|
-
"cacheReadPerMtok": 1.0,
|
|
9
|
-
"modelId": "claude-fable-5"
|
|
10
|
-
},
|
|
11
5
|
"opus": {
|
|
12
6
|
"inPerMtok": 5.0,
|
|
13
7
|
"outPerMtok": 25.0,
|
|
@@ -5,12 +5,12 @@
|
|
|
5
5
|
.claude/multi-agent-preferences.json 1
|
|
6
6
|
.claude/rules 12
|
|
7
7
|
.claude/schemas 23
|
|
8
|
-
.claude/scripts
|
|
8
|
+
.claude/scripts 174
|
|
9
9
|
.claude/settings.json 1
|
|
10
10
|
.claude/skills 555
|
|
11
11
|
.copilot/agents 8
|
|
12
12
|
.copilot/copilot-instructions.md 1
|
|
13
13
|
.copilot/lib 23
|
|
14
14
|
.copilot/schemas 23
|
|
15
|
-
.copilot/scripts
|
|
15
|
+
.copilot/scripts 174
|
|
16
16
|
.copilot/skills 590
|