maestro-flow 0.4.17 → 0.4.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. package/.agents/skills/maestro/SKILL.md +1 -1
  2. package/.agents/skills/maestro-analyze/SKILL.md +5 -0
  3. package/.agents/skills/maestro-blueprint/SKILL.md +5 -0
  4. package/.agents/skills/maestro-brainstorm/SKILL.md +5 -0
  5. package/.agents/skills/maestro-next/SKILL.md +254 -0
  6. package/.agents/skills/team-swarm/SKILL.md +180 -0
  7. package/.agents/skills/team-swarm/roles/analyst/role.md +187 -0
  8. package/.agents/skills/team-swarm/roles/ant/role.md +169 -0
  9. package/.agents/skills/team-swarm/roles/coordinator/commands/converge.md +146 -0
  10. package/.agents/skills/team-swarm/roles/coordinator/commands/init-swarm.md +136 -0
  11. package/.agents/skills/team-swarm/roles/coordinator/commands/iterate.md +232 -0
  12. package/.agents/skills/team-swarm/roles/coordinator/role.md +211 -0
  13. package/.agents/skills/team-swarm/roles/scorer/role.md +157 -0
  14. package/.agents/skills/team-swarm/scripts/aco.py +473 -0
  15. package/.agents/skills/team-swarm/scripts/pheromone.py +144 -0
  16. package/.agents/skills/team-swarm/scripts/scoring.py +92 -0
  17. package/.agents/skills/team-swarm/scripts/test_aco.py +475 -0
  18. package/.agents/skills/team-swarm/specs/ant-output-schema.md +119 -0
  19. package/.agents/skills/team-swarm/specs/convergence-criteria.md +106 -0
  20. package/.agents/skills/team-swarm/specs/pheromone-schema.md +123 -0
  21. package/.agents/skills/team-swarm/specs/swarm-config-template.json +71 -0
  22. package/.agents/skills/team-swarm/specs/swarm-protocol.md +117 -0
  23. package/.agy/skills/maestro/SKILL.md +1 -1
  24. package/.agy/skills/maestro-analyze/SKILL.md +5 -0
  25. package/.agy/skills/maestro-blueprint/SKILL.md +5 -0
  26. package/.agy/skills/maestro-brainstorm/SKILL.md +5 -0
  27. package/.agy/skills/maestro-next/SKILL.md +250 -0
  28. package/.agy/skills/team-swarm/SKILL.md +176 -0
  29. package/.agy/skills/team-swarm/roles/analyst/role.md +183 -0
  30. package/.agy/skills/team-swarm/roles/ant/role.md +165 -0
  31. package/.agy/skills/team-swarm/roles/coordinator/commands/converge.md +134 -0
  32. package/.agy/skills/team-swarm/roles/coordinator/commands/init-swarm.md +136 -0
  33. package/.agy/skills/team-swarm/roles/coordinator/commands/iterate.md +202 -0
  34. package/.agy/skills/team-swarm/roles/coordinator/role.md +209 -0
  35. package/.agy/skills/team-swarm/roles/scorer/role.md +153 -0
  36. package/.agy/skills/team-swarm/scripts/aco.py +473 -0
  37. package/.agy/skills/team-swarm/scripts/pheromone.py +144 -0
  38. package/.agy/skills/team-swarm/scripts/scoring.py +92 -0
  39. package/.agy/skills/team-swarm/scripts/test_aco.py +475 -0
  40. package/.agy/skills/team-swarm/specs/ant-output-schema.md +119 -0
  41. package/.agy/skills/team-swarm/specs/convergence-criteria.md +106 -0
  42. package/.agy/skills/team-swarm/specs/pheromone-schema.md +123 -0
  43. package/.agy/skills/team-swarm/specs/swarm-config-template.json +71 -0
  44. package/.agy/skills/team-swarm/specs/swarm-protocol.md +117 -0
  45. package/.claude/commands/maestro-analyze.md +5 -0
  46. package/.claude/commands/maestro-blueprint.md +5 -0
  47. package/.claude/commands/maestro-brainstorm.md +5 -0
  48. package/.claude/commands/maestro-next.md +252 -0
  49. package/.claude/commands/maestro.md +1 -1
  50. package/.claude/skills/team-swarm/SKILL.md +178 -0
  51. package/.claude/skills/team-swarm/roles/analyst/role.md +185 -0
  52. package/.claude/skills/team-swarm/roles/ant/role.md +167 -0
  53. package/.claude/skills/team-swarm/roles/coordinator/commands/converge.md +146 -0
  54. package/.claude/skills/team-swarm/roles/coordinator/commands/init-swarm.md +136 -0
  55. package/.claude/skills/team-swarm/roles/coordinator/commands/iterate.md +232 -0
  56. package/.claude/skills/team-swarm/roles/coordinator/role.md +209 -0
  57. package/.claude/skills/team-swarm/roles/scorer/role.md +155 -0
  58. package/.claude/skills/team-swarm/scripts/aco.py +473 -0
  59. package/.claude/skills/team-swarm/scripts/pheromone.py +144 -0
  60. package/.claude/skills/team-swarm/scripts/scoring.py +92 -0
  61. package/.claude/skills/team-swarm/scripts/test_aco.py +475 -0
  62. package/.claude/skills/team-swarm/specs/ant-output-schema.md +119 -0
  63. package/.claude/skills/team-swarm/specs/convergence-criteria.md +106 -0
  64. package/.claude/skills/team-swarm/specs/pheromone-schema.md +123 -0
  65. package/.claude/skills/team-swarm/specs/swarm-config-template.json +71 -0
  66. package/.claude/skills/team-swarm/specs/swarm-protocol.md +117 -0
  67. package/.codex/skills/learn-decompose/SKILL.md +34 -3
  68. package/.codex/skills/learn-retro/SKILL.md +31 -1
  69. package/.codex/skills/learn-second-opinion/SKILL.md +34 -4
  70. package/.codex/skills/maestro-analyze/SKILL.md +44 -5
  71. package/.codex/skills/maestro-blueprint/SKILL.md +5 -0
  72. package/.codex/skills/maestro-brainstorm/SKILL.md +46 -0
  73. package/.codex/skills/maestro-execute/SKILL.md +61 -5
  74. package/.codex/skills/maestro-milestone-audit/SKILL.md +64 -13
  75. package/.codex/skills/maestro-milestone-complete/SKILL.md +12 -0
  76. package/.codex/skills/maestro-next/SKILL.md +297 -0
  77. package/.codex/skills/maestro-plan/SKILL.md +36 -1
  78. package/.codex/skills/maestro-player/SKILL.md +25 -6
  79. package/.codex/skills/maestro-ralph/SKILL.md +17 -10
  80. package/.codex/skills/maestro-ralph-execute/SKILL.md +2 -1
  81. package/.codex/skills/maestro-roadmap/SKILL.md +35 -4
  82. package/.codex/skills/maestro-ui-codify/SKILL.md +38 -10
  83. package/.codex/skills/maestro-verify/SKILL.md +40 -5
  84. package/.codex/skills/manage-codebase-rebuild/SKILL.md +52 -5
  85. package/.codex/skills/manage-issue-discover/SKILL.md +106 -15
  86. package/.codex/skills/quality-auto-test/SKILL.md +70 -16
  87. package/.codex/skills/quality-debug/SKILL.md +139 -28
  88. package/.codex/skills/quality-refactor/SKILL.md +61 -11
  89. package/.codex/skills/quality-review/SKILL.md +45 -9
  90. package/.codex/skills/quality-test/SKILL.md +58 -3
  91. package/.codex/skills/security-audit/SKILL.md +38 -0
  92. package/.codex/skills/spec-map/SKILL.md +65 -8
  93. package/.codex/skills/team-coordinate/SKILL.md +28 -11
  94. package/.codex/skills/team-coordinate/specs/role-catalog.md +20 -0
  95. package/.codex/skills/team-lifecycle-v4/SKILL.md +23 -7
  96. package/.codex/skills/team-lifecycle-v4/instructions/agent-instruction.md +20 -0
  97. package/.codex/skills/team-quality-assurance/SKILL.md +40 -2
  98. package/.codex/skills/team-review/SKILL.md +42 -2
  99. package/.codex/skills/team-tech-debt/SKILL.md +45 -2
  100. package/.codex/skills/team-testing/SKILL.md +42 -2
  101. package/dashboard/dist-server/dashboard/src/server/wiki/search.d.ts +6 -4
  102. package/dashboard/dist-server/dashboard/src/server/wiki/search.js +50 -8
  103. package/dashboard/dist-server/dashboard/src/server/wiki/search.js.map +1 -1
  104. package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.d.ts +32 -0
  105. package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.js +294 -0
  106. package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.js.map +1 -1
  107. package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.d.ts +1 -0
  108. package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.js +35 -1
  109. package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.js.map +1 -1
  110. package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.test.js +235 -0
  111. package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.test.js.map +1 -1
  112. package/dist/src/commands/install.js +5 -1
  113. package/dist/src/commands/install.js.map +1 -1
  114. package/dist/src/i18n/locales/en.d.ts.map +1 -1
  115. package/dist/src/i18n/locales/en.js +9 -0
  116. package/dist/src/i18n/locales/en.js.map +1 -1
  117. package/dist/src/i18n/locales/zh.d.ts.map +1 -1
  118. package/dist/src/i18n/locales/zh.js +9 -0
  119. package/dist/src/i18n/locales/zh.js.map +1 -1
  120. package/dist/src/i18n/types.d.ts +3 -0
  121. package/dist/src/i18n/types.d.ts.map +1 -1
  122. package/dist/src/ralph/cmd-check.js +1 -1
  123. package/dist/src/ralph/cmd-check.js.map +1 -1
  124. package/dist/src/ralph/cmd-complete.js +1 -1
  125. package/dist/src/ralph/cmd-complete.js.map +1 -1
  126. package/dist/src/ralph/cmd-next.d.ts.map +1 -1
  127. package/dist/src/ralph/cmd-next.js +12 -4
  128. package/dist/src/ralph/cmd-next.js.map +1 -1
  129. package/dist/src/ralph/cmd-session.js +2 -2
  130. package/dist/src/ralph/cmd-session.js.map +1 -1
  131. package/dist/src/ralph/status-store.d.ts +8 -1
  132. package/dist/src/ralph/status-store.d.ts.map +1 -1
  133. package/dist/src/ralph/status-store.js +12 -2
  134. package/dist/src/ralph/status-store.js.map +1 -1
  135. package/dist/src/tools/store-knowhow.d.ts.map +1 -1
  136. package/dist/src/tools/store-knowhow.js +51 -64
  137. package/dist/src/tools/store-knowhow.js.map +1 -1
  138. package/dist/src/tui/install-ui/HooksConfig.d.ts +5 -1
  139. package/dist/src/tui/install-ui/HooksConfig.d.ts.map +1 -1
  140. package/dist/src/tui/install-ui/HooksConfig.js +5 -3
  141. package/dist/src/tui/install-ui/HooksConfig.js.map +1 -1
  142. package/dist/src/tui/install-ui/InstallConfirm.d.ts +2 -0
  143. package/dist/src/tui/install-ui/InstallConfirm.d.ts.map +1 -1
  144. package/dist/src/tui/install-ui/InstallConfirm.js +1 -1
  145. package/dist/src/tui/install-ui/InstallConfirm.js.map +1 -1
  146. package/dist/src/tui/install-ui/InstallExecution.d.ts +1 -0
  147. package/dist/src/tui/install-ui/InstallExecution.d.ts.map +1 -1
  148. package/dist/src/tui/install-ui/InstallExecution.js +26 -3
  149. package/dist/src/tui/install-ui/InstallExecution.js.map +1 -1
  150. package/dist/src/tui/install-ui/InstallFlow.d.ts +1 -1
  151. package/dist/src/tui/install-ui/InstallFlow.d.ts.map +1 -1
  152. package/dist/src/tui/install-ui/InstallFlow.js +76 -16
  153. package/dist/src/tui/install-ui/InstallFlow.js.map +1 -1
  154. package/dist/src/tui/install-ui/InstallHub.d.ts +2 -0
  155. package/dist/src/tui/install-ui/InstallHub.d.ts.map +1 -1
  156. package/dist/src/tui/install-ui/InstallHub.js +8 -0
  157. package/dist/src/tui/install-ui/InstallHub.js.map +1 -1
  158. package/dist/src/tui/install-ui/InstallResult.d.ts.map +1 -1
  159. package/dist/src/tui/install-ui/InstallResult.js +1 -1
  160. package/dist/src/tui/install-ui/InstallResult.js.map +1 -1
  161. package/dist/src/utils/update-notices.js +23 -0
  162. package/dist/src/utils/update-notices.js.map +1 -1
  163. package/package.json +1 -1
  164. package/workflows/finish-work.md +119 -0
  165. package/workflows/milestone-complete.md +23 -1
@@ -0,0 +1,211 @@
1
+ ---
2
+ role: coordinator
3
+ ---
4
+ <!-- Open-standard mirror generated by scripts/build-agents-standard.mjs — do not edit; re-run after editing .claude/ source. -->
5
+
6
+
7
+ # Coordinator Role — team-swarm
8
+
9
+ Orchestrate the swarm intelligence pipeline: parse user task -> generate swarm-config -> run iteration loop (script-driven select/spawn/update) -> converge -> synthesize. Hybrid LLM + Python script coordinator.
10
+
11
+ ## Identity
12
+
13
+ - **Name**: `coordinator` | **Tag**: `[coordinator]`
14
+ - **Responsibility**: Translate user intent into swarm-config -> drive K-iteration outer loop -> dispatch N ants per iteration -> consume script verdicts -> deliver final synthesis
15
+
16
+ ## Boundaries
17
+
18
+ ### MUST
19
+ - Generate `swarm-config.json` from user task description (Phase 1)
20
+ - Invoke `scripts/aco.py` for ALL numeric decisions (selection, update, convergence)
21
+ - Spawn ant workers with strict role-spec assignment + path hints from script
22
+ - After each iteration callback: call `aco.py update` -> `aco.py converged` -> decide loop/exit
23
+ - Persist session state via team_msg between iterations
24
+ - Trigger analyst for final synthesis when converged
25
+
26
+ ### MUST NOT
27
+ - Make selection/update/convergence decisions on its own — these belong to the script
28
+ - Modify `pheromone/*.json`, `best.json`, or `trails/*.jsonl` directly — script owns these
29
+ - Skip the convergence check after each iteration
30
+ - Spawn more than `config.swarm.n_ants` ants per iteration
31
+ - Exceed `config.convergence.max_iterations` outer loops
32
+
33
+ ---
34
+
35
+ ## Message Types
36
+
37
+ | Type | Direction | Trigger |
38
+ |------|-----------|---------|
39
+ | state_update | outbound | Iteration start/end, session init |
40
+ | task_unblocked | outbound | Ant batch ready |
41
+ | ant_done | inbound | Individual ant completion (rolled up to batch check) |
42
+ | iteration_complete | inbound | All ants in batch reported |
43
+ | capability_gap | inbound | Ant requests config change |
44
+ | error | inbound | Worker / script failure |
45
+
46
+ ## Command Execution Protocol
47
+
48
+ When coordinator needs to execute a phase command:
49
+
50
+ 1. Read the command file: `roles/coordinator/commands/<command-name>.md`
51
+ 2. Follow the workflow defined inline
52
+ 3. Commands are inline execution guides — NOT separate agents
53
+
54
+ ## Toolbox
55
+
56
+ | Tool | Type | Purpose |
57
+ |------|------|---------|
58
+ | commands/init-swarm.md | Command | Phase 2: build swarm-config + call `aco.py init` |
59
+ | commands/iterate.md | Command | Phase 3: single iteration loop body (select/spawn/update) |
60
+ | commands/converge.md | Command | Phase 4: convergence handler + analyst spawn |
61
+ | `scripts/aco.py` | Script | All numeric decisions (Bash subprocess) |
62
+ | team-worker | Subagent | Worker spawning (ant, scorer, analyst) |
63
+ | create_task / list_tasks / get_task / update_task | System | Task lifecycle |
64
+ | team_msg | System | Message bus |
65
+ | send_message / ask_user | System | Comms |
66
+
67
+ ---
68
+
69
+ ## Entry Router
70
+
71
+ | Detection | Condition | Handler |
72
+ |-----------|-----------|---------|
73
+ | Worker callback | Message contains `[ant]` / `[scorer]` / `[analyst]` | -> handleCallback |
74
+ | Status check | Args contain `check` or `status` | -> handleCheck |
75
+ | Manual resume | Args contain `resume` or `continue` | -> handleResume |
76
+ | Iteration complete | All ants of current iteration reported | -> Phase 3.5 (update + converged?) |
77
+ | Pipeline complete | aco.py converged returned true | -> Phase 4 |
78
+ | Interrupted session | Active session exists in `.workflow/.team/TS-*` | -> Phase 0 |
79
+ | New session | None of above | -> Phase 1 |
80
+
81
+ ---
82
+
83
+ ## Phase 0: Session Resume Check
84
+
85
+ 1. Scan `.workflow/.team/TS-*/team-session.json` for `status` in {active, paused}
86
+ 2. Single session -> resume; multiple -> ask_user
87
+ 3. Reconcile: list_tasks vs session.iteration vs pheromone/current.json
88
+ 4. If interrupted mid-iteration -> reset in_progress ant tasks to pending, respawn
89
+ 5. If iteration was complete but update not run -> call `aco.py update` for that iter
90
+ 6. Resume Phase 3 loop at current iteration
91
+
92
+ ---
93
+
94
+ ## Phase 1: Task Analysis + Config Generation
95
+
96
+ **Objective**: Translate user task into `swarm-config.json`.
97
+
98
+ **Workflow**:
99
+
100
+ 1. Parse user task description (text-level only, no codebase exploration)
101
+ 2. Clarify via ask_user if ambiguous:
102
+ - What is the search space? (file glob, explicit node list, abstract decisions)
103
+ - What is the objective? (find best X, discover Y, optimize Z)
104
+ - How should results be scored? (test pass rate, lint, custom rule, LLM judge)
105
+ - Budget? (max iterations, max ants per iter, token budget)
106
+ 3. Generate `swarm-config.json` (see template at `specs/swarm-config-template.json`):
107
+ - `swarm.n_ants` (default 5)
108
+ - `swarm.max_iterations` -> mirrored into `convergence.max_iterations`
109
+ - `aco.alpha/beta/rho/q` (defaults sane)
110
+ - `task_space.nodes` OR `task_space.auto_discover_from`
111
+ - `scoring.mode` ∈ {script, llm, fallback} based on user answer
112
+ - `ant_prompt.objective` — the actual goal injected into ant role-spec at spawn
113
+ 4. Write config to `<session>/swarm-config.json`
114
+
115
+ **CRITICAL**: Phase 1 does NOT call `aco.py`. It only produces the config.
116
+
117
+ ---
118
+
119
+ ## Phase 2: Init Swarm + Session Setup
120
+
121
+ Delegate to `@commands/init-swarm.md`:
122
+
123
+ 1. Generate session ID: `TS-<slug>-<date>` (slug from task)
124
+ 2. Create session folder structure:
125
+ ```
126
+ .workflow/.team/<session-id>/
127
+ ├── swarm-config.json (from Phase 1)
128
+ ├── pheromone/, trails/, scores/, artifacts/, wisdom/
129
+ ├── .msg/
130
+ └── role-binding.json (paths to role.md files)
131
+ ```
132
+ 3. create_team with team_name = `swarm`
133
+ 4. Bash: `python <skill_root>/scripts/aco.py --session <session> init`
134
+ 5. Parse stdout JSON: capture `n_nodes`, `n_edges`, `pheromone_path`
135
+ 6. Initialize team-session.json with `iteration: 0`, `status: "active"`
136
+ 7. Log state_update via team_msg with config summary
137
+
138
+ ---
139
+
140
+ ## Phase 3: Iteration Loop
141
+
142
+ **Objective**: Run iteration k = 1..K. Each iteration = spawn-and-stop + callback resume.
143
+
144
+ **Per-iteration workflow** (delegate to `@commands/iterate.md`):
145
+
146
+ 1. Increment iteration counter: k = session.iteration + 1
147
+ 2. Bash: `python aco.py --session <session> select --iter <k>`
148
+ -> returns `{assignments: [{ant_id, start_node, edge_preferences, max_path_length}, ...]}`
149
+ 3. For each assignment:
150
+ - create_task `ANT-<k>-<i>` with description including session path + assignment
151
+ - update_task set owner = `ant`
152
+ 4. Spawn N × team-worker(ant) in background, each with assignment injected into prompt:
153
+ ```
154
+ role: ant
155
+ role_spec: <skill_root>/roles/ant/role.md
156
+ session: <session>
157
+ session_id: <id>
158
+ team_name: swarm
159
+ requirement: <ant_prompt.objective> | Assignment: <full assignment JSON>
160
+ inner_loop: false
161
+ ```
162
+ 5. STOP
163
+
164
+ **On all-ants-complete callback** (Phase 3.5):
165
+
166
+ 1. Verify all `ANT-<k>-*` tasks have status = completed
167
+ 2. (Optional, if `scoring.mode == "llm"`) Spawn scorer worker for iteration k, await callback
168
+ 3. Bash: `python aco.py --session <session> update --iter <k>`
169
+ -> parse `{best_score, mean_score, delta, hallucinations_flagged, ...}`
170
+ 4. Bash: `python aco.py --session <session> converged`
171
+ -> parse `{converged, triggered_by, reason, metrics}`
172
+ 5. Update session.iteration = k, log state_update
173
+ 6. Branch:
174
+ - `converged == true` -> Phase 4
175
+ - `converged == false` -> loop back to step 1 (iteration k+1)
176
+
177
+ ---
178
+
179
+ ## Phase 4: Converge + Synthesize
180
+
181
+ Delegate to `@commands/converge.md`:
182
+
183
+ 1. Bash: `python aco.py --session <session> report` -> capture best + top_k + curve
184
+ 2. Spawn analyst worker:
185
+ ```
186
+ role: analyst
187
+ role_spec: <skill_root>/roles/analyst/role.md
188
+ requirement: synthesize swarm results | session: <session>
189
+ ```
190
+ 3. Await analyst callback -> `best-solution.md` written
191
+ 4. Build completion report:
192
+ - Total iterations, total ants
193
+ - Best score + best path + best solution summary
194
+ - Convergence reason
195
+ - Top 5 trails table
196
+ 5. Execute completion action (interactive ask_user: Archive / Keep / Export)
197
+
198
+ ---
199
+
200
+ ## Error Handling
201
+
202
+ | Error | Resolution |
203
+ |-------|------------|
204
+ | `aco.py` exits non-zero | Capture stderr, log to issues.md, retry once with same args |
205
+ | Ant produces invalid JSON | Script's `update` skips that artifact + logs warning; coordinator continues |
206
+ | All ants in iteration fail | Halt loop, ask_user (retry / abort) |
207
+ | Convergence flag never trips | max_iterations safety net always triggers |
208
+ | Script not found | Resolve `<skill_root>/scripts/aco.py`; if missing, fail with install hint |
209
+ | Hallucination cluster (>50% ants flagged) | Pause, ask_user (continue / refine config) |
210
+ | Task description too vague | ask_user before Phase 1 config generation |
211
+ | Session corruption | Phase 0 reconciliation; if irrecoverable, archive and start fresh |
@@ -0,0 +1,157 @@
1
+ ---
2
+ role: scorer
3
+ prefix: SCORE
4
+ inner_loop: false
5
+ output_tag: "[scorer]"
6
+ message_types:
7
+ success: scoring_complete
8
+ error: error
9
+ ---
10
+ <!-- Open-standard mirror generated by scripts/build-agents-standard.mjs — do not edit; re-run after editing .claude/ source. -->
11
+
12
+
13
+ # Scorer Role — Phase 2-4
14
+
15
+ Tag: `[scorer]` | Prefix: `SCORE-*`
16
+ Responsibility: After all ants of one iteration complete, compute an authoritative `verified_score` for each ant. Used only when `config.scoring.mode == "llm"`. For `script` or `fallback` modes, this role is not spawned.
17
+
18
+ ## Boundaries
19
+
20
+ ### MUST
21
+ - Read ALL `ant-<iter>-*.json` artifacts for the target iteration
22
+ - Apply a uniform scoring rubric across the batch (consistency over absolute correctness)
23
+ - Produce `<session>/scores/iter-<k>-scores.json` matching the schema in specs/ant-output-schema.md
24
+ - Provide a one-line `rationale` per ant
25
+ - Use a different model from the ant if possible (configured via maestro delegate) to reduce same-source bias
26
+
27
+ ### MUST NOT
28
+ - Modify ant artifacts
29
+ - Score ants outside the target iteration
30
+ - Use self_score as a starting reference (introduces anchoring bias) — score blind first, compare second
31
+ - Assign scores without grounding in evidence claimed by the ant
32
+
33
+ ## Phase 2: Context Loading
34
+
35
+ | Input | Source | Required |
36
+ |-------|--------|----------|
37
+ | Target iteration | Task description (`Iteration to score: <k>`) | Yes |
38
+ | Objective | `<session>/swarm-config.json#ant_prompt.objective` | Yes |
39
+ | Scoring rubric | `<session>/swarm-config.json#scoring.rubric` (if defined) | Optional |
40
+ | Ant artifacts | `<session>/artifacts/ant-<k>-*.json` | Yes |
41
+ | Task space | `<session>/task-space.json` (for context) | Optional |
42
+
43
+ Workflow:
44
+ 1. Extract `k` from task description
45
+ 2. `find_files("<session>/artifacts/ant-<k>-*.json")` -> N artifacts
46
+ 3. Read all N artifacts in parallel
47
+ 4. Read swarm-config.json -> capture objective + rubric
48
+
49
+ ## Phase 3: Blind Batch Scoring
50
+
51
+ ### 3.1 Build rubric
52
+
53
+ If `config.scoring.rubric` provided, use verbatim. Otherwise default rubric:
54
+
55
+ | Dimension | Weight |
56
+ |-----------|--------|
57
+ | Path relevance to objective | 0.35 |
58
+ | Evidence strength (count + specificity) | 0.30 |
59
+ | Candidate solution quality | 0.25 |
60
+ | Path coherence (decisions follow logical chain) | 0.10 |
61
+
62
+ ### 3.2 Score each artifact
63
+
64
+ For each ant artifact (process them blind to self_score order):
65
+
66
+ 1. Extract: `path`, `path_decisions[].rationale`, `evidence`, `candidate_solution`
67
+ 2. **Do NOT read** `self_score` / `self_confidence` until after scoring
68
+ 3. Apply rubric:
69
+ - Each dimension -> 0.0..1.0
70
+ - Weighted sum -> `verified_score`
71
+ 4. One-line rationale: which dimensions drove the score
72
+ 5. Append to scores dict
73
+
74
+ ### 3.3 Calibrate across batch
75
+
76
+ After all individual scores:
77
+ - Compute mean + std of verified_scores
78
+ - If all scores within ±0.05 (compressed range) -> spread them apart by re-ranking with finer rubric application
79
+ - If exactly one is 1.0 and rest are < 0.3 -> double-check the outlier
80
+
81
+ ### 3.4 Compose scores file
82
+
83
+ ```json
84
+ {
85
+ "iteration": <k>,
86
+ "scorer_type": "llm",
87
+ "rubric_used": "default | custom_from_config",
88
+ "scores": {
89
+ "ANT-<k>-1": { "verified_score": <float>, "rationale": "<one-line>" },
90
+ "ANT-<k>-2": { "verified_score": <float>, "rationale": "<one-line>" },
91
+ ...
92
+ },
93
+ "calibration": {
94
+ "mean": <float>,
95
+ "std": <float>,
96
+ "min": <float>,
97
+ "max": <float>
98
+ },
99
+ "computed_at": "<iso8601>"
100
+ }
101
+ ```
102
+
103
+ ## Phase 4: Verify + Publish
104
+
105
+ ### Behavioral Traits
106
+
107
+ #### Accuracy
108
+ - Every `verified_score` in [0.0, 1.0]
109
+ - Every `ANT-<k>-i` in the input set has a score entry
110
+ - `rationale` references at least one dimension of the rubric
111
+
112
+ #### Feedback Contract
113
+ | Field | Required | Content |
114
+ |-------|----------|---------|
115
+ | artifacts_written | Always | `<session>/scores/iter-<k>-scores.json` |
116
+ | n_ants_scored | Always | int |
117
+ | verification_method | Always | "blind_then_calibrated + range_check" |
118
+
119
+ #### Quality Gate
120
+ - All N ant artifacts must have a score entry — partial scoring is NOT acceptable
121
+ - If a single artifact is unparseable -> assign verified_score = 0.0 with rationale "artifact_invalid", continue
122
+ - Validation fails -> retry Phase 3 once
123
+
124
+ ### Verification Steps
125
+
126
+ 1. Re-read the scores file via Read
127
+ 2. Parse JSON, validate schema
128
+ 3. Confirm `len(scores) == n_ants_in_iteration`
129
+ 4. Confirm score range
130
+ 5. Write hallucination delta to wisdom for coordinator visibility:
131
+ - For each ant, compute `delta = |self_score - verified_score|`
132
+ - If `delta > 0.4` for >50% of ants in this iter -> append warning to `<session>/wisdom/issues.md`
133
+
134
+ ### State Update
135
+
136
+ ```json
137
+ {
138
+ "task_id": "SCORE-<k>",
139
+ "role": "scorer",
140
+ "status": "completed",
141
+ "iteration": <k>,
142
+ "n_ants_scored": <N>,
143
+ "mean_verified_score": <float>,
144
+ "artifact_path": "<session>/scores/iter-<k>-scores.json",
145
+ "verification": "blind_scored + schema_validated"
146
+ }
147
+ ```
148
+
149
+ ## Error Handling
150
+
151
+ | Scenario | Resolution |
152
+ |----------|------------|
153
+ | No artifacts found for iteration | Report error - coordinator either misrouted or ants all failed |
154
+ | Artifact parse fails | Score = 0.0, rationale = "artifact_invalid", continue |
155
+ | Rubric ambiguous for a path | Default to 0.5 with rationale "rubric_uncertain", lower confidence in calibration |
156
+ | All scores identical | Force differentiation via secondary tiebreaker (path length, evidence count) |
157
+ | Scores file write fails | Retry; on second failure report blocker |