maestro-flow 0.4.17 → 0.4.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. package/.agents/skills/maestro/SKILL.md +1 -1
  2. package/.agents/skills/maestro-analyze/SKILL.md +5 -0
  3. package/.agents/skills/maestro-blueprint/SKILL.md +5 -0
  4. package/.agents/skills/maestro-brainstorm/SKILL.md +5 -0
  5. package/.agents/skills/maestro-next/SKILL.md +254 -0
  6. package/.agents/skills/team-swarm/SKILL.md +180 -0
  7. package/.agents/skills/team-swarm/roles/analyst/role.md +187 -0
  8. package/.agents/skills/team-swarm/roles/ant/role.md +169 -0
  9. package/.agents/skills/team-swarm/roles/coordinator/commands/converge.md +146 -0
  10. package/.agents/skills/team-swarm/roles/coordinator/commands/init-swarm.md +136 -0
  11. package/.agents/skills/team-swarm/roles/coordinator/commands/iterate.md +232 -0
  12. package/.agents/skills/team-swarm/roles/coordinator/role.md +211 -0
  13. package/.agents/skills/team-swarm/roles/scorer/role.md +157 -0
  14. package/.agents/skills/team-swarm/scripts/aco.py +473 -0
  15. package/.agents/skills/team-swarm/scripts/pheromone.py +144 -0
  16. package/.agents/skills/team-swarm/scripts/scoring.py +92 -0
  17. package/.agents/skills/team-swarm/scripts/test_aco.py +475 -0
  18. package/.agents/skills/team-swarm/specs/ant-output-schema.md +119 -0
  19. package/.agents/skills/team-swarm/specs/convergence-criteria.md +106 -0
  20. package/.agents/skills/team-swarm/specs/pheromone-schema.md +123 -0
  21. package/.agents/skills/team-swarm/specs/swarm-config-template.json +71 -0
  22. package/.agents/skills/team-swarm/specs/swarm-protocol.md +117 -0
  23. package/.agy/skills/maestro/SKILL.md +1 -1
  24. package/.agy/skills/maestro-analyze/SKILL.md +5 -0
  25. package/.agy/skills/maestro-blueprint/SKILL.md +5 -0
  26. package/.agy/skills/maestro-brainstorm/SKILL.md +5 -0
  27. package/.agy/skills/maestro-next/SKILL.md +250 -0
  28. package/.agy/skills/team-swarm/SKILL.md +176 -0
  29. package/.agy/skills/team-swarm/roles/analyst/role.md +183 -0
  30. package/.agy/skills/team-swarm/roles/ant/role.md +165 -0
  31. package/.agy/skills/team-swarm/roles/coordinator/commands/converge.md +134 -0
  32. package/.agy/skills/team-swarm/roles/coordinator/commands/init-swarm.md +136 -0
  33. package/.agy/skills/team-swarm/roles/coordinator/commands/iterate.md +202 -0
  34. package/.agy/skills/team-swarm/roles/coordinator/role.md +209 -0
  35. package/.agy/skills/team-swarm/roles/scorer/role.md +153 -0
  36. package/.agy/skills/team-swarm/scripts/aco.py +473 -0
  37. package/.agy/skills/team-swarm/scripts/pheromone.py +144 -0
  38. package/.agy/skills/team-swarm/scripts/scoring.py +92 -0
  39. package/.agy/skills/team-swarm/scripts/test_aco.py +475 -0
  40. package/.agy/skills/team-swarm/specs/ant-output-schema.md +119 -0
  41. package/.agy/skills/team-swarm/specs/convergence-criteria.md +106 -0
  42. package/.agy/skills/team-swarm/specs/pheromone-schema.md +123 -0
  43. package/.agy/skills/team-swarm/specs/swarm-config-template.json +71 -0
  44. package/.agy/skills/team-swarm/specs/swarm-protocol.md +117 -0
  45. package/.claude/commands/maestro-analyze.md +5 -0
  46. package/.claude/commands/maestro-blueprint.md +5 -0
  47. package/.claude/commands/maestro-brainstorm.md +5 -0
  48. package/.claude/commands/maestro-next.md +252 -0
  49. package/.claude/commands/maestro.md +1 -1
  50. package/.claude/skills/team-swarm/SKILL.md +178 -0
  51. package/.claude/skills/team-swarm/roles/analyst/role.md +185 -0
  52. package/.claude/skills/team-swarm/roles/ant/role.md +167 -0
  53. package/.claude/skills/team-swarm/roles/coordinator/commands/converge.md +146 -0
  54. package/.claude/skills/team-swarm/roles/coordinator/commands/init-swarm.md +136 -0
  55. package/.claude/skills/team-swarm/roles/coordinator/commands/iterate.md +232 -0
  56. package/.claude/skills/team-swarm/roles/coordinator/role.md +209 -0
  57. package/.claude/skills/team-swarm/roles/scorer/role.md +155 -0
  58. package/.claude/skills/team-swarm/scripts/aco.py +473 -0
  59. package/.claude/skills/team-swarm/scripts/pheromone.py +144 -0
  60. package/.claude/skills/team-swarm/scripts/scoring.py +92 -0
  61. package/.claude/skills/team-swarm/scripts/test_aco.py +475 -0
  62. package/.claude/skills/team-swarm/specs/ant-output-schema.md +119 -0
  63. package/.claude/skills/team-swarm/specs/convergence-criteria.md +106 -0
  64. package/.claude/skills/team-swarm/specs/pheromone-schema.md +123 -0
  65. package/.claude/skills/team-swarm/specs/swarm-config-template.json +71 -0
  66. package/.claude/skills/team-swarm/specs/swarm-protocol.md +117 -0
  67. package/.codex/skills/learn-decompose/SKILL.md +34 -3
  68. package/.codex/skills/learn-retro/SKILL.md +31 -1
  69. package/.codex/skills/learn-second-opinion/SKILL.md +34 -4
  70. package/.codex/skills/maestro-analyze/SKILL.md +44 -5
  71. package/.codex/skills/maestro-blueprint/SKILL.md +5 -0
  72. package/.codex/skills/maestro-brainstorm/SKILL.md +46 -0
  73. package/.codex/skills/maestro-execute/SKILL.md +61 -5
  74. package/.codex/skills/maestro-milestone-audit/SKILL.md +64 -13
  75. package/.codex/skills/maestro-milestone-complete/SKILL.md +12 -0
  76. package/.codex/skills/maestro-next/SKILL.md +297 -0
  77. package/.codex/skills/maestro-plan/SKILL.md +36 -1
  78. package/.codex/skills/maestro-player/SKILL.md +25 -6
  79. package/.codex/skills/maestro-ralph/SKILL.md +17 -10
  80. package/.codex/skills/maestro-ralph-execute/SKILL.md +2 -1
  81. package/.codex/skills/maestro-roadmap/SKILL.md +35 -4
  82. package/.codex/skills/maestro-ui-codify/SKILL.md +38 -10
  83. package/.codex/skills/maestro-verify/SKILL.md +40 -5
  84. package/.codex/skills/manage-codebase-rebuild/SKILL.md +52 -5
  85. package/.codex/skills/manage-issue-discover/SKILL.md +106 -15
  86. package/.codex/skills/quality-auto-test/SKILL.md +70 -16
  87. package/.codex/skills/quality-debug/SKILL.md +139 -28
  88. package/.codex/skills/quality-refactor/SKILL.md +61 -11
  89. package/.codex/skills/quality-review/SKILL.md +45 -9
  90. package/.codex/skills/quality-test/SKILL.md +58 -3
  91. package/.codex/skills/security-audit/SKILL.md +38 -0
  92. package/.codex/skills/spec-map/SKILL.md +65 -8
  93. package/.codex/skills/team-coordinate/SKILL.md +28 -11
  94. package/.codex/skills/team-coordinate/specs/role-catalog.md +20 -0
  95. package/.codex/skills/team-lifecycle-v4/SKILL.md +23 -7
  96. package/.codex/skills/team-lifecycle-v4/instructions/agent-instruction.md +20 -0
  97. package/.codex/skills/team-quality-assurance/SKILL.md +40 -2
  98. package/.codex/skills/team-review/SKILL.md +42 -2
  99. package/.codex/skills/team-tech-debt/SKILL.md +45 -2
  100. package/.codex/skills/team-testing/SKILL.md +42 -2
  101. package/dashboard/dist-server/dashboard/src/server/wiki/search.d.ts +6 -4
  102. package/dashboard/dist-server/dashboard/src/server/wiki/search.js +50 -8
  103. package/dashboard/dist-server/dashboard/src/server/wiki/search.js.map +1 -1
  104. package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.d.ts +32 -0
  105. package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.js +294 -0
  106. package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.js.map +1 -1
  107. package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.d.ts +1 -0
  108. package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.js +35 -1
  109. package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.js.map +1 -1
  110. package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.test.js +235 -0
  111. package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.test.js.map +1 -1
  112. package/dist/src/commands/install.js +5 -1
  113. package/dist/src/commands/install.js.map +1 -1
  114. package/dist/src/i18n/locales/en.d.ts.map +1 -1
  115. package/dist/src/i18n/locales/en.js +9 -0
  116. package/dist/src/i18n/locales/en.js.map +1 -1
  117. package/dist/src/i18n/locales/zh.d.ts.map +1 -1
  118. package/dist/src/i18n/locales/zh.js +9 -0
  119. package/dist/src/i18n/locales/zh.js.map +1 -1
  120. package/dist/src/i18n/types.d.ts +3 -0
  121. package/dist/src/i18n/types.d.ts.map +1 -1
  122. package/dist/src/ralph/cmd-check.js +1 -1
  123. package/dist/src/ralph/cmd-check.js.map +1 -1
  124. package/dist/src/ralph/cmd-complete.js +1 -1
  125. package/dist/src/ralph/cmd-complete.js.map +1 -1
  126. package/dist/src/ralph/cmd-next.d.ts.map +1 -1
  127. package/dist/src/ralph/cmd-next.js +12 -4
  128. package/dist/src/ralph/cmd-next.js.map +1 -1
  129. package/dist/src/ralph/cmd-session.js +2 -2
  130. package/dist/src/ralph/cmd-session.js.map +1 -1
  131. package/dist/src/ralph/status-store.d.ts +8 -1
  132. package/dist/src/ralph/status-store.d.ts.map +1 -1
  133. package/dist/src/ralph/status-store.js +12 -2
  134. package/dist/src/ralph/status-store.js.map +1 -1
  135. package/dist/src/tools/store-knowhow.d.ts.map +1 -1
  136. package/dist/src/tools/store-knowhow.js +51 -64
  137. package/dist/src/tools/store-knowhow.js.map +1 -1
  138. package/dist/src/tui/install-ui/HooksConfig.d.ts +5 -1
  139. package/dist/src/tui/install-ui/HooksConfig.d.ts.map +1 -1
  140. package/dist/src/tui/install-ui/HooksConfig.js +5 -3
  141. package/dist/src/tui/install-ui/HooksConfig.js.map +1 -1
  142. package/dist/src/tui/install-ui/InstallConfirm.d.ts +2 -0
  143. package/dist/src/tui/install-ui/InstallConfirm.d.ts.map +1 -1
  144. package/dist/src/tui/install-ui/InstallConfirm.js +1 -1
  145. package/dist/src/tui/install-ui/InstallConfirm.js.map +1 -1
  146. package/dist/src/tui/install-ui/InstallExecution.d.ts +1 -0
  147. package/dist/src/tui/install-ui/InstallExecution.d.ts.map +1 -1
  148. package/dist/src/tui/install-ui/InstallExecution.js +26 -3
  149. package/dist/src/tui/install-ui/InstallExecution.js.map +1 -1
  150. package/dist/src/tui/install-ui/InstallFlow.d.ts +1 -1
  151. package/dist/src/tui/install-ui/InstallFlow.d.ts.map +1 -1
  152. package/dist/src/tui/install-ui/InstallFlow.js +76 -16
  153. package/dist/src/tui/install-ui/InstallFlow.js.map +1 -1
  154. package/dist/src/tui/install-ui/InstallHub.d.ts +2 -0
  155. package/dist/src/tui/install-ui/InstallHub.d.ts.map +1 -1
  156. package/dist/src/tui/install-ui/InstallHub.js +8 -0
  157. package/dist/src/tui/install-ui/InstallHub.js.map +1 -1
  158. package/dist/src/tui/install-ui/InstallResult.d.ts.map +1 -1
  159. package/dist/src/tui/install-ui/InstallResult.js +1 -1
  160. package/dist/src/tui/install-ui/InstallResult.js.map +1 -1
  161. package/dist/src/utils/update-notices.js +23 -0
  162. package/dist/src/utils/update-notices.js.map +1 -1
  163. package/package.json +1 -1
  164. package/workflows/finish-work.md +119 -0
  165. package/workflows/milestone-complete.md +23 -1
@@ -0,0 +1,106 @@
1
+ # Convergence Criteria
2
+
3
+ When does the swarm stop iterating? Defines stop conditions computed by `aco.py converged`.
4
+
5
+ ## Stop Conditions (any-of)
6
+
7
+ The swarm stops when **any** of the configured criteria triggers:
8
+
9
+ | Criterion | Default | Description |
10
+ |-----------|---------|-------------|
11
+ | `max_iterations` | 5 | Hard cap on iteration count |
12
+ | `stagnation` | patience = 2 | Best score unchanged for N iterations |
13
+ | `entropy_floor` | 0.5 | Pheromone Shannon entropy drops below threshold (matrix highly concentrated) |
14
+ | `budget_tokens` | 100000 | Total token cost exceeds budget |
15
+ | `target_score` | 0.95 | Best verified_score crosses target |
16
+
17
+ ## Configuration
18
+
19
+ ```json
20
+ {
21
+ "convergence": {
22
+ "max_iterations": 5,
23
+ "stagnation": { "enabled": true, "patience": 2, "min_delta": 0.01 },
24
+ "entropy_floor": { "enabled": true, "threshold": 0.5 },
25
+ "budget_tokens": { "enabled": false, "max": 100000 },
26
+ "target_score": { "enabled": true, "value": 0.95 }
27
+ }
28
+ }
29
+ ```
30
+
31
+ ## Output Schema (aco.py converged)
32
+
33
+ ```json
34
+ {
35
+ "converged": true,
36
+ "iteration": 4,
37
+ "reason": "stagnation",
38
+ "metrics": {
39
+ "best_score": 0.78,
40
+ "mean_score": 0.62,
41
+ "entropy": 1.85,
42
+ "iterations_completed": 4,
43
+ "iterations_since_best_change": 2,
44
+ "total_tokens_used": 42000
45
+ },
46
+ "triggered_by": ["stagnation"],
47
+ "recommendation": "best solution is stable; recommend report"
48
+ }
49
+ ```
50
+
51
+ ## Selection Logic
52
+
53
+ ```
54
+ def check_convergence(history, config):
55
+ triggered = []
56
+
57
+ if iteration >= config.max_iterations:
58
+ triggered.append("max_iterations")
59
+
60
+ if config.stagnation.enabled:
61
+ recent = history[-config.stagnation.patience-1:]
62
+ if len(recent) > config.stagnation.patience:
63
+ deltas = [abs(recent[i].best - recent[i-1].best)
64
+ for i in range(1, len(recent))]
65
+ if all(d < config.stagnation.min_delta for d in deltas):
66
+ triggered.append("stagnation")
67
+
68
+ if config.entropy_floor.enabled and current_entropy < threshold:
69
+ triggered.append("entropy_floor")
70
+
71
+ if config.budget_tokens.enabled and total_tokens > config.budget_tokens.max:
72
+ triggered.append("budget_tokens")
73
+
74
+ if config.target_score.enabled and best_score >= config.target_score.value:
75
+ triggered.append("target_score")
76
+
77
+ return {"converged": len(triggered) > 0, "triggered_by": triggered}
78
+ ```
79
+
80
+ ## Entropy Calculation
81
+
82
+ Shannon entropy of normalized pheromone distribution:
83
+
84
+ ```
85
+ p_i = tau_i / sum(tau) for each active edge
86
+ H = -sum(p_i * log2(p_i))
87
+ ```
88
+
89
+ - High H → diverse exploration (early stage)
90
+ - Low H → concentrated on few paths (converging)
91
+ - H < threshold + best score plateau → safe to stop
92
+
93
+ ## Why Multi-Criterion
94
+
95
+ Single criterion is fragile:
96
+ - `max_iterations` alone wastes budget if converged early
97
+ - `stagnation` alone may stop too early on noisy scoring
98
+ - `entropy_floor` alone may trigger before useful solutions emerge
99
+
100
+ Combination = early termination when safe, but always bounded by `max_iterations`.
101
+
102
+ ## Anti-Patterns
103
+
104
+ - DO NOT use `stagnation` with `patience < 2` — noise will trigger false stops
105
+ - DO NOT disable `max_iterations` — runaway risk
106
+ - DO NOT set `target_score` without verified scoring — self_score is too optimistic
@@ -0,0 +1,123 @@
1
+ # Pheromone Schema
2
+
3
+ Pheromone matrix structure, update formula, evaporation rule. Authoritative spec for `pheromone/current.json` and history snapshots.
4
+
5
+ ## File Layout
6
+
7
+ ```
8
+ <session>/pheromone/
9
+ ├── current.json # latest state, overwritten each iteration
10
+ ├── history/
11
+ │ ├── 1.json # snapshot after iteration 1
12
+ │ ├── 2.json
13
+ │ └── ...
14
+ └── init.json # snapshot of initial state (immutable)
15
+ ```
16
+
17
+ ## Schema (pheromone/current.json)
18
+
19
+ ```json
20
+ {
21
+ "version": "1.0",
22
+ "iteration": 3,
23
+ "n_nodes": 42,
24
+ "matrix_type": "edge_weighted_sparse",
25
+ "tau": {
26
+ "<node_a>::<node_b>": 0.85,
27
+ "<node_a>::<node_c>": 1.20,
28
+ "<node_x>::<node_y>": 0.13
29
+ },
30
+ "node_tau": {
31
+ "<node_a>": 0.92,
32
+ "<node_b>": 1.05
33
+ },
34
+ "metadata": {
35
+ "alpha": 1.0,
36
+ "beta": 2.0,
37
+ "rho": 0.2,
38
+ "q": 1.0,
39
+ "tau_init": 1.0,
40
+ "tau_min": 0.01,
41
+ "tau_max": 10.0
42
+ },
43
+ "stats": {
44
+ "mean": 0.91,
45
+ "max": 2.34,
46
+ "min": 0.05,
47
+ "entropy": 3.21,
48
+ "n_edges_active": 87
49
+ }
50
+ }
51
+ ```
52
+
53
+ ### Field Semantics
54
+
55
+ | Field | Type | Meaning |
56
+ |-------|------|---------|
57
+ | `matrix_type` | enum | `edge_weighted_sparse` (default), `node_weighted`, `full_dense` |
58
+ | `tau` | dict | Edge pheromone, key `"a::b"` (undirected uses lexical order) |
59
+ | `node_tau` | dict | Node-level pheromone (used when matrix_type = node_weighted) |
60
+ | `metadata.alpha` | float | Pheromone weight in selection probability |
61
+ | `metadata.beta` | float | Heuristic weight in selection probability |
62
+ | `metadata.rho` | float | Evaporation rate (0..1), applied each iteration |
63
+ | `metadata.q` | float | Deposit constant |
64
+ | `metadata.tau_min/max` | float | MMAS bounds — prevents premature convergence/explosion |
65
+ | `stats.entropy` | float | Shannon entropy of normalized tau — convergence signal |
66
+
67
+ ## Update Formula
68
+
69
+ After iteration k completes, for each ant a in iteration:
70
+
71
+ ```
72
+ delta_tau_a(edge) = q * verified_score_a if edge in path_a, else 0
73
+ ```
74
+
75
+ Then:
76
+
77
+ ```
78
+ tau(edge) = (1 - rho) * tau(edge) + sum_over_ants(delta_tau_a(edge))
79
+ tau(edge) = clip(tau(edge), tau_min, tau_max)
80
+ ```
81
+
82
+ **Elitist strategy** (always on): the best path of all time deposits extra `q * best_score` on its edges before clipping.
83
+
84
+ ## Selection Probability (used in `aco.py select`)
85
+
86
+ For ant at node i choosing neighbor j from candidate set N(i):
87
+
88
+ ```
89
+ p(i -> j) = (tau(i,j)^alpha * eta(i,j)^beta) / sum_{k in N(i)}(tau(i,k)^alpha * eta(i,k)^beta)
90
+ ```
91
+
92
+ where `eta` is a heuristic value from config (e.g., inverse-distance, prior knowledge). Default `eta = 1.0` if not provided.
93
+
94
+ ## Path-Hints vs Full-Path
95
+
96
+ `aco.py select` does NOT prescribe a complete path — it returns **path-hints**: weighted starting nodes + edge probabilities. The ant (LLM) then makes the actual node-by-node choices, with freedom to deviate based on its own evidence. This preserves LLM judgment while keeping search guided.
97
+
98
+ ```json
99
+ {
100
+ "ant_id": "ANT-3-2",
101
+ "start_node": "node_a",
102
+ "edge_preferences": {
103
+ "node_a::node_b": 0.45,
104
+ "node_a::node_c": 0.30,
105
+ "node_a::node_d": 0.25
106
+ },
107
+ "max_path_length": 5
108
+ }
109
+ ```
110
+
111
+ ## Initialization
112
+
113
+ ```
114
+ tau_init for all edges = config.aco.tau_init (default 1.0)
115
+ ```
116
+
117
+ If `task_space.nodes` is `auto_discover_from: <glob>`, init.py discovers nodes by globbing and initializes a full uniform matrix.
118
+
119
+ ## History & Reproducibility
120
+
121
+ - `init.json` — frozen snapshot of initial state (never overwritten)
122
+ - `history/k.json` — full pheromone state after iteration k (for convergence-curve analysis)
123
+ - All updates are deterministic given (prior state + ant artifacts + config)
@@ -0,0 +1,71 @@
1
+ {
2
+ "_comment": "User-facing config template. Coordinator generates this from task description in Phase 1. Fields prefixed with _ are documentation, omit from real config.",
3
+
4
+ "swarm": {
5
+ "_comment": "Top-level swarm parameters",
6
+ "n_ants": 5,
7
+ "max_iterations": 5,
8
+ "elite_keep": 3
9
+ },
10
+
11
+ "aco": {
12
+ "_comment": "ACO algorithm hyperparameters - defaults are sane",
13
+ "alpha": 1.0,
14
+ "beta": 2.0,
15
+ "rho": 0.2,
16
+ "q": 1.0,
17
+ "tau_init": 1.0,
18
+ "tau_min": 0.01,
19
+ "tau_max": 10.0
20
+ },
21
+
22
+ "task_space": {
23
+ "_comment": "Define the search space. Use EITHER 'nodes' (explicit) OR 'auto_discover_from' (glob).",
24
+ "type": "graph",
25
+ "nodes": ["node_a", "node_b", "node_c"],
26
+ "_alt_auto_discover": "auto_discover_from: 'src/**/*.ts'",
27
+ "max_path_length": 5,
28
+ "start_nodes": "any",
29
+ "edges": "complete"
30
+ },
31
+
32
+ "scoring": {
33
+ "_comment": "How verified_score is computed. mode = script | llm | fallback",
34
+ "mode": "fallback",
35
+ "_alt_script": "script_path: '../my-scoring-rule.py' (file must define def score(ant_artifact) -> float)",
36
+ "_alt_llm": "rubric: 'custom scoring rubric text for scorer role'",
37
+ "self_score_discount": 0.5
38
+ },
39
+
40
+ "ant_prompt": {
41
+ "_comment": "What the ant actually tries to achieve. This is the bridge between task semantics and ACO mechanics.",
42
+ "objective": "Find the file with the highest density of suspicious code patterns",
43
+ "evidence_requirements": [
44
+ "At least 1 file:line reference per node visited",
45
+ "Concrete code snippet showing the suspicious pattern"
46
+ ],
47
+ "tools_hint": "Use Grep + Read for code-based exploration"
48
+ },
49
+
50
+ "convergence": {
51
+ "_comment": "Stop conditions - any-of triggers convergence",
52
+ "max_iterations": 5,
53
+ "stagnation": {
54
+ "enabled": true,
55
+ "patience": 2,
56
+ "min_delta": 0.01
57
+ },
58
+ "entropy_floor": {
59
+ "enabled": true,
60
+ "threshold": 0.5
61
+ },
62
+ "budget_tokens": {
63
+ "enabled": false,
64
+ "max": 100000
65
+ },
66
+ "target_score": {
67
+ "enabled": true,
68
+ "value": 0.95
69
+ }
70
+ }
71
+ }
@@ -0,0 +1,117 @@
1
+ # Swarm Protocol
2
+
3
+ Master protocol document for team-swarm: defines how the LLM coordinator and Python ACO controller interface, how ants explore the task space, and how exploration results flow back to update pheromone state.
4
+
5
+ ## Design Philosophy
6
+
7
+ | Principle | Rationale |
8
+ |-----------|-----------|
9
+ | **Outer loop = script, inner loop = LLM** | Optimization math is cheap and deterministic; LLM evaluation is expensive and noisy. Separate them. |
10
+ | **Coordinator is a hybrid** | LLM coordinator translates user intent + dispatches workers; Python script makes all numeric decisions (selection, update, convergence). |
11
+ | **Schema-locked ant output** | LLM output → algorithm input bridge demands strict JSON contract. Free-text outputs cannot feed pheromone updates. |
12
+ | **Two-layer scoring** | `self_score` (LLM self-report) is fast but optimistic. `verified_score` (script or LLM scorer) is the only authoritative input to pheromone update. |
13
+ | **Universal task space** | Coordinator does not bake in any domain. User provides `swarm-config.json` with task space + scoring rule. |
14
+
15
+ ## Three-Component Architecture
16
+
17
+ ```
18
+ +-----------------------------------------------------+
19
+ | LLM Coordinator (roles/coordinator/role.md) |
20
+ | - Parses user task → emits swarm-config.json |
21
+ | - Phase 3 main loop: calls script, spawns workers |
22
+ | - Translates worker callbacks back into script ops |
23
+ +--------+--------------------------------------------+
24
+ | Bash subprocess
25
+ v
26
+ +-----------------------------------------------------+
27
+ | Python ACO Controller (scripts/aco.py) |
28
+ | - CLI subcommands: init/select/update/converged |
29
+ | - Owns pheromone matrix + elite tracker |
30
+ | - Pure functions of session state files |
31
+ +-----------------------------------------------------+
32
+ ^ |
33
+ | reads artifacts/ant-*.json | writes pheromone/*.json
34
+ | v
35
+ +-----------------------------------------------------+
36
+ | LLM Workers (team-worker agents) |
37
+ | - ant: explores assigned path, writes JSON |
38
+ | - scorer: assigns verified_score (optional) |
39
+ | - analyst: final synthesis of elite solutions |
40
+ +-----------------------------------------------------+
41
+ ```
42
+
43
+ ## Iteration Lifecycle
44
+
45
+ ```
46
+ [Coordinator] init phase
47
+ └─> python aco.py init --config <session>/swarm-config.json
48
+ └─> writes pheromone/current.json + task-space.json
49
+
50
+ [Coordinator] iteration k (k = 1..K):
51
+ ├─> python aco.py select --iter k
52
+ │ └─> returns N ant assignments (paths to explore)
53
+ ├─> TaskCreate × N ant tasks
54
+ ├─> spawn N × team-worker(ant) in background
55
+ └─> STOP (await all callbacks)
56
+
57
+ [Callback] all ants done → handleIterationComplete
58
+ ├─> (optional) spawn scorer worker → verified_scores.json
59
+ ├─> python aco.py update --iter k
60
+ │ └─> reads artifacts/ant-k-*.json + verified_scores
61
+ │ └─> updates pheromone + elite + history
62
+ ├─> python aco.py converged
63
+ │ └─> {converged: true|false, reason: ...}
64
+ └─> converged → Phase 4; else → iteration k+1
65
+
66
+ [Coordinator] Phase 4: converge
67
+ ├─> python aco.py report → best.json
68
+ ├─> spawn analyst worker → best-solution.md
69
+ └─> completion action (Archive/Keep/Export)
70
+ ```
71
+
72
+ ## Script ↔ Coordinator Contract
73
+
74
+ All scripts MUST:
75
+ - Read from `<session>/...` (session path passed via `--session` flag)
76
+ - Write JSON to stdout for coordinator parsing (no prose)
77
+ - Use exit code 0 = success, 1 = error, 2 = config invalid
78
+ - Be idempotent: calling `update` twice for same iteration is safe
79
+
80
+ | Subcommand | Input | Output (stdout JSON) | Side effects |
81
+ |------------|-------|---------------------|--------------|
82
+ | `init` | swarm-config.json | `{status, pheromone_path, n_nodes}` | writes pheromone/current.json, task-space.json |
83
+ | `select --iter k` | pheromone/current.json, swarm-config.json | `{iteration, assignments: [{ant_id, path_hints, ...}]}` | none |
84
+ | `update --iter k` | artifacts/ant-k-*.json, optional verified_scores.json | `{iteration, mean_score, best_score, delta, elite_updated}` | writes pheromone/current.json (overwrite) + pheromone/history/k.json + trails/k.jsonl + best.json |
85
+ | `converged` | history/, best.json, config | `{converged: bool, reason: str, metrics: {...}}` | none |
86
+ | `report` | best.json, history/ | full JSON: `{best, top_k, convergence_curve, ...}` | none |
87
+
88
+ ## Data Flow Boundaries
89
+
90
+ | Boundary | Owner | Format |
91
+ |----------|-------|--------|
92
+ | User intent → config | LLM coordinator | swarm-config.json |
93
+ | Pheromone state | Python script | pheromone/current.json |
94
+ | Ant assignment → ant prompt | LLM coordinator (templated) | injected into role-spec at spawn |
95
+ | Ant exploration → artifact | LLM ant | artifacts/ant-k-id.json (schema-locked) |
96
+ | Artifact → pheromone update | Python script | reads artifacts, computes delta tau |
97
+ | Elite solutions → human report | LLM analyst | artifacts/best-solution.md |
98
+
99
+ ## Why Hybrid Coordinator (Not Pure Script)
100
+
101
+ - User input is natural language → needs LLM to map to swarm-config
102
+ - Worker spawning, message routing, session management → all framework-bound to LLM coordinator
103
+ - Script as a sub-tool keeps the same team-* lifecycle (spawn-and-stop, callback-driven)
104
+ - Same pattern as `maestro delegate` — Bash-callable subprocess from inside an LLM role
105
+
106
+ ## Universality
107
+
108
+ `team-swarm` is task-agnostic. Specialization happens via:
109
+ 1. `swarm-config.json#task_space` — defines what nodes/edges/paths mean
110
+ 2. `swarm-config.json#scoring` — defines how to compute verified_score
111
+ 3. `swarm-config.json#ant_prompt` — defines what ant should actually do at each node
112
+
113
+ Example domains the same skill handles:
114
+ - Code exploration (nodes = files/modules, score = suspicious code density)
115
+ - Test case generation (nodes = code paths, score = coverage delta)
116
+ - Refactor strategy search (nodes = refactor moves, score = complexity reduction)
117
+ - Hyperparameter tuning (nodes = param choices, score = metric improvement)
@@ -54,7 +54,7 @@ Resolve target to file list. Load coding specs: `maestro spec load --category co
54
54
 
55
55
  ### Phase 2: Wave 1 — Parallel Dimension Scans
56
56
 
57
- Generate `tasks.csv` with 4 dimension rows (wave 1) + 1 cross-ref row (wave 2):
57
+ Generate `tasks.csv` with 4 dimension rows (wave 1) + 1 cross-ref row (wave 2). Initialize every row with `status="pending"`. Filter `wave==N AND status=="pending"` when writing each wave CSV.
58
58
 
59
59
  | id | dimension | focus |
60
60
  |----|-----------|-------|
@@ -64,7 +64,38 @@ Generate `tasks.csv` with 4 dimension rows (wave 1) + 1 cross-ref row (wave 2):
64
64
  | 4 | error | Boundaries, retry/backoff, fallbacks, guards, logging |
65
65
  | 5 | cross-ref | Dedup + catalog from wave 1 findings |
66
66
 
67
- Each dimension agent returns:
67
+ **output_schema** (both waves):
68
+
69
+ ```json
70
+ {
71
+ "type": "object",
72
+ "properties": {
73
+ "id": { "type": "string" },
74
+ "result_status": { "type": "string", "enum": ["completed", "failed"] },
75
+ "dimension": { "type": "string", "enum": ["structural", "behavioral", "data", "error", "cross-ref"] },
76
+ "patterns": { "type": "string", "description": "JSON array string: [{name, dimension, confidence, anchors, description, rationale, tradeoffs}]" },
77
+ "findings": { "type": "string", "maxLength": 500 },
78
+ "error": { "type": "string" }
79
+ },
80
+ "required": ["id", "result_status", "findings"]
81
+ }
82
+ ```
83
+
84
+ Merge: `result_status` → master `status`; copy `dimension`, `patterns`, `findings`, `error`.
85
+
86
+ **Shared termination contract** (embed in every instruction):
87
+ ```
88
+ You MUST call report_agent_job_result EXACTLY ONCE before exiting.
89
+ - Success → result_status=completed (patterns may be empty array if nothing found)
90
+ - Failure → result_status=failed with error message
91
+ - Timeout → near max_runtime_seconds → result_status=completed with partial patterns
92
+ - NEVER continue indefinitely. NEVER exit silently. NEVER omit the call.
93
+ - Every finding MUST include file:line anchors. No speculation.
94
+ - Read-only analysis. Do NOT modify source.
95
+ Do NOT write to tasks.csv, wave-*.csv, results.csv. Do NOT call spawn_agents_on_csv (no recursion).
96
+ ```
97
+
98
+ Each dimension agent populates `patterns` as a JSON array string of:
68
99
  ```json
69
100
  [{
70
101
  "name": "pattern name",
@@ -79,7 +110,7 @@ Each dimension agent returns:
79
110
 
80
111
  ### Phase 3: Wave 2 — Cross-Reference + Catalog
81
112
 
82
- Single agent receives all wave 1 findings via `prev_context`. Tasks:
113
+ Single agent receives all wave 1 findings via `prev_context`. Uses same `output_schema` + termination contract above. Tasks:
83
114
  - Match against dedup set → mark as `documented`, `known`, or `new`
84
115
  - Merge duplicates across dimensions (same pattern found by multiple agents)
85
116
  - Flag contradictions with documented conventions
@@ -44,7 +44,7 @@ $ARGUMENTS — lens selection and scope flags.
44
44
  **3a: Collect decisions** from wiki, specs, git log, phase context, .workflow/specs/learnings.md.
45
45
  **3b: Build decision registry** per decision (id, title, source, rationale, alternatives, evidence).
46
46
 
47
- **3c: Multi-perspective evaluation** via spawn_agents_on_csv (3 parallel agents):
47
+ **3c: Multi-perspective evaluation** via spawn_agents_on_csv (3 parallel agents; filter `wave==1 AND status=="pending"`):
48
48
 
49
49
  | id | perspective | focus |
50
50
  |----|------------|-------|
@@ -52,6 +52,36 @@ $ARGUMENTS — lens selection and scope flags.
52
52
  | 2 | cost | Complexity added, coupling, tech debt. Grade: low-cost/acceptable/expensive |
53
53
  | 3 | hindsight | Right call with current knowledge? Grade: confirmed/questionable/should-revisit |
54
54
 
55
+ **output_schema**:
56
+
57
+ ```json
58
+ {
59
+ "type": "object",
60
+ "properties": {
61
+ "id": { "type": "string" },
62
+ "result_status": { "type": "string", "enum": ["completed", "failed"] },
63
+ "perspective": { "type": "string", "enum": ["technical", "cost", "hindsight"] },
64
+ "grade": { "type": "string" },
65
+ "findings": { "type": "string", "maxLength": 500 },
66
+ "error": { "type": "string" }
67
+ },
68
+ "required": ["id", "result_status", "grade", "findings"]
69
+ }
70
+ ```
71
+
72
+ Merge: `result_status` → master `status`; copy `perspective`, `grade`, `findings`, `error`.
73
+
74
+ **Shared termination contract** (embed in every instruction):
75
+ ```
76
+ You MUST call report_agent_job_result EXACTLY ONCE before exiting.
77
+ - Success → result_status=completed with concrete grade
78
+ - Failure → result_status=failed with error message
79
+ - Timeout → near max_runtime_seconds → result_status=failed, error="timeout (partial)"
80
+ - NEVER continue indefinitely. NEVER exit silently. NEVER omit the call.
81
+ - Read-only analysis. Do NOT modify source files.
82
+ Do NOT write to tasks.csv, wave-*.csv, results.csv. Do NOT call spawn_agents_on_csv (no recursion).
83
+ ```
84
+
55
85
  **3d: Classify lifecycle**: Validated / Aging / Questionable / Stale / Reversed.
56
86
 
57
87
  ### Phase 4: Unified Report
@@ -47,12 +47,42 @@ Resolve target to content. Load specs, wiki search, prior lessons for context br
47
47
  | 3 | strategist | Scalability, extensibility, architecture alignment | coupling, cohesion |
48
48
  | 4 | synthesis | Merge verdicts → agreements, disagreements, top 3 recommendations | combined verdict |
49
49
 
50
- Wave 1: 3 persona agents in parallel. Wave 2: synthesis agent with wave 1 findings as prev_context.
51
-
52
- Each persona returns: `{ persona, verdict: approve|concern|reject, confidence, findings: [{severity, description, location, suggestion}], summary }`
50
+ Wave 1: 3 persona agents in parallel (filter `wave==1 AND status=="pending"`). Wave 2: synthesis agent with wave 1 findings as prev_context.
51
+
52
+ **output_schema** (both waves):
53
+
54
+ ```json
55
+ {
56
+ "type": "object",
57
+ "properties": {
58
+ "id": { "type": "string" },
59
+ "result_status": { "type": "string", "enum": ["completed", "failed"] },
60
+ "persona": { "type": "string" },
61
+ "verdict": { "type": "string", "enum": ["approve", "concern", "reject", ""] },
62
+ "confidence": { "type": "string", "description": "0-100" },
63
+ "findings": { "type": "string", "description": "JSON array of {severity, description, location, suggestion}, max 500 chars summary" },
64
+ "summary": { "type": "string", "maxLength": 500 },
65
+ "error": { "type": "string" }
66
+ },
67
+ "required": ["id", "result_status", "summary"]
68
+ }
69
+ ```
70
+
71
+ Merge: `result_status` → master `status`; copy `persona`, `verdict`, `confidence`, `findings`, `summary`, `error`.
72
+
73
+ **Shared termination contract** (embed in every instruction):
74
+ ```
75
+ You MUST call report_agent_job_result EXACTLY ONCE before exiting.
76
+ - Success → result_status=completed with concrete verdict
77
+ - Failure → result_status=failed with error message
78
+ - Timeout → near max_runtime_seconds → result_status=failed, error="timeout (partial)"
79
+ - NEVER continue indefinitely. NEVER exit silently. NEVER omit the call.
80
+ - Read-only analysis. Do NOT modify source files.
81
+ Do NOT write to tasks.csv, wave-*.csv, results.csv. Do NOT call spawn_agents_on_csv (no recursion).
82
+ ```
53
83
 
54
84
  #### Challenge Mode
55
- Single agent via spawn_agents_on_csv (1 worker). Adversarial analysis with forcing questions:
85
+ Single agent via spawn_agents_on_csv (max_concurrency: 1) with the same `output_schema` + termination contract above. Adversarial analysis with forcing questions:
56
86
  - "What assumption would invalidate this entire approach?"
57
87
  - "What's the simplest thing that breaks this?"
58
88
  - "What's the implicit contract that isn't enforced?"
@@ -152,29 +152,63 @@ S_AGGREGATE:
152
152
 
153
153
  <actions>
154
154
 
155
+ ### Shared Spawn Contract (all three waves)
156
+
157
+ Every `spawn_agents_on_csv` call in this skill MUST use the strict JSON Schema below and the shared termination contract.
158
+
159
+ **Output Schema**:
160
+
161
+ ```json
162
+ {
163
+ "type": "object",
164
+ "properties": {
165
+ "id": { "type": "string" },
166
+ "result_status": { "type": "string", "enum": ["completed", "failed", "blocked"] },
167
+ "findings": { "type": "string", "maxLength": 500 },
168
+ "score": { "type": "string", "description": "0-100 (wave 2 scoring only)" },
169
+ "evidence": { "type": "string", "description": "Code refs file:line (wave 1/2)" },
170
+ "error": { "type": "string" }
171
+ },
172
+ "required": ["id", "result_status", "findings"]
173
+ }
174
+ ```
175
+
176
+ Merge step: `result_status` → master `status`; copy `findings`, `score`, `evidence`, `error`.
177
+
178
+ **Termination contract** (embed in every instruction):
179
+ ```
180
+ You MUST call report_agent_job_result EXACTLY ONCE before exiting.
181
+ - Success → result_status=completed
182
+ - Failure → result_status=failed with error message
183
+ - Blocked → upstream missing → result_status=blocked
184
+ - Timeout → near max_runtime_seconds → result_status=blocked, error="timeout"
185
+ - NEVER continue indefinitely. NEVER exit silently. NEVER omit the call.
186
+ Do NOT write to tasks.csv, wave-*.csv, results.csv. Do NOT call spawn_agents_on_csv (no recursion).
187
+ ```
188
+
155
189
  ### A_SPAWN_WAVE_1
156
190
 
157
- Filter wave==1 -> write wave-1.csv -> `spawn_agents_on_csv({ csv_path, max_concurrency })`.
191
+ Filter `wave==1 AND status=="pending"` -> write wave-1.csv -> `spawn_agents_on_csv({ csv_path, id_column:"id", instruction: EXPLORATION_INSTRUCTION + SHARED_TERMINATION_CONTRACT, max_concurrency, max_runtime_seconds: 3600, output_csv_path, output_schema })`.
158
192
 
159
193
  **Exploration agent** (3-layer per dimension):
160
194
  1. Module Discovery (breadth): keyword search, relevant files, module boundaries
161
195
  2. Structure Tracing (depth): top 3-5 files, call chains 2-3 levels, data flow
162
196
  3. Code Anchor Extraction (detail): code snippet 20-50 lines with file:line per finding
163
197
 
164
- Share via discovery board. Merge results -> master tasks.csv.
198
+ Share via discovery board. Merge results -> master tasks.csv (map `result_status` → master `status`).
165
199
 
166
200
  ### A_SPAWN_WAVE_2
167
201
 
168
- Filter wave==2 -> build prev_context from wave 1 findings -> write wave-2.csv -> spawn.
202
+ Filter `wave==2 AND status=="pending"` -> build prev_context from wave 1 findings -> write wave-2.csv -> spawn with `SCORING_INSTRUCTION + SHARED_TERMINATION_CONTRACT`.
169
203
 
170
204
  **Scoring agent** (6 dimensions: feasibility, impact, risk, complexity, alignment, maintainability):
171
205
  Score 0-100 with specific evidence (code refs from exploration). Each score MUST reference exploration findings.
172
206
 
173
- Merge results -> master tasks.csv.
207
+ Merge results -> master tasks.csv (map `result_status` → master `status`).
174
208
 
175
209
  ### A_SPAWN_WAVE_3
176
210
 
177
- Filter wave==3 -> build prev_context from wave 2 scores (or project context for quick mode) -> spawn.
211
+ Filter `wave==3 AND status=="pending"` -> build prev_context from wave 2 scores (or project context for quick mode) -> spawn with `SYNTHESIS_INSTRUCTION + SHARED_TERMINATION_CONTRACT`.
178
212
 
179
213
  **Synthesis agent**:
180
214
  - Full mode: analysis.md (executive summary, per-dimension scores, risk matrix, Go/No-Go), context.md (Locked/Free/Deferred decisions), context-package.json, conclusions.json (with `scope_verdict` + `implementation_scope[]`)
@@ -262,5 +296,10 @@ Protocol: read before analysis, append-only, dedup by type+key.
262
296
  - [ ] Upstream context loaded via `--from` when specified
263
297
  - [ ] discoveries.ndjson append-only throughout
264
298
  - [ ] Next step routed (plan for Go, brainstorm for No-Go, plan --gaps for Gaps)
299
+ - [ ] Session sealed via finish-work (archive.json written, optional spec/knowhow extraction)
265
300
  </success_criteria>
301
+
302
+ <on_complete>
303
+ @~/.maestro/workflows/finish-work.md — SESSION_DIR=OUTPUT_DIR, SESSION_TYPE=analyze, SESSION_ID={artifact_id}, LINKED_MILESTONE={target_milestone or null}
304
+ </on_complete>
266
305
  </output>
@@ -120,4 +120,9 @@ P6 gate: Pass (>=80%) → Handoff | Review (60-79%) → Handoff w/caveats | Fail
120
120
  - [ ] Readiness gate: Pass (>=80%) or Review (>=60%) with documented caveats
121
121
  - [ ] Artifact registered in state.json (type=blueprint)
122
122
  - [ ] context-package.json generated for downstream consumption
123
+ - [ ] On gate Pass/Review: session sealed via finish-work (archive.json + optional spec/knowhow extraction). On Fail: skip — session stays active, excluded from wiki search.
123
124
  </success_criteria>
125
+
126
+ <on_complete>
127
+ @~/.maestro/workflows/finish-work.md — SESSION_DIR={session_dir}, SESSION_TYPE=blueprint, SESSION_ID={session_id}, LINKED_MILESTONE=null
128
+ </on_complete>