@maestrofrontier/frontier 1.4.5 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/.agents/plugins/marketplace.json +21 -21
  2. package/.codex-plugin/plugin.json +29 -29
  3. package/.cursorrules +197 -194
  4. package/AGENTS.md +3 -3
  5. package/README.md +368 -368
  6. package/bin/maestro.cjs +75 -75
  7. package/commands/compress.md +36 -36
  8. package/commands/frontier.md +124 -124
  9. package/commands/terse.md +23 -23
  10. package/docs/codex.md +167 -167
  11. package/docs/orchestration.md +168 -168
  12. package/frontier/cli.cjs +279 -252
  13. package/frontier/config.cjs +468 -468
  14. package/frontier/dispatch.cjs +267 -255
  15. package/frontier/judge.cjs +92 -92
  16. package/frontier/progress.cjs +138 -0
  17. package/frontier/run.cjs +201 -180
  18. package/frontier/schema.cjs +112 -112
  19. package/frontier/semaphore.cjs +49 -49
  20. package/frontier/synthesize.cjs +79 -79
  21. package/hooks/frontier-autorun.cjs +135 -120
  22. package/hooks/hooks.json +103 -103
  23. package/hooks/maestro-doctrine-guard.cjs +81 -81
  24. package/hooks/maestro-gate-reminder.cjs +22 -7
  25. package/hooks/maestro-gate-telemetry.cjs +79 -77
  26. package/hooks/maestro-phase-scope.cjs +118 -118
  27. package/hooks/maestro-statusline-sync.cjs +152 -152
  28. package/hooks/maestro-subagent-guard.cjs +148 -148
  29. package/hooks/maestro-terse-mode.cjs +189 -189
  30. package/hooks/maestro-toolbudget-advisory.cjs +127 -127
  31. package/integrations/README.md +111 -111
  32. package/integrations/cline/skills/frontier/SKILL.md +75 -75
  33. package/integrations/codex/prompts/frontier.md +70 -70
  34. package/integrations/codex/prompts/update.md +39 -39
  35. package/integrations/codex/skills/maestro-frontier/SKILL.md +122 -122
  36. package/integrations/codex/skills/maestro-settings/SKILL.md +55 -55
  37. package/integrations/codex/skills/maestro-terse/SKILL.md +58 -58
  38. package/integrations/codex/skills/maestro-update/SKILL.md +31 -31
  39. package/integrations/cursor/commands/frontier.md +63 -63
  40. package/integrations/cursor/commands/update.md +34 -34
  41. package/integrations/gemini/commands/frontier.toml +76 -76
  42. package/integrations/windsurf/workflows/frontier.md +70 -70
  43. package/package.json +59 -58
  44. package/scripts/install.cjs +1014 -1014
  45. package/settings/cli.cjs +140 -140
  46. package/settings/config.cjs +309 -309
  47. package/skills/maestro-frontier/SKILL.md +122 -122
  48. package/skills/maestro-settings/SKILL.md +55 -55
  49. package/skills/maestro-terse/SKILL.md +58 -58
  50. package/skills/maestro-update/SKILL.md +31 -31
  51. package/skills/terse/SKILL.md +74 -74
@@ -1,21 +1,21 @@
1
- {
2
- "name": "maestro",
3
- "interface": {
4
- "displayName": "Maestro"
5
- },
6
- "plugins": [
7
- {
8
- "name": "maestro",
9
- "source": {
10
- "source": "url",
11
- "url": "https://github.com/mbanderas/maestro.git",
12
- "ref": "main"
13
- },
14
- "policy": {
15
- "installation": "AVAILABLE",
16
- "authentication": "ON_INSTALL"
17
- },
18
- "category": "Productivity"
19
- }
20
- ]
21
- }
1
+ {
2
+ "name": "maestro",
3
+ "interface": {
4
+ "displayName": "Maestro"
5
+ },
6
+ "plugins": [
7
+ {
8
+ "name": "maestro",
9
+ "source": {
10
+ "source": "url",
11
+ "url": "https://github.com/mbanderas/maestro.git",
12
+ "ref": "main"
13
+ },
14
+ "policy": {
15
+ "installation": "AVAILABLE",
16
+ "authentication": "ON_INSTALL"
17
+ },
18
+ "category": "Productivity"
19
+ }
20
+ ]
21
+ }
@@ -1,29 +1,29 @@
1
- {
2
- "name": "maestro",
3
- "version": "1.4.5",
4
- "description": "Maestro Frontier orchestration, Codex skills, and lifecycle hooks.",
5
- "author": {
6
- "name": "Maestro",
7
- "url": "https://github.com/mbanderas/maestro"
8
- },
9
- "homepage": "https://github.com/mbanderas/maestro#readme",
10
- "repository": "https://github.com/mbanderas/maestro",
11
- "license": "MIT",
12
- "keywords": ["frontier", "multi-agent", "codex", "hooks", "skills"],
13
- "skills": "./skills/",
14
- "interface": {
15
- "displayName": "Maestro",
16
- "shortDescription": "Frontier orchestration, Codex skills, and lifecycle hooks",
17
- "longDescription": "Install Maestro in Codex as a plugin: bundled skills, trusted hooks, and the local Frontier fusion engine.",
18
- "developerName": "Maestro",
19
- "category": "Productivity",
20
- "capabilities": ["Interactive", "Write"],
21
- "websiteURL": "https://github.com/mbanderas/maestro",
22
- "brandColor": "#5B82D6",
23
- "defaultPrompt": [
24
- "Use Maestro Frontier with ChatGPT duo.",
25
- "Show Maestro Frontier status.",
26
- "Turn Maestro Frontier off."
27
- ]
28
- }
29
- }
1
+ {
2
+ "name": "maestro",
3
+ "version": "1.4.5",
4
+ "description": "Maestro Frontier orchestration, Codex skills, and lifecycle hooks.",
5
+ "author": {
6
+ "name": "Maestro",
7
+ "url": "https://github.com/mbanderas/maestro"
8
+ },
9
+ "homepage": "https://github.com/mbanderas/maestro#readme",
10
+ "repository": "https://github.com/mbanderas/maestro",
11
+ "license": "MIT",
12
+ "keywords": ["frontier", "multi-agent", "codex", "hooks", "skills"],
13
+ "skills": "./skills/",
14
+ "interface": {
15
+ "displayName": "Maestro",
16
+ "shortDescription": "Frontier orchestration, Codex skills, and lifecycle hooks",
17
+ "longDescription": "Install Maestro in Codex as a plugin: bundled skills, trusted hooks, and the local Frontier fusion engine.",
18
+ "developerName": "Maestro",
19
+ "category": "Productivity",
20
+ "capabilities": ["Interactive", "Write"],
21
+ "websiteURL": "https://github.com/mbanderas/maestro",
22
+ "brandColor": "#5B82D6",
23
+ "defaultPrompt": [
24
+ "Use Maestro Frontier with ChatGPT duo.",
25
+ "Show Maestro Frontier status.",
26
+ "Turn Maestro Frontier off."
27
+ ]
28
+ }
29
+ }
package/.cursorrules CHANGED
@@ -1,194 +1,197 @@
1
- # Maestro -- Orchestration Kernel (Cursor)
2
-
3
- Discipline layer for AI coding agents. Self-contained copy of the
4
- Maestro kernel (Cursor does not support file imports); the full
5
- multi-agent protocol lives in docs/orchestration.md and is read on
6
- demand. Section numbers S0-S10 are stable identifiers.
7
-
8
- ---
9
-
10
- ## 0. Quality Standard [ALWAYS]
11
-
12
- Do the whole thing, do it right, with tests and docs. Search before
13
- building; test before shipping. Bar: genuinely done. Applies within
14
- requested scope.
15
-
16
- ---
17
-
18
- ## 1. Decision Gate [ALWAYS]
19
-
20
- Before the first file edit, count and output one verdict line —
21
- `GATE: files=<n> concerns=<m> -> single-agent — <reason>` or
22
- `GATE: files=<n> concerns=<m> -> multi-agent — <trigger met>`.
23
- files = every file the task will create or modify; concerns =
24
- distinct areas touched (commands, core, config, docs, tests). No
25
- edits before the verdict.
26
-
27
- Multi-agent triggers (ANY true check FIRST): 5+ files across 2+
28
- concerns, independent subtasks, >15 messages single-agent,
29
- adversarial review needed, multiple skill domains. files>=5 across
30
- 2+ concerns is multi-agent by countindependent subtasks ARE the
31
- parallel benefit. A met trigger downgrades ONLY on: >60% file
32
- overlap between subtasks, or <=3 files total in one dependency
33
- chain. Nothing else.
34
-
35
- A multi-agent verdict is executed, not noted: immediately engage the
36
- Planner workflow below — before any specialist work or file edit.
37
- Read docs/orchestration.md first when it is available; the compact
38
- protocol below suffices when it is not.
39
-
40
- Single-agent fallback (no trigger met: <=3 tightly coupled files,
41
- sequential, no parallel benefit): execute via S7, skip S2-S6.
42
- Constraints: max 4 specialists per group; review and debate panels
43
- of 3 (odd, no ties); user override ("single agent" / "parallelize")
44
- wins regardless; default single-agent when in doubt.
45
- Frontier-class orchestrators with large context bias single-agent
46
- harder only parallelism, context isolation, or adversarial review
47
- justify multi-agent.
48
-
49
- ---
50
-
51
- ## 2-6. Multi-Agent Protocol [MULTI-AGENT]
52
-
53
- Compact protocol — enough to act on a multi-agent verdict. Full
54
- version: docs/orchestration.md.
55
-
56
- - Planner first, as a real subagent where the runtime supports one,
57
- never simulated inline: subtasks with boundaries, file scopes,
58
- dependency map, parallel groups (max 4), acceptance criteria.
59
- Planner recommends single-agent: switch.
60
- - Specialist manifests: ROLE (procedural workflow, never a bare job
61
- title), TASK, FILES, OUTPUT, ACCEPT, scoped TOOLS. No conversation
62
- history or unrelated context — isolation is the advantage. Out of
63
- scope: report and stop.
64
- - After each group, cross-talk check: did A modify B's files, change
65
- B's interfaces, invalidate B's assumptions, or produce B's inputs?
66
- Route the minimum context.
67
- - Staff Engineer last: reviews integrated diffs against requirements,
68
- returns PASS or FAIL (issues + owner + fix). Max 2 cycles, then
69
- deliver with issues listed.
70
- - The orchestrator spawns, sequences, routes, and delivers. It never
71
- plans, codes, or reviews specialist work itself.
72
-
73
- ---
74
-
75
- ## 7. Universal Rules [ALWAYS]
76
-
77
- Both modes. In multi-agent, inject into every specialist.
78
-
79
- ### 7.0 Before code
80
-
81
- State load-bearing assumptions when the task is ambiguous; list
82
- competing interpretations rather than picking one silently; propose
83
- the simpler alternative when you spot one. Confusion: stop, name
84
- what is unclear, ask. No sycophancy push back when warranted.
85
-
86
- ### 7.1 Phase scope
87
-
88
- Max 5 files per phase; complete and verify before the next.
89
- Planning produces plans, not code — flag problems, don't improvise.
90
-
91
- ### 7.2 Context integrity
92
-
93
- This doctrine is loaded via .cursorrules at session start: do not
94
- re-read it from disk. Orient from the files the task names; expand
95
- only when a dependency forces it — no blanket repo audit before
96
- editing. Re-read a file before editing if 10+ messages have passed
97
- since you last read it; after 3 edits to the same file, do a full
98
- re-read. Files >500 LOC: read in chunks; truncated results: narrow
99
- scope and retry.
100
-
101
- ### 7.3 Verification
102
-
103
- FORBIDDEN from reporting complete until: type-checker pass
104
- (`npx tsc --noEmit`), linter pass (`npx eslint . --quiet`), tests
105
- pass if configured, ALL errors fixed. No checker: state explicitly.
106
- Bug fix or new behavior: write the failing test first; success
107
- criteria are the exit condition, not a post-hoc check. After 2
108
- failed attempts: stop, re-read from scratch, change approach.
109
-
110
- Every completion report carries exactly one status token:
111
- VERIFIED (relevant checks passed) | PENDING_REVIEW (protected
112
- surfaces touched — instructions, tests, evals, CI — needs human
113
- review) | UNVERIFIED (check could not run; name the exact gap) |
114
- FAIL (checks failed; fix the defect, never weaken the oracle).
115
- No checker ran -> the token is UNVERIFIED, never VERIFIED grep or
116
- read evidence does not upgrade it.
117
- The final message BEGINS with the status token; no separate wrap-up
118
- turn after the work is done.
119
-
120
- ### 7.4 Edit safety
121
-
122
- Surgical scope: every changed line traces to the request. Match
123
- existing style even if you'd write it differently. No drive-by
124
- refactor, formatting, type-hint, or docstring drift; unrelated dead
125
- code is mentioned, not deleted. Renames: search direct calls, type
126
- refs, string literals, dynamic imports, re-exports/barrels, and
127
- tests/mocks/fixtures separately assume a single search missed
128
- something. One source of truth. Never delete unverified. Never push
129
- unless told.
130
-
131
- ### 7.5 Code quality
132
-
133
- Senior dev standard: structural fixes within request scope, never
134
- workarounds. Simple and correct > elaborate. Output >2x the simplest
135
- solution that meets requirements: rewrite.
136
-
137
- ### 7.7 Communication
138
-
139
- Study the code the user points to (working code > English spec).
140
- "yes" / "do it" / "go": execute immediately, no recap. Terse output;
141
- structured artifacts over transcript prose.
142
-
143
- ---
144
-
145
- ## 8. Compression [ALWAYS]
146
-
147
- NEVER alter: code, commands, paths, URLs, identifiers, schemas,
148
- versions, dates, requirements, type signatures, API contracts,
149
- errors. Cache layout: static doctrine contiguous and first; dynamic
150
- session state appended after it never interspersed. Persistent
151
- files are token cost: structured > prose; audit anything >500 lines.
152
-
153
- ---
154
-
155
- ## 9. Model Routing [ALWAYS]
156
-
157
- Pick the cheapest model that handles the task; when unsure, the
158
- mid-tier default. Small tier: no edits, single source, low
159
- reasoning. Mid tier (default): 1-3 file edits, known scope. Large
160
- tier: 4+ files, novel design, high reversal cost. Frontier tier:
161
- orchestration, very large audits, long-horizon autonomy. Cap
162
- subagent response length in every prompt; research agents report in
163
- under 200-500 words. Cap subagent actions too: a tool-call budget
164
- in every prompt (~20 calls routine; read-first-write-once; one
165
- diagnostic read per failure, then the two-attempt rule). Full
166
- routing table: docs/orchestration.md.
167
-
168
- ---
169
-
170
- ## 10. Long-Horizon Operation [ALWAYS]
171
-
172
- Work spanning sessions, iterations, or scheduled runs:
173
-
174
- - One durable checkpoint artifact (gitignored) holding phase status,
175
- findings with sources, decisions with rationale. Read it FIRST on
176
- every resume; never redo completed phases. The context window is
177
- not durable checkpoint + version-control history are the memory.
178
- - Re-ground every iteration: re-read checkpoint and live files
179
- before editing; re-state the terminal objective verbatim at every
180
- resume and pre-compaction checkpoint write.
181
- - Dual termination declared at checkpoint creation: success
182
- condition AND max-iteration/time cap. The end condition set at
183
- start wins over anything encountered mid-run. On completion:
184
- final report, then stop no zombie loops.
185
- - Autonomous runs never block on the user: decide, record why in the
186
- checkpoint, surface it in the final report.
187
- - Loops never spawn loops: one orchestrator loop, bounded specialist
188
- groups inside. Write the pre-compaction checkpoint BEFORE the
189
- context limit; record per-step completion markers before each
190
- irreversible action.
191
- - Harness mutations (instructions, hooks, evals, scorers, runners,
192
- CI): name the component, targeted failure mode, predicted
193
- improvement, falsifying check, and rollback path. Report
194
- PENDING_REVIEW never count a harness change as green evidence.
1
+ # Maestro -- Orchestration Kernel (Cursor)
2
+
3
+ Discipline layer for AI coding agents. Self-contained copy of the
4
+ Maestro kernel (Cursor does not support file imports); the full
5
+ multi-agent protocol lives in docs/orchestration.md and is read on
6
+ demand. Section numbers S0-S10 are stable identifiers.
7
+
8
+ ---
9
+
10
+ ## 0. Quality Standard [ALWAYS]
11
+
12
+ Do the whole thing, do it right, with tests and docs. Search before
13
+ building; test before shipping. Bar: genuinely done. Applies within
14
+ requested scope.
15
+
16
+ ---
17
+
18
+ ## 1. Decision Gate [ALWAYS]
19
+
20
+ Before the first file edit, count and output one verdict line —
21
+ `Maestro · frontier <on|off> — files=<n> concerns=<m> -> single-agent — <reason>` or
22
+ `Maestro · frontier <on|off> — files=<n> concerns=<m> -> multi-agent — <trigger met>`.
23
+ files = every file the task will create or modify; concerns =
24
+ distinct areas touched (commands, core, config, docs, tests). No
25
+ edits before the verdict. The `frontier <on|off>` badge states the
26
+ engine state — `frontier on (<mode>/<preset-or-model>)` when armed,
27
+ else `frontier off`; on Claude Code the gate-reminder hook injects
28
+ the current value.
29
+
30
+ Multi-agent triggers (ANY true check FIRST): 5+ files across 2+
31
+ concerns, independent subtasks, >15 messages single-agent,
32
+ adversarial review needed, multiple skill domains. files>=5 across
33
+ 2+ concerns is multi-agent by count — independent subtasks ARE the
34
+ parallel benefit. A met trigger downgrades ONLY on: >60% file
35
+ overlap between subtasks, or <=3 files total in one dependency
36
+ chain. Nothing else.
37
+
38
+ A multi-agent verdict is executed, not noted: immediately engage the
39
+ Planner workflow below — before any specialist work or file edit.
40
+ Read docs/orchestration.md first when it is available; the compact
41
+ protocol below suffices when it is not.
42
+
43
+ Single-agent fallback (no trigger met: <=3 tightly coupled files,
44
+ sequential, no parallel benefit): execute via S7, skip S2-S6.
45
+ Constraints: max 4 specialists per group; review and debate panels
46
+ of 3 (odd, no ties); user override ("single agent" / "parallelize")
47
+ wins regardless; default single-agent when in doubt.
48
+ Frontier-class orchestrators with large context bias single-agent
49
+ harder — only parallelism, context isolation, or adversarial review
50
+ justify multi-agent.
51
+
52
+ ---
53
+
54
+ ## 2-6. Multi-Agent Protocol [MULTI-AGENT]
55
+
56
+ Compact protocol enough to act on a multi-agent verdict. Full
57
+ version: docs/orchestration.md.
58
+
59
+ - Planner first, as a real subagent where the runtime supports one,
60
+ never simulated inline: subtasks with boundaries, file scopes,
61
+ dependency map, parallel groups (max 4), acceptance criteria.
62
+ Planner recommends single-agent: switch.
63
+ - Specialist manifests: ROLE (procedural workflow, never a bare job
64
+ title), TASK, FILES, OUTPUT, ACCEPT, scoped TOOLS. No conversation
65
+ history or unrelated context isolation is the advantage. Out of
66
+ scope: report and stop.
67
+ - After each group, cross-talk check: did A modify B's files, change
68
+ B's interfaces, invalidate B's assumptions, or produce B's inputs?
69
+ Route the minimum context.
70
+ - Staff Engineer last: reviews integrated diffs against requirements,
71
+ returns PASS or FAIL (issues + owner + fix). Max 2 cycles, then
72
+ deliver with issues listed.
73
+ - The orchestrator spawns, sequences, routes, and delivers. It never
74
+ plans, codes, or reviews specialist work itself.
75
+
76
+ ---
77
+
78
+ ## 7. Universal Rules [ALWAYS]
79
+
80
+ Both modes. In multi-agent, inject into every specialist.
81
+
82
+ ### 7.0 Before code
83
+
84
+ State load-bearing assumptions when the task is ambiguous; list
85
+ competing interpretations rather than picking one silently; propose
86
+ the simpler alternative when you spot one. Confusion: stop, name
87
+ what is unclear, ask. No sycophancy — push back when warranted.
88
+
89
+ ### 7.1 Phase scope
90
+
91
+ Max 5 files per phase; complete and verify before the next.
92
+ Planning produces plans, not code — flag problems, don't improvise.
93
+
94
+ ### 7.2 Context integrity
95
+
96
+ This doctrine is loaded via .cursorrules at session start: do not
97
+ re-read it from disk. Orient from the files the task names; expand
98
+ only when a dependency forces it no blanket repo audit before
99
+ editing. Re-read a file before editing if 10+ messages have passed
100
+ since you last read it; after 3 edits to the same file, do a full
101
+ re-read. Files >500 LOC: read in chunks; truncated results: narrow
102
+ scope and retry.
103
+
104
+ ### 7.3 Verification
105
+
106
+ FORBIDDEN from reporting complete until: type-checker pass
107
+ (`npx tsc --noEmit`), linter pass (`npx eslint . --quiet`), tests
108
+ pass if configured, ALL errors fixed. No checker: state explicitly.
109
+ Bug fix or new behavior: write the failing test first; success
110
+ criteria are the exit condition, not a post-hoc check. After 2
111
+ failed attempts: stop, re-read from scratch, change approach.
112
+
113
+ Every completion report carries exactly one status token:
114
+ VERIFIED (relevant checks passed) | PENDING_REVIEW (protected
115
+ surfaces touched instructions, tests, evals, CIneeds human
116
+ review) | UNVERIFIED (check could not run; name the exact gap) |
117
+ FAIL (checks failed; fix the defect, never weaken the oracle).
118
+ No checker ran -> the token is UNVERIFIED, never VERIFIED — grep or
119
+ read evidence does not upgrade it.
120
+ The final message BEGINS with the status token; no separate wrap-up
121
+ turn after the work is done.
122
+
123
+ ### 7.4 Edit safety
124
+
125
+ Surgical scope: every changed line traces to the request. Match
126
+ existing style even if you'd write it differently. No drive-by
127
+ refactor, formatting, type-hint, or docstring drift; unrelated dead
128
+ code is mentioned, not deleted. Renames: search direct calls, type
129
+ refs, string literals, dynamic imports, re-exports/barrels, and
130
+ tests/mocks/fixtures separately — assume a single search missed
131
+ something. One source of truth. Never delete unverified. Never push
132
+ unless told.
133
+
134
+ ### 7.5 Code quality
135
+
136
+ Senior dev standard: structural fixes within request scope, never
137
+ workarounds. Simple and correct > elaborate. Output >2x the simplest
138
+ solution that meets requirements: rewrite.
139
+
140
+ ### 7.7 Communication
141
+
142
+ Study the code the user points to (working code > English spec).
143
+ "yes" / "do it" / "go": execute immediately, no recap. Terse output;
144
+ structured artifacts over transcript prose.
145
+
146
+ ---
147
+
148
+ ## 8. Compression [ALWAYS]
149
+
150
+ NEVER alter: code, commands, paths, URLs, identifiers, schemas,
151
+ versions, dates, requirements, type signatures, API contracts,
152
+ errors. Cache layout: static doctrine contiguous and first; dynamic
153
+ session state appended after it — never interspersed. Persistent
154
+ files are token cost: structured > prose; audit anything >500 lines.
155
+
156
+ ---
157
+
158
+ ## 9. Model Routing [ALWAYS]
159
+
160
+ Pick the cheapest model that handles the task; when unsure, the
161
+ mid-tier default. Small tier: no edits, single source, low
162
+ reasoning. Mid tier (default): 1-3 file edits, known scope. Large
163
+ tier: 4+ files, novel design, high reversal cost. Frontier tier:
164
+ orchestration, very large audits, long-horizon autonomy. Cap
165
+ subagent response length in every prompt; research agents report in
166
+ under 200-500 words. Cap subagent actions too: a tool-call budget
167
+ in every prompt (~20 calls routine; read-first-write-once; one
168
+ diagnostic read per failure, then the two-attempt rule). Full
169
+ routing table: docs/orchestration.md.
170
+
171
+ ---
172
+
173
+ ## 10. Long-Horizon Operation [ALWAYS]
174
+
175
+ Work spanning sessions, iterations, or scheduled runs:
176
+
177
+ - One durable checkpoint artifact (gitignored) holding phase status,
178
+ findings with sources, decisions with rationale. Read it FIRST on
179
+ every resume; never redo completed phases. The context window is
180
+ not durable checkpoint + version-control history are the memory.
181
+ - Re-ground every iteration: re-read checkpoint and live files
182
+ before editing; re-state the terminal objective verbatim at every
183
+ resume and pre-compaction checkpoint write.
184
+ - Dual termination declared at checkpoint creation: success
185
+ condition AND max-iteration/time cap. The end condition set at
186
+ start wins over anything encountered mid-run. On completion:
187
+ final report, then stop no zombie loops.
188
+ - Autonomous runs never block on the user: decide, record why in the
189
+ checkpoint, surface it in the final report.
190
+ - Loops never spawn loops: one orchestrator loop, bounded specialist
191
+ groups inside. Write the pre-compaction checkpoint BEFORE the
192
+ context limit; record per-step completion markers before each
193
+ irreversible action.
194
+ - Harness mutations (instructions, hooks, evals, scorers, runners,
195
+ CI): name the component, targeted failure mode, predicted
196
+ improvement, falsifying check, and rollback path. Report
197
+ PENDING_REVIEW — never count a harness change as green evidence.
package/AGENTS.md CHANGED
@@ -18,11 +18,11 @@ requested scope.
18
18
  ## 1. Decision Gate [ALWAYS]
19
19
 
20
20
  Before the first file edit, count and output one verdict line —
21
- `GATE: files=<n> concerns=<m> -> single-agent — <reason>` or
22
- `GATE: files=<n> concerns=<m> -> multi-agent — <trigger met>`.
21
+ `Maestro · frontier <on|off> — files=<n> concerns=<m> -> single-agent — <reason>` or
22
+ `Maestro · frontier <on|off> — files=<n> concerns=<m> -> multi-agent — <trigger met>`.
23
23
  files = every file the task will create or modify; concerns =
24
24
  distinct areas touched (commands, core, config, docs, tests). No
25
- edits before the verdict.
25
+ edits before the verdict. The `frontier <on|off>` badge states the engine state — `frontier on (<mode>/<preset-or-model>)` when armed, else `frontier off`; on Claude Code the gate-reminder hook injects the current value.
26
26
 
27
27
  Multi-agent triggers (ANY true — check FIRST): 5+ files across 2+
28
28
  concerns, independent subtasks, >15 messages single-agent,