@fernado03/zoo-flow 0.5.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/README.md +105 -90
  2. package/bin/zoo-flow.js +405 -56
  3. package/docs/architecture.md +380 -0
  4. package/docs/bloat-control.md +49 -0
  5. package/docs/command-design.md +38 -0
  6. package/docs/command-flow.md +133 -0
  7. package/docs/comparison.md +86 -0
  8. package/docs/context-packs.md +35 -0
  9. package/docs/dogfood/01-small-library.md +28 -0
  10. package/docs/dogfood/02-web-app.md +29 -0
  11. package/docs/dogfood/03-mixed-monorepo.md +29 -0
  12. package/docs/mode-rules.md +86 -0
  13. package/docs/npm-publishing.md +79 -0
  14. package/docs/out-of-scope/mainstream-issue-trackers-only.md +25 -0
  15. package/docs/out-of-scope/question-limits.md +18 -0
  16. package/docs/out-of-scope/setup-skill-verify-mode.md +15 -0
  17. package/docs/overview.md +61 -0
  18. package/docs/philosophy.md +73 -0
  19. package/docs/quality-scorecard.md +23 -0
  20. package/docs/skill-maintenance.md +32 -0
  21. package/docs/skills-index.md +61 -0
  22. package/docs/team-mode.md +46 -0
  23. package/docs/token-budget.md +22 -0
  24. package/docs/troubleshooting.md +288 -0
  25. package/examples/demo-transcripts/01-small-tweak.md +37 -0
  26. package/examples/demo-transcripts/02-unknown-bug-fix.md +37 -0
  27. package/examples/demo-transcripts/03-new-feature.md +37 -0
  28. package/examples/demo-transcripts/04-refactor.md +37 -0
  29. package/examples/demo-transcripts/05-review-and-verify.md +37 -0
  30. package/examples/feature-flow.md +117 -0
  31. package/examples/fix-flow.md +139 -0
  32. package/package.json +16 -5
  33. package/quality/scorecard.json +88 -0
  34. package/quality/token-budget.exceptions.json +13 -0
  35. package/scripts/bundle.ps1 +135 -0
  36. package/scripts/check-golden-transcripts.js +69 -0
  37. package/scripts/check-package-links.js +72 -0
  38. package/scripts/check-package-manifest.js +70 -0
  39. package/scripts/eval-routing.js +149 -0
  40. package/scripts/score-quality.js +292 -0
  41. package/scripts/test-doctor.js +107 -0
  42. package/scripts/test-project-shapes.js +99 -0
  43. package/scripts/token-budget.js +105 -0
  44. package/templates/full/.roo/commands/caveman.md +1 -1
  45. package/templates/full/.roo/commands/diagnose.md +2 -1
  46. package/templates/full/.roo/commands/explore.md +13 -13
  47. package/templates/full/.roo/commands/feature.md +1 -1
  48. package/templates/full/.roo/commands/fix.md +1 -1
  49. package/templates/full/.roo/commands/grill-me.md +2 -1
  50. package/templates/full/.roo/commands/grill-with-docs.md +2 -1
  51. package/templates/full/.roo/commands/handoff.md +2 -1
  52. package/templates/full/.roo/commands/improve-codebase-architecture.md +2 -1
  53. package/templates/full/.roo/commands/prototype.md +1 -1
  54. package/templates/full/.roo/commands/refactor.md +1 -1
  55. package/templates/full/.roo/commands/review.md +11 -0
  56. package/templates/full/.roo/commands/scaffold-context.md +13 -13
  57. package/templates/full/.roo/commands/setup-matt-pocock-skills.md +8 -8
  58. package/templates/full/.roo/commands/tdd.md +1 -1
  59. package/templates/full/.roo/commands/to-issues.md +2 -1
  60. package/templates/full/.roo/commands/to-prd.md +2 -1
  61. package/templates/full/.roo/commands/triage.md +1 -1
  62. package/templates/full/.roo/commands/tweak.md +1 -1
  63. package/templates/full/.roo/commands/update-docs.md +22 -22
  64. package/templates/full/.roo/commands/verify.md +11 -0
  65. package/templates/full/.roo/commands/write-a-skill.md +2 -1
  66. package/templates/full/.roo/commands/zoom-out.md +2 -1
  67. package/templates/full/.roo/rules/01-command-protocol.md +1 -1
  68. package/templates/full/.roo/rules/04-context-economy.md +27 -29
  69. package/templates/full/.roo/rules-code-tweaker/01-completion.md +12 -8
  70. package/templates/full/.roo/rules-custom-orchestrator/00-routing.md +77 -63
  71. package/templates/full/.roo/rules-custom-orchestrator/01-delegation-message.md +59 -55
  72. package/templates/full/.roo/rules-system-architect/02-completion.md +6 -2
  73. package/templates/full/.roo/skills/engineering/README.md +2 -0
  74. package/templates/full/.roo/skills/engineering/commit-and-document/SKILL.md +1 -2
  75. package/templates/full/.roo/skills/engineering/grill-with-docs/ADR-FORMAT.md +1 -1
  76. package/templates/full/.roo/skills/engineering/grill-with-docs/CONTEXT-FORMAT.md +36 -61
  77. package/templates/full/.roo/skills/engineering/grill-with-docs/SKILL.md +1 -1
  78. package/templates/full/.roo/skills/engineering/improve-codebase-architecture/SKILL.md +3 -3
  79. package/templates/full/.roo/skills/engineering/prototype/SKILL.md +37 -37
  80. package/templates/full/.roo/skills/engineering/review/SKILL.md +111 -0
  81. package/templates/full/.roo/skills/engineering/scaffold-context/SKILL.md +218 -152
  82. package/templates/full/.roo/skills/engineering/scaffold-context/templates/writing-patterns.md +17 -0
  83. package/templates/full/.roo/skills/engineering/setup-matt-pocock-skills/SKILL.md +3 -3
  84. package/templates/full/.roo/skills/engineering/setup-matt-pocock-skills/domain.md +2 -3
  85. package/templates/full/.roo/skills/engineering/tdd/SKILL.md +2 -0
  86. package/templates/full/.roo/skills/engineering/to-prd/SKILL.md +57 -57
  87. package/templates/full/.roo/skills/engineering/tweak/SKILL.md +2 -1
  88. package/templates/full/.roo/skills/engineering/verify/SKILL.md +80 -0
  89. package/templates/full/.roo/skills/in-progress/README.md +0 -1
  90. package/templates/full/.roomodes +47 -47
  91. package/templates/full/.zoo-flow/CONTEXT.md +8 -8
  92. package/templates/full/.zoo-flow/START_HERE.md +61 -61
  93. package/templates/full/.zoo-flow/docs/adr/0001-record-architecture-decisions.md +22 -22
  94. package/templates/full/.zoo-flow/evals/no-regression-checklist.md +26 -24
  95. package/templates/full/.zoo-flow/evals/routing-cases.jsonl +20 -0
  96. package/templates/full/.zoo-flow/evals/routing-cases.md +213 -189
  97. package/templates/full/.zoo-flow/project-profile.json +24 -0
  98. package/tests/fixtures/bad-routing-cases/bad-json.jsonl +1 -0
  99. package/tests/fixtures/bad-routing-cases/bad-mode.jsonl +1 -0
  100. package/tests/fixtures/bad-routing-cases/missing-command.jsonl +1 -0
  101. package/tests/fixtures/doctor/bad-built-in-delegation/fixture.json +1 -0
  102. package/tests/fixtures/doctor/bad-mode-slug/fixture.json +1 -0
  103. package/tests/fixtures/doctor/bad-skill-wrapper/fixture.json +1 -0
  104. package/tests/fixtures/doctor/bad-zoo-path/fixture.json +1 -0
  105. package/tests/fixtures/doctor/helper-missing-mode/fixture.json +1 -0
  106. package/tests/fixtures/doctor/helper-not-permitted/fixture.json +1 -0
  107. package/tests/fixtures/doctor/manual-good-template/fixture.json +1 -0
  108. package/tests/fixtures/doctor/missing-command/fixture.json +1 -0
  109. package/tests/fixtures/doctor/missing-roomodes/fixture.json +1 -0
  110. package/tests/fixtures/doctor/missing-skill/fixture.json +1 -0
  111. package/tests/fixtures/project-shapes/cli-tool/cmd/root.go +1 -0
  112. package/tests/fixtures/project-shapes/cli-tool/fixture.json +1 -0
  113. package/tests/fixtures/project-shapes/cli-tool/package.json +1 -0
  114. package/tests/fixtures/project-shapes/data-pipeline/fixture.json +1 -0
  115. package/tests/fixtures/project-shapes/data-pipeline/pipelines/invoices.py +1 -0
  116. package/tests/fixtures/project-shapes/data-pipeline/pyproject.toml +2 -0
  117. package/tests/fixtures/project-shapes/library/fixture.json +1 -0
  118. package/tests/fixtures/project-shapes/library/package.json +1 -0
  119. package/tests/fixtures/project-shapes/library/src/index.ts +1 -0
  120. package/tests/fixtures/project-shapes/monorepo/fixture.json +1 -0
  121. package/tests/fixtures/project-shapes/monorepo/package.json +1 -0
  122. package/tests/fixtures/project-shapes/monorepo/packages/core/index.ts +1 -0
  123. package/tests/fixtures/project-shapes/monorepo/packages/web/index.ts +1 -0
  124. package/tests/fixtures/project-shapes/serverless/fixture.json +1 -0
  125. package/tests/fixtures/project-shapes/serverless/functions/webhook.ts +1 -0
  126. package/tests/fixtures/project-shapes/serverless/package.json +1 -0
  127. package/tests/fixtures/project-shapes/web-app/app/routes/index.tsx +1 -0
  128. package/tests/fixtures/project-shapes/web-app/fixture.json +1 -0
  129. package/tests/fixtures/project-shapes/web-app/package.json +1 -0
  130. package/tests/golden-transcripts/01-small-tweak-golden.md +21 -0
  131. package/tests/golden-transcripts/02-diagnosis-golden.md +26 -0
  132. package/tests/golden-transcripts/03-verification-golden.md +24 -0
  133. package/tests/golden-transcripts/04-review-golden.md +26 -0
  134. package/tests/golden-transcripts/05-feature-planning-golden.md +23 -0
  135. package/templates/full/.roo/skills/in-progress/review/SKILL.md +0 -39
@@ -1,203 +1,227 @@
1
- # Routing Eval Cases
2
-
3
- Use these cases to check whether the orchestrator chooses the expected workflow.
4
-
5
- In every case:
6
-
7
- - The user did **not** type a slash command.
8
- - A free-form request is never self-approving. The orchestrator proposes, then waits.
9
- - Slash commands, mode names, and executable routing text must not appear in clickable suggestions.
10
- - Slash commands are optional. The user should never be told to type one to use Zoo Flow.
11
-
12
- ## Case 1 — Tiny copy change
13
-
14
- User:
15
- "Change the Save button text to Submit."
16
-
17
- Expected:
18
- Recommend the small implementation workflow.
19
-
20
- Must not:
21
- - Route to feature.
22
- - Read architecture docs by default.
23
- - Ask the user to type a slash command.
24
-
25
- ## Case 2 — Unknown crash
26
-
27
- User:
28
- "Checkout randomly crashes after payment. It used to work."
29
-
30
- Expected:
31
- Recommend the diagnosis workflow.
32
-
33
- Must:
34
- - Reproduce before hypothesizing.
35
- - Present hypotheses before fix.
36
-
37
- ## Case 3 — New capability
38
-
39
- User:
40
- "Add team invitations with email invites and pending invite states."
41
-
42
- Expected:
43
- Recommend feature planning.
44
-
45
- Must:
46
- - Plan before implementation.
47
- - Use phase gates.
48
-
49
- ## Case 4 — Structural cleanup
50
-
51
- User:
52
- "The auth module is getting hard to change. I want to decouple provider-specific logic."
53
-
54
- Expected:
55
- Recommend refactor workflow.
56
-
57
- Must:
58
- - Preserve behavior.
59
- - Explore architecture candidates before implementation.
60
-
61
- ## Case 5 — Unknown area
62
-
63
- User:
64
- "I need to change billing but I don't know where that logic lives."
65
-
66
- Expected:
67
- Recommend exploration first.
68
-
69
- Must:
70
- - Produce a map before choosing feature/fix/refactor.
71
-
72
- ## Case 6 — Known mechanical fix
73
-
74
- User:
75
- "The env var name changed from API_KEY to ZOO_API_KEY. Update the config loader."
76
-
77
- Expected:
78
- Recommend small implementation workflow.
79
-
80
- Must not:
81
- - Route to diagnosis.
82
- - Route to feature.
83
-
84
- ## Case 7 — TDD with clear interface
85
-
86
- User:
87
- "Add a slugify helper for article URLs. I want it test-first."
88
-
89
- Expected:
90
- Recommend TDD workflow.
91
-
92
- Must:
93
- - Write the failing test first.
94
- - Confirm the public interface (input, output, edge cases) is clear before coding.
95
-
96
- ## Case 8 — Stale documentation
97
-
98
- User:
99
- "The ARCHITECTURE.md file describes a checkout flow we removed last quarter. Bring it in line with the code."
100
-
101
- Expected:
102
- Recommend the documentation update workflow.
103
-
104
- Must:
105
- - Audit code first, then make surgical doc edits.
106
- - Not rewrite the file wholesale.
107
-
108
- ## Case 9 — Ready to commit
109
-
110
- User:
111
- "I finished the small tweak. Please commit it and add a journal entry."
112
-
113
- Expected:
114
- Recommend the commit + journal workflow.
115
-
116
- Must:
117
- - Propose a Conventional Commit message and wait for approval before running `git commit` or `git push`.
1
+ # Routing Eval Cases
2
+
3
+ Use these cases to check whether the orchestrator chooses the expected workflow.
4
+
5
+ In every case:
6
+
7
+ - The user did **not** type a slash command.
8
+ - A free-form request is never self-approving. The orchestrator proposes, then waits.
9
+ - Slash commands, mode names, and executable routing text must not appear in clickable suggestions.
10
+ - Slash commands are optional. The user should never be told to type one to use Zoo Flow.
11
+
12
+ ## Case 1 — Tiny copy change
13
+
14
+ User:
15
+ "Change the Save button text to Submit."
16
+
17
+ Expected:
18
+ Recommend the small implementation workflow.
19
+
20
+ Must not:
21
+ - Route to feature.
22
+ - Read architecture docs by default.
23
+ - Ask the user to type a slash command.
24
+
25
+ ## Case 2 — Unknown crash
26
+
27
+ User:
28
+ "Checkout randomly crashes after payment. It used to work."
29
+
30
+ Expected:
31
+ Recommend the diagnosis workflow.
32
+
33
+ Must:
34
+ - Reproduce before hypothesizing.
35
+ - Present hypotheses before fix.
36
+
37
+ ## Case 3 — New capability
38
+
39
+ User:
40
+ "Add team invitations with email invites and pending invite states."
41
+
42
+ Expected:
43
+ Recommend feature planning.
44
+
45
+ Must:
46
+ - Plan before implementation.
47
+ - Use phase gates.
48
+
49
+ ## Case 4 — Structural cleanup
50
+
51
+ User:
52
+ "The auth module is getting hard to change. I want to decouple provider-specific logic."
53
+
54
+ Expected:
55
+ Recommend refactor workflow.
56
+
57
+ Must:
58
+ - Preserve behavior.
59
+ - Explore architecture candidates before implementation.
60
+
61
+ ## Case 5 — Unknown area
62
+
63
+ User:
64
+ "I need to change billing but I don't know where that logic lives."
65
+
66
+ Expected:
67
+ Recommend exploration first.
68
+
69
+ Must:
70
+ - Produce a map before choosing feature/fix/refactor.
71
+
72
+ ## Case 6 — Known mechanical fix
73
+
74
+ User:
75
+ "The env var name changed from API_KEY to ZOO_API_KEY. Update the config loader."
76
+
77
+ Expected:
78
+ Recommend small implementation workflow.
79
+
80
+ Must not:
81
+ - Route to diagnosis.
82
+ - Route to feature.
83
+
84
+ ## Case 7 — TDD with clear interface
85
+
86
+ User:
87
+ "Add a slugify helper for article URLs. I want it test-first."
88
+
89
+ Expected:
90
+ Recommend TDD workflow.
91
+
92
+ Must:
93
+ - Write the failing test first.
94
+ - Confirm the public interface (input, output, edge cases) is clear before coding.
95
+
96
+ ## Case 8 — Stale documentation
97
+
98
+ User:
99
+ "The ARCHITECTURE.md file describes a checkout flow we removed last quarter. Bring it in line with the code."
100
+
101
+ Expected:
102
+ Recommend the documentation update workflow.
103
+
104
+ Must:
105
+ - Audit code first, then make surgical doc edits.
106
+ - Not rewrite the file wholesale.
107
+
108
+ ## Case 9 — Ready to commit
109
+
110
+ User:
111
+ "I finished the small tweak. Please commit it and add a journal entry."
112
+
113
+ Expected:
114
+ Recommend the commit + journal workflow.
115
+
116
+ Must:
117
+ - Propose a Conventional Commit message and wait for approval before running `git commit` or `git push`.
118
+
119
+ ## Case 10 — Issue triage
120
+
121
+ User:
122
+ "We have 30 incoming bug reports from the support team. Triage them into the issue tracker."
123
+
124
+ Expected:
125
+ Recommend the triage workflow.
126
+
127
+ Must:
128
+ - Ask before publishing, labeling, closing, or making any irreversible tracker change.
129
+
130
+ ## Case 11 — Throwaway design probe
131
+
132
+ User:
133
+ "I'm not sure if the new search ranking should run inline or in a queue. Can we try both and see?"
134
+
135
+ Expected:
136
+ Recommend a throwaway prototype.
137
+
138
+ Must:
139
+ - Keep the work on a prototype branch or `.scratch/prototypes/<slug>/` so it is clearly throwaway.
140
+ - Resolve the design question, not commit to a real implementation.
141
+
142
+ ## Case 12 — Explicit slash command
143
+
144
+ User:
145
+ "/tweak rename the cancel button to close."
146
+
147
+ Expected:
148
+ Route immediately. Do not second-guess the explicit command.
149
+
150
+ Must not:
151
+ - Repropose the workflow as a numbered choice.
152
+ - Treat the explicit command as if approval were still pending.
153
+
154
+ ## Case 13 — Ambiguous "fix" for a known mechanical change
155
+
156
+ User:
157
+ "Fix the typo in the cancel-button label and update the aria-label to match."
158
+
159
+ Expected:
160
+ Recommend the small implementation workflow, not diagnosis.
161
+
162
+ Must:
163
+ - Recognize the cause and target are known.
164
+ - Not run a full diagnosis loop for a one-line copy fix.
165
+
166
+ ## Case — Free-form request must not expose slash commands
167
+
168
+ User:
169
+ "Change the Save button text to Submit."
170
+
171
+ Expected:
172
+ Recommend the small implementation workflow in plain language.
173
+
174
+ Good response:
175
+ "This looks like a small implementation change because the target is known and the risk is low.
176
+
177
+ 1. Make the small implementation change
178
+ 2. Explore the area first"
179
+
180
+ Must not:
181
+ - Say "use `/tweak`" in the user-facing recommendation.
182
+ - Offer `/tweak` as a selectable option.
183
+ - Tell the user to type a slash command.
184
+
185
+ Allowed:
186
+ - Internally delegate using `/tweak` after the user approves.
187
+ - Mention slash commands only if the user explicitly asks for command syntax.
188
+
189
+ ## Case — Deep inspection must not route to Ask mode
190
+
191
+ User:
192
+ "Do you think these changes are beneficial or not? Inspect deeply if it affects the system."
193
+
194
+ Expected:
195
+ Recommend analysis/review through the architecture/inspection workflow.
196
+
197
+ Delegation target after approval:
198
+ `system-architect`
199
+
200
+ Must not:
201
+ - Delegate to Ask mode.
202
+ - Delegate to default Architect mode.
203
+ - Use any mode other than `system-architect` or `code-tweaker`.
118
204
 
119
- ## Case 10 Issue triage
205
+ ## Case — Review
120
206
 
121
207
  User:
122
- "We have 30 incoming bug reports from the support team. Triage them into the issue tracker."
208
+ "Review this branch before I commit it."
123
209
 
124
210
  Expected:
125
- Recommend the triage workflow.
211
+ Recommend the review workflow.
126
212
 
127
213
  Must:
128
- - Ask before publishing, labeling, closing, or making any irreversible tracker change.
214
+ - Route to `system-architect` after approval.
215
+ - Report findings by severity.
129
216
 
130
- ## Case 11 Throwaway design probe
217
+ ## Case — Verification
131
218
 
132
219
  User:
133
- "I'm not sure if the new search ranking should run inline or in a queue. Can we try both and see?"
220
+ "Run tests for this change and make sure nothing broke."
134
221
 
135
222
  Expected:
136
- Recommend a throwaway prototype.
223
+ Recommend the verification workflow.
137
224
 
138
225
  Must:
139
- - Keep the work on a prototype branch or `.scratch/prototypes/<slug>/` so it is clearly throwaway.
140
- - Resolve the design question, not commit to a real implementation.
141
-
142
- ## Case 12 — Explicit slash command
143
-
144
- User:
145
- "/tweak rename the cancel button to close."
146
-
147
- Expected:
148
- Route immediately. Do not second-guess the explicit command.
149
-
150
- Must not:
151
- - Repropose the workflow as a numbered choice.
152
- - Treat the explicit command as if approval were still pending.
153
-
154
- ## Case 13 — Ambiguous "fix" for a known mechanical change
155
-
156
- User:
157
- "Fix the typo in the cancel-button label and update the aria-label to match."
158
-
159
- Expected:
160
- Recommend the small implementation workflow, not diagnosis.
161
-
162
- Must:
163
- - Recognize the cause and target are known.
164
- - Not run a full diagnosis loop for a one-line copy fix.
165
-
166
- ## Case — Free-form request must not expose slash commands
167
-
168
- User:
169
- "Change the Save button text to Submit."
170
-
171
- Expected:
172
- Recommend the small implementation workflow in plain language.
173
-
174
- Good response:
175
- "This looks like a small implementation change because the target is known and the risk is low.
176
-
177
- 1. Make the small implementation change
178
- 2. Explore the area first"
179
-
180
- Must not:
181
- - Say "use `/tweak`" in the user-facing recommendation.
182
- - Offer `/tweak` as a selectable option.
183
- - Tell the user to type a slash command.
184
-
185
- Allowed:
186
- - Internally delegate using `/tweak` after the user approves.
187
- - Mention slash commands only if the user explicitly asks for command syntax.
188
-
189
- ## Case — Deep inspection must not route to Ask mode
190
-
191
- User:
192
- "Do you think these changes are beneficial or not? Inspect deeply if it affects the system."
193
-
194
- Expected:
195
- Recommend analysis/review through the architecture/inspection workflow.
196
-
197
- Delegation target after approval:
198
- `system-architect`
199
-
200
- Must not:
201
- - Delegate to Ask mode.
202
- - Delegate to default Architect mode.
203
- - Use any mode other than `system-architect` or `code-tweaker`.
226
+ - Route to `code-tweaker` after approval.
227
+ - Report exact commands run and results.
@@ -0,0 +1,24 @@
1
+ {
2
+ "schemaVersion": 1,
3
+ "projectShape": null,
4
+ "packageManager": null,
5
+ "issueTracker": {
6
+ "kind": null,
7
+ "project": null
8
+ },
9
+ "verification": {
10
+ "targetedTest": null,
11
+ "typecheck": null,
12
+ "lint": null,
13
+ "build": null,
14
+ "fullTest": null
15
+ },
16
+ "docsPolicy": {
17
+ "localContext": ".zoo-flow/",
18
+ "sharedDocs": ["AGENTS.md", "docs/adr/", "docs/architecture/"]
19
+ },
20
+ "commitPolicy": {
21
+ "conventionalCommits": true,
22
+ "journal": "docs/journal/"
23
+ }
24
+ }
@@ -0,0 +1 @@
1
+ {"name":"bad json","user":"oops",
@@ -0,0 +1 @@
1
+ {"name":"bad mode","user":"Review this","expected_workflow":"review","expected_command":"/review","expected_mode":"architect","must_require_approval":true,"must_not_include":[]}
@@ -0,0 +1 @@
1
+ {"name":"missing command","user":"Do nonexistent thing","expected_workflow":"small implementation","expected_command":"/does-not-exist","expected_mode":"code-tweaker","must_require_approval":true,"must_not_include":[]}
@@ -0,0 +1 @@
1
+ {"expect":"fail","mutation":"bad-built-in-delegation","message":"Built-in/default delegation target"}
@@ -0,0 +1 @@
1
+ {"expect":"fail","mutation":"bad-mode-slug","message":"uses invalid mode: architect"}
@@ -0,0 +1 @@
1
+ {"expect":"fail","mutation":"bad-skill-wrapper","message":"non-canonical skill wrapper"}
@@ -0,0 +1 @@
1
+ {"expect":"fail","mutation":"bad-zoo-path","message":"Bad pattern \".zoo/\""}
@@ -0,0 +1 @@
1
+ {"expect":"fail","mutation":"helper-missing-mode","message":"must declare mode: system-architect"}
@@ -0,0 +1 @@
1
+ {"expect":"fail","mutation":"helper-not-permitted","message":"does not permit documented command /diagnose"}
@@ -0,0 +1 @@
1
+ {"expect":"pass","mutation":"none","message":"doctor passed"}
@@ -0,0 +1 @@
1
+ {"expect":"fail","mutation":"missing-command","message":"missing command file"}
@@ -0,0 +1 @@
1
+ {"expect":"fail","mutation":"missing-roomodes","message":"Missing .roomodes"}
@@ -0,0 +1 @@
1
+ {"expect":"fail","mutation":"missing-skill","message":"references missing skill"}
@@ -0,0 +1 @@
1
+ package main; func main() { println("hello") }
@@ -0,0 +1 @@
1
+ {"expected_shape": "cli-tool", "keywords": ["cli"]}
@@ -0,0 +1 @@
1
+ {"name": "test-cli", "keywords": ["cli"]}
@@ -0,0 +1 @@
1
+ {"expected_shape": "data-pipeline", "keywords": ["data-pipeline"]}
@@ -0,0 +1 @@
1
+ def run_invoice_pipeline(): pass
@@ -0,0 +1,2 @@
1
+ [project]
2
+ name = "test-pipeline"
@@ -0,0 +1 @@
1
+ {"expected_shape": "library", "keywords": ["library"]}
@@ -0,0 +1 @@
1
+ {"name": "test-lib", "keywords": ["library"]}
@@ -0,0 +1 @@
1
+ export const greet = (name: string) => `Hello, ${name}`;
@@ -0,0 +1 @@
1
+ {"expected_shape": "monorepo", "keywords": ["monorepo"]}
@@ -0,0 +1 @@
1
+ {"name": "test-monorepo", "keywords": ["monorepo"]}
@@ -0,0 +1 @@
1
+ export const core = "core";
@@ -0,0 +1 @@
1
+ export const web = "web";
@@ -0,0 +1 @@
1
+ {"expected_shape": "serverless", "keywords": ["serverless"]}
@@ -0,0 +1 @@
1
+ export const handler = async () => ({ statusCode: 200 });
@@ -0,0 +1 @@
1
+ {"name": "test-serverless", "keywords": ["serverless"]}
@@ -0,0 +1 @@
1
+ export default function Home() { return <div>Home</div>; }
@@ -0,0 +1 @@
1
+ {"expected_shape": "web-app", "keywords": ["web-app"]}
@@ -0,0 +1 @@
1
+ {"name": "test-webapp", "keywords": ["web-app"]}
@@ -0,0 +1,21 @@
1
+ # Golden: Small Tweak
2
+
3
+ ## User
4
+ Change the Save button text to Submit.
5
+
6
+ ## Workflow
7
+ small implementation -> /tweak -> code-tweaker
8
+
9
+ ## Expected structure
10
+ 1. Orchestrator proposes "small implementation" in plain language.
11
+ 2. User approves.
12
+ 3. Orchestrator delegates /tweak to code-tweaker.
13
+ 4. Code Tweaker reads the command, applies the change.
14
+ 5. Code Tweaker reports: files changed, what changed, status.
15
+ 6. Code Tweaker uses attempt_completion with evidence.
16
+
17
+ ## Must not include
18
+ - /tweak in user-facing options
19
+ - code-tweaker as a clickable choice
20
+ - Architecture doc reads
21
+ - Domain doc reads
@@ -0,0 +1,26 @@
1
+ # Golden: Diagnosis
2
+
3
+ ## User
4
+ Checkout randomly crashes after payment. It used to work.
5
+
6
+ ## Workflow
7
+ diagnosis -> /fix -> system-architect -> code-tweaker -> system-architect
8
+
9
+ ## Expected structure
10
+ 1. Orchestrator proposes "diagnosis" in plain language.
11
+ 2. User approves.
12
+ 3. Orchestrator delegates /fix to system-architect.
13
+ 4. System Architect reads domain docs, reproduces, minimizes, hypothesizes.
14
+ 5. System Architect hands to code-tweaker for fix.
15
+ 6. Code Tweaker implements, verifies.
16
+ 7. Control returns to system-architect for post-mortem.
17
+ 8. System Architect uses attempt_completion.
18
+
19
+ ## Must include
20
+ - Reproduction before hypothesis
21
+ - Hypothesis before fix
22
+ - Post-mortem after fix
23
+
24
+ ## Must not include
25
+ - Built-in mode delegation
26
+ - /fix in user-facing suggestions
@@ -0,0 +1,24 @@
1
+ # Golden: Verification
2
+
3
+ ## User
4
+ Run tests for this change and make sure nothing broke.
5
+
6
+ ## Workflow
7
+ verification -> /verify -> code-tweaker
8
+
9
+ ## Expected structure
10
+ 1. Orchestrator proposes "verification" in plain language.
11
+ 2. User approves.
12
+ 3. Orchestrator delegates /verify to code-tweaker.
13
+ 4. Code Tweaker reads verify skill.
14
+ 5. Code Tweaker inspects project type and changed files.
15
+ 6. Code Tweaker picks smallest useful checks.
16
+ 7. Code Tweaker runs checks, captures output.
17
+ 8. Code Tweaker reports: verification result with status, commands run, evidence, remaining risk.
18
+ 9. Code Tweaker uses attempt_completion.
19
+
20
+ ## Evidence format
21
+ - Status: pass | fail | partial | blocked
22
+ - Commands run with pass/fail per command
23
+ - Evidence: short summary of output
24
+ - Remaining risk: what was not checked
@@ -0,0 +1,26 @@
1
+ # Golden: Review
2
+
3
+ ## User
4
+ Review this branch before I commit it.
5
+
6
+ ## Workflow
7
+ review -> /review -> system-architect
8
+
9
+ ## Expected structure
10
+ 1. Orchestrator proposes "review" in plain language.
11
+ 2. User approves.
12
+ 3. Orchestrator delegates /review to system-architect.
13
+ 4. System Architect reads review skill.
14
+ 5. System Architect identifies target (branch diff).
15
+ 6. System Architect reads targeted diffs.
16
+ 7. System Architect evaluates axes: standards, spec, security/risk.
17
+ 8. System Architect reports findings by severity.
18
+ 9. System Architect ends with canonical result line.
19
+ 10. System Architect uses attempt_completion.
20
+
21
+ ## Result line
22
+ - Review result: approve | approve with nits | changes requested | blocked
23
+
24
+ ## Must include
25
+ - Severity-ordered findings
26
+ - Security/Risk axis when change touches auth/payments/PII/data