@kenkaiiii/ggcoder 4.3.212 → 4.3.213

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (312) hide show
  1. package/README.md +5 -8
  2. package/dist/cli.d.ts +3 -1
  3. package/dist/cli.d.ts.map +1 -1
  4. package/dist/cli.js +112 -61
  5. package/dist/cli.js.map +1 -1
  6. package/dist/core/continue-replay-inventory.test.d.ts +2 -0
  7. package/dist/core/continue-replay-inventory.test.d.ts.map +1 -0
  8. package/dist/core/continue-replay-inventory.test.js +42 -0
  9. package/dist/core/continue-replay-inventory.test.js.map +1 -0
  10. package/dist/core/goal-controller.d.ts +2 -0
  11. package/dist/core/goal-controller.d.ts.map +1 -1
  12. package/dist/core/goal-controller.js +283 -24
  13. package/dist/core/goal-controller.js.map +1 -1
  14. package/dist/core/goal-controller.test.js +413 -16
  15. package/dist/core/goal-controller.test.js.map +1 -1
  16. package/dist/core/goal-lifecycle-smoke.test.js +48 -6
  17. package/dist/core/goal-lifecycle-smoke.test.js.map +1 -1
  18. package/dist/core/goal-prerequisites.d.ts +5 -0
  19. package/dist/core/goal-prerequisites.d.ts.map +1 -1
  20. package/dist/core/goal-prerequisites.js +37 -0
  21. package/dist/core/goal-prerequisites.js.map +1 -1
  22. package/dist/core/goal-prerequisites.test.js +29 -1
  23. package/dist/core/goal-prerequisites.test.js.map +1 -1
  24. package/dist/core/goal-references.d.ts +14 -0
  25. package/dist/core/goal-references.d.ts.map +1 -0
  26. package/dist/core/goal-references.js +153 -0
  27. package/dist/core/goal-references.js.map +1 -0
  28. package/dist/core/goal-references.test.d.ts +2 -0
  29. package/dist/core/goal-references.test.d.ts.map +1 -0
  30. package/dist/core/goal-references.test.js +77 -0
  31. package/dist/core/goal-references.test.js.map +1 -0
  32. package/dist/core/goal-store.d.ts +25 -0
  33. package/dist/core/goal-store.d.ts.map +1 -1
  34. package/dist/core/goal-store.js +150 -36
  35. package/dist/core/goal-store.js.map +1 -1
  36. package/dist/core/goal-store.test.js +19 -2
  37. package/dist/core/goal-store.test.js.map +1 -1
  38. package/dist/core/goal-verifier.d.ts.map +1 -1
  39. package/dist/core/goal-verifier.js +4 -1
  40. package/dist/core/goal-verifier.js.map +1 -1
  41. package/dist/core/goal-verifier.test.js +43 -0
  42. package/dist/core/goal-verifier.test.js.map +1 -1
  43. package/dist/core/goal-worker.d.ts +2 -0
  44. package/dist/core/goal-worker.d.ts.map +1 -1
  45. package/dist/core/goal-worker.js +33 -9
  46. package/dist/core/goal-worker.js.map +1 -1
  47. package/dist/core/goal-worker.test.js +49 -1
  48. package/dist/core/goal-worker.test.js.map +1 -1
  49. package/dist/core/prompt-commands.d.ts.map +1 -1
  50. package/dist/core/prompt-commands.js +28 -846
  51. package/dist/core/prompt-commands.js.map +1 -1
  52. package/dist/core/prompt-commands.test.js +40 -78
  53. package/dist/core/prompt-commands.test.js.map +1 -1
  54. package/dist/core/runtime-mode.d.ts +14 -0
  55. package/dist/core/runtime-mode.d.ts.map +1 -0
  56. package/dist/core/runtime-mode.js +10 -0
  57. package/dist/core/runtime-mode.js.map +1 -0
  58. package/dist/core/session-restore-display.test.d.ts +2 -0
  59. package/dist/core/session-restore-display.test.d.ts.map +1 -0
  60. package/dist/core/session-restore-display.test.js +100 -0
  61. package/dist/core/session-restore-display.test.js.map +1 -0
  62. package/dist/core/verify-commands.js +4 -4
  63. package/dist/core/verify-commands.js.map +1 -1
  64. package/dist/system-prompt.d.ts +2 -1
  65. package/dist/system-prompt.d.ts.map +1 -1
  66. package/dist/system-prompt.js +51 -37
  67. package/dist/system-prompt.js.map +1 -1
  68. package/dist/system-prompt.test.js +147 -40
  69. package/dist/system-prompt.test.js.map +1 -1
  70. package/dist/tools/bash.d.ts +3 -2
  71. package/dist/tools/bash.d.ts.map +1 -1
  72. package/dist/tools/bash.js +11 -4
  73. package/dist/tools/bash.js.map +1 -1
  74. package/dist/tools/edit.d.ts +5 -3
  75. package/dist/tools/edit.d.ts.map +1 -1
  76. package/dist/tools/edit.js +14 -4
  77. package/dist/tools/edit.js.map +1 -1
  78. package/dist/tools/edit.test.js +0 -10
  79. package/dist/tools/edit.test.js.map +1 -1
  80. package/dist/tools/goal-mode.test.d.ts +2 -0
  81. package/dist/tools/goal-mode.test.d.ts.map +1 -0
  82. package/dist/tools/goal-mode.test.js +121 -0
  83. package/dist/tools/goal-mode.test.js.map +1 -0
  84. package/dist/tools/goals.d.ts +15 -3
  85. package/dist/tools/goals.d.ts.map +1 -1
  86. package/dist/tools/goals.js +336 -26
  87. package/dist/tools/goals.js.map +1 -1
  88. package/dist/tools/goals.test.js +346 -6
  89. package/dist/tools/goals.test.js.map +1 -1
  90. package/dist/tools/index.d.ts +7 -10
  91. package/dist/tools/index.d.ts.map +1 -1
  92. package/dist/tools/index.js +6 -19
  93. package/dist/tools/index.js.map +1 -1
  94. package/dist/tools/plan-mode.test.js +34 -224
  95. package/dist/tools/plan-mode.test.js.map +1 -1
  96. package/dist/tools/prompt-hints.d.ts.map +1 -1
  97. package/dist/tools/prompt-hints.js +2 -6
  98. package/dist/tools/prompt-hints.js.map +1 -1
  99. package/dist/tools/subagent.d.ts +3 -2
  100. package/dist/tools/subagent.d.ts.map +1 -1
  101. package/dist/tools/subagent.js +4 -9
  102. package/dist/tools/subagent.js.map +1 -1
  103. package/dist/tools/write.d.ts +5 -3
  104. package/dist/tools/write.d.ts.map +1 -1
  105. package/dist/tools/write.js +14 -13
  106. package/dist/tools/write.js.map +1 -1
  107. package/dist/tools/write.test.js +0 -16
  108. package/dist/tools/write.test.js.map +1 -1
  109. package/dist/ui/App.d.ts +144 -28
  110. package/dist/ui/App.d.ts.map +1 -1
  111. package/dist/ui/App.js +1143 -862
  112. package/dist/ui/App.js.map +1 -1
  113. package/dist/ui/activity-phrases.d.ts.map +1 -1
  114. package/dist/ui/activity-phrases.js +0 -2
  115. package/dist/ui/activity-phrases.js.map +1 -1
  116. package/dist/ui/app-state-persistence.test.js +173 -5
  117. package/dist/ui/app-state-persistence.test.js.map +1 -1
  118. package/dist/ui/chat-layout-pinning.test.d.ts +2 -0
  119. package/dist/ui/chat-layout-pinning.test.d.ts.map +1 -0
  120. package/dist/ui/chat-layout-pinning.test.js +407 -0
  121. package/dist/ui/chat-layout-pinning.test.js.map +1 -0
  122. package/dist/ui/components/ActivityIndicator.d.ts +1 -2
  123. package/dist/ui/components/ActivityIndicator.d.ts.map +1 -1
  124. package/dist/ui/components/ActivityIndicator.js +63 -94
  125. package/dist/ui/components/ActivityIndicator.js.map +1 -1
  126. package/dist/ui/components/AssistantMessage.d.ts +6 -2
  127. package/dist/ui/components/AssistantMessage.d.ts.map +1 -1
  128. package/dist/ui/components/AssistantMessage.js +9 -4
  129. package/dist/ui/components/AssistantMessage.js.map +1 -1
  130. package/dist/ui/components/AssistantMessage.test.d.ts +2 -0
  131. package/dist/ui/components/AssistantMessage.test.d.ts.map +1 -0
  132. package/dist/ui/components/AssistantMessage.test.js +369 -0
  133. package/dist/ui/components/AssistantMessage.test.js.map +1 -0
  134. package/dist/ui/components/BackgroundTasksBar.d.ts +1 -3
  135. package/dist/ui/components/BackgroundTasksBar.d.ts.map +1 -1
  136. package/dist/ui/components/BackgroundTasksBar.js +2 -4
  137. package/dist/ui/components/BackgroundTasksBar.js.map +1 -1
  138. package/dist/ui/components/Banner.d.ts +1 -3
  139. package/dist/ui/components/Banner.d.ts.map +1 -1
  140. package/dist/ui/components/Banner.js +7 -3
  141. package/dist/ui/components/Banner.js.map +1 -1
  142. package/dist/ui/components/Footer.d.ts +26 -4
  143. package/dist/ui/components/Footer.d.ts.map +1 -1
  144. package/dist/ui/components/Footer.js +73 -21
  145. package/dist/ui/components/Footer.js.map +1 -1
  146. package/dist/ui/components/GoalOverlay.d.ts +28 -20
  147. package/dist/ui/components/GoalOverlay.d.ts.map +1 -1
  148. package/dist/ui/components/GoalOverlay.js +283 -253
  149. package/dist/ui/components/GoalOverlay.js.map +1 -1
  150. package/dist/ui/components/InputArea.d.ts +2 -6
  151. package/dist/ui/components/InputArea.d.ts.map +1 -1
  152. package/dist/ui/components/InputArea.js +40 -32
  153. package/dist/ui/components/InputArea.js.map +1 -1
  154. package/dist/ui/components/InputArea.test.js +11 -1
  155. package/dist/ui/components/InputArea.test.js.map +1 -1
  156. package/dist/ui/components/Markdown.d.ts +11 -11
  157. package/dist/ui/components/Markdown.d.ts.map +1 -1
  158. package/dist/ui/components/Markdown.js +25 -198
  159. package/dist/ui/components/Markdown.js.map +1 -1
  160. package/dist/ui/components/PlanOverlay.d.ts.map +1 -1
  161. package/dist/ui/components/PlanOverlay.js +1 -1
  162. package/dist/ui/components/PlanOverlay.js.map +1 -1
  163. package/dist/ui/components/ServerToolExecution.d.ts.map +1 -1
  164. package/dist/ui/components/ServerToolExecution.js +3 -2
  165. package/dist/ui/components/ServerToolExecution.js.map +1 -1
  166. package/dist/ui/components/SlashCommandMenu.d.ts +4 -3
  167. package/dist/ui/components/SlashCommandMenu.d.ts.map +1 -1
  168. package/dist/ui/components/SlashCommandMenu.js +38 -26
  169. package/dist/ui/components/SlashCommandMenu.js.map +1 -1
  170. package/dist/ui/components/StreamingArea.d.ts +11 -2
  171. package/dist/ui/components/StreamingArea.d.ts.map +1 -1
  172. package/dist/ui/components/StreamingArea.js +20 -23
  173. package/dist/ui/components/StreamingArea.js.map +1 -1
  174. package/dist/ui/components/StreamingArea.test.d.ts +2 -0
  175. package/dist/ui/components/StreamingArea.test.d.ts.map +1 -0
  176. package/dist/ui/components/StreamingArea.test.js +18 -0
  177. package/dist/ui/components/StreamingArea.test.js.map +1 -0
  178. package/dist/ui/components/ToolExecution.d.ts.map +1 -1
  179. package/dist/ui/components/ToolExecution.js +11 -27
  180. package/dist/ui/components/ToolExecution.js.map +1 -1
  181. package/dist/ui/components/ToolGroupExecution.d.ts.map +1 -1
  182. package/dist/ui/components/ToolGroupExecution.js +9 -124
  183. package/dist/ui/components/ToolGroupExecution.js.map +1 -1
  184. package/dist/ui/components/UserMessage.d.ts.map +1 -1
  185. package/dist/ui/components/UserMessage.js +15 -10
  186. package/dist/ui/components/UserMessage.js.map +1 -1
  187. package/dist/ui/components/UserMessage.test.d.ts +2 -0
  188. package/dist/ui/components/UserMessage.test.d.ts.map +1 -0
  189. package/dist/ui/components/UserMessage.test.js +39 -0
  190. package/dist/ui/components/UserMessage.test.js.map +1 -0
  191. package/dist/ui/footer-status-layout.test.js +21 -7
  192. package/dist/ui/footer-status-layout.test.js.map +1 -1
  193. package/dist/ui/goal-events.d.ts +8 -0
  194. package/dist/ui/goal-events.d.ts.map +1 -1
  195. package/dist/ui/goal-events.js +28 -8
  196. package/dist/ui/goal-events.js.map +1 -1
  197. package/dist/ui/goal-events.test.js +40 -2
  198. package/dist/ui/goal-events.test.js.map +1 -1
  199. package/dist/ui/goal-lifecycle-orchestration.test.js +127 -34
  200. package/dist/ui/goal-lifecycle-orchestration.test.js.map +1 -1
  201. package/dist/ui/goal-overlay.test.js +121 -43
  202. package/dist/ui/goal-overlay.test.js.map +1 -1
  203. package/dist/ui/goal-summary.d.ts +14 -0
  204. package/dist/ui/goal-summary.d.ts.map +1 -0
  205. package/dist/ui/goal-summary.js +194 -0
  206. package/dist/ui/goal-summary.js.map +1 -0
  207. package/dist/ui/hooks/useAgentLoop.d.ts +8 -2
  208. package/dist/ui/hooks/useAgentLoop.d.ts.map +1 -1
  209. package/dist/ui/hooks/useAgentLoop.js +20 -9
  210. package/dist/ui/hooks/useAgentLoop.js.map +1 -1
  211. package/dist/ui/hooks/useAgentLoop.test.d.ts +2 -0
  212. package/dist/ui/hooks/useAgentLoop.test.d.ts.map +1 -0
  213. package/dist/ui/hooks/useAgentLoop.test.js +8 -0
  214. package/dist/ui/hooks/useAgentLoop.test.js.map +1 -0
  215. package/dist/ui/hooks/useTerminalSize.d.ts +5 -9
  216. package/dist/ui/hooks/useTerminalSize.d.ts.map +1 -1
  217. package/dist/ui/hooks/useTerminalSize.js +9 -14
  218. package/dist/ui/hooks/useTerminalSize.js.map +1 -1
  219. package/dist/ui/live-item-flush.d.ts +2 -2
  220. package/dist/ui/live-item-flush.d.ts.map +1 -1
  221. package/dist/ui/live-item-flush.js +8 -4
  222. package/dist/ui/live-item-flush.js.map +1 -1
  223. package/dist/ui/long-prompt-regression-harness.test.d.ts +2 -0
  224. package/dist/ui/long-prompt-regression-harness.test.d.ts.map +1 -0
  225. package/dist/ui/long-prompt-regression-harness.test.js +195 -0
  226. package/dist/ui/long-prompt-regression-harness.test.js.map +1 -0
  227. package/dist/ui/plan-overlay.test.js +7 -29
  228. package/dist/ui/plan-overlay.test.js.map +1 -1
  229. package/dist/ui/queued-message.test.d.ts.map +1 -1
  230. package/dist/ui/queued-message.test.js +76 -14
  231. package/dist/ui/queued-message.test.js.map +1 -1
  232. package/dist/ui/render.d.ts +21 -24
  233. package/dist/ui/render.d.ts.map +1 -1
  234. package/dist/ui/render.js +46 -28
  235. package/dist/ui/render.js.map +1 -1
  236. package/dist/ui/render.test.d.ts +2 -0
  237. package/dist/ui/render.test.d.ts.map +1 -0
  238. package/dist/ui/render.test.js +16 -0
  239. package/dist/ui/render.test.js.map +1 -0
  240. package/dist/ui/scroll-stabilization.test.js +1 -1
  241. package/dist/ui/scroll-stabilization.test.js.map +1 -1
  242. package/dist/ui/slash-command-images.test.js +79 -4
  243. package/dist/ui/slash-command-images.test.js.map +1 -1
  244. package/dist/ui/terminal-history.d.ts +26 -0
  245. package/dist/ui/terminal-history.d.ts.map +1 -0
  246. package/dist/ui/terminal-history.js +910 -0
  247. package/dist/ui/terminal-history.js.map +1 -0
  248. package/dist/ui/terminal-history.test.d.ts +2 -0
  249. package/dist/ui/terminal-history.test.d.ts.map +1 -0
  250. package/dist/ui/terminal-history.test.js +314 -0
  251. package/dist/ui/terminal-history.test.js.map +1 -0
  252. package/dist/ui/tool-group-summary.d.ts +16 -0
  253. package/dist/ui/tool-group-summary.d.ts.map +1 -0
  254. package/dist/ui/tool-group-summary.js +123 -0
  255. package/dist/ui/tool-group-summary.js.map +1 -0
  256. package/dist/ui/tui-history-parity.test.d.ts +2 -0
  257. package/dist/ui/tui-history-parity.test.d.ts.map +1 -0
  258. package/dist/ui/tui-history-parity.test.js +243 -0
  259. package/dist/ui/tui-history-parity.test.js.map +1 -0
  260. package/dist/ui/utils/assistant-stream-split.d.ts +6 -0
  261. package/dist/ui/utils/assistant-stream-split.d.ts.map +1 -0
  262. package/dist/ui/utils/assistant-stream-split.js +37 -0
  263. package/dist/ui/utils/assistant-stream-split.js.map +1 -0
  264. package/dist/ui/utils/assistant-stream-split.test.d.ts +2 -0
  265. package/dist/ui/utils/assistant-stream-split.test.d.ts.map +1 -0
  266. package/dist/ui/utils/assistant-stream-split.test.js +58 -0
  267. package/dist/ui/utils/assistant-stream-split.test.js.map +1 -0
  268. package/dist/ui/utils/latex-to-unicode.d.ts +22 -0
  269. package/dist/ui/utils/latex-to-unicode.d.ts.map +1 -0
  270. package/dist/ui/utils/latex-to-unicode.js +538 -0
  271. package/dist/ui/utils/latex-to-unicode.js.map +1 -0
  272. package/dist/ui/utils/markdown-renderer.d.ts +20 -0
  273. package/dist/ui/utils/markdown-renderer.d.ts.map +1 -0
  274. package/dist/ui/utils/markdown-renderer.js +327 -0
  275. package/dist/ui/utils/markdown-renderer.js.map +1 -0
  276. package/dist/ui/utils/markdown-table.d.ts +9 -0
  277. package/dist/ui/utils/markdown-table.d.ts.map +1 -0
  278. package/dist/ui/utils/markdown-table.js +95 -0
  279. package/dist/ui/utils/markdown-table.js.map +1 -0
  280. package/dist/ui/utils/text-utils.d.ts +8 -0
  281. package/dist/ui/utils/text-utils.d.ts.map +1 -0
  282. package/dist/ui/utils/text-utils.js +16 -0
  283. package/dist/ui/utils/text-utils.js.map +1 -0
  284. package/dist/ui/utils/token-to-ansi.js +19 -9
  285. package/dist/ui/utils/token-to-ansi.js.map +1 -1
  286. package/dist/ui/utils/user-message-display.d.ts +7 -0
  287. package/dist/ui/utils/user-message-display.d.ts.map +1 -0
  288. package/dist/ui/utils/user-message-display.js +26 -0
  289. package/dist/ui/utils/user-message-display.js.map +1 -0
  290. package/dist/utils/format.js +0 -9
  291. package/dist/utils/format.js.map +1 -1
  292. package/package.json +9 -5
  293. package/dist/tools/enter-plan.d.ts +0 -8
  294. package/dist/tools/enter-plan.d.ts.map +0 -1
  295. package/dist/tools/enter-plan.js +0 -30
  296. package/dist/tools/enter-plan.js.map +0 -1
  297. package/dist/tools/exit-plan.d.ts +0 -8
  298. package/dist/tools/exit-plan.d.ts.map +0 -1
  299. package/dist/tools/exit-plan.js +0 -36
  300. package/dist/tools/exit-plan.js.map +0 -1
  301. package/dist/tools/tasks.d.ts +0 -16
  302. package/dist/tools/tasks.d.ts.map +0 -1
  303. package/dist/tools/tasks.js +0 -133
  304. package/dist/tools/tasks.js.map +0 -1
  305. package/dist/ui/components/EyesOverlay.d.ts +0 -10
  306. package/dist/ui/components/EyesOverlay.d.ts.map +0 -1
  307. package/dist/ui/components/EyesOverlay.js +0 -220
  308. package/dist/ui/components/EyesOverlay.js.map +0 -1
  309. package/dist/ui/components/TaskOverlay.d.ts +0 -10
  310. package/dist/ui/components/TaskOverlay.d.ts.map +0 -1
  311. package/dist/ui/components/TaskOverlay.js +0 -267
  312. package/dist/ui/components/TaskOverlay.js.map +0 -1
@@ -6,237 +6,15 @@ export const PROMPT_COMMANDS = [
6
6
  {
7
7
  name: "goal",
8
8
  aliases: ["g"],
9
- description: "Create a programmatic goal loop",
10
- prompt: `# Goal: Programmatic Goal Loop
11
-
12
- You are creating a durable Goal run: a programmatic control loop that lets the user rely on the agent while they are not watching. The run should keep the main orchestrator focused on the objective while workers build, instrument, diagnose, and gather evidence.
13
-
14
- ## User objective
15
-
16
- The user's objective is in the command arguments. If the arguments are absent or too vague to identify an actionable objective, ask exactly one concise clarifying question and do not create a Goal run yet.
17
-
18
- ## Non-negotiable boundary: /goal creates a run, it does not do the work
19
-
20
- The initial /goal invocation is setup/orchestration only. During this turn:
21
-
22
- - Create or update the durable run and Goal tasks, then stop.
23
- - Do not implement, fix, refactor, edit, or generate project artifacts for the objective yourself.
24
- - Do not call subagent, the normal tasks tool, goals resume, or any action that starts workers, verifiers, or auto-continuation.
25
- - Do not run the verifier or "just start" any task. Worker agents do implementation after the user explicitly starts the Goal from the Goal pane with (R).
26
- - You MUST run every cheap local prerequisite check you identify before creating or updating the Goal. Do not leave a locally checkable prerequisite as unknown, and do not mark any prerequisite met unless you have checked it or have concrete non-secret evidence. If a check would mutate files, start a service, run a long process, launch a worker, or begin implementation, make it a Goal task or a blocked external prerequisite instead.
27
-
28
- ## Core mindset: goal-specific sensory proof
29
-
30
- Do not default to ordinary tests, generic scripts, or broad simulations. First model what must be experienced for this specific goal to be trusted without the human present.
31
-
32
- For each Goal, identify:
33
-
34
- 1. Intended experience — who or what must experience the result: user, customer, operator, developer, attacker, browser, device, API client, database, model, downstream system, or another relevant perspective.
35
- 2. Failure imagination — the goal-specific ways the result could appear done while still failing in reality.
36
- 3. Required senses/signals — the observations needed to detect those failures. Think in capabilities, not fixed tools: perception of rendered output, interaction, timing, persistence/state, external boundaries, adversarial/social pressure, generated artifacts, traces, comparisons, or other signals relevant to this objective.
37
- 4. Proportional instruments — local/free ways workers can capture those signals. The evidence portfolio should be as small as possible while still removing the important assumptions; do not simulate, script, screenshot, benchmark, or red-team anything unless that signal is relevant to this goal.
38
- 5. Completion rule — why the planned evidence would be enough to claim success, and what remains unproven or blocked.
39
-
40
- Any examples you consider are inspiration, not a checklist. Borrow verification ideas from any domain when useful, but choose only the senses/signals that fit the user's actual objective.
41
-
42
- ## Orchestrator responsibilities
43
-
44
- 1. Translate the user's objective into:
45
- - a short title,
46
- - the original goal text,
47
- - concrete success criteria that can be verified,
48
- - prerequisite checks,
49
- - an evidence plan describing the goal-specific sensory proof required,
50
- - harness or observability items that workers may need to build,
51
- - a verifier command when already obvious, otherwise a verifier description or task to define one.
52
- 2. Plan first; do not build during initial Goal creation. The orchestrator must perform cheap local prerequisite checks needed to determine whether the Goal is blocked, but worker agents should build instruments, implementation changes, harnesses, diagnostics, and verifier commands after the user starts the Goal. If implementation work is needed, capture it as a Goal task instead of doing it yourself.
53
- 3. Before creating or updating the run, identify every prerequisite and check each one that can be checked locally with the available tools. Examples are non-exhaustive and should not anchor the plan: required credentials or permissions, local capabilities, app/runtime availability, fixture/assets/test data, devices/emulators, network or service access, or domain-specific inputs. Record checked prerequisites as \`met\` only with concise non-secret evidence, record failed local checks as \`missing\` with exact remediation, and leave \`unknown\` only for true external inputs that cannot be checked locally in this setup turn.
54
- 4. Prefer local/free capabilities already available in the project or environment. Do not require paid services, signups, new external accounts, private assets, or physical access unless unavoidable for this specific objective.
55
- 5. Only ask the user for true external blockers after checking what you can do yourself. If a missing input cannot be generated or verified locally, record the exact minimal prerequisite and ask once in chat; do not ask for broad lists of things you could inspect or create yourself.
56
- 6. Treat user-provided prerequisites as the first Goal item, named "User prerequisites" in the pane. The user may provide the missing value or instructions in chat. After they do, verify it locally without revealing secrets, then update the matching prerequisite to \`met\` with short evidence before any worker task runs.
57
- 7. Persist the run with the goals tool:
58
- - call \`goals({ action: "create", ... })\` once the objective is understood,
59
- - include success criteria, prerequisites, evidence_plan items, harness items, and verifier info,
60
- - the goals tool will also run each provided \`check_command\` before persisting; still do not rely on that as a substitute for thinking through and checking available prerequisites yourself.
61
- - if any prerequisite is missing, lacks check evidence, or is unknown because it cannot be checked locally, persist the run as blocked and ask the user for the exact missing thing once.
62
- 8. Add Goal tasks with \`goals({ action: "task", ... })\`. Do not use the normal tasks tool for this workflow. Each Goal task prompt must be standalone, mention the same project cwd, the specific goal slice, the sensory signals or evidence it must produce, any existing instruments it should reuse, and verification expectations. Avoid pure "investigate and report" tasks unless their prompt explicitly requires persisting concrete findings with \`goals({ action: "evidence", ... })\` and creating or updating the next implementation task from those findings.
63
- 9. Persist evidence with \`goals({ action: "evidence", ... })\` whenever workers create diagnostics, build or run instruments, capture artifacts, record controller decisions, attach verifier output, or learn a blocker.
64
- 10. Completion means verifier evidence satisfies the original success criteria and the required sensory proof. Do not call \`goals({ action: "complete" })\` merely because tasks are done; only complete after verification passes.
65
- 11. When the Goal reaches a terminal state, give the user a specific final summary in chat. Do not collapse the outcome into one generic row or say only that it "verified." Use a compact 3–4 column table with one row per substantive Goal task, evidence path, success criterion, verifier result, blocker, or decision. For bug/fix/audit goals, include the problem, how it was proven real or wrong, what fixed it, and the exact verification. For creation/improvement/non-problem goals, substitute the requested outcome or gap, what was delivered or decided, and the exact proof that the intended experience now exists. Include small snippets when useful: file:line references, command names and exit codes, short before/after text, log excerpts, artifact paths, or verifier output summaries. Do not dump worker logs; quote only the few details needed to make the conclusion auditable.
66
-
67
- ## Loop semantics
68
-
69
- Initial /goal turn order: understand intended experience → imagine relevant failures → choose required senses/signals → plan proportional instruments → persist the run/tasks/evidence plan → stop.
70
-
71
- After the user starts a Goal from the Goal pane with (R), worker and verifier completions are sent back to you as hidden synthetic events. On each event, call \`goals({ action: "status", run_id })\`, inspect current state, briefly say what the orchestrator is doing so the chat shows progress, and take the next durable control-loop action rather than merely narrating. The UI keeps auto-continuing until the run is passed, blocked, paused, or failed. Even during auto-continuation, do not switch into hands-on implementation; if work is needed, create or update Goal tasks and let workers/verifiers do it.
72
-
73
- If no verifier command exists yet, create a task to define one. If an evidence path or harness is only planned, create a worker task to build the missing instrument, then later workers can reuse that instrument for subsequent slices. If the verifier fails, persist the failure evidence and add the next Goal task that addresses the failure. Cap runaway loops by pausing and recording evidence when repeated attempts stop making progress.
74
-
75
- ## Final response
76
-
77
- When initially creating the Goal, keep the response short: say whether the Goal was created, ready, or blocked; mention the exact missing prerequisite if blocked; and tell the user they can press Ctrl+G to view it. Then stop. Do not continue into implementation, worker startup, verifier execution, or Goal resume. If they ask how to start it, tell them the Goal pane keybind is (r) to run it. When auto-continuation eventually passes, fails, blocks, or pauses the Goal, provide the specific multi-row final summary table described above, with concrete proof snippets instead of a generic "verified" claim.`,
78
- },
79
- {
80
- name: "scan",
81
- aliases: [],
82
- description: "Find confirmed dead code only",
83
- prompt: `# Scan: Confirmed Dead Code Review
84
-
85
- Find dead code in this codebase. Do not look for bugs, security issues, performance issues, style issues, or refactors. This command is report-first: do not edit or delete anything until the user chooses an option at the end.
86
-
87
- ## Phase 1: Parallel dead-code search
88
-
89
- Spawn exactly 3 sub-agents in parallel using the subagent tool (call the subagent tool 3 times in a single response), each with a different validation angle:
90
-
91
- **Agent 1 - Static Reachability**: Check exports, imports, call sites, route registration, command registration, component usage, tests, package entrypoints, and public API surfaces. Identify candidates only when references appear absent or unreachable.
92
-
93
- **Agent 2 - Runtime & Dynamic Usage**: Check dynamic loading, reflection, string-based references, plugin systems, CLI commands, routes, config keys, generated-code hooks, framework conventions, side-effect imports, and files used outside TypeScript import graphs.
94
-
95
- **Agent 3 - Historical & Boundary Safety**: Check git history, package manifests, build configs, docs, examples, scripts, CI, release artifacts, and external-facing filenames/API names that may be consumed by users even if unused internally.
96
-
97
- Each sub-agent must return only candidates with file:line ranges, estimated line counts, validation evidence, and reasons removal may be unsafe. Finding nothing is valid.
98
-
99
- ## Phase 2: Main-agent validation
100
-
101
- For every candidate, validate it yourself before reporting it:
102
-
103
- 1. Search for references with grep/find and language-aware patterns where possible, including exact symbol names, filenames, route names, config keys, CLI command names, test names, and documented examples.
104
- 2. Check exports and package/public entrypoints before marking anything removable.
105
- 3. Check framework conventions and dynamic lookup risks before marking anything removable. Use official docs when a framework/tool convention could imply usage without direct imports.
106
- 4. Check whether removing it would change public API, CLI behavior, routes, config support, migration behavior, generated artifacts, docs examples, tests, or side effects.
107
- 5. For code-level removal tasks, kencode search is secondary: use it only to verify framework/tool conventions or common generated-code patterns that could make code appear unused locally. Do not treat absence from public code search as proof that local code is dead.
108
- 6. If evidence is incomplete, mark safety as Low or drop the finding.
109
-
110
- ## What counts as dead code
111
-
112
- Report only code that is validated as one of:
113
-
114
- - **Unused file**: no imports, no entrypoint references, no dynamic/framework usage, no public/exported contract.
115
- - **Unused export**: exported but not referenced internally or by package entrypoints, and not part of documented/public API.
116
- - **Unreachable branch**: condition/path cannot execute based on current code and config.
117
- - **Obsolete artifact**: stale script/config/example/generated artifact no longer referenced by build, docs, package manifests, or CI.
118
- - **No-op code**: code executes but has no observable effect and no intentional placeholder/documentation purpose.
119
-
120
- Do not report:
121
- - Public APIs, package exports, CLI commands, routes, config keys, migrations, docs examples, tests, generated-code integration points, or plugin hooks unless you can prove they are obsolete.
122
- - Code only unused in the current test suite.
123
- - Code that might be used through strings, framework conventions, side effects, or external consumers.
124
- - Anything you are not confident is safe to remove.
125
-
126
- ## Safety labels
127
-
128
- - **High**: Strong evidence from static references, entrypoints, configs, docs, tests, and dynamic-use checks; removal is likely safe.
129
- - **Medium**: Probably dead, but one boundary or dynamic-use risk remains; remove only with targeted verification.
130
- - **Low**: Suspicious but not proven; do not remove without more investigation.
131
-
132
- ## Final output
133
-
134
- Output one concise table, prioritized by safety and impact. No prose before the table.
135
-
136
- | Priority | Location | Lines | Dead-code type | Evidence | Safety to remove | Recommended action |
137
- |---|---|---:|---|---|---|---|
138
- | P0/P1/P2/P3 | file:line-line | N | unused file/export/branch/artifact/no-op | one sentence | High/Medium/Low | Remove / Investigate / Keep |
139
-
140
- Priority guide:
141
- - **P0**: High-safety removal with meaningful line or complexity reduction.
142
- - **P1**: High-safety small removal, or Medium-safety meaningful cleanup.
143
- - **P2**: Medium-safety small cleanup; needs targeted verification.
144
- - **P3**: Low-safety candidate; keep unless user wants deeper investigation.
145
-
146
- Rules:
147
- - Put High safety rows first, then Medium, then Low.
148
- - Keep each table cell short.
149
- - If no confirmed dead code is found, output one row saying none found and set action to \`Keep\`.
150
- - Do not recommend deletion for Low-safety rows.
151
-
152
- After the table, ask exactly:
153
-
154
- What should I do?
155
- A) Create tasks to remove all High-safety dead code
156
- B) Create tasks to remove only top priorities
157
- C) Skip
158
-
159
- Do not start deleting or editing until the user chooses.
160
-
161
- If the user chooses A or B, do not remove code directly. Instead, use the tasks tool to create one task per selected removal or tightly coupled removal group, ordered by dependency and risk. Each task prompt must be standalone and include the exact locations, safety evidence, reference-search requirements, removal instructions, project verification commands, and instructions to prove the removal did not delete used code before marking the task complete. That proof must include fresh local reference searches after editing, relevant project checks/tests, and official-docs or kencode comparison only where framework/tool conventions or generated-code patterns could imply hidden usage. After creating tasks, tell the user exactly: "Tasks created. Press CTRL + T to open the Tasks Pane and press R to run all tasks." Do not begin executing them unless the user explicitly starts a task.`,
162
- },
163
- {
164
- name: "verify",
165
- aliases: [],
166
- description: "Review this codebase against real-world implementations",
167
- prompt: `# Verify: Codebase Real-World Check
168
-
169
- Review this codebase's implementation against real-world code, not opinions. Start with changes from this conversation or \`git diff\` / \`git status\`; if there are no relevant changes, choose the most important implemented feature or module in the current project and review that.
170
-
171
- ## Phase 1: Parallel codebase review
172
-
173
- Spawn exactly 3 sub-agents in parallel using the subagent tool (call the subagent tool 3 times in a single response), each with a different focus:
174
-
175
- **Agent 1 - Implementation Shape**: Identify the main APIs, components, functions, file structure, state flow, and integration points. Return only concrete search anchors and candidate concerns.
176
-
177
- **Agent 2 - Completeness**: Check whether the implementation appears to miss expected pieces: edge cases, cleanup, error states, validation, tests, configuration, accessibility, migrations, docs, or lifecycle handling. Return only concrete candidate gaps.
178
-
179
- **Agent 3 - Divergence**: Look for unusual patterns, over-custom code, reinvented utilities, brittle abstractions, or choices that may differ from how mature projects solve the same problem. Return only concrete candidate divergences.
180
-
181
- Each sub-agent must include file:line references and suggested literal search anchors for kencode search, such as imports, function names, hooks, props, config keys, or API calls. Do not report subjective style preferences.
182
-
183
- ## Phase 2: Real-world comparison with kencode search
184
-
185
- After the 3 agents return, use \`mcp__kencode-search__searchCode\` yourself to verify or reject their candidates.
186
-
187
- Search rules:
188
- - Use literal code tokens, not conceptual phrases.
189
- - Prefer imports, framework identifiers, config keys, hook names, component names, and API calls from this codebase.
190
- - Use \`peek: true\` first when exploring, then fetch narrowed examples with repo/path filters when useful.
191
- - Compare against multiple real repositories when possible; one repo is weak evidence unless it is an official or canonical implementation.
192
- - If kencode search is unavailable or returns insufficient evidence, say that in the Evidence column and lower confidence.
193
-
194
- ## What to classify
195
-
196
- Report only findings that fit one of these:
197
-
198
- 1. **Aligned** - The implementation matches consistent real-world practice. No action needed.
199
- 2. **Missing** - Real-world implementations consistently include something this code lacks.
200
- 3. **Divergent** - This code differs from common implementations in a way that likely matters.
201
- 4. **Better Elsewhere** - Real-world implementations solve the same problem more robustly or simply, with evidence.
202
-
203
- Drop anything that is only taste, personal preference, or unsupported by code evidence.
204
-
205
- ## Final output
206
-
207
- Output one concise table, prioritized by impact. No prose before the table.
208
-
209
- | Priority | Type | Location | Finding | Evidence | Recommended action |
210
- |---|---|---|---|---|---|
211
- | P0/P1/P2/P3 | Missing/Divergent/Better Elsewhere/Aligned | file:line | one sentence | kencode evidence in one sentence | concrete action or \`None\` |
212
-
213
- Priority guide:
214
- - **P0**: likely bug, data loss, security risk, or broken integration.
215
- - **P1**: important missing behavior or maintainability risk.
216
- - **P2**: useful improvement backed by real-world evidence.
217
- - **P3**: aligned/no-action observations.
218
-
219
- Rules:
220
- - Keep each table cell short.
221
- - Put action-taking findings before aligned findings.
222
- - If everything is aligned, output only aligned rows and set every action to \`None\`.
223
- - If there is not enough evidence for any finding, output one row explaining that verification was inconclusive.
224
-
225
- After the table, ask exactly:
226
-
227
- Which should I do?
228
- A) Create tasks to refine and adjust all
229
- B) Create tasks for just top priorities
230
- C) Skip
231
-
232
- Do not start fixing until the user chooses.
233
-
234
- If the user chooses A or B, do not fix directly. Instead, use the tasks tool to create one task per selected finding or tightly coupled finding group, ordered by dependency and priority. Each task prompt must be standalone and include the finding, affected local files/anchors, kencode evidence from the report, instructions to compare the approach with kencode search before editing, implementation instructions, project verification commands, and instructions to compare the final implementation with kencode search again before marking the task complete. After creating tasks, tell the user exactly: "Tasks created. Press CTRL + T to open the Tasks Pane and press R to run all tasks." Do not begin executing them unless the user explicitly starts a task.`,
9
+ description: "Create a durable programmatic goal loop",
10
+ // Contract anchors for the audit verifier: /goal setup is setup-only.
11
+ // Do not implement; plan/research as needed first, then define success criteria, evidence_plan, verifier, and goals metadata, then stop.
12
+ prompt: `Create a Goal run for the following objective. First plan/research only if needed; Goal setup will consume that plan and create durable Goal state.`,
235
13
  },
236
14
  {
237
15
  name: "expand",
238
16
  aliases: [],
239
- description: "Find high-value gaps by comparing this project to current alternatives",
17
+ description: "Find high-value project gaps",
240
18
  prompt: `# Expand: Current Competitive Gap Review
241
19
 
242
20
  Find high-value gaps by comparing this project to similar, adjacent, and best-in-class repositories/tools/websites/services. This command is project-agnostic: infer what THIS project is before choosing comparisons. This command is report-first: do not edit, install, or implement anything until the user chooses an option at the end.
@@ -331,29 +109,29 @@ Rules:
331
109
  After the tables, ask exactly:
332
110
 
333
111
  What should I do?
334
- A) Create tasks for all P0/P1 gaps
335
- B) Create tasks for only the top priority gap from each category
112
+ A) Create a Goal for all P0/P1 gaps
113
+ B) Create a Goal for only the top priority gap from each category
336
114
  C) Skip
337
115
 
338
116
  Do not start implementing until the user chooses.
339
117
 
340
- If the user chooses A or B, do not implement gaps directly. Instead, use the tasks tool to create one implementation task per selected gap, ordered by dependency and priority.
118
+ If the user chooses A or B, do not implement gaps directly. Instead, create one durable Goal with one implementation worker task per selected gap, ordered by dependency and priority.
341
119
 
342
- Each task prompt must be standalone and include:
120
+ Each worker prompt must be standalone and include:
343
121
 
344
122
  1. The specific gap, including relevant local files/anchors and source evidence from the /expand report.
345
123
  2. Instructions to compare the implementation approach with kencode search before editing, using literal code tokens and current real-world examples.
346
124
  3. Instructions to implement the gap in the local codebase.
347
- 4. Instructions to verify correctness after implementation by running project checks and by comparing the final implementation with kencode search again before marking the task complete.
125
+ 4. Instructions to verify correctness after implementation by running project checks and by comparing the final implementation with kencode search again before marking the Goal task complete.
348
126
 
349
- Do not create planning tasks, do not instruct tasks to use planning-only workflows, and do not create or write implementation plans from /expand selections.
127
+ Do not create planning-only Goal tasks, do not instruct workers to use planning-only workflows, and do not create or write implementation plans from /expand selections.
350
128
 
351
- After creating tasks, tell the user exactly: "Tasks created. Press CTRL + T to open the Tasks Pane and press R to run all tasks." Do not begin executing them unless the user explicitly starts a task.`,
129
+ After creating the Goal, tell the user exactly: "Goal created. Press CTRL + G to open the Goal pane and run it." Do not begin executing it unless the user explicitly starts the Goal.`,
352
130
  },
353
131
  {
354
132
  name: "bullet-proof",
355
133
  aliases: ["bp"],
356
- description: "Defensive security review — audit the project for exploitable weaknesses",
134
+ description: "Audit exploitable weaknesses",
357
135
  prompt: `# Bullet-Proof: Defensive Security Review
358
136
 
359
137
  You are a defensive security auditor reviewing this codebase to identify exploitable weaknesses so they can be patched before the project ships. Think rigorously about realistic threat scenarios — boundary checks, edge cases, race conditions, trust assumptions, supply-chain risks, agent-mediated paths.
@@ -485,14 +263,14 @@ Threat model: [from recon]
485
263
  After the report, ask:
486
264
 
487
265
  > Which (if any) should I fix? Options:
488
- > - A) Create tasks for all Critical + High
489
- > - B) Create tasks for specific findings (give IDs, e.g. "BP-001, BP-004")
490
- > - C) Create tasks for a category (auth, supply chain, secrets, …)
266
+ > - A) Create a Goal for all Critical + High
267
+ > - B) Create a Goal for specific findings (give IDs, e.g. "BP-001, BP-004")
268
+ > - C) Create a Goal for a category (auth, supply chain, secrets, …)
491
269
  > - D) None — report only
492
270
 
493
271
  **Do not start fixing until the user picks.**
494
272
 
495
- If the user chooses A, B, or C, do not fix directly. Instead, use the tasks tool to create one task per selected finding or tightly coupled finding group, ordered by severity, exploitability, and dependency. Each task prompt must be standalone and include the finding ID, vulnerability scenario, affected local files/anchors, concrete remediation, instructions to compare security-sensitive implementation details with kencode search or authoritative docs before editing, project verification commands, and instructions to compare the final fix with kencode search or authoritative docs again before marking the task complete. After creating tasks, tell the user exactly: "Tasks created. Press CTRL + T to open the Tasks Pane and press R to run all tasks." Do not begin executing them unless the user explicitly starts a task.
273
+ If the user chooses A, B, or C, do not fix directly. Instead, create one durable Goal with one worker task per selected finding or tightly coupled finding group, ordered by severity, exploitability, and dependency. Each worker prompt must include the finding ID, vulnerability scenario, affected local files/anchors, concrete remediation, instructions to compare security-sensitive implementation details with kencode search or authoritative docs before editing, project verification commands, and instructions to compare the final fix with kencode search or authoritative docs again before marking the Goal task complete. After creating the Goal, tell the user exactly: "Goal created. Press CTRL + G to open the Goal pane and run it." Do not begin executing it unless the user explicitly starts the Goal.
496
274
 
497
275
  ## Threat reference (May 2026)
498
276
 
@@ -537,147 +315,6 @@ Cite these as needed per audit. Do not dump them into the report — use them to
537
315
  - **Strict confidence gate (≥0.8).** Drop everything else, even if it looks suspicious.
538
316
  - **Adapt to the stack, always.** The audit catalog and threat reference above are guidance, not a checklist to apply uniformly.
539
317
  - **Report only.** Wait for the user to pick what to fix in Phase 6.`,
540
- },
541
- {
542
- name: "source",
543
- aliases: ["depcheck", "depsource"],
544
- description: "Plan, source-check, adjust, and verify dependency-aligned code",
545
- prompt: `# Source: Plan → Research → Adjust → Verify
546
-
547
- Use exact installed dependency source as the source of truth, then align this project end-to-end. This command is action-oriented like /verify and /compare: plan the investigation, research with source_path, adjust the code, and verify everything before finishing.
548
-
549
- ## Phase 1: Plan the source check
550
-
551
- Do a short, private plan before tool-heavy work:
552
-
553
- 1. Identify the dependency surface to check.
554
- - If the user passed args, treat them as the package/repo/spec plus optional focus area.
555
- - If no args were passed, inspect recent changes, changed files, imports, manifests, and current conversation context to pick the 1-3 dependencies most likely to matter.
556
- 2. Decide what “aligned” means for this run: APIs/types, exports, CLI flags, config schema, runtime behavior, lifecycle/cleanup, error handling, package subpaths, tests, docs examples, or UI/tool wording.
557
- 3. Decide the parallel research slices. Use up to 3 sub-agents; use fewer when the scope is obvious. Do not pad.
558
-
559
- Do not ask the user for confirmation. Proceed unless the focus is impossible to infer.
560
-
561
- ## Phase 2: Research exact dependency source
562
-
563
- For every in-scope dependency, call \`source_path\` before making claims about APIs, types, flags, config, exports, or runtime behavior.
564
-
565
- Inspect the returned absolute source path with \`read\`, \`grep\`, \`find\`, and \`ls\`. Prefer dependency source files, package manifests, type definitions, exports, tests, examples, changelogs, and README sections inside that source checkout. Use web docs only when source alone is ambiguous.
566
-
567
- Spawn the research sub-agents in parallel in one response when useful:
568
-
569
- - **Local Usage Agent**: find local imports, wrappers, tool calls, config keys, CLI flags, tests, docs, and assumptions tied to the dependency. Return exact file:line anchors.
570
- - **Dependency Source Agent**: inspect the exact source_path checkout. Return exact source file paths and authoritative facts about APIs, types, exports, lifecycle, errors, config, and gotchas.
571
- - **Alignment Agent**: compare local assumptions to dependency facts. Return concrete mismatches, missing handling, stale usage, brittle assumptions, or simplifications backed by exact source evidence.
572
-
573
- Every finding must include both local file paths and dependency-source file paths. Mark unproven items as \`aligned\` or \`inconclusive\`; do not turn them into fixes.
574
-
575
- ## Phase 3: Adjust the code
576
-
577
- Validate every candidate yourself, then fix all confirmed issues directly.
578
-
579
- Valid adjustments include:
580
-
581
- - Correct wrong/stale API or type usage for the installed version
582
- - Fix import/export/package-subpath usage
583
- - Fix config keys, option shapes, CLI flags, or tool schemas
584
- - Add missing lifecycle cleanup, abort handling, error handling, or edge-case handling proven by source
585
- - Align local tests/docs/examples with the installed dependency source
586
- - Align local tool prompts/TUI wording when they misrepresent dependency behavior
587
- - Remove small custom workarounds when the installed dependency source shows a supported built-in path
588
-
589
- Rules:
590
-
591
- - Read each local file before editing it.
592
- - Match neighboring local patterns and tone.
593
- - Keep edits minimal and focused; no broad refactors.
594
- - Do not upgrade dependencies unless the user explicitly asked for an upgrade.
595
- - Do not edit just because upstream source uses a different style.
596
- - If a formatter, codegen, or autofix mutates files, re-read before more edits.
597
-
598
- ## Phase 4: Verify everything
599
-
600
- Run the relevant project checks for changed files. If this project specifies commands, use them. Otherwise infer from manifests. For TypeScript, run lint, typecheck, format check, and tests when available.
601
-
602
- If verification fails, read the failure, fix it, and rerun. Do not report success with failing or unrun checks.
603
-
604
- ## Final response
605
-
606
- Keep it short:
607
-
608
- - Dependencies/source paths checked
609
- - Adjustments made, or \`No changes needed — local usage aligns with installed source\`
610
- - Verification commands run
611
-
612
- Do not ask what to do next unless blocked by missing information or an external failure.`,
613
- },
614
- {
615
- name: "research",
616
- aliases: [],
617
- description: "Research best tools, deps, and patterns",
618
- prompt: `Research the best tools, dependencies, and architecture for this project.
619
-
620
- First, if it's not clear what the project is building, ask me to describe the features, target platform, and any constraints. If you can infer this from the codebase, proceed directly.
621
-
622
- Then spawn 6 sub-agents in parallel using the subagent tool (call the subagent tool 6 times in a single response, each with a different task). Every agent must verify ALL recommendations with current official docs, package registries, releases, or maintained source repositories - no training-data assumptions allowed. Use kencode search for architecture and implementation-shape comparisons where real code examples matter.
623
-
624
- **Agent 1 - Project Scan**: Read the current working directory. Catalog what already exists: config files, installed deps, directory structure, language/framework already chosen. Report exactly what's in place.
625
-
626
- **Agent 2 - Stack Validation**: Research whether the current framework/language is the best choice for this project. Compare top 2-3 alternatives on performance, ecosystem, and developer experience. Pick ONE winner with evidence.
627
-
628
- **Agent 3 - Core Dependencies**: For EACH feature, find the single best library for this stack. Confirm latest stable versions. No outdated packages. Output: package name, version, one-line purpose.
629
-
630
- **Agent 4 - Dev Tooling**: Research the best dev tooling for this stack: package manager, bundler, linter, formatter, test framework, type checker. Pick ONE per category with exact versions.
631
-
632
- **Agent 5 - Architecture**: Find how real projects of this type structure their code. Look for directory layouts, file naming conventions, and key patterns. Output a concrete directory tree and list of patterns.
633
-
634
- **Agent 6 - Config & Integration**: Research required config files for the chosen stack and tools. Cover: linter config, formatter config, TS/type config, env setup, CI/CD basics.
635
-
636
- ## Agent Rules
637
-
638
- 1. Every recommendation MUST be verified with a source URL/date - no guessing
639
- 2. Confirm latest stable versions from official registries or release pages - do not assume version numbers
640
- 3. Verify CLI flags, config keys, and file formats against official docs before recommending them
641
- 4. Pick ONE best option per category - no "you could also use X"
642
- 5. No prose, no hedging, no alternatives lists - decisive answers only
643
-
644
- ## Output
645
-
646
- After all agents complete, synthesize findings into a single RESEARCH.md file:
647
-
648
- \`\`\`markdown
649
- # RESEARCH: [short project description]
650
- Generated: [today's date]
651
- Stack: [framework + language + runtime]
652
-
653
- ## INSTALL
654
- [exact shell commands - copy-paste ready]
655
-
656
- ## DEPENDENCIES
657
- | package | version | purpose |
658
- [each purpose max 5 words]
659
-
660
- ## DEV DEPENDENCIES
661
- | package | version | purpose |
662
-
663
- ## CONFIG FILES TO CREATE
664
- ### [filename]
665
- [exact file contents or key settings]
666
-
667
- ## PROJECT STRUCTURE
668
- [tree showing recommended directories]
669
-
670
- ## SETUP STEPS
671
- 1. [concrete action]
672
-
673
- ## KEY PATTERNS
674
- [brief list of architectural patterns]
675
-
676
- ## SOURCES
677
- [URLs used for verification]
678
- \`\`\`
679
-
680
- Write the file, then summarize what was researched and list the verification sources used. If any recommendation could not be verified from current official sources or maintained repos, omit it rather than guessing.`,
681
318
  },
682
319
  {
683
320
  name: "init",
@@ -685,7 +322,7 @@ Write the file, then summarize what was researched and list the verification sou
685
322
  description: "Generate or update CLAUDE.md for this project",
686
323
  prompt: `Generate or update a minimal CLAUDE.md with project-specific context only: what this project is, how it is structured, and commands/workflows that are unique to it.
687
324
 
688
- Do NOT add generic agent behavior already covered by the system prompt, including: read before edit/write, re-read after formatters, ask before destructive actions, no fake verification, generic code-quality advice, single-responsibility rules, one-file-per-component rules, or language-style conventions. Include only project-specific overrides or stricter local requirements.
325
+ Do NOT add generic agent behavior already covered by the system prompt, including: read before edit/write, re-read after formatters, ask before destructive actions, no fake verification, generic code-quality advice, single-responsibility rules, one-file-per-component rules, or language-style conventions. Never add guidance that requires running checks, builds, or the full quality suite after every edit or every file change. Include only project-specific overrides or stricter local requirements.
689
326
 
690
327
  ## Step 1: Check if CLAUDE.md Exists
691
328
 
@@ -716,7 +353,7 @@ Check for config files:
716
353
  - go.mod -> Go
717
354
  - Cargo.toml -> Rust
718
355
 
719
- Extract exact commands that are useful project facts. Verify commands against local package scripts, manifests, Makefiles, CI, or documented project workflows; do not invent commands from convention alone. Do not restate generic "run checks after edits" behavior unless this project requires a stricter command sequence than the system prompt's Verification section.
356
+ Extract exact commands that are useful project facts. Verify commands against local package scripts, manifests, Makefiles, CI, or documented project workflows; do not invent commands from convention alone. Do not restate generic "run checks after edits" behavior, and do not turn discovered commands into mandatory after-every-edit requirements unless local docs or CI explicitly require that stricter sequence.
720
357
 
721
358
  ## Step 4: Summarize Stable Structure
722
359
 
@@ -732,7 +369,7 @@ Create CLAUDE.md with only sections that add project-specific value. Prefer this
732
369
  - Exact local commands (install/build/check/test/dev/publish/deploy) when they are not obvious from package scripts alone
733
370
  - Project-specific constraints that override defaults (for example required publish order, generated-file workflow, auth/secrets storage, deployment caveats)
734
371
 
735
- Avoid generic sections named "Code Quality", "Organization Rules", or "How to Work" unless every bullet is specific to this project. Do not duplicate language style packs or generic verification rules. Do not add generated repo maps, symbol indexes, exhaustive file indexes, or auto-generated project inventories; CLAUDE.md must remain durable, agent-focused project context.
372
+ Avoid generic sections named "Code Quality", "Organization Rules", or "How to Work" unless every bullet is specific to this project. Do not duplicate language style packs, generic verification rules, or boilerplate quality gates such as "After editing ANY file" / "Code Quality — Zero Tolerance". Do not add generated repo maps, symbol indexes, exhaustive file indexes, or auto-generated project inventories; CLAUDE.md must remain durable, agent-focused project context.
736
373
 
737
374
  Keep total file under 100 lines. If updating, preserve any custom sections the user added. After writing, re-read CLAUDE.md and confirm it contains only project-specific facts supported by local files.
738
375
 
@@ -741,79 +378,11 @@ Keep total file under 100 lines. If updating, preserve any custom sections the u
741
378
  End your reply with this exact notice so the user doesn't miss it:
742
379
 
743
380
  > ⚠️ CLAUDE.md was created/updated. ggcoder loads it at startup, so **exit and restart ggcoder** (\`/quit\` then run \`ggcoder\` again) before continuing. Without a restart, I won't see the new context.`,
744
- },
745
- {
746
- name: "setup-lint",
747
- aliases: [],
748
- description: "Generate a /fix command for linting and typechecking",
749
- prompt: `Detect the project type and generate a /fix command for linting and typechecking.
750
-
751
- ## Step 1: Detect Project Type
752
-
753
- Check for config files:
754
- - package.json -> JavaScript/TypeScript
755
- - pyproject.toml or requirements.txt -> Python
756
- - go.mod -> Go
757
- - Cargo.toml -> Rust
758
- - composer.json -> PHP
759
-
760
- Read the relevant config file to understand the project structure.
761
-
762
- ## Step 2: Check Existing Tools
763
-
764
- Based on the project type, check if linting/typechecking tools are already configured:
765
-
766
- - **JS/TS**: eslint, prettier, typescript — check package.json scripts and config files
767
- - **Python**: mypy, pylint, black, ruff — check dependencies and config files
768
- - **Go**: go vet, gofmt, staticcheck
769
- - **Rust**: clippy, rustfmt
770
-
771
- ## Step 3: Install Missing Tools (if needed)
772
-
773
- Only install what's missing. Use the detected package manager. Before installing or writing config, verify current recommended setup, CLI flags, and config filenames against official docs for the selected tools.
774
-
775
- ## Step 4: Generate /fix Command
776
-
777
- Create the directory \`.gg/commands/\` if it doesn't exist, then write \`.gg/commands/fix.md\`:
778
-
779
- \`\`\`markdown
780
- ---
781
- name: fix
782
- description: Run typechecking and linting, then spawn parallel agents to fix all issues
783
- ---
784
-
785
- Run all linting and typechecking tools, collect errors, group them by domain, and use the subagent tool to spawn parallel sub-agents to fix them.
786
-
787
- ## Step 1: Run Checks
788
-
789
- [INSERT PROJECT-SPECIFIC COMMANDS — e.g. npm run lint, npm run typecheck, etc.]
790
-
791
- ## Step 2: Collect and Group Errors
792
-
793
- Parse the output. Group errors by domain:
794
- - **Type errors**: Issues from TypeScript, mypy, etc.
795
- - **Lint errors**: Issues from eslint, pylint, ruff, clippy, etc.
796
- - **Format errors**: Issues from prettier, black, rustfmt, gofmt
797
-
798
- ## Step 3: Spawn Parallel Agents
799
-
800
- For each domain with issues, use the subagent tool to spawn a sub-agent to fix all errors in that domain.
801
-
802
- ## Step 4: Verify
803
-
804
- After all agents complete, re-run all checks to verify all issues are resolved.
805
- \`\`\`
806
-
807
- Replace [INSERT PROJECT-SPECIFIC COMMANDS] with the actual commands for the detected project.
808
-
809
- ## Step 5: Confirm
810
-
811
- Report what was detected, what official docs or local configs were used to verify it, what was installed, and that /fix is now available.`,
812
381
  },
813
382
  {
814
383
  name: "setup-commit",
815
384
  aliases: [],
816
- description: "Generate a /commit command with quality checks",
385
+ description: "Generate a /commit command",
817
386
  prompt: `Detect the project type and generate a /commit command that enforces quality checks before committing.
818
387
 
819
388
  ## Step 1: Detect Project and Extract Commands
@@ -860,398 +429,11 @@ Keep the command file under 20 lines.
860
429
  ## Step 3: Confirm
861
430
 
862
431
  Report that /commit is now available with quality checks and AI-generated commit messages, and mention which local scripts/docs verified the commands.`,
863
- },
864
- {
865
- name: "setup-tests",
866
- aliases: [],
867
- description: "Set up testing and generate a /test command",
868
- prompt: `Set up comprehensive testing for this project and generate a /test command.
869
-
870
- ## Step 1: Analyze Project
871
-
872
- Detect the project type, framework, and architecture. Identify all critical business logic that needs testing.
873
-
874
- ## Step 2: Determine Testing Strategy
875
-
876
- Use these tools based on project type (2025-2026 best practices), but verify current versions, install commands, config files, and runner flags against official docs before installing anything:
877
-
878
- | Language | Unit/Integration | E2E | Notes |
879
- |----------|------------------|-----|-------|
880
- | JS/TS | Vitest (not Jest) | Playwright | Vitest is faster, native ESM/TS. Use Testing Library for components. |
881
- | Python | pytest | Playwright | pytest-django for Django, httpx+pytest-asyncio for FastAPI. |
882
- | Go | testing + testify | httptest | testcontainers-go for integration. Table-driven tests. |
883
- | Rust | #[test] + rstest | axum-test | assert_cmd for CLI, proptest for property-based. |
884
- | PHP | Pest 4 (Laravel) / PHPUnit 12 | Laravel Dusk | Pest preferred for Laravel. |
885
-
886
- ## Step 3: Set Up Testing Infrastructure
887
-
888
- Spawn 4 sub-agents in parallel using the subagent tool (call the subagent tool 4 times in a single response):
889
-
890
- **Agent 1 - Dependencies & Config**: Install test frameworks and create config files
891
- **Agent 2 - Unit Tests**: Create comprehensive unit tests for all business logic, utilities, and core functions
892
- **Agent 3 - Integration Tests**: Create integration tests for APIs, database operations, and service interactions
893
- **Agent 4 - E2E Tests** (if applicable): Create end-to-end tests for critical user flows
894
-
895
- Each agent should create COMPREHENSIVE tests covering all critical code paths - not just samples. Each agent must verify test framework APIs and helper patterns against official docs or current maintained examples before adding tests.
896
-
897
- ## Step 4: Verify and Generate /test Command
898
-
899
- Run the tests to verify everything works. Fix any issues.
900
-
901
- Then create the directory \`.gg/commands/\` if it doesn't exist and write \`.gg/commands/test.md\` with:
902
-
903
- \`\`\`markdown
904
- ---
905
- name: test
906
- description: Run tests, then spawn parallel agents to fix failures
907
- ---
908
-
909
- Run all tests for this project, collect failures, and use the subagent tool to spawn parallel sub-agents to fix them.
910
-
911
- ## Step 1: Run Tests
912
-
913
- [PROJECT-SPECIFIC TEST COMMANDS with options for watch mode, coverage, filtering]
914
-
915
- ## Step 2: If Failures
916
-
917
- For each failing test, use the subagent tool to spawn a sub-agent to fix the underlying issue (not the test).
918
-
919
- ## Step 3: Re-run
920
-
921
- Re-run tests to verify all fixes.
922
- \`\`\`
923
-
924
- Replace placeholders with the actual test commands for this project.
925
-
926
- ## Step 5: Report
927
-
928
- Summarize what was set up, how many tests were created, what official docs/current examples verified the setup, and that /test is now available.`,
929
- },
930
- {
931
- name: "setup-update",
932
- aliases: [],
933
- description: "Generate an /update command for dependency updates",
934
- prompt: `Detect the project type and generate an /update command for dependency updates and deprecation fixes.
935
-
936
- ## Step 1: Detect Project Type & Package Manager
937
-
938
- Check for config files and lock files:
939
- - package.json + package-lock.json -> npm
940
- - package.json + yarn.lock -> yarn
941
- - package.json + pnpm-lock.yaml -> pnpm
942
- - pyproject.toml + poetry.lock -> poetry
943
- - requirements.txt -> pip
944
- - go.mod -> Go
945
- - Cargo.toml -> Rust
946
-
947
- ## Step 2: Generate /update Command
948
-
949
- Create the directory \`.gg/commands/\` if it doesn't exist, then write \`.gg/commands/update.md\`:
950
-
951
- \`\`\`markdown
952
- ---
953
- name: update
954
- description: Update dependencies, fix deprecations and warnings
955
- ---
956
-
957
- ## Step 1: Check for Updates
958
-
959
- [OUTDATED CHECK COMMAND for detected package manager]
960
-
961
- ## Step 2: Update Dependencies
962
-
963
- [UPDATE COMMAND + SECURITY AUDIT]
964
-
965
- ## Step 3: Check for Deprecations & Warnings
966
-
967
- Run a clean install and read ALL output carefully. Look for:
968
- - Deprecation warnings
969
- - Security vulnerabilities
970
- - Peer dependency warnings
971
- - Breaking changes
972
-
973
- ## Step 4: Fix Issues
974
-
975
- For each warning/deprecation:
976
- 1. Research the recommended replacement or fix using official changelogs, migration guides, advisories, or package docs
977
- 2. Update code/dependencies accordingly
978
- 3. Re-run installation
979
- 4. Verify no warnings remain
980
-
981
- ## Step 5: Run Quality Checks
982
-
983
- [PROJECT-SPECIFIC LINT/TYPECHECK COMMANDS]
984
-
985
- Fix all errors before completing.
986
-
987
- ## Step 6: Verify Clean Install
988
-
989
- Delete dependency folders/caches, run a fresh install, verify ZERO warnings/errors.
990
- \`\`\`
991
-
992
- Replace all placeholders with the actual commands for the detected project type and package manager.
993
-
994
- ## Step 3: Confirm
995
-
996
- Report that /update is now available with dependency updates, security audits, and deprecation fixes, and mention that generated update steps require official changelog/migration-guide verification before applying changes.`,
997
- },
998
- {
999
- name: "setup-eyes",
1000
- aliases: [],
1001
- description: "Set up project perception probes and document them",
1002
- prompt: `# Eyes: Set Up or Expand Project Perception
1003
-
1004
- Build the perception probes this project needs and document them in CLAUDE.md so any future agent can use them. The \`ggcoder eyes\` CLI does the mechanical work (detect, install, verify); your job is **judgment** (which capabilities matter for THIS project) and **prose** (the project-specific triggers in CLAUDE.md). Re-run this command anytime to add or fix probes.
1005
-
1006
- ## Steps
1007
-
1008
- 1. \`ggcoder eyes list\` — see what's already installed/verified. **Resume**, don't restart. Skip verified probes; re-run failed ones.
1009
- 2. \`ggcoder eyes detect\` — emits JSON of \`{capability: {candidates, primary}}\` for this project.
1010
- 3. **Pick 3–8 capabilities to install this run.** Verify any capability assumptions against \`ggcoder eyes\` help output or official/local CLI docs before installing. Heuristics:
1011
- - Universal: \`http\` for any API/backend, \`runtime_logs\` for anything with a server.
1012
- - UI: \`visual\` — for multi-stack projects (e.g. React Native), install all primary candidates with distinct names: \`install visual --impl playwright --as visual-web\`, \`install visual --impl adb --as visual-android\`, \`install visual --impl simctl --as visual-ios\`.
1013
- - Backend with email/webhooks: \`capture_email\`, \`capture_webhook\`.
1014
- - **Always defer** opt-ins: \`load\`, \`chaos\`, \`remote\`, \`apm\` — unless the user explicitly asked.
1015
- 4. For each pick: \`ggcoder eyes install <cap> [--impl <name>] [--as <name>]\`. On failure: retry once, then mark and continue — don't abort the whole run.
1016
- 5. \`ggcoder eyes verify\` — runs every installed probe's self-test. Some failures (\`adb\` no device, \`simctl\` no booted simulator) are expected; they get recorded.
1017
- 6. **Write/update the \`## Eyes\` section in CLAUDE.md** (create CLAUDE.md if missing; do NOT clobber other sections). Use the template below. The triggers are the load-bearing piece — make them project-specific and actionable.
1018
- 7. **Report**: list verified ✓ / failed ✗ / deferred, and note which probe self-tests or docs verified the setup. End with the restart notice.
1019
-
1020
- ## CLAUDE.md \`## Eyes\` template
1021
-
1022
- \`\`\`markdown
1023
- ## Eyes
1024
-
1025
- Perception probes live in \`.gg/eyes/\`. All headless. Artifacts → \`.gg/eyes/out/\` (gitignored). Invoke probes yourself; don't ask the user to verify what you can verify.
1026
-
1027
- ### Available probes
1028
-
1029
- | Need | Run | Then |
1030
- |---|---|---|
1031
- | <one-line need> | \`.gg/eyes/<id>.sh <args>\` | <how to consume the output> |
1032
- | ... | ... | ... |
1033
-
1034
- ### When to use these eyes (automatically, without being asked)
1035
-
1036
- Reach for probes ON YOUR OWN INITIATIVE when any of these apply:
1037
-
1038
- - <project-specific trigger 1, e.g. "After editing any \`.tsx\` file under \`src/components/\`, screenshot the affected page with \`.gg/eyes/visual.sh http://localhost:3000/<path>\`.">
1039
- - <trigger 2, e.g. "After adding/modifying a route under \`src/routes/\`, hit it with \`.gg/eyes/http.sh\` and confirm the response shape.">
1040
- - <trigger 3>
1041
-
1042
- If a probe fails or returns unexpected results, investigate the artifact directly before assuming the probe itself is broken.
1043
-
1044
- ### When NOT to use
1045
-
1046
- - Docs-only changes, comments, formatting.
1047
- - Refactors covered by tests.
1048
- - Dev server / simulator / sink isn't up AND the task doesn't require runtime verification.
1049
- - Same probe already ran this turn on the same artifact — reuse the output.
1050
-
1051
- ### When to escalate a capability gap (the self-improvement loop)
1052
-
1053
- If you're about to **guess**, **skip verification**, or **hand-wave** about something a better probe would show you — STOP and surface the tradeoff inline. Phrasing like:
1054
-
1055
- > "I tried screenshotting but the failure is a JS error I can only see in the browser console — and there's no \`browser_console\` probe. Two paths: (a) ~3 min to add it, then I can diagnose properly. (b) Workaround: I'd guess from the DOM state. Your call?"
1056
-
1057
- Wait for the user's choice. **Don't escalate more than once per request** — if the user picked the workaround, don't re-ask in the same turn.
1058
-
1059
- For minor friction (worked around it but wished it were better), don't interrupt — log it for later review:
1060
- - \`ggcoder eyes log rough "<reason>" [--probe <name>]\` — minor friction, you handled it
1061
- - \`ggcoder eyes log wish "<gap>"\` — capability you wished existed
1062
- - \`ggcoder eyes log blocked "<reason>"\` — call this AFTER the user approves an inline-escalation fix, for the audit trail
1063
-
1064
- These accumulate quietly. The user reviews them periodically. Open signals will appear in your context on future turns until they're acked.
1065
- \`\`\`
1066
-
1067
- ## Trigger writing rules
1068
-
1069
- The "When to use" triggers are project-specific and the load-bearing piece — without them the agent has probes but no instinct to use them. Rules:
1070
- - For each verified probe, write at least one trigger that names a real **file pattern** or **task type** the agent will recognize ("after editing \`*.tsx\` under \`src/ui/\`", not "after UI changes").
1071
- - Be **actionable** ("screenshot the page", "hit the endpoint") not **vague** ("verify it works").
1072
- - Match density to the project: a UI-heavy app warrants strong visual triggers; a pure backend library does not.
1073
-
1074
- ## Restart notice
1075
-
1076
- End your report with:
1077
-
1078
- > ⚠ CLAUDE.md was updated. ggcoder loads CLAUDE.md at startup, so **exit and restart ggcoder** (\`/quit\` then \`ggcoder\` again) before asking me to use these probes. Without a restart, I won't see the new instructions in my context.`,
1079
- },
1080
- {
1081
- name: "eyes-improve",
1082
- aliases: [],
1083
- description: "Triage eyes signals and apply approved probe fixes",
1084
- prompt: `# Eyes Improve: Triage Accumulated Signals
1085
-
1086
- Read the open signals in \`.gg/eyes/journal.jsonl\`, group related ones, propose concrete fixes, and apply what the user approves. This isn't unbounded refactoring — it's incremental probe improvement driven by real use.
1087
-
1088
- ## Steps
1089
-
1090
- 1. \`ggcoder eyes log list --status open\` — if zero entries, say "nothing to triage" and stop.
1091
- 2. **Group** signals by likely fix:
1092
- - Multiple \`rough\` entries naming the same probe / same frustration → one patch to that probe.
1093
- - \`wish\` entries naming a capability not installed → one \`ggcoder eyes install <cap>\` proposal.
1094
- - \`blocked\` entries are historical (user already resolved inline) → ack them, no new work.
1095
- 3. **Cap at 5 proposals this run.** If more would apply, mention them and stop — they'll resurface next run.
1096
- 4. For each group, propose ONE concrete change:
1097
- - **Probe tweak**: read \`.gg/eyes/<name>.sh\`, show a diff, explain what it fixes.
1098
- - **New probe**: \`ggcoder eyes install <cap>\` with a one-line justification.
1099
- - **New/updated trigger**: bullet added under \`## Eyes → When to use\` in CLAUDE.md.
1100
- 5. Present all proposals as a numbered list with diffs inline. Ask: **"Accept which? Reply with numbers (e.g. '1, 3') or 'none'."**
1101
- 6. On user reply:
1102
- - For accepted: apply the change. Then run the relevant probe self-test or a focused command that exercises the changed probe/trigger. Then \`ggcoder eyes log ack <id>\` for every journal entry the proposal covers.
1103
- - For unmentioned / rejected: \`ggcoder eyes log defer <id>\` so they stop appearing in context every turn. The user can resurrect deferred entries later.
1104
- 7. **Report**: applied changes (one line each), verification run, entries acked, entries deferred.
1105
-
1106
- ## Rules
1107
-
1108
- - **No fishing.** Only act on entries already in the journal. Don't scan the repo for hypothetical gaps.
1109
- - **No scope creep.** "Add a \`--wait-for-selector\` flag to the visual probe" is in scope. "Rewrite the probe in TypeScript" is not.
1110
- - **Preserve user edits.** If \`.gg/eyes/<name>.sh\` has diverged from the shipped impl (user hand-edited), point this out and ask before overwriting.
1111
- - **Be honest about tradeoffs.** If a proposed fix might break existing invocations, say so in the proposal.
1112
- - **Decline when appropriate.** If open signals are all vague or low-value, say so and defer them — don't manufacture fixes.`,
1113
- },
1114
- {
1115
- name: "simplify",
1116
- aliases: [],
1117
- description: "Review changed code and fix issues found",
1118
- prompt: `# Simplify: Code Review and Cleanup
1119
-
1120
- Review all changed files for reuse, quality, and efficiency. Fix any issues found.
1121
-
1122
- ## Phase 1: Identify Changes
1123
-
1124
- Run \`git diff\` (or \`git diff HEAD\` if there are staged changes) to see what changed. If there are no git changes, review the most recently modified files that the user mentioned or that you edited earlier in this conversation.
1125
-
1126
- ## Phase 2: Launch Three Review Agents in Parallel
1127
-
1128
- Use the subagent tool to launch all three agents concurrently in a single response (call the subagent tool 3 times in one message). Pass each agent the full diff so it has the complete context.
1129
-
1130
- ### Agent 1: Code Reuse Review
1131
-
1132
- For each change:
1133
-
1134
- 1. **Search for existing utilities and helpers** that could replace newly written code. Look for similar patterns elsewhere in the codebase — common locations are utility directories, shared modules, and files adjacent to the changed ones.
1135
- 2. **Flag any new function that duplicates existing functionality.** Suggest the existing function to use instead.
1136
- 3. **Flag any inline logic that could use an existing utility** — hand-rolled string manipulation, manual path handling, custom environment checks, ad-hoc type guards, and similar patterns are common candidates.
1137
-
1138
- ### Agent 2: Code Quality Review
1139
-
1140
- Review the same changes for hacky patterns:
1141
-
1142
- 1. **Redundant state**: state that duplicates existing state, cached values that could be derived, observers/effects that could be direct calls
1143
- 2. **Parameter sprawl**: adding new parameters to a function instead of generalizing or restructuring existing ones
1144
- 3. **Copy-paste with slight variation**: near-duplicate code blocks that should be unified with a shared abstraction
1145
- 4. **Leaky abstractions**: exposing internal details that should be encapsulated, or breaking existing abstraction boundaries
1146
- 5. **Stringly-typed code**: using raw strings where constants, enums (string unions), or branded types already exist in the codebase
1147
- 6. **Unnecessary JSX nesting**: wrapper Boxes/elements that add no layout value — check if inner component props (flexShrink, alignItems, etc.) already provide the needed behavior
1148
- 7. **Unnecessary comments**: comments explaining WHAT the code does (well-named identifiers already do that), narrating the change, or referencing the task/caller — delete; keep only non-obvious WHY (hidden constraints, subtle invariants, workarounds)
1149
-
1150
- ### Agent 3: Efficiency Review
1151
-
1152
- Review the same changes for efficiency:
1153
-
1154
- 1. **Unnecessary work**: redundant computations, repeated file reads, duplicate network/API calls, N+1 patterns
1155
- 2. **Missed concurrency**: independent operations run sequentially when they could run in parallel
1156
- 3. **Hot-path bloat**: new blocking work added to startup or per-request/per-render hot paths
1157
- 4. **Recurring no-op updates**: state/store updates inside polling loops, intervals, or event handlers that fire unconditionally — add a change-detection guard so downstream consumers aren't notified when nothing changed. Also: if a wrapper function takes an updater/reducer callback, verify it honors same-reference returns (or whatever the "no change" signal is) — otherwise callers' early-return no-ops are silently defeated
1158
- 5. **Unnecessary existence checks**: pre-checking file/resource existence before operating (TOCTOU anti-pattern) — operate directly and handle the error
1159
- 6. **Memory**: unbounded data structures, missing cleanup, event listener leaks
1160
- 7. **Overly broad operations**: reading entire files when only a portion is needed, loading all items when filtering for one
1161
-
1162
- ## Phase 3: Fix Issues
1163
-
1164
- Wait for all three agents to complete. Aggregate their findings and fix each issue directly. If a finding is a false positive or not worth addressing, note it and move on — do not argue with the finding, just skip it.
1165
-
1166
- Before making any non-trivial pattern/API change, verify the intended approach against local neighboring code first; use kencode search or official docs when the change touches framework APIs, lifecycle behavior, concurrency, cleanup, or other conventions where real-world practice matters.
1167
-
1168
- When done, run relevant project checks/tests, then briefly summarize what was fixed (or confirm the code was already clean) and what verification ran.`,
1169
- },
1170
- {
1171
- name: "batch",
1172
- aliases: [],
1173
- description: "Plan a large change, execute in parallel PRs",
1174
- prompt: `# Batch: Parallel Work Orchestration
1175
-
1176
- You are orchestrating a large, parallelizable change across this codebase.
1177
-
1178
- ## Phase 1: Research
1179
-
1180
- Launch one or more subagents using the subagent tool with \`agent: "researcher"\` to deeply research what this instruction touches. You need their results before proceeding, so wait for them to complete. Have them:
1181
-
1182
- - Find ALL files, patterns, and call sites that need to change
1183
- - Understand existing conventions so the migration is consistent
1184
- - Quantify the surface area (how many files, how many call sites)
1185
- - Note any risks or complications
1186
-
1187
- ## Phase 2: Plan
1188
-
1189
- After research completes, call the enter_plan tool to enter plan mode. Using the research findings:
1190
-
1191
- 1. **Decompose into independent units.** Break the work into 5–30 self-contained units. Each unit must:
1192
- - Be independently implementable on its own git branch (no shared state with sibling units)
1193
- - Be mergeable on its own without depending on another unit's PR landing first
1194
- - Be roughly uniform in size (split large units, merge trivial ones)
1195
-
1196
- Scale the count to the actual work: few files → closer to 5; hundreds of files → closer to 30. Prefer per-directory or per-module slicing over arbitrary file lists.
1197
-
1198
- 2. **Determine the test recipe.** Figure out how a worker can verify its change actually works — not just that unit tests pass. Look for:
1199
- - An existing e2e/integration test suite the worker can run
1200
- - A dev-server + curl pattern (for API changes)
1201
- - A CLI verification pattern (for CLI changes)
1202
-
1203
- If you cannot find a concrete verification path, ask the user how to verify. Offer 2–3 specific options based on what the researcher found. Do not skip this — the workers cannot ask the user themselves.
1204
-
1205
- 3. **Write the plan** to \`.gg/plans/batch.md\` with:
1206
- - Summary of research findings
1207
- - Numbered list of work units — each with: title, file list, one-line description
1208
- - The test recipe (or "skip e2e because …")
1209
- - Note that each worker will use the \`worker\` agent (branch-isolated)
1210
-
1211
- 4. Call exit_plan to present the plan for approval.
1212
-
1213
- ## Phase 3: Spawn Workers (After Plan Approval)
1214
-
1215
- Record the current branch name first: \`git branch --show-current\`.
1216
-
1217
- Spawn one subagent per work unit using the subagent tool with \`agent: "worker"\`. **Launch them all in a single message block so they run in parallel.**
1218
-
1219
- For each worker, the task must be fully self-contained. Include:
1220
- - The overall goal (the user's instruction)
1221
- - The starting branch to branch from (the branch name you recorded above)
1222
- - This unit's specific task (title, file list, change description — copied verbatim from your plan)
1223
- - Any codebase conventions discovered during research
1224
- - The test recipe from your plan (or "skip e2e because …")
1225
- - These additional instructions, copied verbatim:
1226
-
1227
- \`\`\`
1228
- After you finish implementing the change:
1229
- 1. Self-review your diff for code reuse, quality, and efficiency. Search the codebase for existing utilities that could replace new code. Fix any issues found.
1230
- 2. For framework/API/config changes, compare the approach with official docs or kencode search examples before finalizing. Do not use kencode for purely local renames or mechanical edits.
1231
- 3. Run the project's test suite (check for package.json scripts, Makefile targets, or common commands like npm test, pnpm test, pytest, go test). If tests fail, fix them.
1232
- 4. Follow the e2e test recipe above. If it says to skip e2e, skip it.
1233
- 5. Commit all changes with a clear message, push the branch, and create a PR with gh pr create. Use a descriptive title.
1234
- 6. Switch back to the original branch with git checkout -.
1235
- 7. End with exactly: PR: <url> or PR: none — <reason>
1236
- \`\`\`
1237
-
1238
- ## Phase 4: Track Results
1239
-
1240
- After launching all workers, render an initial status table:
1241
-
1242
- | # | Unit | Status | PR |
1243
- |---|------|--------|----|
1244
- | 1 | <title> | running | — |
1245
- | 2 | <title> | running | — |
1246
-
1247
- As workers complete, parse the \`PR: <url>\` line from each result and re-render the table with updated status (\`done\` / \`failed\`) and PR links. Keep a brief failure note for any worker that did not produce a PR.
1248
-
1249
- When all workers have reported, render the final table and a one-line summary (e.g., "22/24 units landed as PRs").`,
1250
432
  },
1251
433
  {
1252
434
  name: "compare",
1253
435
  aliases: [],
1254
- description: "Compare code against real-world implementations via kencode-search",
436
+ description: "Compare real-world code",
1255
437
  prompt: `Compare the code you just created or modified in this conversation against real-world implementations using the \`mcp__kencode-search__searchCode\` tool.
1256
438
 
1257
439
  You already know what you just built. For each file you created or modified, use \`mcp__kencode-search__searchCode\` to search for how real projects implement the same patterns. Look at the specific APIs, hooks, functions, and architecture you used.
@@ -1272,7 +454,7 @@ If the code aligns well with real-world patterns, say so. That's a good outcome.
1272
454
  {
1273
455
  name: "setup-skills",
1274
456
  aliases: [],
1275
- description: "Audit project, recommend skills ranked by impact",
457
+ description: "Recommend useful skills",
1276
458
  prompt: `# Skills Audit: Find useful skills for this project
1277
459
 
1278
460
  Analyze this project and recommend skills from the open ecosystem that would make **working on this project more efficient, easier, and safer**. That is the goal, full stop. Every recommendation must pass the test: does this skill save real time, lower real cognitive load, or prevent real mistakes for someone working on THIS project, repeatedly?
@@ -1371,7 +553,7 @@ After presenting the list, ask which (if any) to install. Install nothing withou
1371
553
  {
1372
554
  name: "setup",
1373
555
  aliases: ["setup-project"],
1374
- description: "Audit project hygiene, tooling, verify pipeline, and style-pack alignment",
556
+ description: "Audit project setup",
1375
557
  prompt: `Audit this project across six categories and report gaps. **Do not fix anything yet.** Wait for me to choose what to address after the report.
1376
558
 
1377
559
  Language-agnostic and project-agnostic — adapt findings to the languages and stack actually present. Ignore categories that don't apply (e.g. skip CI for a local-only scratchpad).
@@ -1446,16 +628,16 @@ At the end:
1446
628
  <N> gaps in hygiene, <N> in tooling, <N> in verify pipeline, <N> in style-pack alignment.
1447
629
 
1448
630
  Which (if any) would you like me to fix? Options:
1449
- - A) Create tasks for all [GAP] items that are safe + additive (no overwrites)
1450
- - B) Create tasks for a category: hygiene / tooling / verify / style-pack alignment
1451
- - C) Create tasks for specific items — tell me which
631
+ - A) Create a Goal for all [GAP] items that are safe + additive (no overwrites)
632
+ - B) Create a Goal for a category: hygiene / tooling / verify / style-pack alignment
633
+ - C) Create a Goal for specific items — tell me which
1452
634
  - D) None — just the report
1453
635
  \`\`\`
1454
636
 
1455
637
  ## Rules
1456
638
 
1457
639
  - **Report only.** No edits, no installs, no commits without explicit user confirmation after the report.
1458
- - **Task handoff for fixes.** If the user chooses A, B, or C, do not fix directly. Use the tasks tool to create one standalone task per selected gap or tightly coupled gap group. Each task must include the gap, affected files/configs, safe-additive constraints, implementation instructions, project verification commands, and instructions to verify relevant tool/config semantics against official docs before marking the task complete. Use kencode search only for code-level examples, not as proof of scaffolding requirements. After creating tasks, tell the user exactly: "Tasks created. Press CTRL + T to open the Tasks Pane and press R to run all tasks." Do not begin executing them unless the user explicitly starts a task.
640
+ - **Goal handoff for fixes.** If the user chooses A, B, or C, do not fix directly. Create one durable Goal with standalone worker tasks for the selected gap or tightly coupled gap groups. Each worker prompt must include the gap, affected files/configs, safe-additive constraints, implementation instructions, project verification commands, and instructions to verify relevant tool/config semantics against official docs before marking the Goal task complete. Use kencode search only for code-level examples, not as proof of scaffolding requirements. After creating the Goal, tell the user exactly: "Goal created. Press CTRL + G to open the Goal pane and run it." Do not begin executing it unless the user explicitly starts the Goal.
1459
641
  - **No code refactors recommended.** This audit is about scaffolding/tooling, not code review. Use \`/scan\` or \`/verify\` for code-level findings.
1460
642
  - **No dependency installations in the report.** Listing them as observations is fine; recommending installation is not — that's the user's call.
1461
643
  - **Skip empty categories.** If a category has no findings, omit it.