supipowers 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. package/README.md +118 -56
  2. package/bin/install.ts +48 -128
  3. package/package.json +11 -3
  4. package/skills/code-review/SKILL.md +137 -40
  5. package/skills/context-mode/SKILL.md +67 -52
  6. package/skills/creating-supi-agents/SKILL.md +204 -0
  7. package/skills/debugging/SKILL.md +86 -40
  8. package/skills/fix-pr/SKILL.md +96 -65
  9. package/skills/planning/SKILL.md +103 -46
  10. package/skills/qa-strategy/SKILL.md +68 -46
  11. package/skills/receiving-code-review/SKILL.md +60 -53
  12. package/skills/release/SKILL.md +111 -39
  13. package/skills/tdd/SKILL.md +118 -67
  14. package/skills/verification/SKILL.md +71 -37
  15. package/src/bootstrap.ts +24 -5
  16. package/src/commands/agents.ts +249 -0
  17. package/src/commands/ai-review.ts +1113 -0
  18. package/src/commands/config.ts +224 -95
  19. package/src/commands/doctor.ts +19 -13
  20. package/src/commands/fix-pr.ts +8 -11
  21. package/src/commands/generate.ts +200 -0
  22. package/src/commands/model-picker.ts +5 -15
  23. package/src/commands/model.ts +4 -5
  24. package/src/commands/plan.ts +148 -92
  25. package/src/commands/qa.ts +14 -23
  26. package/src/commands/release.ts +504 -275
  27. package/src/commands/review.ts +643 -86
  28. package/src/commands/status.ts +44 -17
  29. package/src/commands/supi.ts +69 -42
  30. package/src/commands/update.ts +57 -2
  31. package/src/config/defaults.ts +6 -39
  32. package/src/config/loader.ts +388 -40
  33. package/src/config/model-resolver.ts +26 -22
  34. package/src/config/schema.ts +113 -48
  35. package/src/context/analyzer.ts +4 -2
  36. package/src/context-mode/detector.ts +16 -54
  37. package/src/context-mode/hooks.ts +135 -17
  38. package/src/context-mode/knowledge/chunker.ts +235 -0
  39. package/src/context-mode/knowledge/store.ts +187 -0
  40. package/src/context-mode/routing.ts +3 -9
  41. package/src/context-mode/sandbox/executor.ts +183 -0
  42. package/src/context-mode/sandbox/runners.ts +40 -0
  43. package/src/context-mode/snapshot-builder.ts +2 -2
  44. package/src/context-mode/tools.ts +440 -0
  45. package/src/context-mode/web/fetcher.ts +117 -0
  46. package/src/context-mode/web/html-to-md.ts +293 -0
  47. package/src/debug/logger.ts +107 -0
  48. package/src/deps/registry.ts +0 -20
  49. package/src/docs/drift.ts +454 -0
  50. package/src/fix-pr/fetch-comments.ts +66 -0
  51. package/src/git/commit-msg.ts +2 -1
  52. package/src/git/commit.ts +123 -141
  53. package/src/git/conventions.ts +2 -2
  54. package/src/git/status.ts +4 -1
  55. package/src/lsp/bridge.ts +138 -12
  56. package/src/planning/approval-flow.ts +125 -19
  57. package/src/planning/plan-writer-prompt.ts +4 -11
  58. package/src/planning/planning-ask-tool.ts +81 -0
  59. package/src/planning/prompt-builder.ts +9 -169
  60. package/src/planning/system-prompt.ts +290 -0
  61. package/src/platform/omp.ts +50 -4
  62. package/src/platform/progress.ts +182 -0
  63. package/src/platform/test-utils.ts +4 -1
  64. package/src/platform/tui-colors.ts +30 -0
  65. package/src/platform/types.ts +1 -0
  66. package/src/qa/detect-app-type.ts +102 -0
  67. package/src/qa/discover-routes.ts +353 -0
  68. package/src/quality/ai-session.ts +96 -0
  69. package/src/quality/ai-setup.ts +86 -0
  70. package/src/quality/gates/ai-review.ts +129 -0
  71. package/src/quality/gates/build.ts +8 -0
  72. package/src/quality/gates/command.ts +150 -0
  73. package/src/quality/gates/format.ts +28 -0
  74. package/src/quality/gates/lint.ts +22 -0
  75. package/src/quality/gates/lsp-diagnostics.ts +84 -0
  76. package/src/quality/gates/test-suite.ts +8 -0
  77. package/src/quality/gates/typecheck.ts +22 -0
  78. package/src/quality/registry.ts +25 -0
  79. package/src/quality/review-gates.ts +33 -0
  80. package/src/quality/runner.ts +268 -0
  81. package/src/quality/schemas.ts +48 -0
  82. package/src/quality/setup.ts +227 -0
  83. package/src/release/changelog.ts +7 -3
  84. package/src/release/channels/custom.ts +43 -0
  85. package/src/release/channels/gitea.ts +35 -0
  86. package/src/release/channels/github.ts +35 -0
  87. package/src/release/channels/gitlab.ts +35 -0
  88. package/src/release/channels/registry.ts +52 -0
  89. package/src/release/channels/types.ts +27 -0
  90. package/src/release/detector.ts +10 -63
  91. package/src/release/executor.ts +61 -51
  92. package/src/release/prompt.ts +38 -38
  93. package/src/release/version.ts +129 -10
  94. package/src/review/agent-loader.ts +331 -0
  95. package/src/review/consolidator.ts +180 -0
  96. package/src/review/default-agents/correctness.md +72 -0
  97. package/src/review/default-agents/maintainability.md +64 -0
  98. package/src/review/default-agents/security.md +67 -0
  99. package/src/review/fixer.ts +219 -0
  100. package/src/review/multi-agent-runner.ts +135 -0
  101. package/src/review/output.ts +147 -0
  102. package/src/review/prompts/agent-review-wrapper.md +36 -0
  103. package/src/review/prompts/fix-findings.md +32 -0
  104. package/src/review/prompts/fix-output-schema.md +18 -0
  105. package/src/review/prompts/invalid-output-retry.md +22 -0
  106. package/src/review/prompts/output-instructions.md +14 -0
  107. package/src/review/prompts/review-output-schema.md +38 -0
  108. package/src/review/prompts/single-review.md +53 -0
  109. package/src/review/prompts/validation-review.md +30 -0
  110. package/src/review/runner.ts +128 -0
  111. package/src/review/scope.ts +353 -0
  112. package/src/review/template.ts +15 -0
  113. package/src/review/types.ts +296 -0
  114. package/src/review/validator.ts +160 -0
  115. package/src/storage/plans.ts +5 -3
  116. package/src/storage/reports.ts +50 -7
  117. package/src/storage/review-sessions.ts +117 -0
  118. package/src/text.ts +19 -0
  119. package/src/types.ts +336 -26
  120. package/src/utils/paths.ts +39 -0
  121. package/src/visual/companion.ts +5 -3
  122. package/src/visual/start-server.ts +101 -0
  123. package/src/visual/stop-server.ts +39 -0
  124. package/bin/ctx-mode-wrapper.mjs +0 -66
  125. package/src/config/profiles.ts +0 -64
  126. package/src/context-mode/installer.ts +0 -38
  127. package/src/quality/ai-review-gate.ts +0 -43
  128. package/src/quality/gate-runner.ts +0 -67
  129. package/src/quality/lsp-gate.ts +0 -24
  130. package/src/quality/test-gate.ts +0 -39
  131. package/src/visual/scripts/start-server.sh +0 -98
  132. package/src/visual/scripts/stop-server.sh +0 -21
@@ -1,73 +1,88 @@
1
- # context-mode — MANDATORY routing rules
1
+ # supi-context-mode
2
2
 
3
- You have context-mode MCP tools available. These rules are NOT optional — they protect your context window from flooding. A single unrouted command can dump 56 KB into context and waste the entire session.
3
+ Route high-output tool calls through sandboxed execution to protect the context window.
4
4
 
5
- ## BLOCKED commands do NOT attempt these
5
+ | Scope | Tool routing rules for supi-context-mode |
6
+ |-------|-----------------------------------------------------|
7
+ | Trigger | Always active when supi-context-mode tools are available |
8
+ | Goal | Prevent context flooding — a single unrouted command can dump 56 KB into context |
9
+ | Key rule | Blocked tools return errors; use sandbox equivalents instead |
6
10
 
7
- ### curl / wget — BLOCKED
8
- Any Bash command containing `curl` or `wget` is intercepted and replaced with an error message. Do NOT retry.
9
- Instead use:
10
- - `ctx_fetch_and_index(url, source)` to fetch and index web pages
11
- - `ctx_execute(language: "javascript", code: "const r = await fetch(...)")` to run HTTP calls in sandbox
11
+ ## Tool Selection Hierarchy
12
12
 
13
- ### Inline HTTP BLOCKED
14
- Any Bash command containing `fetch('http`, `requests.get(`, `requests.post(`, `http.get(`, or `http.request(` is intercepted and replaced with an error message. Do NOT retry with Bash.
15
- Instead use:
16
- - `ctx_execute(language, code)` to run HTTP calls in sandbox — only stdout enters context
13
+ Pick the highest-priority tool that fits the task:
17
14
 
18
- ### WebFetch / Fetch BLOCKED
19
- WebFetch and Fetch calls are denied entirely.
20
- Instead use:
21
- - `ctx_fetch_and_index(url, source)` then `ctx_search(queries)` to query the indexed content
15
+ | Priority | Tool | Use for |
16
+ |----------|------|---------|
17
+ | 1 — GATHER | `ctx_batch_execute(commands, queries)` | Primary tool. Runs all commands, auto-indexes, returns search results. ONE call replaces 30+ individual calls. |
18
+ | 2 FOLLOW-UP | `ctx_search(queries: ["q1", "q2", ...])` | Query already-indexed content. Pass ALL questions as array in ONE call. |
19
+ | 3 — PROCESSING | `ctx_execute(language, code)` / `ctx_execute_file(path, language, code)` | Sandbox execution. Only stdout enters context. |
20
+ | 4 — WEB | `ctx_fetch_and_index(url, source)` then `ctx_search(queries)` | Fetch, chunk, index, query. Raw HTML never enters context. |
21
+ | 5 — INDEX | `ctx_index(content, source)` | Store content in FTS5 knowledge base for later search. |
22
22
 
23
- ### Grep — BLOCKED
24
- Grep calls are intercepted and blocked. Do NOT retry with Grep.
25
- Instead use:
26
- - `ctx_search(queries: ["<pattern>"])` to search indexed content
27
- - `ctx_batch_execute(commands, queries)` to run searches and return compressed results
28
- - `ctx_execute(language: "shell", code: "grep ...")` to run searches in sandbox
23
+ ## Blocked Commands
29
24
 
30
- ### Find / Glob BLOCKED
31
- Find/Glob calls are intercepted and blocked. Do NOT retry with Find/Glob.
32
- Instead use:
33
- - `ctx_execute(language: "shell", code: "find ...")` to run in sandbox
34
- - `ctx_batch_execute(commands, queries)` for multiple searches
25
+ Blocked commands are intercepted and replaced with an error. Do NOT retry via Bash.
35
26
 
27
+ | Blocked tool | Replacement |
28
+ |---|---|
29
+ | `curl` / `wget` in Bash | `ctx_fetch_and_index(url, source)` or `ctx_execute` with `fetch()` |
30
+ | Inline HTTP (`fetch('http`, `requests.get(`, etc.) in Bash | `ctx_execute(language, code)` — only stdout enters context |
31
+ | WebFetch / Fetch tool | `ctx_fetch_and_index(url, source)` then `ctx_search(queries)` |
32
+ | Grep tool | `ctx_search(queries)`, `ctx_batch_execute(commands, queries)`, or `ctx_execute(language: "shell", code: "grep ...")` |
33
+ | Find / Glob tool | `ctx_execute(language: "shell", code: "find ...")` or `ctx_batch_execute(commands, queries)` |
36
34
 
37
- ## REDIRECTED tools use sandbox equivalents
35
+ ### Example: routing a grep call
38
36
 
39
- ### Bash (>20 lines output)
40
- Bash is ONLY for: `git`, `mkdir`, `rm`, `mv`, `cd`, `ls`, `npm install`, `pip install`, and other short-output commands.
41
- For everything else, use:
42
- - `ctx_batch_execute(commands, queries)` — run multiple commands + search in ONE call
43
- - `ctx_execute(language: "shell", code: "...")` — run in sandbox, only stdout enters context
37
+ ```
38
+ // WRONG blocked, returns error
39
+ grep(pattern: "TODO", path: "src/")
44
40
 
45
- ### Read (large files)
46
- Reads are never blocked — they always go through OMP's native read tool so hashline anchors (`N#XX`) are preserved for the edit contract. Large file reads (>110 lines) are automatically compressed to head (80 lines) + tail (30 lines) with a `sel` hint for the omitted section.
47
- For analysis-only reads where hashlines aren't needed, `ctx_execute_file(path, language, code)` remains more efficient — only your printed summary enters context.
41
+ // CORRECT runs in sandbox, only printed summary enters context
42
+ ctx_execute(language: "shell", code: "grep -rn TODO src/")
48
43
 
49
- ## Tool selection hierarchy
44
+ // BEST indexes output and returns search results in one call
45
+ ctx_batch_execute(
46
+ commands: [{ label: "TODOs", command: "grep -rn TODO src/" }],
47
+ queries: ["TODO fixme priority"]
48
+ )
49
+ ```
50
50
 
51
- 1. **GATHER**: `ctx_batch_execute(commands, queries)` — Primary tool. Runs all commands, auto-indexes output, returns search results. ONE call replaces 30+ individual calls.
52
- 2. **FOLLOW-UP**: `ctx_search(queries: ["q1", "q2", ...])` — Query indexed content. Pass ALL questions as array in ONE call.
53
- 3. **PROCESSING**: `ctx_execute(language, code)` | `ctx_execute_file(path, language, code)` — Sandbox execution. Only stdout enters context.
54
- 4. **WEB**: `ctx_fetch_and_index(url, source)` then `ctx_search(queries)` — Fetch, chunk, index, query. Raw HTML never enters context.
55
- 5. **INDEX**: `ctx_index(content, source)` — Store content in FTS5 knowledge base for later search.
51
+ ## Redirected Tools
56
52
 
57
- ## Subagent routing
53
+ ### Bash
58
54
 
59
- When spawning subagents (Agent/Task tool), the routing block is automatically injected into their prompt. Bash-type subagents are upgraded to general-purpose so they have access to MCP tools. You do NOT need to manually instruct subagents about context-mode.
55
+ Bash is for commands producing <20 lines: `git`, `mkdir`, `rm`, `mv`, `ls`, `npm install`, `pip install`.
60
56
 
61
- ## Output constraints
57
+ For everything else:
58
+ - `ctx_batch_execute(commands, queries)` — multiple commands + search in ONE call
59
+ - `ctx_execute(language: "shell", code: "...")` — sandbox, only stdout enters context
62
60
 
63
- - Keep responses under 500 words.
64
- - Write artifacts (code, configs, PRDs) to FILES — never return them as inline text. Return only: file path + 1-line description.
65
- - When indexing content, use descriptive source labels so others can `ctx_search(source: "label")` later.
61
+ ### Read
66
62
 
67
- ## ctx commands
63
+ Reads are never blocked — OMP's native read tool preserves hashline anchors (`N#XX`) for the edit contract. Large reads (>110 lines) are auto-compressed to head (80) + tail (30) with a `sel` hint.
64
+
65
+ For analysis-only reads where anchors are not needed, prefer `ctx_execute_file(path, language, code)` — only your printed summary enters context.
66
+
67
+ ## Subagent Routing
68
+
69
+ The routing block is automatically injected into subagent prompts. Bash-type subagents are upgraded to general-purpose for tool access. You do NOT need to manually instruct subagents about context-mode.
70
+
71
+ ## Output Constraints
72
+
73
+ - Write artifacts (code, configs, PRDs) to files — never inline. Return only: file path + 1-line description.
74
+ - When indexing, use descriptive `source` labels so others can `ctx_search(source: "label")` later.
75
+
76
+ ## `ctx` Commands
68
77
 
69
78
  | Command | Action |
70
79
  |---------|--------|
71
- | `ctx stats` | Call the `ctx_stats` MCP tool and display the full output verbatim |
72
- | `ctx doctor` | Call the `ctx_doctor` MCP tool, run the returned shell command, display as checklist |
73
- | `ctx upgrade` | Call the `ctx_upgrade` MCP tool, run the returned shell command, display as checklist |
80
+ | `ctx stats` | Call the `ctx_stats` tool, display full output verbatim |
81
+ | `ctx purge` | Call the `ctx_purge` tool to clear all indexed content |
82
+
83
+ ## Checklist
84
+
85
+ - [ ] Used tool hierarchy (batch_execute > search > execute > fetch) — not raw Bash/Grep/Find
86
+ - [ ] No blocked tool calls attempted
87
+ - [ ] Artifacts written to files, not returned inline
88
+ - [ ] Source labels are descriptive for later search
@@ -0,0 +1,204 @@
1
+ ---
2
+ name: creating-supi-agents
3
+ description: Interactive guide for creating a new supipowers review agent from scratch
4
+ ---
5
+
6
+ # Creating a Review Agent
7
+
8
+ Guide the user through creating a specialized code review agent for supipowers' multi-agent `/supi:review` pipeline.
9
+
10
+ ## Quick Reference
11
+
12
+ | Aspect | Detail |
13
+ |--------|--------|
14
+ | **Input** | User's description of what the agent should review |
15
+ | **Output** | Agent file saved to `.omp/agents/<agent-name>.md` |
16
+ | **File format** | YAML frontmatter (`name`, `description`, `focus`) + prompt body + `{output_instructions}` |
17
+ | **Hard constraint** | Prompt body **MUST** end with `{output_instructions}` on its own line — the pipeline replaces it with the output schema at review time |
18
+ | **Process** | Goal → Research → Present → Refine → Save |
19
+
20
+ ## Agent File Format
21
+
22
+ ```markdown
23
+ ---
24
+ name: <kebab-case-name>
25
+ description: <one-line summary>
26
+ focus: <comma-separated areas>
27
+ ---
28
+
29
+ <prompt body>
30
+
31
+ {output_instructions}
32
+ ```
33
+
34
+ ## Process
35
+
36
+ ### Step 1: Understand the Goal
37
+
38
+ Ask what kind of reviewer the user wants. Common archetypes:
39
+
40
+ | Archetype | Focus areas |
41
+ |-----------|-------------|
42
+ | Performance | algorithmic complexity, memory, caching, lazy loading |
43
+ | Accessibility | ARIA, semantic HTML, screen reader support, WCAG |
44
+ | API design | REST conventions, error contracts, versioning |
45
+ | Test quality | coverage gaps, flaky patterns, missing edge cases |
46
+ | Security | injection, auth, secrets, OWASP Top 10 |
47
+ | Documentation | JSDoc, README accuracy, changelog updates |
48
+
49
+ ### Step 2: Research
50
+
51
+ Research established checklists and best practices for the focus area (e.g., OWASP for security, WCAG for accessibility). Look for language/framework-specific patterns relevant to the user's stack.
52
+
53
+ ### Step 3: Present Overview
54
+
55
+ Present a structured proposal:
56
+ - **Name**: suggested kebab-case name
57
+ - **Description**: one-line summary
58
+ - **Focus areas**: comma-separated specializations
59
+ - **Review criteria**: bulleted list of what the agent will check
60
+ - **Example findings**: 2–3 examples of what this agent would flag
61
+
62
+ ### Step 4: Refine with User
63
+
64
+ Ask if they want to adjust:
65
+ - Focus areas or review criteria
66
+ - Tone — **strict** (flags aggressively, treats ambiguity as an issue) vs. **advisory** (flags only clear problems, uses softer language)
67
+ - Project-specific conventions to enforce
68
+
69
+ Iterate until the user approves.
70
+
71
+ ### Step 5: Save the Agent
72
+
73
+ Generate the final agent file and save to `.omp/agents/<agent-name>.md`.
74
+
75
+ ## Agent Prompt Guidelines
76
+
77
+ ### What makes a good agent prompt
78
+
79
+ 1. **State the role** clearly (e.g., "You are a performance-focused code reviewer")
80
+ 2. **List specific check items** as concrete, actionable criteria (not vague categories)
81
+ 3. **Provide severity guidance** — define what warrants `error` vs. `warning` vs. `info`
82
+ 4. **Define scope boundaries** — state what is NOT in scope to prevent overlap with other agents
83
+ 5. **End with `{output_instructions}`** — mandatory, on its own line
84
+
85
+ ### Before / After: Check Item Quality
86
+
87
+ ```markdown
88
+ # BEFORE — vague
89
+ ## What to Check
90
+ - Look for performance issues
91
+ - Check if things could be faster
92
+ - Make sure the code is efficient
93
+
94
+ # AFTER — concrete and actionable
95
+ ## What to Check
96
+ - **Algorithmic complexity**: O(n²) or worse loops, unnecessary nested iterations
97
+ - **Memory allocation**: Large object creation in hot paths, missing cleanup
98
+ - **Caching opportunities**: Repeated expensive computations that could be memoized
99
+ ```
100
+
101
+ ### Before / After: Severity Guidance
102
+
103
+ ```markdown
104
+ # BEFORE — missing severity
105
+ Flag any issues you find in the code.
106
+
107
+ # AFTER — calibrated severity
108
+ ## Severity Guide
109
+ - **error**: Will cause visible degradation in production (e.g., O(n²) on large datasets)
110
+ - **warning**: Potential issue that depends on scale (e.g., missing memoization)
111
+ - **info**: Optimization opportunity, not a current problem
112
+ ```
113
+
114
+ ## Example: Performance Agent
115
+
116
+ ```markdown
117
+ ---
118
+ name: performance
119
+ description: Reviews code for performance issues and optimization opportunities
120
+ focus: algorithmic complexity, memory allocation, caching, lazy loading
121
+ ---
122
+
123
+ You are a performance-focused code reviewer. Analyze the provided code diff for performance issues.
124
+
125
+ ## What to Check
126
+
127
+ - **Algorithmic complexity**: O(n²) or worse loops, unnecessary nested iterations
128
+ - **Memory allocation**: Large object creation in hot paths, missing cleanup
129
+ - **Caching opportunities**: Repeated expensive computations that could be memoized
130
+ - **Lazy loading**: Resources loaded eagerly that could be deferred
131
+ - **Bundle size**: Unnecessary imports, tree-shaking blockers
132
+ - **Database queries**: N+1 queries, missing indexes, unbounded result sets
133
+
134
+ ## Severity Guide
135
+
136
+ - **error**: Will cause visible performance degradation in production (e.g., O(n²) on large datasets)
137
+ - **warning**: Potential issue that depends on scale (e.g., missing memoization)
138
+ - **info**: Optimization opportunity, not a current problem
139
+
140
+ ## Out of Scope
141
+
142
+ - Correctness issues (handled by correctness agent)
143
+ - Style/formatting (handled by linter)
144
+ - Security concerns (handled by security agent)
145
+
146
+ {output_instructions}
147
+ ```
148
+
149
+ ## Example: Accessibility Agent
150
+
151
+ ```markdown
152
+ ---
153
+ name: accessibility
154
+ description: Reviews UI code for accessibility violations and WCAG compliance
155
+ focus: ARIA attributes, semantic HTML, keyboard navigation, color contrast
156
+ ---
157
+
158
+ You are an accessibility-focused code reviewer. Analyze the provided code diff for accessibility issues using WCAG 2.1 AA as the baseline.
159
+
160
+ ## What to Check
161
+
162
+ - **Semantic HTML**: `<div>` or `<span>` used where `<button>`, `<nav>`, `<main>`, `<section>` belongs
163
+ - **ARIA attributes**: Missing `aria-label` on icon-only buttons, incorrect `role` values
164
+ - **Keyboard navigation**: Interactive elements not reachable via Tab, missing focus indicators
165
+ - **Color contrast**: Text/background combinations below 4.5:1 ratio (normal text) or 3:1 (large text)
166
+ - **Form labels**: Inputs without associated `<label>` or `aria-labelledby`
167
+ - **Image alt text**: Missing or non-descriptive `alt` attributes on `<img>` tags
168
+
169
+ ## Severity Guide
170
+
171
+ - **error**: Blocks assistive technology users entirely (e.g., button with no accessible name)
172
+ - **warning**: Degraded experience for assistive technology users (e.g., missing focus indicator)
173
+ - **info**: Best-practice improvement (e.g., prefer `<nav>` over `<div role="navigation">`)
174
+
175
+ ## Out of Scope
176
+
177
+ - Visual design preferences (handled by design review)
178
+ - Performance (handled by performance agent)
179
+ - Business logic correctness (handled by correctness agent)
180
+
181
+ {output_instructions}
182
+ ```
183
+
184
+ ## MUST DO / MUST NOT DO
185
+
186
+ | MUST DO | MUST NOT DO |
187
+ |---------|-------------|
188
+ | End every agent prompt with `{output_instructions}` on its own line | Omit `{output_instructions}` — the pipeline will fail |
189
+ | Include a severity guide (`error` / `warning` / `info`) | Leave severity undefined — agents produce inconsistent ratings |
190
+ | Define "Out of Scope" to prevent overlap with other agents | Let scope overlap — produces duplicate findings across agents |
191
+ | Use concrete check items with specific patterns to look for | Use vague criteria like "check for issues" or "ensure quality" |
192
+ | Save to `.omp/agents/<agent-name>.md` | Save anywhere else or leave unsaved |
193
+
194
+ ## Pre-Save Checklist
195
+
196
+ Before saving the agent file, verify:
197
+
198
+ - [ ] YAML frontmatter has `name`, `description`, and `focus`
199
+ - [ ] Prompt body states the agent's role in the first sentence
200
+ - [ ] At least 3 concrete, actionable check items
201
+ - [ ] Severity guide defines `error`, `warning`, and `info` thresholds
202
+ - [ ] "Out of Scope" section present
203
+ - [ ] `{output_instructions}` is the last line of the prompt body
204
+ - [ ] File saved to `.omp/agents/<agent-name>.md`
@@ -5,58 +5,104 @@ description: Systematic debugging — find root cause before attempting fixes, 4
5
5
 
6
6
  # Systematic Debugging
7
7
 
8
- ## Iron Law
8
+ Find the root cause before touching the code. Every fix without a verified root cause is a coin flip.
9
9
 
10
- **NO FIXES WITHOUT ROOT CAUSE INVESTIGATION FIRST.**
10
+ ## Quick Reference
11
11
 
12
- Symptom fixes are failure. If you haven't completed Phase 1, you cannot propose fixes.
12
+ | Aspect | Detail |
13
+ |--------|--------|
14
+ | **Trigger** | Bug report, failing test, unexpected behavior, error message |
15
+ | **Input** | Error output, stack trace, user report, failing test, or observed misbehavior |
16
+ | **Output** | Root-cause statement, minimal fix, regression test, verification evidence |
17
+ | **Gate rule** | You **MUST** complete Phase 1 before proposing any fix |
18
+ | **Escalation** | After 3 failed fix attempts → stop, reassess architecture with human partner |
19
+
20
+ ## Phases
21
+
22
+ | Phase | Goal | Gate (exit when true) |
23
+ |-------|------|-----------------------|
24
+ | 1. Investigate | Identify root cause | Root cause stated as a falsifiable claim |
25
+ | 2. Analyze | Confirm via pattern comparison | Difference between working and broken code documented |
26
+ | 3. Hypothesize | Single testable prediction | Hypothesis written as "Changing [X] produces [Y] because [Z]" |
27
+ | 4. Fix | Minimal correct change | Failing test passes, no regressions |
28
+
29
+ ---
13
30
 
14
31
  ## Phase 1: Root Cause Investigation
15
32
 
16
- Complete this phase before proposing any fix.
33
+ 1. **Read the full error message and stack trace.** Extract: error type, file/line location, triggering input.
34
+ 2. **Reproduce consistently.** Write exact steps. If it doesn't reproduce, you don't understand it yet.
35
+ 3. **Check recent changes.** `git diff`, new dependencies, config changes — narrow the blast radius.
36
+ 4. **Log at each boundary** in multi-component systems. Capture: timestamps, payloads, status codes.
37
+ 5. **Trace data flow** backward through the call stack to the original trigger.
38
+
39
+ **Gate:** State the root cause as a single sentence before moving on.
40
+
41
+ ### Example — Phase 1
42
+
43
+ ```
44
+ BAD (skipping investigation):
45
+ "TypeError: Cannot read property 'id' of undefined"
46
+ → "I'll add a null check on line 42."
17
47
 
18
- 1. **Read error messages carefully.** Don't skip; they often contain solutions.
19
- 2. **Reproduce consistently.** Exact steps, every time.
20
- 3. **Check recent changes.** `git diff`, new dependencies, config changes.
21
- 4. **Gather evidence** in multi-component systems: diagnostic instrumentation at each boundary.
22
- 5. **Trace data flow** backward through call stack to find original trigger.
48
+ GOOD (investigating):
49
+ "TypeError: Cannot read property 'id' of undefined at UserService.getProfile:42"
50
+ git diff shows fetchUser was changed yesterday to return { data: user } instead of user
51
+ Line 42 reads `user.id` but now receives the wrapper object
52
+ Root cause: fetchUser response shape changed; callers were not updated
53
+ ```
23
54
 
24
55
  ## Phase 2: Pattern Analysis
25
56
 
26
- 1. Find working examples in codebase.
27
- 2. Compare against references completely (not skimming).
28
- 3. Identify differences between working and broken.
29
- 4. Understand dependencies and assumptions.
57
+ 1. Find a **working example** of the same pattern in the codebase.
58
+ 2. **Diff working vs broken** line-by-line. Document each difference.
59
+ 3. **List the assumptions** the broken code makes about its inputs, environment, and call order.
30
60
 
31
61
  ## Phase 3: Hypothesis and Testing
32
62
 
33
- 1. Form a single, specific hypothesis (not vague).
34
- 2. Test minimally: smallest possible change, one variable at a time.
35
- 3. Verify before continuing. If wrong form NEW hypothesis, not more fixes.
36
- 4. Admit uncertainty. Don't pretend to know.
63
+ 1. **Write the hypothesis** in this format: "Changing [X] will produce [Y] because [Z]."
64
+ 2. **Test one variable** at a time — smallest possible change.
65
+ 3. If the hypothesis is wrong, return to Phase 1 with the new evidence. Do not stack guesses.
66
+ 4. If confidence is not high, state: "I'm uncertain because [reason]" before proceeding.
67
+
68
+ ### Example — Hypothesis
69
+
70
+ ```
71
+ BAD:
72
+ "Something is wrong with the config."
73
+
74
+ GOOD:
75
+ "Changing `loadConfig` to parseInt(env.TIMEOUT) will fix the 'NaN' comparison
76
+ because env vars are strings and the timeout check uses numeric comparison."
77
+ ```
37
78
 
38
79
  ## Phase 4: Implementation
39
80
 
40
- 1. Create failing test case first.
41
- 2. Implement single fix addressing root cause only.
42
- 3. Verify: test passes, no other tests broken.
43
- 4. **If fix doesn't work:**
44
- - < 3 attempts: Return to Phase 1 with new information
45
- - 3 attempts: **STOP** and question the architecture. Discuss with human partner.
46
-
47
- ## Red Flags — STOP and Follow the Process
48
-
49
- - "Quick fix for now, investigate later"
50
- - "Just try changing X and see if it works"
51
- - "Skip the test, I'll manually verify"
52
- - "It's probably X, let me fix that"
53
- - "I don't fully understand but this might work"
54
- - "One more fix attempt" (when already tried 2+)
55
- - Each fix reveals new problem in different place
56
-
57
- ## When to Use (Especially)
58
-
59
- - Under time pressure (emergencies make guessing tempting)
60
- - "Just one quick fix" seems obvious
61
- - Already tried multiple fixes
62
- - Don't fully understand the issue
81
+ 1. **Write a failing test** that reproduces the bug.
82
+ 2. **Implement a single fix** addressing the root cause only.
83
+ 3. **Verify:** test passes, no other tests broken.
84
+ 4. If fix fails:
85
+ - < 3 attempts return to Phase 1 with new information.
86
+ - >= 3 attempts **STOP.** Reassess the architecture. Discuss with human partner.
87
+
88
+ ---
89
+
90
+ ## MUST DO / MUST NOT DO
91
+
92
+ | MUST DO | MUST NOT DO |
93
+ |---------|-------------|
94
+ | Complete Phase 1 before proposing any fix | Skip to a fix from a stack trace alone |
95
+ | State root cause as a falsifiable claim | Propose a vague cause ("something in config") |
96
+ | Write a failing test before fixing | Skip the test and manually verify |
97
+ | Change one variable at a time | Stack multiple speculative changes |
98
+ | Escalate after 3 failed attempts | Say "one more fix attempt" after 2+ failures |
99
+
100
+ ## Final Checklist
101
+
102
+ - [ ] Root cause identified and stated as a single sentence
103
+ - [ ] Working vs broken difference documented
104
+ - [ ] Hypothesis written as "Changing [X] produces [Y] because [Z]"
105
+ - [ ] Failing test written before fix applied
106
+ - [ ] Fix addresses root cause only — no speculative side-fixes
107
+ - [ ] All existing tests still pass
108
+ - [ ] After 3 failed attempts: stopped and escalated