supipowers 1.3.0 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/README.md +130 -57
  2. package/bin/install.ts +48 -128
  3. package/package.json +25 -3
  4. package/skills/code-review/SKILL.md +137 -40
  5. package/skills/context-mode/SKILL.md +67 -52
  6. package/skills/creating-supi-agents/SKILL.md +204 -0
  7. package/skills/debugging/SKILL.md +86 -40
  8. package/skills/fix-pr/SKILL.md +96 -65
  9. package/skills/planning/SKILL.md +105 -46
  10. package/skills/qa-strategy/SKILL.md +68 -46
  11. package/skills/receiving-code-review/SKILL.md +60 -53
  12. package/skills/release/SKILL.md +111 -39
  13. package/skills/tdd/SKILL.md +118 -67
  14. package/skills/verification/SKILL.md +71 -37
  15. package/src/bootstrap.ts +24 -5
  16. package/src/commands/agents.ts +249 -0
  17. package/src/commands/ai-review.ts +1113 -0
  18. package/src/commands/config.ts +224 -95
  19. package/src/commands/doctor.ts +19 -13
  20. package/src/commands/fix-pr.ts +8 -11
  21. package/src/commands/generate.ts +200 -0
  22. package/src/commands/model-picker.ts +5 -15
  23. package/src/commands/model.ts +4 -5
  24. package/src/commands/plan.ts +148 -92
  25. package/src/commands/qa.ts +14 -23
  26. package/src/commands/release.ts +523 -282
  27. package/src/commands/review.ts +643 -86
  28. package/src/commands/status.ts +44 -17
  29. package/src/commands/supi.ts +69 -42
  30. package/src/commands/update.ts +57 -2
  31. package/src/config/defaults.ts +6 -39
  32. package/src/config/loader.ts +388 -40
  33. package/src/config/model-resolver.ts +26 -22
  34. package/src/config/schema.ts +113 -48
  35. package/src/context/analyzer.ts +4 -2
  36. package/src/context-mode/detector.ts +16 -54
  37. package/src/context-mode/hooks.ts +135 -17
  38. package/src/context-mode/knowledge/chunker.ts +274 -0
  39. package/src/context-mode/knowledge/store.ts +187 -0
  40. package/src/context-mode/routing.ts +3 -9
  41. package/src/context-mode/sandbox/executor.ts +183 -0
  42. package/src/context-mode/sandbox/runners.ts +40 -0
  43. package/src/context-mode/snapshot-builder.ts +2 -2
  44. package/src/context-mode/tools.ts +459 -0
  45. package/src/context-mode/web/fetcher.ts +117 -0
  46. package/src/context-mode/web/html-to-md.ts +293 -0
  47. package/src/debug/logger.ts +107 -0
  48. package/src/deps/registry.ts +0 -20
  49. package/src/docs/drift.ts +454 -0
  50. package/src/fix-pr/fetch-comments.ts +66 -0
  51. package/src/git/commit-msg.ts +2 -1
  52. package/src/git/commit.ts +123 -141
  53. package/src/git/conventions.ts +2 -2
  54. package/src/git/status.ts +4 -1
  55. package/src/lsp/bridge.ts +138 -12
  56. package/src/planning/approval-flow.ts +125 -19
  57. package/src/planning/plan-content-policy.ts +78 -0
  58. package/src/planning/plan-reviewer.ts +8 -8
  59. package/src/planning/plan-writer-prompt.ts +15 -34
  60. package/src/planning/planning-ask-tool.ts +81 -0
  61. package/src/planning/prompt-builder.ts +9 -169
  62. package/src/planning/system-prompt.ts +293 -0
  63. package/src/platform/omp.ts +50 -4
  64. package/src/platform/progress.ts +182 -0
  65. package/src/platform/test-utils.ts +4 -1
  66. package/src/platform/tui-colors.ts +30 -0
  67. package/src/platform/types.ts +1 -0
  68. package/src/qa/detect-app-type.ts +102 -0
  69. package/src/qa/discover-routes.ts +353 -0
  70. package/src/quality/ai-session.ts +96 -0
  71. package/src/quality/ai-setup.ts +86 -0
  72. package/src/quality/gates/ai-review.ts +129 -0
  73. package/src/quality/gates/build.ts +8 -0
  74. package/src/quality/gates/command.ts +150 -0
  75. package/src/quality/gates/format.ts +28 -0
  76. package/src/quality/gates/lint.ts +22 -0
  77. package/src/quality/gates/lsp-diagnostics.ts +84 -0
  78. package/src/quality/gates/test-suite.ts +8 -0
  79. package/src/quality/gates/typecheck.ts +22 -0
  80. package/src/quality/registry.ts +25 -0
  81. package/src/quality/review-gates.ts +33 -0
  82. package/src/quality/runner.ts +268 -0
  83. package/src/quality/schemas.ts +48 -0
  84. package/src/quality/setup.ts +227 -0
  85. package/src/release/changelog.ts +72 -3
  86. package/src/release/channels/custom.ts +43 -0
  87. package/src/release/channels/gitea.ts +35 -0
  88. package/src/release/channels/github.ts +35 -0
  89. package/src/release/channels/gitlab.ts +35 -0
  90. package/src/release/channels/registry.ts +52 -0
  91. package/src/release/channels/types.ts +27 -0
  92. package/src/release/detector.ts +10 -63
  93. package/src/release/executor.ts +61 -51
  94. package/src/release/prompt.ts +38 -38
  95. package/src/release/version.ts +163 -15
  96. package/src/review/agent-loader.ts +335 -0
  97. package/src/review/consolidator.ts +180 -0
  98. package/src/review/default-agents/correctness.md +72 -0
  99. package/src/review/default-agents/maintainability.md +64 -0
  100. package/src/review/default-agents/security.md +67 -0
  101. package/src/review/fixer.ts +219 -0
  102. package/src/review/multi-agent-runner.ts +135 -0
  103. package/src/review/output.ts +147 -0
  104. package/src/review/prompts/agent-review-wrapper.md +36 -0
  105. package/src/review/prompts/fix-findings.md +32 -0
  106. package/src/review/prompts/fix-output-schema.md +18 -0
  107. package/src/review/prompts/invalid-output-retry.md +22 -0
  108. package/src/review/prompts/output-instructions.md +14 -0
  109. package/src/review/prompts/review-output-schema.md +38 -0
  110. package/src/review/prompts/single-review.md +53 -0
  111. package/src/review/prompts/validation-review.md +30 -0
  112. package/src/review/runner.ts +128 -0
  113. package/src/review/scope.ts +353 -0
  114. package/src/review/template.ts +15 -0
  115. package/src/review/types.ts +296 -0
  116. package/src/review/validator.ts +160 -0
  117. package/src/storage/plans.ts +5 -3
  118. package/src/storage/reports.ts +50 -7
  119. package/src/storage/review-sessions.ts +117 -0
  120. package/src/text.ts +19 -0
  121. package/src/types.ts +336 -26
  122. package/src/utils/paths.ts +39 -0
  123. package/src/visual/companion.ts +5 -3
  124. package/src/visual/start-server.ts +101 -0
  125. package/src/visual/stop-server.ts +39 -0
  126. package/bin/ctx-mode-wrapper.mjs +0 -66
  127. package/src/config/profiles.ts +0 -64
  128. package/src/context-mode/installer.ts +0 -38
  129. package/src/quality/ai-review-gate.ts +0 -43
  130. package/src/quality/gate-runner.ts +0 -67
  131. package/src/quality/lsp-gate.ts +0 -24
  132. package/src/quality/test-gate.ts +0 -39
  133. package/src/visual/scripts/start-server.sh +0 -98
  134. package/src/visual/scripts/stop-server.sh +0 -21
@@ -3,43 +3,140 @@ name: code-review
3
3
  description: Deep code review methodology for thorough quality assessment
4
4
  ---
5
5
 
6
- # Code Review Skill
7
-
8
- Systematic approach to reviewing code changes.
9
-
10
- ## Review Checklist
11
-
12
- ### Correctness
13
- - Does the code do what it claims?
14
- - Are edge cases handled?
15
- - Are error conditions handled?
16
-
17
- ### Security
18
- - Input validation at system boundaries?
19
- - SQL injection, XSS, command injection risks?
20
- - Secrets in code or logs?
21
- - Authentication/authorization checks?
22
-
23
- ### Performance
24
- - Unnecessary loops or allocations?
25
- - N+1 query patterns?
26
- - Missing indexes for frequent queries?
27
- - Large payloads or unbounded lists?
28
-
29
- ### Maintainability
30
- - Clear naming (functions, variables, files)?
31
- - Single responsibility per unit?
32
- - Unnecessary abstractions or premature optimization?
33
- - Comments where logic isn't self-evident?
34
-
35
- ### Testing
36
- - Tests cover the happy path?
37
- - Tests cover error/edge cases?
38
- - Tests are deterministic (no flaky tests)?
39
- - Test names describe the behavior?
40
-
41
- ## Severity Levels
42
-
43
- - **error**: Must fix before merge. Bugs, security issues, data loss risks.
44
- - **warning**: Should fix. Code quality, maintainability, minor issues.
45
- - **info**: Nice to have. Style, naming suggestions, minor improvements.
6
+ # Code Review
7
+
8
+ Identify defects, security risks, and maintainability problems in code changes before they merge.
9
+
10
+ ## Quick Reference
11
+
12
+ | Aspect | Detail |
13
+ |---|---|
14
+ | **Input** | PR diff, file contents, PR title/description |
15
+ | **Output** | Structured findings (see Finding Format below) |
16
+ | **Scope** | Changed lines + immediate context; follow references 1 level deep when a change touches a public API |
17
+ | **Skip** | Formatting, import order, whitespace — defer to linters |
18
+ | **Depth** | Read every changed line; skim unchanged context for broken assumptions |
19
+
20
+ ## Finding Format
21
+
22
+ Each finding MUST follow this structure:
23
+
24
+ ```
25
+ **[severity]** `file:line` Description of the issue.
26
+ Suggestion: concrete fix or direction.
27
+ ```
28
+
29
+ **Severity levels:**
30
+
31
+ | Level | Meaning | Gate |
32
+ |---|---|---|
33
+ | `error` | Bugs, security holes, data loss, crashes | MUST fix before merge |
34
+ | `warning` | Wrong abstraction, missing validation, performance trap | SHOULD fix |
35
+ | `info` | Naming, style, minor simplification | Nice to have |
36
+
37
+ ## Review Procedure
38
+
39
+ Execute these phases in order. Each phase produces findings or nothing.
40
+
41
+ ### Phase 1 — Understand Intent
42
+ Read the PR title, description, and linked issues. Determine what the change is supposed to do. If intent is unclear, report as `warning` before proceeding.
43
+
44
+ ### Phase 2 Correctness
45
+ For each changed function/block:
46
+ - Trace inputs through the logic. Identify domain boundaries (null, empty, zero, negative, max-length).
47
+ - For each boundary, verify the code handles or explicitly rejects it. Unhandled → `error`.
48
+ - Check return values: can a caller confuse a failure return with a success? Silent failures → `error`.
49
+
50
+ ### Phase 3 — Security
51
+ At every system boundary (user input, HTTP params, DB queries, shell commands, file paths):
52
+ - Verify input is validated or sanitized before use. Missing → `error`.
53
+ - Check for secrets in code, logs, or error messages. Present → `error`.
54
+ - Verify auth checks exist for protected operations. Missing → `error`.
55
+
56
+ ### Phase 4 — Performance
57
+ - Identify loops over collections: is work inside the loop that could be batched or hoisted? Report as `warning`.
58
+ - Look for N+1 patterns: a query inside a loop that iterates query results. Report as `warning`.
59
+ - Flag unbounded lists or payloads with no pagination/limit. Report as `warning`.
60
+
61
+ ### Phase 5 — Maintainability
62
+ - Flag functions doing more than one job (needs "and" to describe) → `warning`.
63
+ - Flag duplicated logic across the diff (same pattern 2+ times) → `info`.
64
+ - Flag misleading names (function name promises X, body does Y) → `warning`.
65
+
66
+ ### Phase 6 — Tests
67
+ - If the change adds behavior, verify a test covers the happy path. Missing → `warning`.
68
+ - If the change fixes a bug, verify a regression test exists. Missing → `warning`.
69
+ - Flag non-deterministic tests (time-dependent, random, order-dependent) → `warning`.
70
+
71
+ ## Examples
72
+
73
+ ### Bug: unhandled null at domain boundary
74
+
75
+ ```ts
76
+ // PR diff
77
+ function getUser(id: string) {
78
+ const row = db.query("SELECT * FROM users WHERE id = ?", [id]);
79
+ return { name: row.name, email: row.email };
80
+ }
81
+ ```
82
+
83
+ **Finding:**
84
+ ```
85
+ **[error]** `src/users.ts:3` — `db.query` returns `null` when no row matches,
86
+ but the next line unconditionally accesses `.name` on the result.
87
+ Suggestion: Guard with `if (!row) return null` or throw a NotFoundError.
88
+ ```
89
+
90
+ ### Security: unsanitized input in shell command
91
+
92
+ ```python
93
+ # PR diff
94
+ def export_report(filename):
95
+ os.system(f"tar czf /tmp/{filename}.tar.gz /data/reports")
96
+ ```
97
+
98
+ **Finding:**
99
+ ```
100
+ **[error]** `reports/export.py:3` — `filename` is interpolated into a shell
101
+ command without sanitization. An attacker passing `; rm -rf /` exploits this.
102
+ Suggestion: Use `subprocess.run(["tar", "czf", ...])` with a list to avoid shell injection,
103
+ and validate `filename` against an allowlist pattern.
104
+ ```
105
+
106
+ ### N+1 query in loop
107
+
108
+ ```ts
109
+ // PR diff
110
+ const orders = await db.orders.findMany({ where: { status: "open" } });
111
+ for (const order of orders) {
112
+ const customer = await db.customers.findUnique({ where: { id: order.customerId } });
113
+ order.customerName = customer.name;
114
+ }
115
+ ```
116
+
117
+ **Finding:**
118
+ ```
119
+ **[warning]** `src/orders.ts:2-5` — Each loop iteration issues a separate
120
+ DB query for the customer. With N open orders this is N+1 queries.
121
+ Suggestion: Use `include: { customer: true }` in the initial query, or
122
+ batch-fetch customers with `findMany({ where: { id: { in: customerIds } } })`.
123
+ ```
124
+
125
+ ## MUST DO / MUST NOT DO
126
+
127
+ | MUST DO | MUST NOT DO |
128
+ |---|---|
129
+ | Report every finding with file, line, severity, and suggestion | Report vague findings without location or fix direction |
130
+ | Prioritize errors first, then warnings, then info | Bury a critical bug under 10 style nits |
131
+ | Read the full diff before writing findings | Review only the first file and stop |
132
+ | Verify claims by reading the referenced code | Assume a pattern is wrong without checking the implementation |
133
+ | Limit info-level findings to 5 max | Flood the review with cosmetic suggestions |
134
+
135
+ ## Final Checklist
136
+
137
+ Before submitting your review, verify:
138
+ - [ ] Every `error` finding includes a concrete reproduction scenario or input
139
+ - [ ] Every finding has `file:line`, severity, description, and suggestion
140
+ - [ ] Findings are grouped by severity (errors first)
141
+ - [ ] No duplicate findings (same root cause reported once, not per-occurrence)
142
+ - [ ] If zero findings: explicitly state "No issues found" — do not return empty output
@@ -1,73 +1,88 @@
1
- # context-mode — MANDATORY routing rules
1
+ # supi-context-mode
2
2
 
3
- You have context-mode MCP tools available. These rules are NOT optional — they protect your context window from flooding. A single unrouted command can dump 56 KB into context and waste the entire session.
3
+ Route high-output tool calls through sandboxed execution to protect the context window.
4
4
 
5
- ## BLOCKED commands do NOT attempt these
5
+ | Scope | Tool routing rules for supi-context-mode |
6
+ |-------|-----------------------------------------------------|
7
+ | Trigger | Always active when supi-context-mode tools are available |
8
+ | Goal | Prevent context flooding — a single unrouted command can dump 56 KB into context |
9
+ | Key rule | Blocked tools return errors; use sandbox equivalents instead |
6
10
 
7
- ### curl / wget — BLOCKED
8
- Any Bash command containing `curl` or `wget` is intercepted and replaced with an error message. Do NOT retry.
9
- Instead use:
10
- - `ctx_fetch_and_index(url, source)` to fetch and index web pages
11
- - `ctx_execute(language: "javascript", code: "const r = await fetch(...)")` to run HTTP calls in sandbox
11
+ ## Tool Selection Hierarchy
12
12
 
13
- ### Inline HTTP BLOCKED
14
- Any Bash command containing `fetch('http`, `requests.get(`, `requests.post(`, `http.get(`, or `http.request(` is intercepted and replaced with an error message. Do NOT retry with Bash.
15
- Instead use:
16
- - `ctx_execute(language, code)` to run HTTP calls in sandbox — only stdout enters context
13
+ Pick the highest-priority tool that fits the task:
17
14
 
18
- ### WebFetch / Fetch BLOCKED
19
- WebFetch and Fetch calls are denied entirely.
20
- Instead use:
21
- - `ctx_fetch_and_index(url, source)` then `ctx_search(queries)` to query the indexed content
15
+ | Priority | Tool | Use for |
16
+ |----------|------|---------|
17
+ | 1 — GATHER | `ctx_batch_execute(commands, queries)` | Primary tool. Runs all commands, auto-indexes, returns search results. ONE call replaces 30+ individual calls. |
18
+ | 2 FOLLOW-UP | `ctx_search(queries: ["q1", "q2", ...])` | Query already-indexed content. Pass ALL questions as array in ONE call. |
19
+ | 3 — PROCESSING | `ctx_execute(language, code)` / `ctx_execute_file(path, language, code)` | Sandbox execution. Only stdout enters context. |
20
+ | 4 — WEB | `ctx_fetch_and_index(url, source)` then `ctx_search(queries)` | Fetch, chunk, index, query. Raw HTML never enters context. |
21
+ | 5 — INDEX | `ctx_index(content, source)` | Store content in FTS5 knowledge base for later search. |
22
22
 
23
- ### Grep — BLOCKED
24
- Grep calls are intercepted and blocked. Do NOT retry with Grep.
25
- Instead use:
26
- - `ctx_search(queries: ["<pattern>"])` to search indexed content
27
- - `ctx_batch_execute(commands, queries)` to run searches and return compressed results
28
- - `ctx_execute(language: "shell", code: "grep ...")` to run searches in sandbox
23
+ ## Blocked Commands
29
24
 
30
- ### Find / Glob BLOCKED
31
- Find/Glob calls are intercepted and blocked. Do NOT retry with Find/Glob.
32
- Instead use:
33
- - `ctx_execute(language: "shell", code: "find ...")` to run in sandbox
34
- - `ctx_batch_execute(commands, queries)` for multiple searches
25
+ Blocked commands are intercepted and replaced with an error. Do NOT retry via Bash.
35
26
 
27
+ | Blocked tool | Replacement |
28
+ |---|---|
29
+ | `curl` / `wget` in Bash | `ctx_fetch_and_index(url, source)` or `ctx_execute` with `fetch()` |
30
+ | Inline HTTP (`fetch('http`, `requests.get(`, etc.) in Bash | `ctx_execute(language, code)` — only stdout enters context |
31
+ | WebFetch / Fetch tool | `ctx_fetch_and_index(url, source)` then `ctx_search(queries)` |
32
+ | Grep tool | `ctx_search(queries)`, `ctx_batch_execute(commands, queries)`, or `ctx_execute(language: "shell", code: "grep ...")` |
33
+ | Find / Glob tool | `ctx_execute(language: "shell", code: "find ...")` or `ctx_batch_execute(commands, queries)` |
36
34
 
37
- ## REDIRECTED tools use sandbox equivalents
35
+ ### Example: routing a grep call
38
36
 
39
- ### Bash (>20 lines output)
40
- Bash is ONLY for: `git`, `mkdir`, `rm`, `mv`, `cd`, `ls`, `npm install`, `pip install`, and other short-output commands.
41
- For everything else, use:
42
- - `ctx_batch_execute(commands, queries)` — run multiple commands + search in ONE call
43
- - `ctx_execute(language: "shell", code: "...")` — run in sandbox, only stdout enters context
37
+ ```
38
+ // WRONG blocked, returns error
39
+ grep(pattern: "TODO", path: "src/")
44
40
 
45
- ### Read (large files)
46
- Reads are never blocked — they always go through OMP's native read tool so hashline anchors (`N#XX`) are preserved for the edit contract. Large file reads (>110 lines) are automatically compressed to head (80 lines) + tail (30 lines) with a `sel` hint for the omitted section.
47
- For analysis-only reads where hashlines aren't needed, `ctx_execute_file(path, language, code)` remains more efficient — only your printed summary enters context.
41
+ // CORRECT runs in sandbox, only printed summary enters context
42
+ ctx_execute(language: "shell", code: "grep -rn TODO src/")
48
43
 
49
- ## Tool selection hierarchy
44
+ // BEST indexes output and returns search results in one call
45
+ ctx_batch_execute(
46
+ commands: [{ label: "TODOs", command: "grep -rn TODO src/" }],
47
+ queries: ["TODO fixme priority"]
48
+ )
49
+ ```
50
50
 
51
- 1. **GATHER**: `ctx_batch_execute(commands, queries)` — Primary tool. Runs all commands, auto-indexes output, returns search results. ONE call replaces 30+ individual calls.
52
- 2. **FOLLOW-UP**: `ctx_search(queries: ["q1", "q2", ...])` — Query indexed content. Pass ALL questions as array in ONE call.
53
- 3. **PROCESSING**: `ctx_execute(language, code)` | `ctx_execute_file(path, language, code)` — Sandbox execution. Only stdout enters context.
54
- 4. **WEB**: `ctx_fetch_and_index(url, source)` then `ctx_search(queries)` — Fetch, chunk, index, query. Raw HTML never enters context.
55
- 5. **INDEX**: `ctx_index(content, source)` — Store content in FTS5 knowledge base for later search.
51
+ ## Redirected Tools
56
52
 
57
- ## Subagent routing
53
+ ### Bash
58
54
 
59
- When spawning subagents (Agent/Task tool), the routing block is automatically injected into their prompt. Bash-type subagents are upgraded to general-purpose so they have access to MCP tools. You do NOT need to manually instruct subagents about context-mode.
55
+ Bash is for commands producing <20 lines: `git`, `mkdir`, `rm`, `mv`, `ls`, `npm install`, `pip install`.
60
56
 
61
- ## Output constraints
57
+ For everything else:
58
+ - `ctx_batch_execute(commands, queries)` — multiple commands + search in ONE call
59
+ - `ctx_execute(language: "shell", code: "...")` — sandbox, only stdout enters context
62
60
 
63
- - Keep responses under 500 words.
64
- - Write artifacts (code, configs, PRDs) to FILES — never return them as inline text. Return only: file path + 1-line description.
65
- - When indexing content, use descriptive source labels so others can `ctx_search(source: "label")` later.
61
+ ### Read
66
62
 
67
- ## ctx commands
63
+ Reads are never blocked — OMP's native read tool preserves hashline anchors (`N#XX`) for the edit contract. Large reads (>110 lines) are auto-compressed to head (80) + tail (30) with a `sel` hint.
64
+
65
+ For analysis-only reads where anchors are not needed, prefer `ctx_execute_file(path, language, code)` — only your printed summary enters context.
66
+
67
+ ## Subagent Routing
68
+
69
+ The routing block is automatically injected into subagent prompts. Bash-type subagents are upgraded to general-purpose for tool access. You do NOT need to manually instruct subagents about context-mode.
70
+
71
+ ## Output Constraints
72
+
73
+ - Write artifacts (code, configs, PRDs) to files — never inline. Return only: file path + 1-line description.
74
+ - When indexing, use descriptive `source` labels so others can `ctx_search(source: "label")` later.
75
+
76
+ ## `ctx` Commands
68
77
 
69
78
  | Command | Action |
70
79
  |---------|--------|
71
- | `ctx stats` | Call the `ctx_stats` MCP tool and display the full output verbatim |
72
- | `ctx doctor` | Call the `ctx_doctor` MCP tool, run the returned shell command, display as checklist |
73
- | `ctx upgrade` | Call the `ctx_upgrade` MCP tool, run the returned shell command, display as checklist |
80
+ | `ctx stats` | Call the `ctx_stats` tool, display full output verbatim |
81
+ | `ctx purge` | Call the `ctx_purge` tool to clear all indexed content |
82
+
83
+ ## Checklist
84
+
85
+ - [ ] Used tool hierarchy (batch_execute > search > execute > fetch) — not raw Bash/Grep/Find
86
+ - [ ] No blocked tool calls attempted
87
+ - [ ] Artifacts written to files, not returned inline
88
+ - [ ] Source labels are descriptive for later search
@@ -0,0 +1,204 @@
1
+ ---
2
+ name: creating-supi-agents
3
+ description: Interactive guide for creating a new supipowers review agent from scratch
4
+ ---
5
+
6
+ # Creating a Review Agent
7
+
8
+ Guide the user through creating a specialized code review agent for supipowers' multi-agent `/supi:review` pipeline.
9
+
10
+ ## Quick Reference
11
+
12
+ | Aspect | Detail |
13
+ |--------|--------|
14
+ | **Input** | User's description of what the agent should review |
15
+ | **Output** | Agent file saved to `.omp/agents/<agent-name>.md` |
16
+ | **File format** | YAML frontmatter (`name`, `description`, `focus`) + prompt body + `{output_instructions}` |
17
+ | **Hard constraint** | Prompt body **MUST** end with `{output_instructions}` on its own line — the pipeline replaces it with the output schema at review time |
18
+ | **Process** | Goal → Research → Present → Refine → Save |
19
+
20
+ ## Agent File Format
21
+
22
+ ```markdown
23
+ ---
24
+ name: <kebab-case-name>
25
+ description: <one-line summary>
26
+ focus: <comma-separated areas>
27
+ ---
28
+
29
+ <prompt body>
30
+
31
+ {output_instructions}
32
+ ```
33
+
34
+ ## Process
35
+
36
+ ### Step 1: Understand the Goal
37
+
38
+ Ask what kind of reviewer the user wants. Common archetypes:
39
+
40
+ | Archetype | Focus areas |
41
+ |-----------|-------------|
42
+ | Performance | algorithmic complexity, memory, caching, lazy loading |
43
+ | Accessibility | ARIA, semantic HTML, screen reader support, WCAG |
44
+ | API design | REST conventions, error contracts, versioning |
45
+ | Test quality | coverage gaps, flaky patterns, missing edge cases |
46
+ | Security | injection, auth, secrets, OWASP Top 10 |
47
+ | Documentation | JSDoc, README accuracy, changelog updates |
48
+
49
+ ### Step 2: Research
50
+
51
+ Research established checklists and best practices for the focus area (e.g., OWASP for security, WCAG for accessibility). Look for language/framework-specific patterns relevant to the user's stack.
52
+
53
+ ### Step 3: Present Overview
54
+
55
+ Present a structured proposal:
56
+ - **Name**: suggested kebab-case name
57
+ - **Description**: one-line summary
58
+ - **Focus areas**: comma-separated specializations
59
+ - **Review criteria**: bulleted list of what the agent will check
60
+ - **Example findings**: 2–3 examples of what this agent would flag
61
+
62
+ ### Step 4: Refine with User
63
+
64
+ Ask if they want to adjust:
65
+ - Focus areas or review criteria
66
+ - Tone — **strict** (flags aggressively, treats ambiguity as an issue) vs. **advisory** (flags only clear problems, uses softer language)
67
+ - Project-specific conventions to enforce
68
+
69
+ Iterate until the user approves.
70
+
71
+ ### Step 5: Save the Agent
72
+
73
+ Generate the final agent file and save to `.omp/agents/<agent-name>.md`.
74
+
75
+ ## Agent Prompt Guidelines
76
+
77
+ ### What makes a good agent prompt
78
+
79
+ 1. **State the role** clearly (e.g., "You are a performance-focused code reviewer")
80
+ 2. **List specific check items** as concrete, actionable criteria (not vague categories)
81
+ 3. **Provide severity guidance** — define what warrants `error` vs. `warning` vs. `info`
82
+ 4. **Define scope boundaries** — state what is NOT in scope to prevent overlap with other agents
83
+ 5. **End with `{output_instructions}`** — mandatory, on its own line
84
+
85
+ ### Before / After: Check Item Quality
86
+
87
+ ```markdown
88
+ # BEFORE — vague
89
+ ## What to Check
90
+ - Look for performance issues
91
+ - Check if things could be faster
92
+ - Make sure the code is efficient
93
+
94
+ # AFTER — concrete and actionable
95
+ ## What to Check
96
+ - **Algorithmic complexity**: O(n²) or worse loops, unnecessary nested iterations
97
+ - **Memory allocation**: Large object creation in hot paths, missing cleanup
98
+ - **Caching opportunities**: Repeated expensive computations that could be memoized
99
+ ```
100
+
101
+ ### Before / After: Severity Guidance
102
+
103
+ ```markdown
104
+ # BEFORE — missing severity
105
+ Flag any issues you find in the code.
106
+
107
+ # AFTER — calibrated severity
108
+ ## Severity Guide
109
+ - **error**: Will cause visible degradation in production (e.g., O(n²) on large datasets)
110
+ - **warning**: Potential issue that depends on scale (e.g., missing memoization)
111
+ - **info**: Optimization opportunity, not a current problem
112
+ ```
113
+
114
+ ## Example: Performance Agent
115
+
116
+ ```markdown
117
+ ---
118
+ name: performance
119
+ description: Reviews code for performance issues and optimization opportunities
120
+ focus: algorithmic complexity, memory allocation, caching, lazy loading
121
+ ---
122
+
123
+ You are a performance-focused code reviewer. Analyze the provided code diff for performance issues.
124
+
125
+ ## What to Check
126
+
127
+ - **Algorithmic complexity**: O(n²) or worse loops, unnecessary nested iterations
128
+ - **Memory allocation**: Large object creation in hot paths, missing cleanup
129
+ - **Caching opportunities**: Repeated expensive computations that could be memoized
130
+ - **Lazy loading**: Resources loaded eagerly that could be deferred
131
+ - **Bundle size**: Unnecessary imports, tree-shaking blockers
132
+ - **Database queries**: N+1 queries, missing indexes, unbounded result sets
133
+
134
+ ## Severity Guide
135
+
136
+ - **error**: Will cause visible performance degradation in production (e.g., O(n²) on large datasets)
137
+ - **warning**: Potential issue that depends on scale (e.g., missing memoization)
138
+ - **info**: Optimization opportunity, not a current problem
139
+
140
+ ## Out of Scope
141
+
142
+ - Correctness issues (handled by correctness agent)
143
+ - Style/formatting (handled by linter)
144
+ - Security concerns (handled by security agent)
145
+
146
+ {output_instructions}
147
+ ```
148
+
149
+ ## Example: Accessibility Agent
150
+
151
+ ```markdown
152
+ ---
153
+ name: accessibility
154
+ description: Reviews UI code for accessibility violations and WCAG compliance
155
+ focus: ARIA attributes, semantic HTML, keyboard navigation, color contrast
156
+ ---
157
+
158
+ You are an accessibility-focused code reviewer. Analyze the provided code diff for accessibility issues using WCAG 2.1 AA as the baseline.
159
+
160
+ ## What to Check
161
+
162
+ - **Semantic HTML**: `<div>` or `<span>` used where `<button>`, `<nav>`, `<main>`, `<section>` belongs
163
+ - **ARIA attributes**: Missing `aria-label` on icon-only buttons, incorrect `role` values
164
+ - **Keyboard navigation**: Interactive elements not reachable via Tab, missing focus indicators
165
+ - **Color contrast**: Text/background combinations below 4.5:1 ratio (normal text) or 3:1 (large text)
166
+ - **Form labels**: Inputs without associated `<label>` or `aria-labelledby`
167
+ - **Image alt text**: Missing or non-descriptive `alt` attributes on `<img>` tags
168
+
169
+ ## Severity Guide
170
+
171
+ - **error**: Blocks assistive technology users entirely (e.g., button with no accessible name)
172
+ - **warning**: Degraded experience for assistive technology users (e.g., missing focus indicator)
173
+ - **info**: Best-practice improvement (e.g., prefer `<nav>` over `<div role="navigation">`)
174
+
175
+ ## Out of Scope
176
+
177
+ - Visual design preferences (handled by design review)
178
+ - Performance (handled by performance agent)
179
+ - Business logic correctness (handled by correctness agent)
180
+
181
+ {output_instructions}
182
+ ```
183
+
184
+ ## MUST DO / MUST NOT DO
185
+
186
+ | MUST DO | MUST NOT DO |
187
+ |---------|-------------|
188
+ | End every agent prompt with `{output_instructions}` on its own line | Omit `{output_instructions}` — the pipeline will fail |
189
+ | Include a severity guide (`error` / `warning` / `info`) | Leave severity undefined — agents produce inconsistent ratings |
190
+ | Define "Out of Scope" to prevent overlap with other agents | Let scope overlap — produces duplicate findings across agents |
191
+ | Use concrete check items with specific patterns to look for | Use vague criteria like "check for issues" or "ensure quality" |
192
+ | Save to `.omp/agents/<agent-name>.md` | Save anywhere else or leave unsaved |
193
+
194
+ ## Pre-Save Checklist
195
+
196
+ Before saving the agent file, verify:
197
+
198
+ - [ ] YAML frontmatter has `name`, `description`, and `focus`
199
+ - [ ] Prompt body states the agent's role in the first sentence
200
+ - [ ] At least 3 concrete, actionable check items
201
+ - [ ] Severity guide defines `error`, `warning`, and `info` thresholds
202
+ - [ ] "Out of Scope" section present
203
+ - [ ] `{output_instructions}` is the last line of the prompt body
204
+ - [ ] File saved to `.omp/agents/<agent-name>.md`