@inceptionstack/pi-hard-no 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 InceptionStack
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,287 @@
1
+ # pi-hard-no
2
+
3
+ A [pi](https://github.com/badlogic/pi-mono) extension that automatically reviews code changes after each agent turn using a separate pi reviewer instance.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pi install npm:@inceptionstack/pi-hard-no
9
+ ```
10
+
11
+ Or manually:
12
+
13
+ ```bash
14
+ cp index.ts ~/.pi/agent/extensions/pi-hard-no.ts
15
+ ```
16
+
17
+ ## How it works
18
+
19
+ ```
20
+ Agent makes file changes (write, edit, bash)
21
+
22
+ ▼ agent_end fires
23
+
24
+ ▼ Extension detects file-modifying tool calls
25
+
26
+ ▼ Spawns a fresh pi instance (in-memory, isolated)
27
+
28
+ ▼ Sends per-file diffs + commit messages to reviewer
29
+ │ Reviewer reads each file itself via read(path) tool
30
+
31
+ ┌────┴────┐
32
+ │ │
33
+ LGTM Issues found
34
+ │ │
35
+ │ ▼
36
+ │ Feeds back to main agent
37
+ │ Agent fixes → new review loop
38
+ │ (up to maxReviewLoops)
39
+
40
+ ▼ >1 file reviewed from git?
41
+
42
+ ├── No → done
43
+
44
+ └── Yes → Architect review
45
+ (cross-file consistency, architecture coherence)
46
+ ```
47
+
48
+ The reviewer checks for:
49
+
50
+ - Bugs, logic errors, off-by-one errors, race conditions
51
+ - Security issues (injection, secret leaks, auth bypasses)
52
+ - Missing error handling
53
+ - DRY violations (Don't Repeat Yourself)
54
+ - Single Responsibility Principle
55
+ - Readability and maintainability
56
+
57
+ ## Configuration
58
+
59
+ Config files are loaded from two locations. **Local takes precedence over global:**
60
+
61
+ 1. `cwd/.hardno/` — project-specific config
62
+ 2. `~/.pi/.hardno/` — global defaults
63
+
64
+ All config files are optional. If missing, sensible defaults are used.
65
+
66
+ Use `/scaffold-review-files` to generate config templates.
67
+
68
+ ### `.hardno/settings.json`
69
+
70
+ ```json
71
+ {
72
+ "maxReviewLoops": 100,
73
+ "model": "amazon-bedrock/us.anthropic.claude-opus-4-6-v1",
74
+ "thinkingLevel": "off",
75
+ "architectEnabled": true,
76
+ "reviewTimeoutMs": 120000,
77
+ "toggleShortcut": "alt+r",
78
+ "cancelShortcut": "",
79
+ "judgeEnabled": false,
80
+ "judgeModel": "amazon-bedrock/us.anthropic.claude-haiku-4-5-20251001-v1:0",
81
+ "judgeTimeoutMs": 10000
82
+ }
83
+ ```
84
+
85
+ | Setting | Type | Default | Description |
86
+ | ------------------ | ----------- | -------------------------------------------------------------- | ------------------------------------------------------------------------------------------ |
87
+ | `maxReviewLoops` | integer > 0 | `100` | Max review→fix→review cycles before stopping |
88
+ | `model` | string | `"amazon-bedrock/us.anthropic.claude-opus-4-6-v1"` | Reviewer model (`"provider/model-id"`) |
89
+ | `thinkingLevel` | string | `"off"` | `off\|minimal\|low\|medium\|high\|xhigh` |
90
+ | `architectEnabled` | boolean | `true` | Enable architect review (triggers when >1 file reviewed from git) |
91
+ | `reviewTimeoutMs` | integer > 0 | `120000` | Max wall-clock per review in ms |
92
+ | `toggleShortcut` | string | `"alt+r"` | Key id for toggling review on/off |
93
+ | `judgeEnabled` | boolean | `false` | Opt-in LLM gate that suppresses redundant reviews on read-only turns (see [Judge](#judge)) |
94
+ | `judgeModel` | string | `"amazon-bedrock/us.anthropic.claude-haiku-4-5-20251001-v1:0"` | Model used by the judge (`"provider/model-id"`) |
95
+ | `judgeTimeoutMs` | integer > 0 | `10000` | Max wall-clock per judge classification call in ms |
96
+ | `cancelShortcut` | string | `""` (none) | Key id for cancelling review (opt-in, see below) |
97
+
98
+ > **Note:** `roundupEnabled` is accepted as a legacy alias for `architectEnabled`.
99
+
100
+ ### `.hardno/review-rules.md`
101
+
102
+ Custom review rules appended to the reviewer prompt. Only include review criteria — the surrounding prompt (tools, budget, workflow, response format) is handled automatically.
103
+
104
+ ```markdown
105
+ ## Architecture
106
+
107
+ - All API endpoints must validate input with zod schemas
108
+ - Database queries must use parameterized statements
109
+
110
+ ## Security
111
+
112
+ - No console.log in production code (use logger)
113
+ - No secrets in code — use environment variables
114
+ ```
115
+
116
+ Use `/add-review-rule <text>` to quickly prepend rules, or `/hardno-rules` to open the file in pi's editor.
117
+
118
+ ### `.hardno/auto-review.md`
119
+
120
+ Override the "what to review / what not to report" section of the review prompt. The surrounding prompt (tools, budget, workflow, response format) is always included automatically.
121
+
122
+ ### `.hardno/architect.md`
123
+
124
+ Custom rules for the architect review (cross-file consistency check):
125
+
126
+ ```markdown
127
+ ## Architecture
128
+
129
+ - Verify module dependency graph has no cycles
130
+ - Check error handling is consistent across all modules
131
+ - Flag any TODO/FIXME comments added during fix loops
132
+ ```
133
+
134
+ > **Note:** `.hardno/roundup.md` is accepted as a legacy fallback.
135
+
136
+ ### `.hardno/ignore`
137
+
138
+ Gitignore-style patterns to exclude files from review:
139
+
140
+ ```
141
+ # Skip generated files
142
+ *.generated.ts
143
+ dist/
144
+ node_modules/
145
+
146
+ # Skip specific paths
147
+ src/vendor/**
148
+ ```
149
+
150
+ ## UX
151
+
152
+ ### Status bar (bottom of pi)
153
+
154
+ - `lgtm on (Alt+R toggle)` — idle, no pending files
155
+ - `lgtm on 🔒 push blocked · will review 3 files (Alt+R toggle)` — edits accumulating, push blocked
156
+ - `lgtm reviewing… 🔒 push blocked (/cancel-review)` — reviewer running
157
+ - `lgtm on issues found 🔒 push blocked (Alt+R toggle)` — review found issues
158
+ - `lgtm skipped — no files to review` — nothing to review after fix turn
159
+ - `lgtm off (Alt+R toggle)` — disabled, push guard off
160
+
161
+ ### Review progress widget
162
+
163
+ During reviews, an animated widget appears below the editor showing:
164
+
165
+ - ASCII art senior dev with reading glasses
166
+ - File list with active file highlighted and per-file tool usage counts
167
+ - Elapsed time, model name, loop count
168
+
169
+ ### Commands
170
+
171
+ | Command | Description |
172
+ | ------------------------- | --------------------------------------------------------------------- |
173
+ | `/review` | Toggle review on/off |
174
+ | `/review N` | Review the last N commits |
175
+ | `/review-all` | Review all changes (pending diff → last commit → all files in cwd) |
176
+ | `/cancel-review` | Cancel an in-progress review (works during architect review) |
177
+ | `/review-judge-toggle` | Toggle the duplicate-review suppressor (judge) for this session |
178
+ | `/review-clean-logs` | Wipe `~/.pi/.hardno/review.log` + `reviews/*.json` (config untouched) |
179
+ | `/scaffold-review-files` | Create `.hardno/` config templates in a git repo |
180
+ | `/hardno-rules` | Edit `.hardno/review-rules.md` in pi's built-in editor |
181
+ | `/add-review-rule <text>` | Prepend a custom rule to `.hardno/review-rules.md` |
182
+
183
+ ### Keyboard shortcuts
184
+
185
+ | Key | Default | Configurable | Action |
186
+ | ------------------ | -------- | ---------------- | --------------------------------------------------- |
187
+ | Toggle shortcut | `alt+r` | `toggleShortcut` | Toggle review on/off |
188
+ | Cancel shortcut | _(none)_ | `cancelShortcut` | Cancel in-progress review |
189
+ | `ctrl+alt+r` | built-in | no | Cancel review (fallback, terminals that support it) |
190
+ | `ctrl+alt+shift+r` | built-in | no | Full reset: cancel, reset loops, clear all state |
191
+
192
+ > **Note:** `/cancel-review` is the recommended cancel method. It works in all terminals. Keyboard shortcuts for cancel are opt-in via `cancelShortcut` in settings because many terminals (especially iTerm2 on macOS) don't reliably send modifier key combos.
193
+
194
+ ## Review loop behavior
195
+
196
+ 1. Agent makes changes → review triggers
197
+ 2. If issues found → agent fixes them → review triggers again
198
+ 3. If LGTM → loop counter resets
199
+ 4. If loop count reaches `maxReviewLoops` → stops with a warning
200
+ 5. Toggling off/on with `/review` resets the counter
201
+
202
+ ### Architect review
203
+
204
+ After the review loop reaches LGTM, an **architect review** triggers automatically when more than one file was reviewed from git across the session. No heuristics or judge gating — it always runs for multi-file changes.
205
+
206
+ The architect review:
207
+
208
+ - Checks architecture coherence across all changes
209
+ - Verifies cross-file consistency (naming, patterns, types)
210
+ - Looks for accumulated tech debt from fix loops
211
+ - Validates documentation is still accurate
212
+ - Uses tools (`read`, `bash`, `grep`, `find`, `ls`) to explore the full codebase
213
+
214
+ Disable with `"architectEnabled": false` in settings.
215
+
216
+ ## Judge
217
+
218
+ The **judge** is an opt-in duplicate-review suppressor. When enabled, it runs a cheap classifier LLM (default: Claude Haiku 4.5) on each bash tool call the agent made this turn. If every bash call classifies as `inspection_vcs_noop` (reads state only — `git status`, `git log`, `echo`, inspection compounds, etc.) **and** no `write`/`edit` tool call ran, the full review is skipped with reason `judge_read_only`.
219
+
220
+ **Why it exists:** the deterministic classifier in `changes.ts` uses a static allowlist. Commands using shell builtins outside the allowlist (e.g. `echo` in a compound) get flagged as "potentially modifying" and trigger an unnecessary review of already-reviewed content. The judge catches those false positives.
221
+
222
+ **Fail-safe by design:**
223
+
224
+ - Off by default.
225
+ - Fail-open: any judge error (timeout, transport, parse) → review runs as normal.
226
+ - `unsure` classification → review runs (same as "modifying").
227
+ - Any `write`/`edit` tool call skips the judge entirely and goes straight to review.
228
+ - A kill switch: set `"judgeEnabled": false` to disable instantly.
229
+
230
+ **Enable in `.hardno/settings.json`:**
231
+
232
+ ```json
233
+ {
234
+ "judgeEnabled": true
235
+ }
236
+ ```
237
+
238
+ See `eval/RESULTS.md` for the evaluation that picked Haiku 4.5.
239
+
240
+ ## What triggers a review
241
+
242
+ Only fires when file-modifying tools were used during the agent turn:
243
+
244
+ - `write` — new files
245
+ - `edit` — file edits
246
+ - `bash` — commands matching file operations (`cp`, `mv`, `rm`, `sed -i`, `cat >`, `tee`, `mkdir`, `echo >`)
247
+
248
+ Pure read/search turns are skipped. Non-file-modifying bash commands (`git commit`, `curl`, `aws`, etc.) are also skipped.
249
+
250
+ ### Untracked (new) files
251
+
252
+ Files created via `write` that haven't been `git add`ed are detected via `git ls-files --others --exclude-standard` and included in the review context, labeled as `(new file)`.
253
+
254
+ ## Cancellation
255
+
256
+ You can cancel a review at any time:
257
+
258
+ - **`/cancel-review`** — works in all terminals, recommended method
259
+ - **Configured shortcut** — set `cancelShortcut` in settings if you want a hotkey
260
+ - **`ctrl+alt+r`** — fallback, works in terminals that support the key combo
261
+
262
+ Cancellation stops the current review immediately, including architect reviews. The agent continues normally.
263
+
264
+ ## Push guard
265
+
266
+ The extension automatically blocks `git push` when:
267
+
268
+ - **A review is in progress** — wait for the review to complete
269
+ - **The last review found issues** — fix the issues and get LGTM first
270
+ - **Files have been modified but not yet reviewed** — wait for the review to start and complete
271
+
272
+ The status bar shows `🔒 push blocked` whenever push would be blocked.
273
+
274
+ The block applies to any `bash` tool call matching `git push` (including `git -C <dir> push`, `git push origin main`, etc.). The agent sees a clear "Push blocked" message explaining why.
275
+
276
+ The block clears automatically when:
277
+
278
+ - The next review returns **LGTM**
279
+ - The review **skips** with "no files to review" (issues resolved by deletion/revert)
280
+ - You do a **full reset** (`Ctrl+Alt+Shift+R`)
281
+ - You **disable** review (`Alt+R` toggle) — push guard is off when review is off
282
+
283
+ No git hooks are needed — this is enforced at the extension level via pi's `tool_call` event interception.
284
+
285
+ ## License
286
+
287
+ MIT
package/architect.ts ADDED
@@ -0,0 +1,128 @@
1
+ /**
2
+ * architect.ts — Final "zoom out" architecture review after mini-review loops complete
3
+ *
4
+ * Triggered automatically when more than 1 file was actively reviewed by the
5
+ * review step. No heuristics or judge gating — if multiple files were
6
+ * touched, an architecture-level review always runs.
7
+ *
8
+ * Looks at the big picture: architecture coherence, cross-file consistency,
9
+ * accumulated tech debt, and documentation accuracy.
10
+ */
11
+
12
+ import type { ReviewResult, ReviewRunner } from "./reviewer";
13
+ import { readConfigFile } from "./settings";
14
+ import { log } from "./logger";
15
+
16
+ const DEFAULT_ARCHITECT_PROMPT = `You are a senior architect doing a final "zoom out" review. A series of code changes were just made and passed individual mini-reviews. Now step back and look at the big picture.
17
+
18
+ You have tools available (read, bash, grep, find, ls) to explore the full codebase.
19
+
20
+ ## Architecture coherence
21
+ - Do all the pieces fit together? Any orphaned code that nothing calls?
22
+ - Is the module dependency graph clean? Any unexpected coupling?
23
+ - Does the layering make sense (e.g. no circular dependencies)?
24
+
25
+ ## Cross-file consistency
26
+ - Are naming conventions consistent across all changed files?
27
+ - Are similar patterns handled the same way everywhere?
28
+ - Are types/interfaces consistent and not duplicated?
29
+
30
+ ## Integration completeness
31
+ - Is new code properly wired up? Exports used? Imports correct?
32
+ - Are there any missing integration points?
33
+ - Do tests cover the integration paths, not just unit-level?
34
+
35
+ ## Accumulated tech debt
36
+ - Did the back-and-forth fix loops create any franken-code?
37
+ - Any TODO/FIXME/HACK comments that were added?
38
+ - Dead code or unused imports that accumulated?
39
+ - Any functions that grew too large or do too many things?
40
+
41
+ ## Documentation
42
+ - Is the README still accurate after all changes?
43
+ - Are architecture docs (if any) still correct?
44
+ - Do public APIs have adequate comments/types?
45
+ - Are new files/modules properly documented?
46
+
47
+ ## Response format
48
+ If everything looks good at the big-picture level, say "LGTM — architecture looks solid."
49
+ If there are issues, list them as bullet points with severity (high/medium/low).
50
+ Focus on systemic issues that individual mini-reviews would miss.
51
+ Do NOT repeat issues that were already found and fixed in mini-reviews.`;
52
+
53
+ /**
54
+ * Load architect review rules from .hardno/architect.md.
55
+ * Falls back to .hardno/roundup.md for backwards compatibility.
56
+ */
57
+ export async function loadArchitectRules(cwd: string): Promise<string | null> {
58
+ // Try new name first, fall back to old name
59
+ const content = await readConfigFile(cwd, "architect.md");
60
+ if (content?.trim()) return content.trim();
61
+ const legacy = await readConfigFile(cwd, "roundup.md");
62
+ return legacy?.trim() || null;
63
+ }
64
+
65
+ export function buildArchitectPrompt(customRules: string | null): string {
66
+ let prompt = DEFAULT_ARCHITECT_PROMPT;
67
+ if (customRules) {
68
+ prompt += `\n\n## Additional project-specific architect review rules\n\n${customRules}`;
69
+ }
70
+ return prompt;
71
+ }
72
+
73
+ // ── Trigger logic ──────────────────────────────────
74
+
75
+ /**
76
+ * Determine whether the architect review should run.
77
+ * Triggers when more than 1 file was actively reviewed AND the review
78
+ * content came from one or more git repositories.
79
+ */
80
+ export function shouldRunArchitectReview(reviewedFiles: string[], isGitBased: boolean): boolean {
81
+ if (!isGitBased) {
82
+ log(`architect: skip — reviewed files are not from a git repo`);
83
+ return false;
84
+ }
85
+ const dominated = reviewedFiles.length > 1;
86
+ if (dominated) {
87
+ log(`architect: will run — ${reviewedFiles.length} files reviewed from git repo(s)`);
88
+ } else {
89
+ log(`architect: skip — only ${reviewedFiles.length} file(s) reviewed`);
90
+ }
91
+ return dominated;
92
+ }
93
+
94
+ // ── Full architect review ──────────────────────────
95
+
96
+ export interface ArchitectReviewOptions {
97
+ signal: AbortSignal;
98
+ cwd: string;
99
+ model?: string;
100
+ customRules: string | null;
101
+ sessionChangeSummary: string;
102
+ /** Unique id for the architect review cycle. Separate from the senior review id. */
103
+ reviewId?: string;
104
+ /** Max wall-clock for the architect's LLM call, in ms. Passed through to the reviewer. */
105
+ timeoutMs?: number;
106
+ onActivity?: (description: string) => void;
107
+ onToolCall?: (toolName: string, targetPath: string | null) => void;
108
+ }
109
+
110
+ /**
111
+ * Run the final architect review.
112
+ */
113
+ export async function runArchitectReview(
114
+ runner: ReviewRunner,
115
+ opts: ArchitectReviewOptions,
116
+ ): Promise<ReviewResult> {
117
+ const prompt = `${buildArchitectPrompt(opts.customRules)}\n\n---\n\nHere is a summary of all changes made in this session:\n\n${opts.sessionChangeSummary}\n\nPlease explore the codebase with your tools to verify everything fits together.`;
118
+
119
+ return await runner(prompt, {
120
+ signal: opts.signal,
121
+ cwd: opts.cwd,
122
+ model: opts.model,
123
+ reviewId: opts.reviewId,
124
+ timeoutMs: opts.timeoutMs,
125
+ onActivity: opts.onActivity,
126
+ onToolCall: opts.onToolCall,
127
+ });
128
+ }