@inceptionstack/pi-hard-no 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,150 @@
1
+ ## Correctness & Bugs
2
+
3
+ - **Off-by-one errors**: wrong loop bounds (< vs <=, 0-indexed vs 1-indexed), fence-post errors in slicing/substring, incorrect range endpoints
4
+ - **Null / undefined / nil access**: dereferencing values that can be absent, missing nil checks before member access, optional chaining gaps
5
+ - **Type mismatches**: wrong argument types, implicit coercions that change behavior (string ↔ number, truthy/falsy surprises), incorrect casts
6
+ - **Logic errors**: inverted conditions, wrong boolean operator (AND vs OR), negation errors, swapped arguments, wrong variable used in expression
7
+ - **Boundary conditions**: empty collections, zero-length strings, negative numbers, integer overflow/underflow, maximum-size inputs
8
+ - **Error handling**: missing error checks on fallible operations, swallowed exceptions, catch blocks that hide root causes, error paths that leave state inconsistent
9
+ - **Resource leaks**: opened files/connections/handles never closed, missing cleanup in error paths, missing try-finally or equivalent
10
+ - **Concurrency**: race conditions on shared mutable state, missing synchronization, non-atomic check-then-act patterns, deadlock potential
11
+ - **Async correctness**: missing await on async calls, unhandled promise rejections, callbacks that can fire multiple times, event listener leaks
12
+ - **Data flow**: variables written but never read, stale values used after mutation elsewhere, aliasing bugs where two references unexpectedly share state
13
+ - **API contract violations**: passing values outside documented valid range, ignoring return values that signal errors, misusing library APIs
14
+ - **Partial failure**: operations that can half-complete (write 3 of 5 records), leaving data in an inconsistent state — flag missing transactions or rollback logic
15
+ - Each flagged bug must be **discrete and actionable** — identify the specific location, the trigger condition, and the concrete consequence
16
+ - Do not speculate about what might break in other parts of the codebase without evidence — prove the issue from the code under review
17
+ - Match the level of rigor to the codebase: do not demand production-grade defensive coding in one-off scripts or prototypes
18
+
19
+ ## Clean Architecture
20
+
21
+ - Enforce dependency rule: dependencies point inward (UI → Application → Domain → Infrastructure inverts via ports)
22
+ - Business logic must not depend on frameworks, databases, or external services directly
23
+ - Use cases / application services must orchestrate domain objects, not contain domain logic themselves
24
+ - Domain entities and value objects must be pure — no I/O, no framework imports
25
+ - Adapters (controllers, repositories, gateways) must implement ports defined by inner layers
26
+ - Flag any layer-skipping: UI calling infrastructure directly, domain importing from UI, etc.
27
+ - Configuration and wiring belong at the composition root, not scattered across layers
28
+
29
+ ## SOLID Principles
30
+
31
+ - **Single Responsibility**: each module/class/function should have one reason to change — flag god-classes and functions doing unrelated things
32
+ - **Open/Closed**: prefer extension over modification — flag changes that require editing existing working code when a plugin/strategy/decorator pattern would suffice
33
+ - **Liskov Substitution**: subtypes must be substitutable for their base types — flag overrides that narrow preconditions or weaken postconditions
34
+ - **Interface Segregation**: clients should not depend on methods they don't use — flag fat interfaces that force implementors to stub unused methods
35
+ - **Dependency Inversion**: high-level modules must not depend on low-level modules; both should depend on abstractions — flag direct instantiation of infrastructure in business logic
36
+
37
+ ## DRY — Don't Repeat Yourself
38
+
39
+ - Flag duplicated logic across files (copy-paste code with minor variations)
40
+ - Flag duplicated constants, magic numbers, and magic strings — extract to named constants
41
+ - Flag repeated conditional patterns that should be polymorphism or lookup tables
42
+ - Exception: test code may duplicate setup for readability — do not flag test helpers that are intentionally explicit
43
+
44
+ ## Clean Code
45
+
46
+ - Functions should do one thing, do it well, and do it only
47
+ - Functions should be short and operate at a single level of abstraction
48
+ - Flag deep nesting (more than 2-3 levels) — suggest early returns or extraction
49
+ - Flag functions with more than 3 parameters — suggest parameter objects
50
+ - Flag boolean parameters that switch behavior — suggest separate functions
51
+ - Names must reveal intent — flag cryptic abbreviations, single-letter variables (except conventional loop counters), and misleading names
52
+ - Flag dead code: unreachable branches, unused variables, commented-out code
53
+ - Flag side effects hidden in functions whose names suggest pure computation
54
+ - Error handling must be explicit — flag swallowed exceptions, empty catch blocks, and generic error messages that hide root causes
55
+
56
+ ## Pragmatic Programmer
57
+
58
+ - Flag violations of the principle of least surprise — code should behave as readers expect
59
+ - Flag broken windows: sloppy code left alongside clean code signals that quality doesn't matter
60
+ - Flag shotgun surgery: a single change requiring edits across many unrelated files
61
+ - Flag feature envy: a function that uses more data from another module than its own
62
+ - Prefer composition over inheritance — flag deep inheritance hierarchies (more than 2 levels)
63
+ - Flag primitive obsession: using raw strings/numbers/booleans where a value object or enum would add safety
64
+ - Orthogonality: modules should be independent — changing one should not require changing others
65
+
66
+ ## Documentation & Release Notes
67
+
68
+ - A changelog file should exist at the project root (`CHANGELOG.md`, `CHANGES.md`, `HISTORY.md`, or equivalent) documenting user-visible changes
69
+ - When a change modifies user-visible behavior (new feature, bug fix, breaking change, deprecation, notable behavior change), flag if the changelog has not been updated to describe it
70
+ - Changelog entries should be concrete and specific: what changed, why it matters to the user, not just the commit summary
71
+ - Internal-only changes (refactors with no behavior change, test-only updates, documentation-only updates, build/tooling changes) do not require a changelog entry — do not flag these
72
+ - If no changelog file exists at all and the project has user-visible changes accumulating, flag that one should be created (suggest Keep a Changelog format or similar)
73
+ - Version bumps in `package.json` or equivalent without a matching changelog entry are a smell — flag the mismatch
74
+
75
+ ## Domain-Driven Design
76
+
77
+ - Ubiquitous language: code names should match domain terminology — flag technical jargon where domain terms exist
78
+ - Bounded contexts must have clear boundaries — flag domain concepts leaking across context boundaries
79
+ - Aggregates must enforce their own invariants — flag external code that manipulates aggregate internals directly
80
+ - Value objects must be immutable — flag mutable value objects
81
+ - Domain events should be used for cross-aggregate side effects, not direct coupling
82
+ - Repositories must only exist for aggregate roots, not for every entity
83
+ - Flag anemic domain models: entities that are just data bags with getters/setters while logic lives in services
84
+
85
+ ## Security — OWASP Top 10 (Web / General)
86
+
87
+ - **Broken Access Control**: missing authorization checks, IDOR (direct object references without ownership validation), privilege escalation paths, missing CORS configuration
88
+ - **Cryptographic Failures**: hardcoded secrets, API keys, passwords, or tokens in code; weak hashing (MD5, SHA1 for passwords); missing encryption for sensitive data at rest or in transit
89
+ - **Injection**: SQL injection via string concatenation, command injection via unsanitized shell arguments, XSS via unescaped user input in HTML/templates, LDAP/XML/path injection
90
+ - **Insecure Design**: missing rate limiting on sensitive endpoints, no account lockout, missing input validation at trust boundaries, business logic flaws
91
+ - **Security Misconfiguration**: verbose error messages exposing internals, default credentials, unnecessary features enabled, missing security headers
92
+ - **Vulnerable Components**: known-vulnerable dependency versions, unmaintained libraries, dependencies with known CVEs
93
+ - **Authentication Failures**: weak password policies, missing MFA where appropriate, session tokens in URLs, missing session invalidation on logout/password change
94
+ - **Data Integrity Failures**: missing integrity checks on critical data, unsigned/unverified updates, deserialization of untrusted data without validation
95
+ - **Logging & Monitoring Failures**: missing audit logs for security-relevant actions, logging sensitive data (passwords, tokens, PII), insufficient error logging for incident response
96
+ - **SSRF**: server-side requests using user-supplied URLs without allowlist validation, internal service URLs exposed
97
+
98
+ ## Security — OWASP Top 10 for LLM / AI Applications
99
+
100
+ - **Prompt Injection**: user input concatenated directly into LLM prompts without sanitization, missing input/output boundaries, indirect injection via retrieved documents or tool outputs
101
+ - **Sensitive Information Disclosure**: PII, credentials, or proprietary data included in prompts, model responses, or training data; missing output filtering; conversation history leaking across users
102
+ - **Supply Chain Vulnerabilities**: untrusted model sources, unverified model weights, poisoned training data pipelines, compromised fine-tuning datasets
103
+ - **Data and Model Poisoning**: training or fine-tuning on unvalidated user-generated content, no data provenance tracking, missing anomaly detection on training inputs
104
+ - **Improper Output Handling**: LLM output used directly in SQL queries, shell commands, code execution, or HTML rendering without sanitization — treat all model output as untrusted
105
+ - **Excessive Agency**: LLM given write/execute/delete capabilities without human-in-the-loop confirmation, missing scope restrictions on tool access, no action audit trail
106
+ - **System Prompt Leakage**: system prompts retrievable via adversarial queries, sensitive instructions or architecture details in prompts, no prompt confidentiality controls
107
+ - **Vector and Embedding Weaknesses**: RAG retrieval without access control (users retrieving documents they shouldn't see), embedding injection, poisoned vector store entries
108
+ - **Misinformation**: no fact-checking or grounding for critical outputs, model hallucinations presented as authoritative, missing disclaimers on generated content
109
+ - **Unbounded Consumption**: missing token/cost limits per request or user, recursive agent loops without caps, no rate limiting on AI endpoints
110
+
111
+ ## AI Trifecta — Critical AI Security Intersection
112
+
113
+ - **Prompt Injection + Data Poisoning + Tool Use**: the most dangerous combination — poisoned retrieval data triggers prompt injection that invokes privileged tools; flag any path where untrusted data flows into prompts that have access to sensitive tools
114
+ - Flag chains where: retrieval → prompt → tool execution has no trust boundary validation at each step
115
+ - Flag missing sandboxing for AI-invoked code execution or file system access
116
+ - Flag AI systems that can modify their own instructions, training data, or retrieval sources
117
+
118
+ ## Unit Testing — Osherove's Art of Unit Testing
119
+
120
+ ### Naming
121
+
122
+ - Test names must describe: unit of work, scenario/input, and expected result
123
+ - Pattern: `[UnitOfWork]_[Scenario]_[ExpectedBehavior]` or equivalent readable form
124
+ - Flag cryptic test names like `test1`, `testIt`, `shouldWork`, or names that don't describe the scenario
125
+ - Test names should read as specifications — a non-developer should understand what is being tested
126
+
127
+ ### Trustworthiness
128
+
129
+ - Each test must have exactly one logical assertion (one reason to fail) — flag tests with multiple unrelated assertions
130
+ - No logic in tests: no if/else, loops, switch, or try/catch in test code — these make tests unreliable
131
+ - Flag tests that test implementation details (private methods, internal state) instead of observable behavior
132
+ - Flag tests without assertions (passing tests that verify nothing)
133
+ - Flag flaky patterns: time-dependent tests, order-dependent tests, tests sharing mutable state
134
+ - Tests must fail for the right reason — flag assertions that would pass even if the code were broken
135
+
136
+ ### Readability
137
+
138
+ - Tests should follow Arrange-Act-Assert (or Given-When-Then) structure clearly
139
+ - Flag excessive setup that obscures what is being tested — prefer factory methods or builders
140
+ - Flag shared mutable test fixtures — prefer fresh setup per test
141
+ - Magic values must be explained or extracted to named constants
142
+ - The entire test should be readable without scrolling — if it's long, it's testing too much
143
+
144
+ ### Maintainability
145
+
146
+ - Flag over-mocking: tests that mock everything except the unit under test lose integration confidence
147
+ - Flag brittle tests: tests that break when implementation changes but behavior doesn't
148
+ - Flag test duplication: identical test logic copy-pasted across files — extract shared test utilities
149
+ - Test helpers and custom matchers are encouraged when they improve clarity
150
+ - Flag missing edge case tests for: null/empty inputs, boundary values, error paths, concurrent access
package/git-roots.ts ADDED
@@ -0,0 +1,94 @@
1
+ /**
2
+ * git-roots.ts — Detect git repo roots from modified file paths
3
+ * Expands ~ to homedir for correct path resolution.
4
+ * Caches resolved roots to avoid repeated git calls.
5
+ */
6
+
7
+ import { dirname, resolve, isAbsolute } from "node:path";
8
+ import { homedir } from "node:os";
9
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
10
+
11
+ /**
12
+ * Find the git repo root for a given directory.
13
+ * Returns null if not in a git repo.
14
+ */
15
+ export async function findGitRoot(pi: ExtensionAPI, dir: string): Promise<string | null> {
16
+ try {
17
+ const result = await pi.exec("git", ["-C", dir, "rev-parse", "--show-toplevel"], {
18
+ timeout: 5000,
19
+ });
20
+ if (result.code === 0 && result.stdout.trim()) {
21
+ return result.stdout.trim();
22
+ }
23
+ } catch {
24
+ /* not in a git repo */
25
+ }
26
+ return null;
27
+ }
28
+
29
+ /**
30
+ * Given a set of modified file paths and pi's cwd, find all unique
31
+ * git repo roots that contain the modified files.
32
+ *
33
+ * Returns a map of gitRoot → list of files in that repo.
34
+ * Files not in any git repo are grouped under the key "(no-git)".
35
+ */
36
+ export async function resolveGitRoots(
37
+ pi: ExtensionAPI,
38
+ cwd: string,
39
+ modifiedFiles: Set<string>,
40
+ ): Promise<Map<string, string[]>> {
41
+ const roots = new Map<string, string[]>();
42
+ const resolvedCache = new Map<string, string | null>(); // dir → gitRoot cache
43
+
44
+ for (const file of modifiedFiles) {
45
+ if (file === "(bash file op)") continue;
46
+
47
+ // Expand ~ to homedir
48
+ const expanded = file.startsWith("~/") ? resolve(homedir(), file.slice(2)) : file;
49
+ const absPath = isAbsolute(expanded) ? expanded : resolve(cwd, expanded);
50
+ const dir = dirname(absPath);
51
+
52
+ // Check cache first
53
+ let gitRoot: string | null | undefined = resolvedCache.get(dir);
54
+ if (gitRoot === undefined) {
55
+ gitRoot = await findGitRoot(pi, dir);
56
+ resolvedCache.set(dir, gitRoot);
57
+ }
58
+
59
+ const key = gitRoot ?? "(no-git)";
60
+ const list = roots.get(key) ?? [];
61
+ list.push(file);
62
+ roots.set(key, list);
63
+ }
64
+
65
+ // Also try cwd itself if no files resolved to repos
66
+ if (roots.size === 0) {
67
+ const cwdRoot = await findGitRoot(pi, cwd);
68
+ if (cwdRoot) {
69
+ roots.set(cwdRoot, []);
70
+ }
71
+ }
72
+
73
+ return roots;
74
+ }
75
+
76
+ /**
77
+ * Resolve all git roots from multiple sources:
78
+ * tracked modified files, tool call paths, and detected bash git roots.
79
+ */
80
+ export async function resolveAllGitRoots(
81
+ pi: ExtensionAPI,
82
+ cwd: string,
83
+ modifiedFiles: Set<string>,
84
+ toolCallPaths: string[],
85
+ detectedGitRoots: Set<string>,
86
+ ): Promise<Set<string>> {
87
+ const allRoots = new Set(detectedGitRoots);
88
+ const combinedFiles = new Set([...modifiedFiles, ...toolCallPaths]);
89
+ const fileRoots = await resolveGitRoots(pi, cwd, combinedFiles);
90
+ for (const root of fileRoots.keys()) {
91
+ if (root !== "(no-git)") allRoots.add(root);
92
+ }
93
+ return allRoots;
94
+ }
package/helpers.ts ADDED
@@ -0,0 +1,72 @@
1
+ /**
2
+ * helpers.ts — Extracted pure functions for testability
3
+ */
4
+
5
+ import { randomBytes } from "node:crypto";
6
+
7
+ /**
8
+ * Generate a short unique ID for a review cycle.
9
+ * Format: `r-` + 8 lowercase hex chars (32 bits, ~4B possible values).
10
+ * Enough uniqueness for debugging/correlation within a session; not cryptographic.
11
+ */
12
+ export function createReviewId(): string {
13
+ return `r-${randomBytes(4).toString("hex")}`;
14
+ }
15
+
16
+ /**
17
+ * Clamp requested commit count to available commits.
18
+ * Returns the effective count and whether it was clamped.
19
+ */
20
+ export function clampCommitCount(
21
+ requested: number,
22
+ totalCommits: number,
23
+ ): { effectiveCount: number; wasClamped: boolean } {
24
+ if (totalCommits <= 0) {
25
+ return { effectiveCount: 0, wasClamped: true };
26
+ }
27
+ const effectiveCount = Math.min(requested, totalCommits);
28
+ return {
29
+ effectiveCount,
30
+ wasClamped: effectiveCount < requested,
31
+ };
32
+ }
33
+
34
+ /**
35
+ * Determine whether to diff against empty tree (all commits)
36
+ * or HEAD~N (partial history).
37
+ */
38
+ export function shouldDiffAllCommits(effectiveCount: number, totalCommits: number): boolean {
39
+ return effectiveCount >= totalCommits;
40
+ }
41
+
42
+ /**
43
+ * Truncate a diff string to maxLen, appending a note if truncated.
44
+ */
45
+ export function truncateDiff(diff: string, maxLen: number): string {
46
+ if (diff.length <= maxLen) return diff;
47
+ const omitted = diff.length - maxLen;
48
+ return diff.slice(0, maxLen) + `\n\n... (diff truncated, ${omitted} chars omitted)`;
49
+ }
50
+
51
+ /**
52
+ * Per-file budget (ms) for scaling the review timeout with file count.
53
+ * The reviewer spends time reading + reasoning about each file, so a multi-file
54
+ * review deserves proportionally more wall-clock budget.
55
+ */
56
+ export const REVIEW_PER_FILE_BUDGET_MS = 120_000;
57
+
58
+ /**
59
+ * Compute the effective wall-clock budget for a review run.
60
+ *
61
+ * Takes the larger of the user-configured minimum (`settings.reviewTimeoutMs`)
62
+ * and a per-file scaling factor (`fileCount * REVIEW_PER_FILE_BUDGET_MS`), so
63
+ * small reviews respect the user's floor and large reviews get enough headroom.
64
+ *
65
+ * Centralized here so changing the per-file factor or clamping logic happens
66
+ * in one place — previously this formula was duplicated in orchestrator.ts
67
+ * and commands.ts.
68
+ */
69
+ export function computeReviewTimeoutMs(minTimeoutMs: number, fileCount: number): number {
70
+ const scaled = Math.max(0, fileCount) * REVIEW_PER_FILE_BUDGET_MS;
71
+ return Math.max(minTimeoutMs, scaled);
72
+ }
package/ignore.ts ADDED
@@ -0,0 +1,105 @@
1
+ /**
2
+ * ignore.ts — .hardno/ignore pattern matching
3
+ *
4
+ * Uses gitignore-style patterns:
5
+ * - Blank lines and lines starting with # are ignored
6
+ * - * matches anything except /
7
+ * - ** matches everything including /
8
+ * - ? matches a single character
9
+ * - Patterns without / match the filename only
10
+ * - Patterns with / match the full path
11
+ * - Leading ! negates a pattern
12
+ * - Trailing / means directory (treated as dir/**)
13
+ */
14
+
15
+ import { basename } from "node:path";
16
+ import { log } from "./logger";
17
+ import { readConfigFile } from "./settings";
18
+
19
+ /**
20
+ * Parse an ignore file into a list of patterns.
21
+ * Tries cwd/.hardno/ first, then ~/.pi/.hardno/.
22
+ */
23
+ export async function loadIgnorePatterns(cwd: string): Promise<string[] | null> {
24
+ try {
25
+ const content = await readConfigFile(cwd, "ignore");
26
+ if (content === null) return null;
27
+ return parseIgnoreFile(content);
28
+ } catch (err: any) {
29
+ log(`Warning: could not read .hardno/ignore: ${err?.message}`);
30
+ return null;
31
+ }
32
+ }
33
+
34
+ /**
35
+ * Parse ignore file content into pattern strings.
36
+ */
37
+ export function parseIgnoreFile(content: string): string[] {
38
+ return content
39
+ .split("\n")
40
+ .map((line) => line.trim())
41
+ .filter((line) => line.length > 0 && !line.startsWith("#"));
42
+ }
43
+
44
+ /**
45
+ * Convert a gitignore-style pattern to a RegExp.
46
+ * The pattern should NOT have a ! prefix (negation is handled by the caller).
47
+ */
48
+ function patternToRegex(pattern: string): RegExp {
49
+ // Handle trailing / as directory pattern → dir/**
50
+ let p = pattern;
51
+ if (p.endsWith("/")) {
52
+ p = p.slice(0, -1) + "/**";
53
+ }
54
+
55
+ const matchFullPath = p.includes("/");
56
+
57
+ let regex = p
58
+ .replace(/([.+^${}()|[\]\\])/g, "\\$1")
59
+ .replace(/\*\*/g, "DOUBLESTAR")
60
+ .replace(/\*/g, "[^/]*")
61
+ .replace(/\?/g, "[^/]")
62
+ .replace(/DOUBLESTAR/g, ".*");
63
+
64
+ if (matchFullPath) {
65
+ if (regex.startsWith("/")) regex = regex.slice(1);
66
+ regex = `^${regex}$`;
67
+ } else {
68
+ regex = `(^|/)${regex}$`;
69
+ }
70
+
71
+ return new RegExp(regex);
72
+ }
73
+
74
+ /**
75
+ * Check if a file path should be ignored based on patterns.
76
+ * Follows gitignore semantics: last matching pattern wins, ! negates.
77
+ */
78
+ export function shouldIgnore(filePath: string, patterns: string[]): boolean {
79
+ const name = basename(filePath);
80
+ const normalized = filePath.startsWith("./") ? filePath.slice(2) : filePath;
81
+
82
+ let ignored = false;
83
+
84
+ for (const pattern of patterns) {
85
+ const isNegated = pattern.startsWith("!");
86
+ const raw = isNegated ? pattern.slice(1) : pattern;
87
+ const regex = patternToRegex(raw);
88
+
89
+ const matchesPath = regex.test(normalized);
90
+ const matchesName = !raw.includes("/") && regex.test(name);
91
+
92
+ if (matchesPath || matchesName) {
93
+ ignored = !isNegated;
94
+ }
95
+ }
96
+
97
+ return ignored;
98
+ }
99
+
100
+ /**
101
+ * Filter a list of file paths, removing ignored ones.
102
+ */
103
+ export function filterIgnored(files: string[], patterns: string[]): string[] {
104
+ return files.filter((f) => !shouldIgnore(f, patterns));
105
+ }