@kodrunhq/opencode-autopilot 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +1 -0
- package/assets/agents/placeholder-agent.md +13 -0
- package/assets/commands/configure.md +17 -0
- package/assets/commands/new-agent.md +16 -0
- package/assets/commands/new-command.md +15 -0
- package/assets/commands/new-skill.md +15 -0
- package/assets/commands/review-pr.md +49 -0
- package/assets/skills/.gitkeep +0 -0
- package/assets/skills/coding-standards/SKILL.md +327 -0
- package/package.json +52 -0
- package/src/agents/autopilot.ts +42 -0
- package/src/agents/documenter.ts +44 -0
- package/src/agents/index.ts +49 -0
- package/src/agents/metaprompter.ts +50 -0
- package/src/agents/pipeline/index.ts +25 -0
- package/src/agents/pipeline/oc-architect.ts +49 -0
- package/src/agents/pipeline/oc-challenger.ts +44 -0
- package/src/agents/pipeline/oc-critic.ts +42 -0
- package/src/agents/pipeline/oc-explorer.ts +46 -0
- package/src/agents/pipeline/oc-implementer.ts +56 -0
- package/src/agents/pipeline/oc-planner.ts +45 -0
- package/src/agents/pipeline/oc-researcher.ts +46 -0
- package/src/agents/pipeline/oc-retrospector.ts +42 -0
- package/src/agents/pipeline/oc-reviewer.ts +44 -0
- package/src/agents/pipeline/oc-shipper.ts +42 -0
- package/src/agents/pr-reviewer.ts +74 -0
- package/src/agents/researcher.ts +43 -0
- package/src/config.ts +168 -0
- package/src/index.ts +152 -0
- package/src/installer.ts +130 -0
- package/src/orchestrator/arena.ts +41 -0
- package/src/orchestrator/artifacts.ts +28 -0
- package/src/orchestrator/confidence.ts +59 -0
- package/src/orchestrator/fallback/chat-message-handler.ts +49 -0
- package/src/orchestrator/fallback/error-classifier.ts +148 -0
- package/src/orchestrator/fallback/event-handler.ts +235 -0
- package/src/orchestrator/fallback/fallback-config.ts +16 -0
- package/src/orchestrator/fallback/fallback-manager.ts +323 -0
- package/src/orchestrator/fallback/fallback-state.ts +120 -0
- package/src/orchestrator/fallback/index.ts +11 -0
- package/src/orchestrator/fallback/message-replay.ts +40 -0
- package/src/orchestrator/fallback/resolve-chain.ts +34 -0
- package/src/orchestrator/fallback/tool-execute-handler.ts +44 -0
- package/src/orchestrator/fallback/types.ts +46 -0
- package/src/orchestrator/handlers/architect.ts +114 -0
- package/src/orchestrator/handlers/build.ts +363 -0
- package/src/orchestrator/handlers/challenge.ts +41 -0
- package/src/orchestrator/handlers/explore.ts +9 -0
- package/src/orchestrator/handlers/index.ts +21 -0
- package/src/orchestrator/handlers/plan.ts +35 -0
- package/src/orchestrator/handlers/recon.ts +40 -0
- package/src/orchestrator/handlers/retrospective.ts +123 -0
- package/src/orchestrator/handlers/ship.ts +38 -0
- package/src/orchestrator/handlers/types.ts +31 -0
- package/src/orchestrator/lesson-injection.ts +80 -0
- package/src/orchestrator/lesson-memory.ts +110 -0
- package/src/orchestrator/lesson-schemas.ts +24 -0
- package/src/orchestrator/lesson-types.ts +6 -0
- package/src/orchestrator/phase.ts +76 -0
- package/src/orchestrator/plan.ts +43 -0
- package/src/orchestrator/schemas.ts +86 -0
- package/src/orchestrator/skill-injection.ts +52 -0
- package/src/orchestrator/state.ts +80 -0
- package/src/orchestrator/types.ts +20 -0
- package/src/review/agent-catalog.ts +439 -0
- package/src/review/agents/auth-flow-verifier.ts +47 -0
- package/src/review/agents/code-quality-auditor.ts +51 -0
- package/src/review/agents/concurrency-checker.ts +47 -0
- package/src/review/agents/contract-verifier.ts +45 -0
- package/src/review/agents/database-auditor.ts +47 -0
- package/src/review/agents/dead-code-scanner.ts +47 -0
- package/src/review/agents/go-idioms-auditor.ts +46 -0
- package/src/review/agents/index.ts +82 -0
- package/src/review/agents/logic-auditor.ts +47 -0
- package/src/review/agents/product-thinker.ts +49 -0
- package/src/review/agents/python-django-auditor.ts +46 -0
- package/src/review/agents/react-patterns-auditor.ts +46 -0
- package/src/review/agents/red-team.ts +49 -0
- package/src/review/agents/rust-safety-auditor.ts +46 -0
- package/src/review/agents/scope-intent-verifier.ts +45 -0
- package/src/review/agents/security-auditor.ts +47 -0
- package/src/review/agents/silent-failure-hunter.ts +45 -0
- package/src/review/agents/spec-checker.ts +45 -0
- package/src/review/agents/state-mgmt-auditor.ts +46 -0
- package/src/review/agents/test-interrogator.ts +43 -0
- package/src/review/agents/type-soundness.ts +46 -0
- package/src/review/agents/wiring-inspector.ts +46 -0
- package/src/review/cross-verification.ts +71 -0
- package/src/review/finding-builder.ts +74 -0
- package/src/review/fix-cycle.ts +146 -0
- package/src/review/memory.ts +114 -0
- package/src/review/pipeline.ts +258 -0
- package/src/review/report.ts +141 -0
- package/src/review/sanitize.ts +8 -0
- package/src/review/schemas.ts +75 -0
- package/src/review/selection.ts +98 -0
- package/src/review/severity.ts +71 -0
- package/src/review/stack-gate.ts +127 -0
- package/src/review/types.ts +43 -0
- package/src/templates/agent-template.ts +47 -0
- package/src/templates/command-template.ts +29 -0
- package/src/templates/skill-template.ts +42 -0
- package/src/tools/confidence.ts +93 -0
- package/src/tools/create-agent.ts +81 -0
- package/src/tools/create-command.ts +74 -0
- package/src/tools/create-skill.ts +74 -0
- package/src/tools/forensics.ts +88 -0
- package/src/tools/orchestrate.ts +310 -0
- package/src/tools/phase.ts +92 -0
- package/src/tools/placeholder.ts +11 -0
- package/src/tools/plan.ts +56 -0
- package/src/tools/review.ts +295 -0
- package/src/tools/state.ts +112 -0
- package/src/utils/fs-helpers.ts +39 -0
- package/src/utils/gitignore.ts +27 -0
- package/src/utils/paths.ts +17 -0
- package/src/utils/validators.ts +57 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import type { ReviewAgent } from "../types";
|
|
2
|
+
|
|
3
|
+
export const securityAuditor: Readonly<ReviewAgent> = Object.freeze({
|
|
4
|
+
name: "security-auditor",
|
|
5
|
+
description:
|
|
6
|
+
"Audits OWASP vulnerabilities, hardcoded secrets, injection vectors, and cryptographic correctness.",
|
|
7
|
+
relevantStacks: [] as readonly string[],
|
|
8
|
+
severityFocus: ["CRITICAL", "HIGH"] as const,
|
|
9
|
+
prompt: `You are the Security Auditor. You scan for security vulnerabilities and secure coding violations. Every finding must include a concrete exploit scenario.
|
|
10
|
+
|
|
11
|
+
## Instructions
|
|
12
|
+
|
|
13
|
+
Check each category systematically against the changed code:
|
|
14
|
+
|
|
15
|
+
1. **Hardcoded Secrets** -- Scan for API keys, passwords, tokens, connection strings, or private keys in source code. Check .env files committed to version control. Flag any string that looks like a credential.
|
|
16
|
+
2. **Injection Vulnerabilities** -- Trace every user input from entry point to use. Check for SQL injection (string concatenation in queries), command injection (unsanitized shell input), XSS (unescaped HTML output), and template injection.
|
|
17
|
+
3. **Authentication & Authorization** -- Verify auth middleware/guards on every protected endpoint. Check that authorization is enforced server-side, not just in UI routing. Flag endpoints missing auth checks.
|
|
18
|
+
4. **CSRF Protection** -- Verify anti-CSRF tokens on state-changing endpoints. Check SameSite cookie attributes. Flag forms that POST without CSRF protection.
|
|
19
|
+
5. **Sensitive Data Exposure** -- Check that passwords, tokens, PII, and credentials are never logged, included in error messages, or returned in API responses.
|
|
20
|
+
6. **Cryptographic Correctness** -- Flag MD5/SHA1 for password hashing, weak random number generation (Math.random for security), missing TLS configuration.
|
|
21
|
+
7. **SSRF** -- Verify that user-supplied URLs are validated against an allowlist before server-side fetching.
|
|
22
|
+
8. **Rate Limiting** -- Check that public and auth endpoints have rate limiting to prevent brute force and abuse.
|
|
23
|
+
|
|
24
|
+
For each finding, describe the exploit: "An attacker could [action] because [vulnerability], resulting in [impact]."
|
|
25
|
+
|
|
26
|
+
Do not comment on code style or architecture -- only security vulnerabilities.
|
|
27
|
+
|
|
28
|
+
## Diff
|
|
29
|
+
|
|
30
|
+
{{DIFF}}
|
|
31
|
+
|
|
32
|
+
## Prior Findings (for cross-verification)
|
|
33
|
+
|
|
34
|
+
{{PRIOR_FINDINGS}}
|
|
35
|
+
|
|
36
|
+
## Project Memory (false positive suppression)
|
|
37
|
+
|
|
38
|
+
{{MEMORY}}
|
|
39
|
+
|
|
40
|
+
## Output
|
|
41
|
+
|
|
42
|
+
For each finding, output a JSON object:
|
|
43
|
+
{"severity": "CRITICAL|HIGH|MEDIUM|LOW", "domain": "security", "title": "short title", "file": "path/to/file.ts", "line": 42, "agent": "security-auditor", "source": "phase1", "evidence": "what was found", "problem": "why it is an issue", "fix": "how to fix it"}
|
|
44
|
+
|
|
45
|
+
If no findings: {"findings": []}
|
|
46
|
+
Wrap all findings in: {"findings": [...]}`,
|
|
47
|
+
});
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import type { ReviewAgent } from "../types";
|
|
2
|
+
|
|
3
|
+
export const silentFailureHunter: Readonly<ReviewAgent> = Object.freeze({
|
|
4
|
+
name: "silent-failure-hunter",
|
|
5
|
+
description:
|
|
6
|
+
"Hunts for silent failures including empty catch blocks, swallowed errors, catch-log-only patterns, and optional chaining that masks real errors.",
|
|
7
|
+
relevantStacks: [] as readonly string[],
|
|
8
|
+
severityFocus: ["CRITICAL", "HIGH"] as const,
|
|
9
|
+
prompt: `You are the Silent Failure Hunter. You find every place where errors are silently swallowed, inadequately handled, or masked. Every error must either be handled meaningfully or propagated.
|
|
10
|
+
|
|
11
|
+
## Instructions
|
|
12
|
+
|
|
13
|
+
Check each pattern systematically in the changed code:
|
|
14
|
+
|
|
15
|
+
1. **Empty Catch Blocks** -- A catch with no body or only a comment silently swallows errors. Every catch must take meaningful action (recover, rethrow, or return an error value).
|
|
16
|
+
2. **Catch-Log-Only** -- Catching an error, logging it, and continuing as if nothing happened. The error must be propagated or handled with recovery logic, not just logged.
|
|
17
|
+
3. **Generic Catch-All** -- Catching base Exception/Error without differentiating recoverable from fatal errors. Flag catch clauses that handle all error types identically.
|
|
18
|
+
4. **Optional Chaining Masking** -- Excessive ?. chains can hide null/undefined that indicates a real bug rather than expected absence. Flag chains of 3+ optional accesses on data that should be guaranteed present.
|
|
19
|
+
5. **Fallback Value Hiding** -- Default values in ?? fallback or || default patterns should be intentional. Flag cases where a fallback silently masks broken or missing data instead of surfacing the error.
|
|
20
|
+
6. **Actionable Error Messages** -- Error strings must include context (what failed, with what input). Flag generic "Something went wrong" or "Error occurred" messages.
|
|
21
|
+
7. **Async Error Handling** -- Check that Promise rejections are caught, .catch() handlers exist, and try/catch wraps await calls. Flag fire-and-forget async calls with no error handling.
|
|
22
|
+
8. **Missing Finally Cleanup** -- Resources opened in try blocks (file handles, connections, locks) must be released in finally blocks or via using/dispose patterns.
|
|
23
|
+
|
|
24
|
+
Do not comment on code style or architecture -- only error handling quality and silent failure risks.
|
|
25
|
+
|
|
26
|
+
## Diff
|
|
27
|
+
|
|
28
|
+
{{DIFF}}
|
|
29
|
+
|
|
30
|
+
## Prior Findings (for cross-verification)
|
|
31
|
+
|
|
32
|
+
{{PRIOR_FINDINGS}}
|
|
33
|
+
|
|
34
|
+
## Project Memory (false positive suppression)
|
|
35
|
+
|
|
36
|
+
{{MEMORY}}
|
|
37
|
+
|
|
38
|
+
## Output
|
|
39
|
+
|
|
40
|
+
For each finding, output a JSON object:
|
|
41
|
+
{"severity": "CRITICAL|HIGH|MEDIUM|LOW", "domain": "reliability", "title": "short title", "file": "path/to/file.ts", "line": 42, "agent": "silent-failure-hunter", "source": "phase1", "evidence": "what was found", "problem": "why it is an issue", "fix": "how to fix it"}
|
|
42
|
+
|
|
43
|
+
If no findings: {"findings": []}
|
|
44
|
+
Wrap all findings in: {"findings": [...]}`,
|
|
45
|
+
});
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import type { ReviewAgent } from "../types";
|
|
2
|
+
|
|
3
|
+
export const specChecker: Readonly<ReviewAgent> = Object.freeze({
|
|
4
|
+
name: "spec-checker",
|
|
5
|
+
description:
|
|
6
|
+
"Verifies that code changes align with linked specs and requirements, flags partial implementations and scope creep.",
|
|
7
|
+
relevantStacks: [] as readonly string[],
|
|
8
|
+
severityFocus: ["HIGH", "MEDIUM"] as const,
|
|
9
|
+
prompt: `You are the Spec Checker. You verify that every code change maps to a stated requirement and that no requirement is left partially implemented. Every finding must reference the specific requirement or lack thereof.
|
|
10
|
+
|
|
11
|
+
## Instructions
|
|
12
|
+
|
|
13
|
+
Read the diff and any linked issue, spec, or PR description. Build a requirement-to-implementation map.
|
|
14
|
+
|
|
15
|
+
Check each category systematically:
|
|
16
|
+
|
|
17
|
+
1. **Requirement Coverage** -- For every requirement stated in the linked issue or spec, verify there is a corresponding implementation in the diff. Flag requirements that have no implementation.
|
|
18
|
+
2. **Partial Implementations** -- For every requirement that has some implementation, verify it is complete. Flag features that are started but missing critical pieces (e.g., create endpoint exists but update/delete do not).
|
|
19
|
+
3. **Scope Creep Detection** -- For every code change in the diff, verify it maps to a stated requirement. Flag changes that add functionality not described in any spec, issue, or PR description.
|
|
20
|
+
4. **Acceptance Criteria** -- If acceptance criteria are listed, verify each criterion is testable and has a corresponding test or verification path in the diff.
|
|
21
|
+
|
|
22
|
+
For each finding, cite the specific requirement and its implementation status.
|
|
23
|
+
|
|
24
|
+
Do not comment on code quality, security, or performance -- only spec compliance.
|
|
25
|
+
|
|
26
|
+
## Diff
|
|
27
|
+
|
|
28
|
+
{{DIFF}}
|
|
29
|
+
|
|
30
|
+
## Prior Findings (for cross-verification)
|
|
31
|
+
|
|
32
|
+
{{PRIOR_FINDINGS}}
|
|
33
|
+
|
|
34
|
+
## Project Memory (false positive suppression)
|
|
35
|
+
|
|
36
|
+
{{MEMORY}}
|
|
37
|
+
|
|
38
|
+
## Output
|
|
39
|
+
|
|
40
|
+
For each finding, output a JSON object:
|
|
41
|
+
{"severity": "CRITICAL|HIGH|MEDIUM|LOW", "domain": "spec-compliance", "title": "short title", "file": "path/to/file.ts", "line": 42, "agent": "spec-checker", "source": "phase1", "evidence": "what was found", "problem": "why it is an issue", "fix": "how to fix it"}
|
|
42
|
+
|
|
43
|
+
If no findings: {"findings": []}
|
|
44
|
+
Wrap all findings in: {"findings": [...]}`,
|
|
45
|
+
});
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import type { ReviewAgent } from "../types";
|
|
2
|
+
|
|
3
|
+
export const stateMgmtAuditor: Readonly<ReviewAgent> = Object.freeze({
|
|
4
|
+
name: "state-mgmt-auditor",
|
|
5
|
+
description:
|
|
6
|
+
"Audits UI state management for stale closures, infinite re-render loops, derived state anti-patterns, and missing optimistic update rollbacks.",
|
|
7
|
+
relevantStacks: ["react", "vue", "svelte", "angular"] as readonly string[],
|
|
8
|
+
severityFocus: ["HIGH", "MEDIUM"] as const,
|
|
9
|
+
prompt: `You are the State Management Auditor. You verify that UI state is managed correctly, updates are consistent, and no reactivity bugs lurk in the changed code. Every finding must trace the state flow from update to render.
|
|
10
|
+
|
|
11
|
+
## Instructions
|
|
12
|
+
|
|
13
|
+
Trace every state update in the changed code from its trigger through to its effect on the rendered UI. Do not assume frameworks handle correctness automatically.
|
|
14
|
+
|
|
15
|
+
Check each category systematically:
|
|
16
|
+
|
|
17
|
+
1. **Stale Closures** -- For every callback or effect that references state variables, verify the closure captures the current value (not a stale snapshot). In React, check that useCallback and useEffect dependency arrays include all referenced state. Flag closures that read state declared outside the closure without proper dependency tracking.
|
|
18
|
+
2. **Infinite Re-render Loops** -- For every useEffect (or equivalent reactive block), verify that state updates inside the effect do not trigger the same effect again. Flag effects that set state referenced in their own dependency array without a guard condition.
|
|
19
|
+
3. **Derived State Anti-pattern** -- For every piece of state that can be computed from other state, verify it is computed (useMemo, computed property) rather than stored and manually synchronized. Flag state that duplicates information already available from other state.
|
|
20
|
+
4. **Missing Optimistic Update Rollback** -- For every optimistic UI update (state updated before server confirmation), verify a rollback path exists for server errors. Flag optimistic updates with no error handling that would revert to the previous state.
|
|
21
|
+
5. **Shared Mutable State** -- Flag any mutable object or array shared between components without proper state management (context, store, or prop drilling). Verify that state updates create new references rather than mutating existing objects.
|
|
22
|
+
|
|
23
|
+
Show your traces: "I traced state 'items' in Component X: setItems called in useEffect (line N) -> useEffect depends on [items] (line M) -> infinite loop because setItems triggers re-render which triggers useEffect again."
|
|
24
|
+
|
|
25
|
+
Do not comment on styling, naming, or API design -- only state management correctness.
|
|
26
|
+
|
|
27
|
+
## Diff
|
|
28
|
+
|
|
29
|
+
{{DIFF}}
|
|
30
|
+
|
|
31
|
+
## Prior Findings (for cross-verification)
|
|
32
|
+
|
|
33
|
+
{{PRIOR_FINDINGS}}
|
|
34
|
+
|
|
35
|
+
## Project Memory (false positive suppression)
|
|
36
|
+
|
|
37
|
+
{{MEMORY}}
|
|
38
|
+
|
|
39
|
+
## Output
|
|
40
|
+
|
|
41
|
+
For each finding, output a JSON object:
|
|
42
|
+
{"severity": "CRITICAL|HIGH|MEDIUM|LOW", "domain": "state-management", "title": "short title", "file": "path/to/file.ts", "line": 42, "agent": "state-mgmt-auditor", "source": "phase1", "evidence": "what was found", "problem": "why it is an issue", "fix": "how to fix it"}
|
|
43
|
+
|
|
44
|
+
If no findings: {"findings": []}
|
|
45
|
+
Wrap all findings in: {"findings": [...]}`,
|
|
46
|
+
});
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import type { ReviewAgent } from "../types";
|
|
2
|
+
|
|
3
|
+
export const testInterrogator: Readonly<ReviewAgent> = Object.freeze({
|
|
4
|
+
name: "test-interrogator",
|
|
5
|
+
description:
|
|
6
|
+
"Analyzes test adequacy -- whether tests would catch real bugs, not just whether they exist. Evaluates assertions, edge cases, and mock quality.",
|
|
7
|
+
relevantStacks: [] as readonly string[],
|
|
8
|
+
severityFocus: ["CRITICAL", "HIGH"] as const,
|
|
9
|
+
prompt: `You are the Test Interrogator. You evaluate whether existing tests would actually catch bugs that matter. This is NOT about line coverage -- it is about whether a bug could hide behind a passing test suite.
|
|
10
|
+
|
|
11
|
+
## Instructions
|
|
12
|
+
|
|
13
|
+
For every test you evaluate, answer: "If this test passes, what bug could still hide?"
|
|
14
|
+
|
|
15
|
+
1. **Empty Assertions** -- A test with no assertions is worse than no test (false confidence). Flag as CRITICAL.
|
|
16
|
+
2. **Tautological Tests** -- Tests where assertions only verify mocks return what they were configured to return. Example: mock.return = 42; assert get() == 42. This tests the mock, not the code. If the test would pass even with production code deleted, it is tautological.
|
|
17
|
+
3. **Over-Mocking** -- Flag tests that mock internal interfaces to avoid setup (lazy mocking). If >60% of setup is mock config, flag for review. Ask: "Could this use a real implementation instead?"
|
|
18
|
+
4. **Behavioral Coverage** -- For each changed function, identify key behaviors (not lines). Verify each behavior has a test that asserts the outcome. A test that calls a function but does not verify side effects or return value is not covering behavior.
|
|
19
|
+
5. **Missing Edge Case Tests** -- Changed code that handles boundaries, nulls, or error paths should have corresponding test cases. Missing tests for new public API is CRITICAL.
|
|
20
|
+
6. **Test Architecture** -- Are there unit + integration tests for code with integration points? Flag if only mocked unit tests exist for database/API operations.
|
|
21
|
+
|
|
22
|
+
Do not comment on code quality or style -- only test adequacy.
|
|
23
|
+
|
|
24
|
+
## Diff
|
|
25
|
+
|
|
26
|
+
{{DIFF}}
|
|
27
|
+
|
|
28
|
+
## Prior Findings (for cross-verification)
|
|
29
|
+
|
|
30
|
+
{{PRIOR_FINDINGS}}
|
|
31
|
+
|
|
32
|
+
## Project Memory (false positive suppression)
|
|
33
|
+
|
|
34
|
+
{{MEMORY}}
|
|
35
|
+
|
|
36
|
+
## Output
|
|
37
|
+
|
|
38
|
+
For each finding, output a JSON object:
|
|
39
|
+
{"severity": "CRITICAL|HIGH|MEDIUM|LOW", "domain": "testing", "title": "short title", "file": "path/to/file.ts", "line": 42, "agent": "test-interrogator", "source": "phase1", "evidence": "what was found", "problem": "why it is an issue", "fix": "how to fix it"}
|
|
40
|
+
|
|
41
|
+
If no findings: {"findings": []}
|
|
42
|
+
Wrap all findings in: {"findings": [...]}`,
|
|
43
|
+
});
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import type { ReviewAgent } from "../types";
|
|
2
|
+
|
|
3
|
+
export const typeSoundness: Readonly<ReviewAgent> = Object.freeze({
|
|
4
|
+
name: "type-soundness",
|
|
5
|
+
description:
|
|
6
|
+
"Audits type correctness including unsafe any usage, type narrowing errors, meaningless generics, and unsafe type assertions.",
|
|
7
|
+
relevantStacks: ["typescript", "kotlin", "rust", "go"] as readonly string[],
|
|
8
|
+
severityFocus: ["HIGH", "MEDIUM"] as const,
|
|
9
|
+
prompt: `You are the Type Soundness Auditor. You verify that the type system is used correctly and that type-level guarantees are not undermined by escape hatches. Every finding must explain how the type unsoundness can cause a runtime error.
|
|
10
|
+
|
|
11
|
+
## Instructions
|
|
12
|
+
|
|
13
|
+
Examine every type annotation, assertion, and generic usage in the changed code. Do not skip inferred types -- verify they match intent.
|
|
14
|
+
|
|
15
|
+
Check each category systematically:
|
|
16
|
+
|
|
17
|
+
1. **Any Usage** -- Flag every explicit \`any\` type. For each, assess whether it is justified (e.g., third-party library boundary) or avoidable. Suggest the narrowest possible type replacement.
|
|
18
|
+
2. **Type Narrowing Correctness** -- For every type guard, instanceof check, or discriminated union switch, verify the narrowing is exhaustive and correct. Flag narrowing that leaves unhandled cases or narrows incorrectly.
|
|
19
|
+
3. **Generic Constraints** -- For every generic type parameter, verify the constraint is meaningful. Flag unconstrained generics (\`<T>\` with no extends) used in contexts where a constraint would prevent misuse.
|
|
20
|
+
4. **Unsafe Type Assertions** -- Flag every \`as\` assertion, especially double assertions (\`as unknown as X\`). Verify the assertion is safe by tracing the actual runtime type. Flag assertions that could mask a type mismatch.
|
|
21
|
+
5. **Invariant Enforcement** -- Verify that domain invariants (non-negative values, non-empty strings, valid email format) are enforced through the type system (branded types, newtypes, validation schemas) rather than relying on runtime checks alone.
|
|
22
|
+
|
|
23
|
+
Show your reasoning: "Type assertion at line N casts UserInput as ValidatedUser, but no validation occurs between input and cast. At runtime, UserInput may lack required fields, causing property access errors."
|
|
24
|
+
|
|
25
|
+
Do not comment on naming conventions, code style, or business logic -- only type correctness.
|
|
26
|
+
|
|
27
|
+
## Diff
|
|
28
|
+
|
|
29
|
+
{{DIFF}}
|
|
30
|
+
|
|
31
|
+
## Prior Findings (for cross-verification)
|
|
32
|
+
|
|
33
|
+
{{PRIOR_FINDINGS}}
|
|
34
|
+
|
|
35
|
+
## Project Memory (false positive suppression)
|
|
36
|
+
|
|
37
|
+
{{MEMORY}}
|
|
38
|
+
|
|
39
|
+
## Output
|
|
40
|
+
|
|
41
|
+
For each finding, output a JSON object:
|
|
42
|
+
{"severity": "CRITICAL|HIGH|MEDIUM|LOW", "domain": "types", "title": "short title", "file": "path/to/file.ts", "line": 42, "agent": "type-soundness", "source": "phase1", "evidence": "what was found", "problem": "why it is an issue", "fix": "how to fix it"}
|
|
43
|
+
|
|
44
|
+
If no findings: {"findings": []}
|
|
45
|
+
Wrap all findings in: {"findings": [...]}`,
|
|
46
|
+
});
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import type { ReviewAgent } from "../types";
|
|
2
|
+
|
|
3
|
+
export const wiringInspector: Readonly<ReviewAgent> = Object.freeze({
|
|
4
|
+
name: "wiring-inspector",
|
|
5
|
+
description:
|
|
6
|
+
"Traces end-to-end connectivity from UI events through API endpoints to database writes and back, checking for disconnected flows and orphaned handlers.",
|
|
7
|
+
relevantStacks: [] as readonly string[],
|
|
8
|
+
severityFocus: ["CRITICAL", "HIGH"] as const,
|
|
9
|
+
prompt: `You are the Wiring Inspector. You verify that every feature path is fully connected from the user interface through the API layer to the database and back. Every finding must trace the broken link.
|
|
10
|
+
|
|
11
|
+
## Instructions
|
|
12
|
+
|
|
13
|
+
Trace every changed feature path end-to-end. Do not assume connectivity -- verify it.
|
|
14
|
+
|
|
15
|
+
Check each category systematically:
|
|
16
|
+
|
|
17
|
+
1. **UI-to-API Connectivity** -- Trace every UI event handler to its API call. Verify the endpoint URL, HTTP method, and request body shape match the backend route definition. Flag any UI action that fires into the void.
|
|
18
|
+
2. **API-to-Client Alignment** -- For every new or modified API endpoint, verify a corresponding client-side call exists. Check that request and response shapes match on both sides (field names, types, optional vs required).
|
|
19
|
+
3. **Cross-Layer Shape Alignment** -- Trace data shapes from database schema through ORM/model to API response to client-side type. Flag any field that exists in one layer but is missing in another.
|
|
20
|
+
4. **Error Propagation** -- For every error that can originate in the backend (validation, auth, DB constraint), verify it propagates through the API with an appropriate status code and is handled in the UI with a user-visible message.
|
|
21
|
+
5. **Orphaned Handlers** -- Identify event handlers, route handlers, or callback functions that are defined but never invoked from any call site in the changed code.
|
|
22
|
+
|
|
23
|
+
Show your traces: "I traced feature X: UI button click -> fetch('/api/foo', POST) -> route handler (line N) -> DB write. Issue: response shape has 'userId' but client expects 'user_id'."
|
|
24
|
+
|
|
25
|
+
Do not comment on style, naming, or performance -- only connectivity correctness.
|
|
26
|
+
|
|
27
|
+
## Diff
|
|
28
|
+
|
|
29
|
+
{{DIFF}}
|
|
30
|
+
|
|
31
|
+
## Prior Findings (for cross-verification)
|
|
32
|
+
|
|
33
|
+
{{PRIOR_FINDINGS}}
|
|
34
|
+
|
|
35
|
+
## Project Memory (false positive suppression)
|
|
36
|
+
|
|
37
|
+
{{MEMORY}}
|
|
38
|
+
|
|
39
|
+
## Output
|
|
40
|
+
|
|
41
|
+
For each finding, output a JSON object:
|
|
42
|
+
{"severity": "CRITICAL|HIGH|MEDIUM|LOW", "domain": "wiring", "title": "short title", "file": "path/to/file.ts", "line": 42, "agent": "wiring-inspector", "source": "phase1", "evidence": "what was found", "problem": "why it is an issue", "fix": "how to fix it"}
|
|
43
|
+
|
|
44
|
+
If no findings: {"findings": []}
|
|
45
|
+
Wrap all findings in: {"findings": [...]}`,
|
|
46
|
+
});
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cross-verification prompt builder.
|
|
3
|
+
*
|
|
4
|
+
* After Stage 1 review, each agent receives condensed findings from ALL OTHER
|
|
5
|
+
* agents so they can upgrade severities, add missed findings, or confirm results.
|
|
6
|
+
* Uses a 1-line condensed format to prevent token budget explosion (Pitfall 2).
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { sanitizeTemplateContent } from "./sanitize";
|
|
10
|
+
import type { ReviewAgent, ReviewFinding } from "./types";
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Condense a finding to a single line for cross-verification context.
|
|
14
|
+
* Format: [agent] [severity] [file:line] title (truncated to ~120 chars)
|
|
15
|
+
*
|
|
16
|
+
* Max ~150 chars per finding to prevent token budget explosion.
|
|
17
|
+
*/
|
|
18
|
+
export function condenseFinding(finding: ReviewFinding): string {
|
|
19
|
+
const lineRef = finding.line ? `${finding.file}:${finding.line}` : finding.file;
|
|
20
|
+
const truncatedTitle =
|
|
21
|
+
finding.title.length > 120 ? `${finding.title.slice(0, 117)}...` : finding.title;
|
|
22
|
+
return `[${finding.agent}] [${finding.severity}] [${lineRef}] ${truncatedTitle}`;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Build cross-verification prompts for each agent.
|
|
27
|
+
*
|
|
28
|
+
* Each agent receives:
|
|
29
|
+
* 1. Its original prompt with {{DIFF}} replaced by the actual diff
|
|
30
|
+
* 2. A {{PRIOR_FINDINGS}} section with condensed findings from all OTHER agents
|
|
31
|
+
* 3. Instructions to upgrade, add, or confirm findings
|
|
32
|
+
*
|
|
33
|
+
* An agent's own findings are NEVER included in its prompt.
|
|
34
|
+
*/
|
|
35
|
+
export function buildCrossVerificationPrompts(
|
|
36
|
+
agents: readonly ReviewAgent[],
|
|
37
|
+
findingsByAgent: ReadonlyMap<string, readonly ReviewFinding[]>,
|
|
38
|
+
diff: string,
|
|
39
|
+
): readonly { readonly name: string; readonly prompt: string }[] {
|
|
40
|
+
const results: { readonly name: string; readonly prompt: string }[] = [];
|
|
41
|
+
|
|
42
|
+
for (const agent of agents) {
|
|
43
|
+
// Collect condensed findings from all OTHER agents
|
|
44
|
+
const otherFindings: string[] = [];
|
|
45
|
+
for (const [agentName, findings] of findingsByAgent) {
|
|
46
|
+
if (agentName === agent.name) continue;
|
|
47
|
+
for (const f of findings) {
|
|
48
|
+
otherFindings.push(condenseFinding(f));
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const priorFindingsBlock =
|
|
53
|
+
otherFindings.length > 0 ? otherFindings.join("\n") : "No findings from other agents yet.";
|
|
54
|
+
|
|
55
|
+
const crossVerifyInstruction = `Review these findings from other agents. You may: (1) UPGRADE severity with justification, (2) ADD a new finding you missed, (3) Report no changes.`;
|
|
56
|
+
|
|
57
|
+
// Sanitize untrusted content before template substitution
|
|
58
|
+
const safeDiff = sanitizeTemplateContent(diff);
|
|
59
|
+
const safeFindings = sanitizeTemplateContent(priorFindingsBlock);
|
|
60
|
+
|
|
61
|
+
// Replace placeholders in the agent's prompt
|
|
62
|
+
const prompt = agent.prompt
|
|
63
|
+
.replace("{{DIFF}}", safeDiff)
|
|
64
|
+
.replace("{{PRIOR_FINDINGS}}", `${safeFindings}\n\n${crossVerifyInstruction}`)
|
|
65
|
+
.replace("{{MEMORY}}", "");
|
|
66
|
+
|
|
67
|
+
results.push(Object.freeze({ name: agent.name, prompt }));
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
return Object.freeze(results);
|
|
71
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import { reviewFindingSchema } from "./schemas";
|
|
2
|
+
import { compareSeverity } from "./severity";
|
|
3
|
+
import type { AgentCategory, AgentResult, ReviewFinding } from "./types";
|
|
4
|
+
|
|
5
|
+
interface FindingInput {
|
|
6
|
+
readonly severity: string;
|
|
7
|
+
readonly domain: string;
|
|
8
|
+
readonly title: string;
|
|
9
|
+
readonly file: string;
|
|
10
|
+
readonly line?: number;
|
|
11
|
+
readonly agent: string;
|
|
12
|
+
readonly source: "phase1" | "cross-verification" | "product-review" | "red-team";
|
|
13
|
+
readonly evidence: string;
|
|
14
|
+
readonly problem: string;
|
|
15
|
+
readonly fix: string;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Create an immutable, Zod-validated ReviewFinding.
|
|
20
|
+
* Throws if the input fails schema validation (e.g., invalid severity).
|
|
21
|
+
*/
|
|
22
|
+
export function createFinding(input: FindingInput): Readonly<ReviewFinding> {
|
|
23
|
+
const validated = reviewFindingSchema.parse(input);
|
|
24
|
+
return Object.freeze(validated);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
interface AgentResultInput {
|
|
28
|
+
readonly agent: string;
|
|
29
|
+
readonly category: AgentCategory;
|
|
30
|
+
readonly findings: readonly ReviewFinding[];
|
|
31
|
+
readonly durationMs: number;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Create an immutable AgentResult with auto-timestamped completedAt.
|
|
36
|
+
*/
|
|
37
|
+
export function createAgentResult(input: AgentResultInput): Readonly<AgentResult> {
|
|
38
|
+
const result: AgentResult = {
|
|
39
|
+
agent: input.agent,
|
|
40
|
+
category: input.category,
|
|
41
|
+
findings: [...input.findings],
|
|
42
|
+
durationMs: input.durationMs,
|
|
43
|
+
completedAt: new Date().toISOString(),
|
|
44
|
+
};
|
|
45
|
+
return Object.freeze(result);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Merge findings from multiple sources, deduplicate by file+title
|
|
50
|
+
* (keeping the higher severity), and sort by severity (CRITICAL first).
|
|
51
|
+
*/
|
|
52
|
+
export function mergeFindings(findings: readonly ReviewFinding[]): readonly ReviewFinding[] {
|
|
53
|
+
// Deduplicate by file+title, keeping higher severity
|
|
54
|
+
const deduped = new Map<string, ReviewFinding>();
|
|
55
|
+
|
|
56
|
+
for (const finding of findings) {
|
|
57
|
+
const key = `${finding.file}::${finding.title}`;
|
|
58
|
+
const existing = deduped.get(key);
|
|
59
|
+
|
|
60
|
+
if (existing === undefined) {
|
|
61
|
+
deduped.set(key, finding);
|
|
62
|
+
} else {
|
|
63
|
+
// Keep the one with higher severity (compareSeverity returns negative if first is higher)
|
|
64
|
+
if (compareSeverity(finding.severity, existing.severity) < 0) {
|
|
65
|
+
deduped.set(key, finding);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Sort by severity: CRITICAL first, then HIGH, MEDIUM, LOW
|
|
71
|
+
const sorted = [...deduped.values()].sort((a, b) => compareSeverity(a.severity, b.severity));
|
|
72
|
+
|
|
73
|
+
return Object.freeze(sorted);
|
|
74
|
+
}
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fix cycle logic for the review engine.
|
|
3
|
+
*
|
|
4
|
+
* Determines which CRITICAL findings have actionable suggestions,
|
|
5
|
+
* and builds re-run prompts for only the affected agents.
|
|
6
|
+
* Vague suggestions (containing "consider", "might", "perhaps", etc.)
|
|
7
|
+
* or very short suggestions are filtered out.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { sanitizeTemplateContent } from "./sanitize";
|
|
11
|
+
import type { ReviewAgent, ReviewFinding } from "./types";
|
|
12
|
+
|
|
13
|
+
export interface FixInstructions {
|
|
14
|
+
readonly fixable: readonly ReviewFinding[];
|
|
15
|
+
readonly agentsToRerun: readonly string[];
|
|
16
|
+
readonly skipped: readonly { readonly finding: ReviewFinding; readonly reason: string }[];
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const VAGUE_INDICATORS = ["consider", "might want to", "perhaps", "could potentially"] as const;
|
|
20
|
+
const MIN_SUGGESTION_LENGTH = 20;
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Check if a suggestion is vague/non-actionable.
|
|
24
|
+
*/
|
|
25
|
+
function isVagueSuggestion(fix: string): boolean {
|
|
26
|
+
const lower = fix.toLowerCase();
|
|
27
|
+
return VAGUE_INDICATORS.some((indicator) => lower.includes(indicator));
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Determine which findings are fixable and which agents need re-running.
|
|
32
|
+
*
|
|
33
|
+
* Filters to CRITICAL severity only, with actionable (non-vague, long enough) suggestions.
|
|
34
|
+
* Returns frozen result with fixable, agentsToRerun, and skipped lists.
|
|
35
|
+
*/
|
|
36
|
+
export function determineFixableFindings(findings: readonly ReviewFinding[]): FixInstructions {
|
|
37
|
+
const fixable: ReviewFinding[] = [];
|
|
38
|
+
const skipped: { readonly finding: ReviewFinding; readonly reason: string }[] = [];
|
|
39
|
+
|
|
40
|
+
for (const finding of findings) {
|
|
41
|
+
// Only CRITICAL severity
|
|
42
|
+
if (finding.severity !== "CRITICAL") {
|
|
43
|
+
skipped.push(
|
|
44
|
+
Object.freeze({
|
|
45
|
+
finding,
|
|
46
|
+
reason: `Skipped: ${finding.severity} severity (only CRITICAL are fixable)`,
|
|
47
|
+
}),
|
|
48
|
+
);
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Check suggestion length
|
|
53
|
+
if (finding.fix.length < MIN_SUGGESTION_LENGTH) {
|
|
54
|
+
skipped.push(
|
|
55
|
+
Object.freeze({ finding, reason: "Skipped: suggestion too short (< 20 chars)" }),
|
|
56
|
+
);
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Check for vague indicators
|
|
61
|
+
if (isVagueSuggestion(finding.fix)) {
|
|
62
|
+
skipped.push(Object.freeze({ finding, reason: "Skipped: vague suggestion detected" }));
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
fixable.push(finding);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Build unique set of agent names from fixable findings
|
|
70
|
+
const agentSet = new Set<string>();
|
|
71
|
+
for (const f of fixable) {
|
|
72
|
+
agentSet.add(f.agent);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return Object.freeze({
|
|
76
|
+
fixable: Object.freeze(fixable),
|
|
77
|
+
agentsToRerun: Object.freeze([...agentSet]),
|
|
78
|
+
skipped: Object.freeze(skipped),
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Build re-run prompts for agents whose findings were fixable.
|
|
84
|
+
*
|
|
85
|
+
* For each unique agent in the fixable set:
|
|
86
|
+
* - Uses the agent's original prompt with {{DIFF}} replaced by the updated diff
|
|
87
|
+
* - Includes the specific findings that should have been fixed
|
|
88
|
+
* - Adds instructions to verify fixes and check for regressions
|
|
89
|
+
*
|
|
90
|
+
* Only agents whose findings appear in the fixable set are included.
|
|
91
|
+
*/
|
|
92
|
+
export function buildFixInstructions(
|
|
93
|
+
fixable: readonly ReviewFinding[],
|
|
94
|
+
agents: readonly ReviewAgent[],
|
|
95
|
+
diff: string,
|
|
96
|
+
): readonly { readonly name: string; readonly prompt: string }[] {
|
|
97
|
+
// Group fixable findings by agent
|
|
98
|
+
const findingsByAgent = new Map<string, ReviewFinding[]>();
|
|
99
|
+
for (const f of fixable) {
|
|
100
|
+
const group = findingsByAgent.get(f.agent);
|
|
101
|
+
if (group) {
|
|
102
|
+
group.push(f);
|
|
103
|
+
} else {
|
|
104
|
+
findingsByAgent.set(f.agent, [f]);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const results: { readonly name: string; readonly prompt: string }[] = [];
|
|
109
|
+
|
|
110
|
+
for (const agent of agents) {
|
|
111
|
+
const agentFindings = findingsByAgent.get(agent.name);
|
|
112
|
+
if (!agentFindings || agentFindings.length === 0) continue;
|
|
113
|
+
|
|
114
|
+
// Build findings list for the prompt
|
|
115
|
+
const findingsList = agentFindings
|
|
116
|
+
.map((f) => `- [${f.severity}] ${f.title} in ${f.file}${f.line ? `:${f.line}` : ""}`)
|
|
117
|
+
.join("\n");
|
|
118
|
+
|
|
119
|
+
// Sanitize untrusted content and replace placeholders
|
|
120
|
+
const safeDiff = sanitizeTemplateContent(diff);
|
|
121
|
+
const basePrompt = agent.prompt
|
|
122
|
+
.replace("{{DIFF}}", safeDiff)
|
|
123
|
+
.replace("{{PRIOR_FINDINGS}}", "")
|
|
124
|
+
.replace("{{MEMORY}}", "");
|
|
125
|
+
|
|
126
|
+
const fixCyclePrompt = `${basePrompt}
|
|
127
|
+
|
|
128
|
+
## Fix Cycle - Verification Pass
|
|
129
|
+
|
|
130
|
+
The following findings were reported in the previous review and should have been fixed.
|
|
131
|
+
Please verify each fix is correct and check for any regressions introduced by the fixes.
|
|
132
|
+
|
|
133
|
+
### Findings to verify:
|
|
134
|
+
${findingsList}
|
|
135
|
+
|
|
136
|
+
### Instructions:
|
|
137
|
+
1. For each finding above, verify the fix is correct and complete
|
|
138
|
+
2. Check for any regression bugs introduced by the fixes
|
|
139
|
+
3. Report any NEW issues found in the fixed code
|
|
140
|
+
4. If a finding was NOT fixed, report it again with the same severity`;
|
|
141
|
+
|
|
142
|
+
results.push(Object.freeze({ name: agent.name, prompt: fixCyclePrompt }));
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return Object.freeze(results);
|
|
146
|
+
}
|