@dreki-gg/pi-code-reviewer 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +63 -1
- package/extensions/code-reviewer/commands/review-init.ts +5 -1
- package/extensions/code-reviewer/commands/review-tool.ts +70 -9
- package/extensions/code-reviewer/commands/review.ts +36 -1
- package/extensions/code-reviewer/config.ts +73 -1
- package/extensions/code-reviewer/effects/model.ts +112 -0
- package/extensions/code-reviewer/errors.ts +10 -1
- package/extensions/code-reviewer/model-plan.ts +84 -0
- package/extensions/code-reviewer/passes.ts +571 -0
- package/extensions/code-reviewer/reviewer.ts +87 -1
- package/extensions/code-reviewer/types.ts +112 -0
- package/package.json +1 -1
- package/skills/code-review/lenses/code-quality.md +16 -2
|
@@ -25,6 +25,116 @@ export type LensResult = {
|
|
|
25
25
|
_lensSection?: string;
|
|
26
26
|
};
|
|
27
27
|
|
|
28
|
+
// ── Self-driving review pipeline (Bugbot-style) ──────────────────────────────
|
|
29
|
+
//
|
|
30
|
+
// The tool can run the review itself by driving the session's model through
|
|
31
|
+
// several parallel adversarial passes, bucketing + majority-voting the
|
|
32
|
+
// findings, then validating each survivor — instead of returning a prompt for
|
|
33
|
+
// a single downstream pass. The types below describe that pipeline's data.
|
|
34
|
+
|
|
35
|
+
/** A finding as emitted by one bug-finding pass (before bucketing). */
|
|
36
|
+
export type RawFinding = {
|
|
37
|
+
file: string;
|
|
38
|
+
line?: number;
|
|
39
|
+
severity: LensSeverity;
|
|
40
|
+
message: string;
|
|
41
|
+
/** Optional bug taxonomy tag the pass assigned (e.g. "boundary-input"). */
|
|
42
|
+
category?: string;
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
/** A merged bucket of near-duplicate raw findings across passes. */
|
|
46
|
+
export type CandidateFinding = RawFinding & {
|
|
47
|
+
/** Number of DISTINCT passes that independently surfaced this bucket. */
|
|
48
|
+
votes: number;
|
|
49
|
+
/** Indices of the passes that contributed (0-based). */
|
|
50
|
+
passIndices: number[];
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
/** A candidate after the validator stage has confirmed or refuted it. */
|
|
54
|
+
export type ValidatedFinding = CandidateFinding & {
|
|
55
|
+
verdict: 'real' | 'false-positive';
|
|
56
|
+
/** Validator confidence in `verdict`, 0..1. */
|
|
57
|
+
confidence: number;
|
|
58
|
+
justification?: string;
|
|
59
|
+
/** Distinct model keys whose passes contributed to this finding (for the
|
|
60
|
+
* model bake-off: "which model caught this"). */
|
|
61
|
+
models: string[];
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
/** Reasoning/thinking effort for a step (mirrors pi-ai's `ThinkingLevel`). */
|
|
65
|
+
export type ReasoningLevel = 'minimal' | 'low' | 'medium' | 'high' | 'xhigh';
|
|
66
|
+
|
|
67
|
+
/** A per-step model choice in config: either a bare spec string
|
|
68
|
+
* ("provider/id", id, or name) or that spec plus a reasoning level. */
|
|
69
|
+
export type ModelSpec = { model: string; reasoning?: ReasoningLevel };
|
|
70
|
+
export type ModelStepConfig = string | ModelSpec;
|
|
71
|
+
|
|
72
|
+
/** A resolved per-step assignment the pipeline runs against. `key` is either
|
|
73
|
+
* {@link DEFAULT_MODEL_KEY} (the session model) or a spec that resolved to a
|
|
74
|
+
* real model; `label` is the human display (key + reasoning). */
|
|
75
|
+
export type ModelAssignment = {
|
|
76
|
+
key: string;
|
|
77
|
+
label: string;
|
|
78
|
+
reasoning?: ReasoningLevel;
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
export type ModelPlan = {
|
|
82
|
+
/** Assignment for each pass, length === `passes` (round-robin from config). */
|
|
83
|
+
passes: ModelAssignment[];
|
|
84
|
+
/** Assignment for the validator stage. */
|
|
85
|
+
validator: ModelAssignment;
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
/** Counts describing what the pipeline did, for transparency in the report. */
|
|
89
|
+
export type PipelineTelemetry = {
|
|
90
|
+
passes: number;
|
|
91
|
+
passFindingCounts: number[];
|
|
92
|
+
buckets: number;
|
|
93
|
+
candidates: number;
|
|
94
|
+
validated: number;
|
|
95
|
+
droppedFalsePositives: number;
|
|
96
|
+
droppedLowSignal: number;
|
|
97
|
+
failedPasses: number;
|
|
98
|
+
/** Model key used for each pass (parallel to pass index). */
|
|
99
|
+
passModels: string[];
|
|
100
|
+
/** Model key used for the validator stage. */
|
|
101
|
+
validatorModel: string;
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
export type PipelineResult = {
|
|
105
|
+
findings: ValidatedFinding[];
|
|
106
|
+
telemetry: PipelineTelemetry;
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
/** Tunables for the self-driving pipeline (all overridable in config). */
|
|
110
|
+
export type ReviewPipelineConfig = {
|
|
111
|
+
/** Parallel adversarial bug-finding passes. 0 disables the pipeline
|
|
112
|
+
* (falls back to returning a single-pass review prompt). */
|
|
113
|
+
passes: number;
|
|
114
|
+
/** Run the validator stage that falsifies each surviving candidate. */
|
|
115
|
+
validate: boolean;
|
|
116
|
+
/** Min distinct passes a NOTE-severity bucket needs to survive pre-validation
|
|
117
|
+
* (blockers/warnings are never dropped for low votes). */
|
|
118
|
+
minVotes: number;
|
|
119
|
+
/** Max passes run concurrently. */
|
|
120
|
+
concurrency: number;
|
|
121
|
+
/** Base sampling temperature; each pass adds a small deterministic jitter so
|
|
122
|
+
* passes diverge instead of collapsing onto identical reasoning. */
|
|
123
|
+
temperature: number;
|
|
124
|
+
/** Hard cap on findings returned (safety valve against runaway output). */
|
|
125
|
+
maxFindings: number;
|
|
126
|
+
/** Model for ALL passes — a spec string or `{ model, reasoning }`. Omitted →
|
|
127
|
+
* session model. Overridden per-pass by {@link passModels}. */
|
|
128
|
+
passModel?: ModelStepConfig;
|
|
129
|
+
/** Models rotated round-robin across passes — run the same diff through
|
|
130
|
+
* several models/reasoning levels in one review (a bake-off). Overrides
|
|
131
|
+
* `passModel`. */
|
|
132
|
+
passModels?: ModelStepConfig[];
|
|
133
|
+
/** Model for the validator stage — a spec string or `{ model, reasoning }`.
|
|
134
|
+
* Omitted → session model. */
|
|
135
|
+
validateModel?: ModelStepConfig;
|
|
136
|
+
};
|
|
137
|
+
|
|
28
138
|
// NOTE: findings + summary on LensResult describe what the agent produces in
|
|
29
139
|
// its follow-up message; the tool/command layer emits a review *task*, it does
|
|
30
140
|
// not parse findings back into a rendered report.
|
|
@@ -38,4 +148,6 @@ export type ReviewConfig = {
|
|
|
38
148
|
/** Max lens tools run in parallel. Tools are deduped across lenses first,
|
|
39
149
|
* so this bounds the distinct command set, not lens count. */
|
|
40
150
|
toolConcurrency: number;
|
|
151
|
+
/** Self-driving pipeline tunables (see {@link ReviewPipelineConfig}). */
|
|
152
|
+
review: ReviewPipelineConfig;
|
|
41
153
|
};
|
package/package.json
CHANGED
|
@@ -10,11 +10,25 @@ Evaluates changes for correctness, dead code introduction, and adherence to proj
|
|
|
10
10
|
- Are there any obvious bugs or logic errors?
|
|
11
11
|
- Does the code avoid known anti-patterns for the project's framework?
|
|
12
12
|
|
|
13
|
+
### Adversarial inputs (enumerate, don't assume)
|
|
14
|
+
For each changed function, construct the edge inputs that break it rather than
|
|
15
|
+
trusting the happy path or the surrounding comment:
|
|
16
|
+
- `null` / `undefined` / `NaN` / `Infinity` / `-0` / `""` / `[]` / `{}` / huge /
|
|
17
|
+
negative / duplicate / out-of-order / unicode.
|
|
18
|
+
- Numeric-type guards that the wrong value defeats: `typeof NaN === "number"`,
|
|
19
|
+
`typeof null === "object"`, `0`/`""`/`NaN` as falsy, `JSON.parse` of
|
|
20
|
+
attacker input. Prefer `Number.isFinite` / explicit checks.
|
|
21
|
+
- **Claim-vs-code audit:** every comment or test that asserts an invariant
|
|
22
|
+
("non-numeric falls through", "never empty") — find the input that violates it
|
|
23
|
+
and confirm the code actually enforces the claim.
|
|
24
|
+
- Off-by-one, boundary indices, wrong id/key space, missing `await`, swallowed
|
|
25
|
+
errors, unhandled rejection, cancellation/abort paths.
|
|
26
|
+
|
|
13
27
|
## Tools
|
|
14
28
|
- `bun run typecheck`
|
|
15
29
|
- `bun run lint`
|
|
16
30
|
|
|
17
31
|
## Severity
|
|
18
|
-
- blocker: Type errors, unresolved imports, obvious bugs, unhandled error paths
|
|
19
|
-
- warning: New lint violations, unused code, inconsistent naming
|
|
32
|
+
- blocker: Type errors, unresolved imports, obvious bugs, unhandled error paths, an edge input (NaN/empty/boundary) that crashes or corrupts on a path users hit
|
|
33
|
+
- warning: New lint violations, unused code, inconsistent naming, an unguarded edge input on a lower-risk path, a comment/test claim the code does not actually honor
|
|
20
34
|
- note: Style suggestions, minor improvements
|