@dreki-gg/pi-code-reviewer 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,6 +25,116 @@ export type LensResult = {
25
25
  _lensSection?: string;
26
26
  };
27
27
 
28
+ // ── Self-driving review pipeline (Bugbot-style) ──────────────────────────────
29
+ //
30
+ // The tool can run the review itself by driving the session's model through
31
+ // several parallel adversarial passes, bucketing + majority-voting the
32
+ // findings, then validating each survivor — instead of returning a prompt for
33
+ // a single downstream pass. The types below describe that pipeline's data.
34
+
35
+ /** A finding as emitted by one bug-finding pass (before bucketing). */
36
+ export type RawFinding = {
37
+ file: string;
38
+ line?: number;
39
+ severity: LensSeverity;
40
+ message: string;
41
+ /** Optional bug taxonomy tag the pass assigned (e.g. "boundary-input"). */
42
+ category?: string;
43
+ };
44
+
45
+ /** A merged bucket of near-duplicate raw findings across passes. */
46
+ export type CandidateFinding = RawFinding & {
47
+ /** Number of DISTINCT passes that independently surfaced this bucket. */
48
+ votes: number;
49
+ /** Indices of the passes that contributed (0-based). */
50
+ passIndices: number[];
51
+ };
52
+
53
+ /** A candidate after the validator stage has confirmed or refuted it. */
54
+ export type ValidatedFinding = CandidateFinding & {
55
+ verdict: 'real' | 'false-positive';
56
+ /** Validator confidence in `verdict`, 0..1. */
57
+ confidence: number;
58
+ justification?: string;
59
+ /** Distinct model keys whose passes contributed to this finding (for the
60
+ * model bake-off: "which model caught this"). */
61
+ models: string[];
62
+ };
63
+
64
+ /** Reasoning/thinking effort for a step (mirrors pi-ai's `ThinkingLevel`). */
65
+ export type ReasoningLevel = 'minimal' | 'low' | 'medium' | 'high' | 'xhigh';
66
+
67
+ /** A per-step model choice in config: either a bare spec string
68
+ * ("provider/id", id, or name) or that spec plus a reasoning level. */
69
+ export type ModelSpec = { model: string; reasoning?: ReasoningLevel };
70
+ export type ModelStepConfig = string | ModelSpec;
71
+
72
+ /** A resolved per-step assignment the pipeline runs against. `key` is either
73
+ * {@link DEFAULT_MODEL_KEY} (the session model) or a spec that resolved to a
74
+ * real model; `label` is the human display (key + reasoning). */
75
+ export type ModelAssignment = {
76
+ key: string;
77
+ label: string;
78
+ reasoning?: ReasoningLevel;
79
+ };
80
+
81
+ export type ModelPlan = {
82
+ /** Assignment for each pass, length === `passes` (round-robin from config). */
83
+ passes: ModelAssignment[];
84
+ /** Assignment for the validator stage. */
85
+ validator: ModelAssignment;
86
+ };
87
+
88
+ /** Counts describing what the pipeline did, for transparency in the report. */
89
+ export type PipelineTelemetry = {
90
+ passes: number;
91
+ passFindingCounts: number[];
92
+ buckets: number;
93
+ candidates: number;
94
+ validated: number;
95
+ droppedFalsePositives: number;
96
+ droppedLowSignal: number;
97
+ failedPasses: number;
98
+ /** Model key used for each pass (parallel to pass index). */
99
+ passModels: string[];
100
+ /** Model key used for the validator stage. */
101
+ validatorModel: string;
102
+ };
103
+
104
+ export type PipelineResult = {
105
+ findings: ValidatedFinding[];
106
+ telemetry: PipelineTelemetry;
107
+ };
108
+
109
+ /** Tunables for the self-driving pipeline (all overridable in config). */
110
+ export type ReviewPipelineConfig = {
111
+ /** Parallel adversarial bug-finding passes. 0 disables the pipeline
112
+ * (falls back to returning a single-pass review prompt). */
113
+ passes: number;
114
+ /** Run the validator stage that falsifies each surviving candidate. */
115
+ validate: boolean;
116
+ /** Min distinct passes a NOTE-severity bucket needs to survive pre-validation
117
+ * (blockers/warnings are never dropped for low votes). */
118
+ minVotes: number;
119
+ /** Max passes run concurrently. */
120
+ concurrency: number;
121
+ /** Base sampling temperature; each pass adds a small deterministic jitter so
122
+ * passes diverge instead of collapsing onto identical reasoning. */
123
+ temperature: number;
124
+ /** Hard cap on findings returned (safety valve against runaway output). */
125
+ maxFindings: number;
126
+ /** Model for ALL passes — a spec string or `{ model, reasoning }`. Omitted →
127
+ * session model. Overridden per-pass by {@link passModels}. */
128
+ passModel?: ModelStepConfig;
129
+ /** Models rotated round-robin across passes — run the same diff through
130
+ * several models/reasoning levels in one review (a bake-off). Overrides
131
+ * `passModel`. */
132
+ passModels?: ModelStepConfig[];
133
+ /** Model for the validator stage — a spec string or `{ model, reasoning }`.
134
+ * Omitted → session model. */
135
+ validateModel?: ModelStepConfig;
136
+ };
137
+
28
138
  // NOTE: findings + summary on LensResult describe what the agent produces in
29
139
  // its follow-up message; the tool/command layer emits a review *task*, it does
30
140
  // not parse findings back into a rendered report.
@@ -38,4 +148,6 @@ export type ReviewConfig = {
38
148
  /** Max lens tools run in parallel. Tools are deduped across lenses first,
39
149
  * so this bounds the distinct command set, not lens count. */
40
150
  toolConcurrency: number;
151
+ /** Self-driving pipeline tunables (see {@link ReviewPipelineConfig}). */
152
+ review: ReviewPipelineConfig;
41
153
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dreki-gg/pi-code-reviewer",
3
- "version": "0.4.0",
3
+ "version": "0.5.0",
4
4
  "description": "Multi-lens code review extension for pi — configurable review criteria per project",
5
5
  "keywords": [
6
6
  "pi-package"
@@ -10,11 +10,25 @@ Evaluates changes for correctness, dead code introduction, and adherence to proj
10
10
  - Are there any obvious bugs or logic errors?
11
11
  - Does the code avoid known anti-patterns for the project's framework?
12
12
 
13
+ ### Adversarial inputs (enumerate, don't assume)
14
+ For each changed function, construct the edge inputs that break it rather than
15
+ trusting the happy path or the surrounding comment:
16
+ - `null` / `undefined` / `NaN` / `Infinity` / `-0` / `""` / `[]` / `{}` / huge /
17
+ negative / duplicate / out-of-order / unicode.
18
+ - Numeric-type guards that the wrong value defeats: `typeof NaN === "number"`,
19
+ `typeof null === "object"`, `0`/`""`/`NaN` as falsy, `JSON.parse` of
20
+ attacker input. Prefer `Number.isFinite` / explicit checks.
21
+ - **Claim-vs-code audit:** every comment or test that asserts an invariant
22
+ ("non-numeric falls through", "never empty") — find the input that violates it
23
+ and confirm the code actually enforces the claim.
24
+ - Off-by-one, boundary indices, wrong id/key space, missing `await`, swallowed
25
+ errors, unhandled rejection, cancellation/abort paths.
26
+
13
27
  ## Tools
14
28
  - `bun run typecheck`
15
29
  - `bun run lint`
16
30
 
17
31
  ## Severity
18
- - blocker: Type errors, unresolved imports, obvious bugs, unhandled error paths
19
- - warning: New lint violations, unused code, inconsistent naming
32
+ - blocker: Type errors, unresolved imports, obvious bugs, unhandled error paths, an edge input (NaN/empty/boundary) that crashes or corrupts on a path users hit
33
+ - warning: New lint violations, unused code, inconsistent naming, an unguarded edge input on a lower-risk path, a comment/test claim the code does not actually honor
20
34
  - note: Style suggestions, minor improvements