specvector 0.3.1 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/config/index.ts +5 -5
- package/src/index.ts +2 -2
- package/src/mcp/mcp-client.ts +3 -2
- package/src/pipeline/batcher.ts +543 -0
- package/src/pipeline/classifier.ts +361 -0
- package/src/pipeline/index.ts +34 -0
- package/src/pipeline/merger.ts +329 -0
- package/src/review/engine.ts +31 -8
- package/src/review/json-parser.ts +283 -0
- package/src/utils/redact.ts +125 -0
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* File Risk Classifier for the Scalable Review Pipeline.
|
|
3
|
+
*
|
|
4
|
+
* Classifies each file in a PR diff as SKIP, FAST_PASS, or DEEP_DIVE
|
|
5
|
+
* using heuristics only (no LLM calls). This determines how each file
|
|
6
|
+
* is reviewed: skipped entirely, single-pass LLM review, or full
|
|
7
|
+
* agent-loop deep review.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type { DiffFile, ParsedDiff } from "../types/diff";
|
|
11
|
+
|
|
12
|
+
/** Risk level assigned to each file in a PR diff. */
|
|
13
|
+
export type RiskLevel = "SKIP" | "FAST_PASS" | "DEEP_DIVE";
|
|
14
|
+
|
|
15
|
+
/** A file with its assigned risk classification and reasons. */
|
|
16
|
+
export interface ClassifiedFile {
|
|
17
|
+
/** File path (uses newPath, falls back to oldPath) */
|
|
18
|
+
path: string;
|
|
19
|
+
/** Assigned risk level */
|
|
20
|
+
risk: RiskLevel;
|
|
21
|
+
/** Human-readable reasons for the classification */
|
|
22
|
+
reasons: string[];
|
|
23
|
+
/** Original diff file data */
|
|
24
|
+
diffFile: DiffFile;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/** Classification result with summary counts. */
|
|
28
|
+
export interface ClassificationResult {
|
|
29
|
+
files: ClassifiedFile[];
|
|
30
|
+
counts: { skip: number; fastPass: number; deepDive: number };
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// --- SKIP patterns ---
|
|
34
|
+
|
|
35
|
+
const LOCKFILE_NAMES = new Set([
|
|
36
|
+
"bun.lock",
|
|
37
|
+
"bun.lockb",
|
|
38
|
+
"package-lock.json",
|
|
39
|
+
"yarn.lock",
|
|
40
|
+
"pnpm-lock.yaml",
|
|
41
|
+
"composer.lock",
|
|
42
|
+
"Gemfile.lock",
|
|
43
|
+
"Cargo.lock",
|
|
44
|
+
"poetry.lock",
|
|
45
|
+
]);
|
|
46
|
+
|
|
47
|
+
const SKIP_EXTENSIONS = new Set([
|
|
48
|
+
".md",
|
|
49
|
+
".mdx",
|
|
50
|
+
".txt",
|
|
51
|
+
".toml",
|
|
52
|
+
".ini",
|
|
53
|
+
".csv",
|
|
54
|
+
".svg",
|
|
55
|
+
".ico",
|
|
56
|
+
]);
|
|
57
|
+
|
|
58
|
+
const SKIP_FILENAME_PREFIXES = [
|
|
59
|
+
"LICENSE",
|
|
60
|
+
"CHANGELOG",
|
|
61
|
+
"CHANGES",
|
|
62
|
+
"HISTORY",
|
|
63
|
+
".gitignore",
|
|
64
|
+
".gitattributes",
|
|
65
|
+
".editorconfig",
|
|
66
|
+
".prettierrc",
|
|
67
|
+
".eslintignore",
|
|
68
|
+
".dockerignore",
|
|
69
|
+
];
|
|
70
|
+
|
|
71
|
+
const SKIP_PATH_SEGMENTS = ["dist/", "build/", "out/", "coverage/", ".next/", "node_modules/"];
|
|
72
|
+
|
|
73
|
+
const GENERATED_PATTERNS = [
|
|
74
|
+
/\.generated\.\w+$/,
|
|
75
|
+
/\.min\.(js|css)$/,
|
|
76
|
+
/\.d\.ts$/,
|
|
77
|
+
/\.map$/,
|
|
78
|
+
/\.snap$/,
|
|
79
|
+
];
|
|
80
|
+
|
|
81
|
+
const SKIP_CONFIG_EXTENSIONS = new Set([".json"]);
|
|
82
|
+
const CONFIG_JSON_EXCEPTIONS = new Set(["package.json", "tsconfig.json"]);
|
|
83
|
+
|
|
84
|
+
const SAFE_ENV_NAMES = new Set([".env.example", ".env.sample", ".env.template"]);
|
|
85
|
+
|
|
86
|
+
// --- DEEP_DIVE patterns ---
|
|
87
|
+
|
|
88
|
+
const SECURITY_PATH_SEGMENTS = [
|
|
89
|
+
"auth",
|
|
90
|
+
"crypto",
|
|
91
|
+
"payment",
|
|
92
|
+
"secret",
|
|
93
|
+
"token",
|
|
94
|
+
"session",
|
|
95
|
+
"permission",
|
|
96
|
+
"middleware",
|
|
97
|
+
"security",
|
|
98
|
+
"credential",
|
|
99
|
+
"sql",
|
|
100
|
+
"migration",
|
|
101
|
+
"database",
|
|
102
|
+
"db",
|
|
103
|
+
];
|
|
104
|
+
|
|
105
|
+
const SECURITY_FILENAME_PATTERNS = [
|
|
106
|
+
/password/i,
|
|
107
|
+
/credential/i,
|
|
108
|
+
/oauth/i,
|
|
109
|
+
/jwt/i,
|
|
110
|
+
/encrypt/i,
|
|
111
|
+
/decrypt/i,
|
|
112
|
+
/apikey/i,
|
|
113
|
+
/api-key/i,
|
|
114
|
+
/query/i,
|
|
115
|
+
];
|
|
116
|
+
|
|
117
|
+
const CORE_ARCHITECTURE_PATHS = [
|
|
118
|
+
"src/index.ts",
|
|
119
|
+
"src/agent/",
|
|
120
|
+
"src/llm/",
|
|
121
|
+
"src/pipeline/",
|
|
122
|
+
];
|
|
123
|
+
|
|
124
|
+
/** High complexity thresholds */
|
|
125
|
+
const HIGH_CHURN_THRESHOLD = 50;
|
|
126
|
+
const HIGH_HUNK_THRESHOLD = 3;
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Classify all files in a parsed diff by risk level.
|
|
130
|
+
*/
|
|
131
|
+
export function classifyFiles(diff: ParsedDiff): ClassificationResult {
|
|
132
|
+
const files: ClassifiedFile[] = [];
|
|
133
|
+
const counts = { skip: 0, fastPass: 0, deepDive: 0 };
|
|
134
|
+
|
|
135
|
+
for (const diffFile of diff.files) {
|
|
136
|
+
const path = diffFile.newPath ?? diffFile.oldPath ?? "unknown";
|
|
137
|
+
const classified = classifyFile(path, diffFile);
|
|
138
|
+
files.push(classified);
|
|
139
|
+
|
|
140
|
+
switch (classified.risk) {
|
|
141
|
+
case "SKIP":
|
|
142
|
+
counts.skip++;
|
|
143
|
+
break;
|
|
144
|
+
case "FAST_PASS":
|
|
145
|
+
counts.fastPass++;
|
|
146
|
+
break;
|
|
147
|
+
case "DEEP_DIVE":
|
|
148
|
+
counts.deepDive++;
|
|
149
|
+
break;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
console.log(
|
|
154
|
+
`📊 Classified ${files.length} files: ${counts.skip} SKIP, ${counts.fastPass} FAST_PASS, ${counts.deepDive} DEEP_DIVE`
|
|
155
|
+
);
|
|
156
|
+
|
|
157
|
+
return { files, counts };
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Classify a single file. DEEP_DIVE is checked first so security-sensitive
|
|
162
|
+
* files are never silently skipped. Then SKIP, then FAST_PASS as default.
|
|
163
|
+
*/
|
|
164
|
+
function classifyFile(path: string, diffFile: DiffFile): ClassifiedFile {
|
|
165
|
+
// Check DEEP_DIVE first — security-sensitive files must never be silently skipped.
|
|
166
|
+
// For renamed files, check both old and new paths.
|
|
167
|
+
const deepDivePath = path;
|
|
168
|
+
const deepDiveAltPath =
|
|
169
|
+
diffFile.status === "renamed"
|
|
170
|
+
? (diffFile.oldPath ?? undefined)
|
|
171
|
+
: undefined;
|
|
172
|
+
|
|
173
|
+
const deepDiveResult = checkDeepDive(deepDivePath, diffFile, deepDiveAltPath);
|
|
174
|
+
if (deepDiveResult) {
|
|
175
|
+
return { path, risk: "DEEP_DIVE", reasons: deepDiveResult, diffFile };
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
const skipResult = checkSkip(path, diffFile);
|
|
179
|
+
if (skipResult) {
|
|
180
|
+
return { path, risk: "SKIP", reasons: skipResult, diffFile };
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
return {
|
|
184
|
+
path,
|
|
185
|
+
risk: "FAST_PASS",
|
|
186
|
+
reasons: ["Standard change, single-pass review"],
|
|
187
|
+
diffFile,
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Check if a file should be SKIPped. Returns reasons if yes, null if no.
|
|
193
|
+
*/
|
|
194
|
+
function checkSkip(path: string, diffFile: DiffFile): string[] | null {
|
|
195
|
+
const reasons: string[] = [];
|
|
196
|
+
const filename = path.split("/").pop() ?? "";
|
|
197
|
+
const ext = getExtension(filename);
|
|
198
|
+
|
|
199
|
+
// Binary files
|
|
200
|
+
if (diffFile.binary) {
|
|
201
|
+
reasons.push("Binary file");
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Lockfiles
|
|
205
|
+
if (LOCKFILE_NAMES.has(filename)) {
|
|
206
|
+
reasons.push(`Lockfile: ${filename}`);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Generated code patterns
|
|
210
|
+
for (const pattern of GENERATED_PATTERNS) {
|
|
211
|
+
if (pattern.test(filename)) {
|
|
212
|
+
reasons.push(`Generated file: matches ${pattern}`);
|
|
213
|
+
break;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// Files in output directories — boundary-aware matching
|
|
218
|
+
for (const segment of SKIP_PATH_SEGMENTS) {
|
|
219
|
+
if (pathContainsSegment(path, segment)) {
|
|
220
|
+
reasons.push(`Output directory: ${segment}`);
|
|
221
|
+
break;
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// Documentation and markup
|
|
226
|
+
if (SKIP_EXTENSIONS.has(ext)) {
|
|
227
|
+
reasons.push(`Documentation/config extension: ${ext}`);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// YAML/YML config files (skip unless in security path — handled by DEEP_DIVE-first)
|
|
231
|
+
if (ext === ".yaml" || ext === ".yml") {
|
|
232
|
+
reasons.push(`Config file: ${ext}`);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// Config JSON files (except package.json, tsconfig.json)
|
|
236
|
+
if (SKIP_CONFIG_EXTENSIONS.has(ext) && !CONFIG_JSON_EXCEPTIONS.has(filename)) {
|
|
237
|
+
reasons.push(`Config JSON file: ${filename}`);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// Known skip filenames
|
|
241
|
+
for (const prefix of SKIP_FILENAME_PREFIXES) {
|
|
242
|
+
if (filename.startsWith(prefix)) {
|
|
243
|
+
reasons.push(`Known skip file: ${prefix}`);
|
|
244
|
+
break;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Safe .env template files only
|
|
249
|
+
if (SAFE_ENV_NAMES.has(filename)) {
|
|
250
|
+
reasons.push(`Environment template: ${filename}`);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// Deleted files
|
|
254
|
+
if (diffFile.status === "deleted") {
|
|
255
|
+
reasons.push("Deleted file");
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
return reasons.length > 0 ? reasons : null;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Check if a file needs DEEP_DIVE review. Returns reasons if yes, null if no.
|
|
263
|
+
* Optionally checks an alternate path (for renamed files).
|
|
264
|
+
*/
|
|
265
|
+
function checkDeepDive(path: string, diffFile: DiffFile, altPath?: string): string[] | null {
|
|
266
|
+
const reasons: string[] = [];
|
|
267
|
+
|
|
268
|
+
// Check security paths on both primary and alternate paths
|
|
269
|
+
const pathsToCheck = altPath ? [path, altPath] : [path];
|
|
270
|
+
for (const p of pathsToCheck) {
|
|
271
|
+
const securityReason = checkSecurityPath(p);
|
|
272
|
+
if (securityReason) {
|
|
273
|
+
reasons.push(securityReason);
|
|
274
|
+
break;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
// Security-sensitive filename patterns
|
|
279
|
+
const filename = path.split("/").pop() ?? "";
|
|
280
|
+
for (const pattern of SECURITY_FILENAME_PATTERNS) {
|
|
281
|
+
if (pattern.test(filename)) {
|
|
282
|
+
reasons.push(`Security-sensitive filename: matches ${pattern}`);
|
|
283
|
+
break;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// Also check alt path filename for renames
|
|
288
|
+
if (altPath) {
|
|
289
|
+
const altFilename = altPath.split("/").pop() ?? "";
|
|
290
|
+
if (altFilename !== filename) {
|
|
291
|
+
for (const pattern of SECURITY_FILENAME_PATTERNS) {
|
|
292
|
+
if (pattern.test(altFilename)) {
|
|
293
|
+
reasons.push(`Security-sensitive renamed from: ${altFilename}`);
|
|
294
|
+
break;
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// .env files with potential secrets (not safe templates)
|
|
301
|
+
if (/^\.env/.test(filename) && !SAFE_ENV_NAMES.has(filename)) {
|
|
302
|
+
reasons.push(`Environment file with potential secrets: ${filename}`);
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// High complexity delta
|
|
306
|
+
const totalChurn = diffFile.additions + diffFile.deletions;
|
|
307
|
+
if (totalChurn > HIGH_CHURN_THRESHOLD) {
|
|
308
|
+
reasons.push(`High churn: ${totalChurn} lines changed (threshold: ${HIGH_CHURN_THRESHOLD})`);
|
|
309
|
+
}
|
|
310
|
+
if (diffFile.hunks.length > HIGH_HUNK_THRESHOLD) {
|
|
311
|
+
reasons.push(`Many hunks: ${diffFile.hunks.length} (threshold: ${HIGH_HUNK_THRESHOLD})`);
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
// Core architecture files
|
|
315
|
+
for (const corePath of CORE_ARCHITECTURE_PATHS) {
|
|
316
|
+
if (path === corePath || path.startsWith(corePath)) {
|
|
317
|
+
reasons.push(`Core architecture file: ${corePath}`);
|
|
318
|
+
break;
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
return reasons.length > 0 ? reasons : null;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
/**
|
|
326
|
+
* Check if a path contains a security-sensitive segment.
|
|
327
|
+
* Handles top-level dirs (no leading slash) and mid-path segments.
|
|
328
|
+
*/
|
|
329
|
+
function checkSecurityPath(path: string): string | null {
|
|
330
|
+
const lowerPath = path.toLowerCase();
|
|
331
|
+
for (const segment of SECURITY_PATH_SEGMENTS) {
|
|
332
|
+
if (
|
|
333
|
+
lowerPath.startsWith(`${segment}/`) ||
|
|
334
|
+
lowerPath.startsWith(`${segment}.`) ||
|
|
335
|
+
lowerPath.includes(`/${segment}/`) ||
|
|
336
|
+
lowerPath.includes(`/${segment}.`)
|
|
337
|
+
) {
|
|
338
|
+
return `Security-sensitive path: contains "${segment}"`;
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
return null;
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
/**
|
|
345
|
+
* Check if a path contains an output directory segment at a proper boundary.
|
|
346
|
+
* Prevents false positives like "src/layout/" matching "out/".
|
|
347
|
+
*/
|
|
348
|
+
function pathContainsSegment(path: string, segment: string): boolean {
|
|
349
|
+
return path.startsWith(segment) || path.includes(`/${segment}`);
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
/**
|
|
353
|
+
* Extract file extension including the dot.
|
|
354
|
+
* Returns empty string for dotfiles with no extension (e.g., ".gitignore").
|
|
355
|
+
*/
|
|
356
|
+
function getExtension(filename: string): string {
|
|
357
|
+
const lastDot = filename.lastIndexOf(".");
|
|
358
|
+
// Dotfiles with no extension: the dot is at position 0 and there's no other dot
|
|
359
|
+
if (lastDot <= 0) return "";
|
|
360
|
+
return filename.slice(lastDot);
|
|
361
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Scalable Review Pipeline - barrel exports.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export {
|
|
6
|
+
classifyFiles,
|
|
7
|
+
type RiskLevel,
|
|
8
|
+
type ClassifiedFile,
|
|
9
|
+
type ClassificationResult,
|
|
10
|
+
} from "./classifier";
|
|
11
|
+
|
|
12
|
+
export {
|
|
13
|
+
runBatchedReviews,
|
|
14
|
+
reviewFastPassBatch,
|
|
15
|
+
reviewDeepDiveFile,
|
|
16
|
+
buildFastPassTask,
|
|
17
|
+
reconstructFileDiff,
|
|
18
|
+
splitIntoBatches,
|
|
19
|
+
runWithConcurrencyLimit,
|
|
20
|
+
FAST_PASS_SYSTEM_PROMPT,
|
|
21
|
+
type BatchConfig,
|
|
22
|
+
type BatchError,
|
|
23
|
+
type BatchResult,
|
|
24
|
+
} from "./batcher";
|
|
25
|
+
|
|
26
|
+
export {
|
|
27
|
+
mergeFindings,
|
|
28
|
+
deduplicateFindings,
|
|
29
|
+
generalizePatterns,
|
|
30
|
+
sortFindings,
|
|
31
|
+
areSimilarFindings,
|
|
32
|
+
jaccardSimilarity,
|
|
33
|
+
type MergerConfig,
|
|
34
|
+
} from "./merger";
|