@stupify/cli 0.0.16 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/.review/CORPUS.md +73 -0
  2. package/.review/REVIEW-PROMPT.md +52 -0
  3. package/.review/RUBRIC.md +46 -0
  4. package/LICENSE +1 -1
  5. package/README.md +41 -39
  6. package/package.json +24 -25
  7. package/src/cli.ts +358 -0
  8. package/src/review-sweep.ts +492 -0
  9. package/dist/analysis.d.ts +0 -16
  10. package/dist/analysis.js +0 -168
  11. package/dist/cache.d.ts +0 -2
  12. package/dist/cache.js +0 -57
  13. package/dist/checks.d.ts +0 -4
  14. package/dist/checks.js +0 -228
  15. package/dist/command.d.ts +0 -2
  16. package/dist/command.js +0 -147
  17. package/dist/constants.d.ts +0 -4
  18. package/dist/constants.js +0 -53
  19. package/dist/counter-scout.d.ts +0 -21
  20. package/dist/counter-scout.js +0 -167
  21. package/dist/diff.d.ts +0 -1
  22. package/dist/diff.js +0 -10
  23. package/dist/doctor.d.ts +0 -16
  24. package/dist/doctor.js +0 -143
  25. package/dist/git.d.ts +0 -17
  26. package/dist/git.js +0 -368
  27. package/dist/hooks.d.ts +0 -5
  28. package/dist/hooks.js +0 -135
  29. package/dist/index.d.ts +0 -1
  30. package/dist/index.js +0 -1
  31. package/dist/model.d.ts +0 -11
  32. package/dist/model.js +0 -296
  33. package/dist/prompts.d.ts +0 -8
  34. package/dist/prompts.js +0 -89
  35. package/dist/render.d.ts +0 -6
  36. package/dist/render.js +0 -295
  37. package/dist/repomix-provider.d.ts +0 -12
  38. package/dist/repomix-provider.js +0 -196
  39. package/dist/search-bench.d.ts +0 -1
  40. package/dist/search-bench.js +0 -677
  41. package/dist/search-profile.d.ts +0 -6
  42. package/dist/search-profile.js +0 -73
  43. package/dist/sem-provider.d.ts +0 -2
  44. package/dist/sem-provider.js +0 -255
  45. package/dist/stupify.d.ts +0 -38
  46. package/dist/stupify.js +0 -505
  47. package/dist/trace.d.ts +0 -31
  48. package/dist/trace.js +0 -86
  49. package/dist/types.d.ts +0 -341
  50. package/dist/types.js +0 -6
  51. package/dist/ui.d.ts +0 -34
  52. package/dist/ui.js +0 -143
  53. package/src/analysis.ts +0 -223
  54. package/src/cache.ts +0 -63
  55. package/src/checks.ts +0 -231
  56. package/src/command.ts +0 -173
  57. package/src/constants.ts +0 -56
  58. package/src/counter-scout.ts +0 -195
  59. package/src/diff.ts +0 -9
  60. package/src/doctor.ts +0 -166
  61. package/src/git.ts +0 -380
  62. package/src/hooks.ts +0 -151
  63. package/src/index.ts +0 -1
  64. package/src/model.ts +0 -367
  65. package/src/prompts.ts +0 -100
  66. package/src/render.ts +0 -328
  67. package/src/repomix-provider.ts +0 -219
  68. package/src/search-bench.ts +0 -783
  69. package/src/search-profile.ts +0 -89
  70. package/src/sem-provider.ts +0 -300
  71. package/src/stupify.ts +0 -604
  72. package/src/trace.ts +0 -126
  73. package/src/types.ts +0 -362
  74. package/src/ui.ts +0 -187
@@ -1,677 +0,0 @@
1
- import { execFile } from "node:child_process";
2
- import { mkdir, mkdtemp, readFile, readdir, rm, writeFile } from "node:fs/promises";
3
- import { tmpdir } from "node:os";
4
- import path from "node:path";
5
- import { promisify } from "node:util";
6
- const execFileAsync = promisify(execFile);
7
- export async function runSearchBench(configPath) {
8
- const startedAt = new Date();
9
- const configFile = path.resolve(configPath);
10
- const configDir = path.dirname(configFile);
11
- const config = JSON.parse(await readFile(configFile, "utf8"));
12
- const outputDir = path.resolve("experiments/results", `${safeSegment(config.name)}-${startedAt.toISOString().replace(/[:.]/g, "-")}`);
13
- const profilesDir = path.join(outputDir, "profiles");
14
- const runsDir = path.join(outputDir, "runs");
15
- const replayDir = path.join(outputDir, "real-replay");
16
- await mkdir(profilesDir, { recursive: true });
17
- await mkdir(runsDir, { recursive: true });
18
- await mkdir(replayDir, { recursive: true });
19
- const profilePaths = await resolveProfilePaths(config.profiles, configDir);
20
- const profiles = await Promise.all(profilePaths.map(readProfile));
21
- await Promise.all(profiles.map(({ profile, filePath }) => writeFile(path.join(profilesDir, `${safeSegment(profile.id)}.json`), JSON.stringify({ source: filePath, ...profile }, null, 2))));
22
- const fixturePaths = await resolveGlob(config.fixtures, configDir);
23
- const fixtures = await Promise.all(fixturePaths.map(readFixture));
24
- const allRuns = [];
25
- const replayRuns = [];
26
- for (const { profile, filePath: profilePath } of profiles) {
27
- for (const { fixture } of fixtures) {
28
- const run = await runFixture(profile.id, profilePath, fixture);
29
- allRuns.push(run);
30
- await writeRunFiles(runsDir, `${fixture.id}__${profile.id}`, run, fixture.description);
31
- }
32
- for (const smoke of config.realSmokeRuns ?? []) {
33
- const run = await runSmoke(profile.id, profilePath, smoke);
34
- allRuns.push(run);
35
- await writeRunFiles(runsDir, `${smoke.id}__${profile.id}`, run, "Real repo smoke run");
36
- }
37
- }
38
- for (const replay of config.realCommitReplay ?? []) {
39
- const runs = await runCommitReplay(replay, profiles, replayDir);
40
- replayRuns.push(...runs);
41
- }
42
- const leaderboard = summarize(profiles.map(({ profile }) => profile), allRuns);
43
- const perCheck = summarizeByCheck(allRuns);
44
- const summary = {
45
- name: config.name,
46
- outputDir,
47
- generatedAt: startedAt.toISOString(),
48
- runs: allRuns,
49
- realReplayRuns: replayRuns,
50
- leaderboard,
51
- perCheck,
52
- };
53
- await writeFile(path.join(outputDir, "summary.json"), JSON.stringify(summary, null, 2));
54
- const leaderboardText = renderLeaderboard(leaderboard, perCheck);
55
- await writeFile(path.join(outputDir, "leaderboard.md"), leaderboardText);
56
- await writeFile(path.join(outputDir, "real-replay-summary.json"), JSON.stringify(replayRuns, null, 2));
57
- await writeFile(path.join(outputDir, "real-replay.md"), renderReplayMarkdown(replayRuns));
58
- await writeFile(path.join(outputDir, "real-replay-review.md"), renderReplayReviewMarkdown(replayRuns));
59
- return `Search bench complete.
60
- Results: ${outputDir}
61
-
62
- ${leaderboardText}`;
63
- }
64
- async function runFixture(profileId, profilePath, fixture) {
65
- const tempDir = await mkdtemp(path.join(tmpdir(), "stupify-search-fixture-"));
66
- try {
67
- await execFileAsync("git", ["init", "-q"], { cwd: tempDir });
68
- const patchPath = path.join(tempDir, "fixture.patch");
69
- await writeFile(patchPath, fixture.stagedPatch);
70
- await execFileAsync("git", ["apply", "--recount", "--whitespace=nowarn", patchPath], { cwd: tempDir, maxBuffer: 32 * 1024 * 1024 });
71
- await rm(patchPath, { force: true });
72
- await execFileAsync("git", ["add", "-A"], { cwd: tempDir });
73
- const result = await runCli(tempDir, ["--staged", "--json", "--search-profile", profilePath]);
74
- const run = resultToBenchRun(profileId, result, { fixtureId: fixture.id, expected: fixture.expected });
75
- return {
76
- ...run,
77
- score: scoreFixtureRun(run, fixture.expected),
78
- };
79
- }
80
- catch (error) {
81
- return errorRun(profileId, { fixtureId: fixture.id, expected: fixture.expected }, error);
82
- }
83
- finally {
84
- await rm(tempDir, { recursive: true, force: true });
85
- }
86
- }
87
- async function runSmoke(profileId, profilePath, smoke) {
88
- const cwd = resolveSmokeCwd(smoke.cwd);
89
- if (!cwd) {
90
- return {
91
- profileId,
92
- smokeId: smoke.id,
93
- elapsedMs: 0,
94
- modelCalls: 0,
95
- patterns: [],
96
- targets: 0,
97
- targetsByPattern: {},
98
- inputTokens: 0,
99
- skipped: true,
100
- skipReason: "missing_cwd",
101
- matches: [],
102
- targetsPreview: [],
103
- matchesUsingCounterReasonAsProof: 0,
104
- score: -5,
105
- error: "Smoke cwd is not configured. Set BEVYL_REPO or provide cwd.",
106
- };
107
- }
108
- try {
109
- const result = await runCli(cwd, [...smoke.args, "--json", "--search-profile", profilePath]);
110
- const run = resultToBenchRun(profileId, result, { smokeId: smoke.id });
111
- return {
112
- ...run,
113
- score: scoreSmokeRun(run),
114
- };
115
- }
116
- catch (error) {
117
- return errorRun(profileId, { smokeId: smoke.id }, error);
118
- }
119
- }
120
- async function runCommitReplay(replay, profiles, replayDir) {
121
- const cwd = resolveReplayCwd(replay);
122
- if (!cwd) {
123
- return replay.profiles.map((profileId) => replayErrorRun(replay.id, profileId, { sha: "", shortSha: "(none)" }, new Error(`Replay cwd is not configured. Set ${replay.repoEnv ?? "repo env"} or provide cwd.`)));
124
- }
125
- const commits = await replayCommits(cwd, replay);
126
- const profilesById = new Map(profiles.map((profile) => [profile.profile.id, profile]));
127
- const runs = [];
128
- for (const commit of commits) {
129
- for (const profileId of replay.profiles) {
130
- const profile = profilesById.get(profileId);
131
- const run = profile
132
- ? await runReplayCommit(cwd, replay.id, commit, profile.profile.id, profile.filePath)
133
- : replayErrorRun(replay.id, profileId, commit, new Error(`Unknown replay profile: ${profileId}`));
134
- runs.push(run);
135
- await writeFile(path.join(replayDir, `${safeSegment(replay.id)}__${safeSegment(commit.shortSha)}__${safeSegment(profileId)}.json`), JSON.stringify(run, null, 2));
136
- }
137
- }
138
- return runs;
139
- }
140
- async function replayCommits(cwd, replay) {
141
- const args = ["log", "--format=%H", `-${replay.limit}`];
142
- if (replay.nonMerge)
143
- args.push("--no-merges");
144
- if (replay.since)
145
- args.push(`--since=${replay.since}`);
146
- const { stdout } = await execFileAsync("git", args, { cwd, maxBuffer: 32 * 1024 * 1024 });
147
- return stdout
148
- .split(/\r?\n/)
149
- .map((sha) => sha.trim())
150
- .filter(Boolean)
151
- .map((sha) => ({ sha, shortSha: sha.slice(0, 7) }));
152
- }
153
- async function runReplayCommit(repoCwd, replayId, commit, profileId, profilePath) {
154
- const tempDir = await mkdtemp(path.join(tmpdir(), "stupify-replay-"));
155
- let worktreeAdded = false;
156
- try {
157
- const parent = `${commit.sha}^`;
158
- const stats = await commitStats(repoCwd, parent, commit.sha);
159
- await execFileAsync("git", ["worktree", "add", "--detach", tempDir, parent], { cwd: repoCwd, maxBuffer: 64 * 1024 * 1024 });
160
- worktreeAdded = true;
161
- const { stdout: patch } = await execFileAsync("git", ["diff", "--binary", parent, commit.sha], { cwd: repoCwd, maxBuffer: 128 * 1024 * 1024 });
162
- const patchPath = path.join(tempDir, "commit.patch");
163
- await writeFile(patchPath, patch);
164
- await execFileAsync("git", ["apply", "--cached", "--whitespace=nowarn", patchPath], { cwd: tempDir, maxBuffer: 128 * 1024 * 1024 });
165
- await rm(patchPath, { force: true });
166
- const result = await runCli(tempDir, ["--staged", "--json", "--search-profile", profilePath]);
167
- return replayResult(replayId, profileId, commit, result, stats);
168
- }
169
- catch (error) {
170
- return replayErrorRun(replayId, profileId, commit, error);
171
- }
172
- finally {
173
- if (worktreeAdded) {
174
- await execFileAsync("git", ["worktree", "remove", "--force", tempDir], { cwd: repoCwd, maxBuffer: 64 * 1024 * 1024 }).catch(async () => {
175
- await rm(tempDir, { recursive: true, force: true });
176
- await execFileAsync("git", ["worktree", "prune"], { cwd: repoCwd }).catch(() => undefined);
177
- });
178
- }
179
- else {
180
- await rm(tempDir, { recursive: true, force: true });
181
- }
182
- }
183
- }
184
- async function commitStats(cwd, parent, target) {
185
- const { stdout } = await execFileAsync("git", ["diff", "--numstat", parent, target], { cwd, maxBuffer: 32 * 1024 * 1024 });
186
- let changedFiles = 0;
187
- let addedLines = 0;
188
- let deletedLines = 0;
189
- for (const line of stdout.split(/\r?\n/).filter(Boolean)) {
190
- const [added, deleted] = line.split(/\s+/);
191
- changedFiles += 1;
192
- addedLines += numericStat(added);
193
- deletedLines += numericStat(deleted);
194
- }
195
- return { changedFiles, addedLines, deletedLines };
196
- }
197
- function replayResult(replayId, profileId, commit, result, stats) {
198
- return {
199
- replayId,
200
- profileId,
201
- commitId: commit.shortSha,
202
- outcome: replayOutcome(result),
203
- changedFiles: stats.changedFiles,
204
- addedLines: stats.addedLines,
205
- deletedLines: stats.deletedLines,
206
- elapsedMs: result.stats.elapsedMs,
207
- skipped: result.stats.skipped ?? false,
208
- skipReason: result.stats.skipReason,
209
- targets: result.stats.searchTargets ?? result.stats.candidates ?? 0,
210
- inputTokens: result.stats.inputTokens ?? 0,
211
- repomixPackedTokens: result.stats.repomixTokens,
212
- modelCalls: result.stats.modelCalls,
213
- matches: result.matches,
214
- matchesByPattern: countMatches(result.matches),
215
- };
216
- }
217
- function replayOutcome(result) {
218
- if (result.stats.skipReason === "input_too_large")
219
- return "skipped_input_too_large";
220
- if (result.stats.skipReason === "no_candidates")
221
- return "no_candidates";
222
- if (result.matches.length > 0)
223
- return "ran_with_matches";
224
- return "ran_no_matches";
225
- }
226
- function replayErrorRun(replayId, profileId, commit, error) {
227
- return {
228
- replayId,
229
- profileId,
230
- commitId: commit.shortSha,
231
- outcome: "error",
232
- changedFiles: 0,
233
- addedLines: 0,
234
- deletedLines: 0,
235
- elapsedMs: 0,
236
- skipped: true,
237
- skipReason: "error",
238
- targets: 0,
239
- inputTokens: 0,
240
- modelCalls: 0,
241
- matches: [],
242
- matchesByPattern: {},
243
- error: error instanceof Error ? error.message : String(error),
244
- };
245
- }
246
- async function runCli(cwd, args) {
247
- const startedAt = Date.now();
248
- const cliPath = process.argv[1];
249
- if (!cliPath)
250
- throw new Error("Could not resolve current CLI entrypoint.");
251
- const { stdout } = await execFileAsync(process.execPath, [cliPath, ...args], {
252
- cwd,
253
- env: process.env,
254
- maxBuffer: 128 * 1024 * 1024,
255
- });
256
- const parsed = JSON.parse(stdout);
257
- return {
258
- ...parsed,
259
- stats: {
260
- ...parsed.stats,
261
- elapsedMs: parsed.stats.elapsedMs || Date.now() - startedAt,
262
- },
263
- };
264
- }
265
- function resultToBenchRun(profileId, result, identity) {
266
- return {
267
- profileId,
268
- fixtureId: identity.fixtureId,
269
- smokeId: identity.smokeId,
270
- elapsedMs: result.stats.elapsedMs,
271
- modelCalls: result.stats.modelCalls,
272
- patterns: result.patterns,
273
- targets: result.stats.searchTargets ?? result.stats.candidates ?? 0,
274
- targetsByPattern: result.stats.targetsByPattern ?? {},
275
- inputTokens: result.stats.inputTokens ?? 0,
276
- repomixPackedTokens: result.stats.repomixTokens,
277
- skipped: result.stats.skipped ?? false,
278
- skipReason: result.stats.skipReason,
279
- matches: result.matches,
280
- expected: identity.expected,
281
- targetsPreview: result.stats.targetsPreview ?? [],
282
- matchesUsingCounterReasonAsProof: countCounterReasonProofs(result.matches),
283
- };
284
- }
285
- function errorRun(profileId, identity, error) {
286
- return {
287
- profileId,
288
- fixtureId: identity.fixtureId,
289
- smokeId: identity.smokeId,
290
- elapsedMs: 0,
291
- modelCalls: 0,
292
- patterns: [],
293
- targets: 0,
294
- targetsByPattern: {},
295
- inputTokens: 0,
296
- skipped: true,
297
- skipReason: "error",
298
- matches: [],
299
- expected: identity.expected,
300
- targetsPreview: [],
301
- matchesUsingCounterReasonAsProof: 0,
302
- score: identity.fixtureId ? -3 : -5,
303
- error: error instanceof Error ? error.message : String(error),
304
- };
305
- }
306
- function scoreFixtureRun(run, expected) {
307
- const activePatterns = new Set(run.patterns.map((pattern) => pattern));
308
- const activeExpected = expected.filter((item) => activePatterns.has(item.patternId));
309
- let score = run.skipped && activeExpected.some((item) => item.shouldMatch) ? -3 : 0;
310
- const matchCounts = countMatches(run.matches);
311
- const expectedPatterns = new Set(activeExpected.map((item) => item.patternId));
312
- for (const item of activeExpected) {
313
- const matched = (matchCounts[item.patternId] ?? 0) > 0;
314
- if (item.shouldMatch && matched)
315
- score += 5;
316
- if (item.shouldMatch && !matched)
317
- score -= 4;
318
- if (!item.shouldMatch && !matched)
319
- score += 2;
320
- if (!item.shouldMatch && matched)
321
- score -= 10;
322
- }
323
- for (const match of run.matches) {
324
- const id = match.patternId;
325
- if (!expectedPatterns.has(id))
326
- score -= 6;
327
- }
328
- score -= (run.elapsedMs / 1000) * 0.05;
329
- score -= (run.inputTokens / 1000) * 0.001;
330
- return round(score);
331
- }
332
- function scoreSmokeRun(run) {
333
- let score = 0;
334
- if (run.skipped)
335
- score -= 5;
336
- if (run.matches.length > 3)
337
- score -= 3;
338
- if (run.elapsedMs > 60_000)
339
- score -= 5;
340
- if (run.inputTokens > 12_000 && run.skipped)
341
- score -= 5;
342
- score -= (run.elapsedMs / 1000) * 0.05;
343
- score -= (run.inputTokens / 1000) * 0.001;
344
- return round(score);
345
- }
346
- function summarize(profiles, runs) {
347
- const rows = profiles.map((profile) => {
348
- const fixtureRuns = runs.filter((run) => run.profileId === profile.id && run.fixtureId);
349
- const smokeRuns = runs.filter((run) => run.profileId === profile.id && run.smokeId);
350
- const counts = fixtureRuns.reduce((acc, run) => addFixtureCounts(acc, run), emptyCounts());
351
- const positiveFixtureCount = fixtureRuns
352
- .flatMap((run) => (run.expected ?? []).filter((item) => run.patterns.some((pattern) => pattern === item.patternId)))
353
- .filter((expected) => expected.shouldMatch).length;
354
- const avgMs = fixtureRuns.length === 0
355
- ? 0
356
- : fixtureRuns.reduce((sum, run) => sum + run.elapsedMs, 0) / fixtureRuns.length;
357
- const decision = decisionForProfile(counts, positiveFixtureCount, smokeRuns);
358
- return {
359
- profileId: profile.id,
360
- fixtureScore: round(fixtureRuns.reduce((sum, run) => sum + (run.score ?? 0), 0)),
361
- falsePositives: counts.fp,
362
- falseNegatives: counts.fn,
363
- truePositives: counts.tp,
364
- trueNegatives: counts.tn,
365
- wrongPatterns: counts.wp,
366
- assignedCheckFalsePositives: counts.assignedFp,
367
- avgMs: Math.round(avgMs),
368
- smokeMatches: smokeRuns.reduce((sum, run) => sum + run.matches.length, 0),
369
- smokeSkipped: smokeRuns.filter((run) => run.skipped).length,
370
- matchesUsingCounterReasonAsProof: fixtureRuns.reduce((sum, run) => sum + run.matchesUsingCounterReasonAsProof, 0),
371
- decision,
372
- };
373
- });
374
- return rows.sort((a, b) => b.fixtureScore - a.fixtureScore);
375
- }
376
- function summarizeByCheck(runs) {
377
- const counts = new Map();
378
- for (const run of runs.filter((item) => item.fixtureId)) {
379
- const expected = run.expected ?? [];
380
- const activePatterns = new Set(run.patterns.map((pattern) => pattern));
381
- const activeExpected = expected.filter((item) => activePatterns.has(item.patternId));
382
- for (const item of activeExpected) {
383
- const current = counts.get(item.patternId) ?? emptyCounts();
384
- const matched = run.matches.some((match) => match.patternId === item.patternId);
385
- if (item.shouldMatch && matched)
386
- current.tp += 1;
387
- if (item.shouldMatch && !matched)
388
- current.fn += 1;
389
- if (!item.shouldMatch && matched) {
390
- current.fp += 1;
391
- current.assignedFp += 1;
392
- }
393
- if (!item.shouldMatch && !matched)
394
- current.tn += 1;
395
- counts.set(item.patternId, current);
396
- }
397
- const expectedPatterns = new Set(activeExpected.map((item) => item.patternId));
398
- for (const match of run.matches) {
399
- const id = match.patternId;
400
- if (expectedPatterns.has(id))
401
- continue;
402
- const current = counts.get(id) ?? emptyCounts();
403
- current.fp += 1;
404
- current.wp += 1;
405
- counts.set(id, current);
406
- }
407
- }
408
- return [...counts.entries()]
409
- .map(([checkId, count]) => ({
410
- checkId,
411
- truePositives: count.tp,
412
- falsePositives: count.fp,
413
- falseNegatives: count.fn,
414
- wrongPatterns: count.wp,
415
- assignedCheckFalsePositives: count.assignedFp,
416
- decision: checkDecision(count),
417
- }))
418
- .sort((a, b) => a.checkId.localeCompare(b.checkId));
419
- }
420
- function addFixtureCounts(counts, run) {
421
- const expected = run.expected ?? [];
422
- const activePatterns = new Set(run.patterns.map((pattern) => pattern));
423
- const activeExpected = expected.filter((item) => activePatterns.has(item.patternId));
424
- const matchCounts = countMatches(run.matches);
425
- const expectedPatterns = new Set(activeExpected.map((item) => item.patternId));
426
- for (const item of activeExpected) {
427
- const matched = (matchCounts[item.patternId] ?? 0) > 0;
428
- if (item.shouldMatch && matched)
429
- counts.tp += 1;
430
- if (item.shouldMatch && !matched)
431
- counts.fn += 1;
432
- if (!item.shouldMatch && !matched)
433
- counts.tn += 1;
434
- if (!item.shouldMatch && matched) {
435
- counts.fp += 1;
436
- counts.assignedFp += 1;
437
- }
438
- }
439
- for (const match of run.matches) {
440
- const id = match.patternId;
441
- if (!expectedPatterns.has(id)) {
442
- if (!expectedPatterns.has(id))
443
- counts.fp += 1;
444
- counts.wp += 1;
445
- }
446
- }
447
- return counts;
448
- }
449
- function emptyCounts() {
450
- return { tp: 0, tn: 0, fp: 0, fn: 0, wp: 0, assignedFp: 0 };
451
- }
452
- function decisionForProfile(counts, positiveFixtureCount, smokeRuns) {
453
- if (counts.fp > 0)
454
- return "reject: false positives";
455
- if (counts.wp > 0)
456
- return "reject: wrong pattern";
457
- if (counts.tp < Math.ceil(positiveFixtureCount * 0.6))
458
- return "reject: low recall";
459
- if (smokeRuns.some((run) => run.matches.length > 3))
460
- return "reject: noisy smoke";
461
- if (smokeRuns.some((run) => run.elapsedMs > 60_000))
462
- return "reject: slow smoke";
463
- if (smokeRuns.some((run) => run.skipped))
464
- return "fixture candidate";
465
- return "candidate hook default";
466
- }
467
- function checkDecision(counts) {
468
- if (counts.fp > 0)
469
- return "not search-safe";
470
- if (counts.tp === 0 && counts.fn > 0)
471
- return "blind";
472
- if (counts.fn > counts.tp)
473
- return "low recall";
474
- return "candidate";
475
- }
476
- function countMatches(matches) {
477
- const counts = {};
478
- for (const match of matches)
479
- counts[match.patternId] = (counts[match.patternId] ?? 0) + 1;
480
- return counts;
481
- }
482
- function countCounterReasonProofs(matches) {
483
- return matches.filter((match) => /counter_reason/i.test(match.proof)).length;
484
- }
485
- function emptyReplayOutcomeCounts() {
486
- return {
487
- runs: 0,
488
- no_candidates: 0,
489
- ran_no_matches: 0,
490
- ran_with_matches: 0,
491
- skipped_input_too_large: 0,
492
- error: 0,
493
- matches: 0,
494
- modelCalls: 0,
495
- targets: 0,
496
- };
497
- }
498
- async function writeRunFiles(runsDir, id, run, description) {
499
- const safeId = safeSegment(id);
500
- await writeFile(path.join(runsDir, `${safeId}.json`), JSON.stringify(run, null, 2));
501
- await writeFile(path.join(runsDir, `${safeId}.md`), renderRunMarkdown(run, description));
502
- }
503
- function renderRunMarkdown(run, description) {
504
- return `# ${run.fixtureId ?? run.smokeId}
505
-
506
- Profile: ${run.profileId}
507
- Description: ${description}
508
- Runtime: ${run.elapsedMs}ms
509
- Targets: ${run.targets}
510
- Model calls: ${run.modelCalls}
511
- Input tokens: ${run.inputTokens}
512
- Counter-reason proofs: ${run.matchesUsingCounterReasonAsProof}
513
- Skipped: ${run.skipped ? `${run.skipReason ?? "yes"}` : "no"}
514
- Score: ${run.score ?? "n/a"}
515
-
516
- ## Matches
517
- ${run.matches.length === 0 ? "(none)" : run.matches.map((match, index) => `${index + 1}. ${match.patternId} (${match.targetId})
518
- reason: ${match.reason}
519
- proof: ${match.proof}`).join("\n")}
520
-
521
- ## Expected
522
- ${(run.expected ?? []).length === 0 ? "(none)" : (run.expected ?? []).map((expected) => `- ${expected.patternId}: ${expected.shouldMatch ? "match" : "no match"}`).join("\n")}
523
-
524
- ## Targets
525
- ${run.targetsPreview.length === 0 ? "(none)" : run.targetsPreview.map((target) => `- ${target.targetId}: ${target.patternId} ${target.entityKind ?? ""} ${target.sourceKind ?? ""}`.trim()).join("\n")}
526
-
527
- ${run.error ? `## Error\n${run.error}\n` : ""}`;
528
- }
529
- function renderLeaderboard(rows, perCheck) {
530
- const table = rows.map((row, index) => `| ${index + 1} | ${row.profileId} | ${row.fixtureScore} | ${row.falsePositives} | ${row.wrongPatterns} | ${row.assignedCheckFalsePositives} | ${row.falseNegatives} | ${row.truePositives} | ${row.matchesUsingCounterReasonAsProof} | ${row.avgMs} | ${row.smokeMatches} | ${row.smokeSkipped} | ${row.decision} |`).join("\n");
531
- const checkTable = perCheck.map((row) => `| ${row.checkId} | ${row.truePositives} | ${row.falsePositives} | ${row.wrongPatterns} | ${row.assignedCheckFalsePositives} | ${row.falseNegatives} | ${row.decision} |`).join("\n");
532
- return `# Search Bench Leaderboard
533
-
534
- | rank | profile | fixture score | FP | wrong FP | assigned FP | FN | TP | counter-proof | avg ms | smoke matches | smoke skipped | decision |
535
- |---|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---|
536
- ${table}
537
-
538
- ## Per-Check Summary
539
-
540
- | check | TP | FP | wrong FP | assigned FP | FN | decision |
541
- |---|---:|---:|---:|---:|---:|---|
542
- ${checkTable}
543
- `;
544
- }
545
- function renderReplayMarkdown(runs) {
546
- const table = runs.map((run) => {
547
- const patterns = Object.entries(run.matchesByPattern)
548
- .filter(([, count]) => count > 0)
549
- .map(([pattern, count]) => count === 1 ? pattern : `${pattern}(${count})`)
550
- .join(", ") || "-";
551
- return `| ${run.profileId} | ${run.commitId} | ${run.changedFiles} | +${run.addedLines}/-${run.deletedLines} | ${run.outcome} | ${run.elapsedMs} | ${run.targets} | ${run.inputTokens} | ${run.matches.length} | ${patterns} | |`;
552
- }).join("\n");
553
- const outcomeTable = renderReplayOutcomeSummary(runs);
554
- return `# Real Staged Replay
555
-
556
- ${outcomeTable}
557
-
558
- | profile | commit | files | +/- | outcome | ms | targets | input tokens | matches | patterns | manual |
559
- |---|---|---:|---:|---|---:|---:|---:|---:|---|---|
560
- ${table}
561
- `;
562
- }
563
- function renderReplayReviewMarkdown(runs) {
564
- const matched = runs.filter((run) => run.matches.length > 0);
565
- if (matched.length === 0)
566
- return "# Real Replay Review\n\nNo real replay matches.\n";
567
- return `# Real Replay Review
568
-
569
- ${matched.flatMap((run) => run.matches.map((match) => `## ${run.profileId} / ${run.commitId}
570
-
571
- Pattern: ${match.patternId}
572
- Target: ${match.targetId}
573
- Reason: ${match.reason}
574
- Proof: ${match.proof}
575
- Manual label: [good / maybe / bad]
576
- Notes:
577
- `)).join("\n")}`;
578
- }
579
- function renderReplayOutcomeSummary(runs) {
580
- const byProfile = new Map();
581
- for (const run of runs) {
582
- const current = byProfile.get(run.profileId) ?? emptyReplayOutcomeCounts();
583
- current.runs += 1;
584
- current[run.outcome] += 1;
585
- current.matches += run.matches.length;
586
- current.modelCalls += run.modelCalls;
587
- current.targets += run.targets;
588
- byProfile.set(run.profileId, current);
589
- }
590
- const table = [...byProfile.entries()].map(([profile, counts]) => `| ${profile} | ${counts.runs} | ${counts.no_candidates} | ${counts.ran_no_matches} | ${counts.ran_with_matches} | ${counts.skipped_input_too_large} | ${counts.error} | ${counts.matches} | ${counts.modelCalls} | ${counts.targets} |`).join("\n");
591
- return `## Outcome Summary
592
-
593
- | profile | runs | no candidates | ran no matches | ran with matches | input too large | errors | matches | model calls | targets |
594
- |---|---:|---:|---:|---:|---:|---:|---:|---:|---:|
595
- ${table}`;
596
- }
597
- async function resolveProfilePaths(profilePaths, configDir) {
598
- return Promise.all(profilePaths.map((profilePath) => resolvePath(profilePath, configDir)));
599
- }
600
- async function resolveGlob(pattern, configDir) {
601
- const resolved = await resolvePath(pattern, configDir, false);
602
- if (!resolved.includes("*"))
603
- return [resolved];
604
- const before = resolved.slice(0, resolved.indexOf("*"));
605
- const after = resolved.slice(resolved.indexOf("*") + 1);
606
- const dir = before.endsWith(path.sep) ? before.slice(0, -1) : path.dirname(before);
607
- const prefix = before.endsWith(path.sep) ? "" : path.basename(before);
608
- const entries = await readdir(dir);
609
- return entries
610
- .filter((entry) => entry.startsWith(prefix) && entry.endsWith(after))
611
- .map((entry) => path.join(dir, entry))
612
- .sort();
613
- }
614
- async function resolvePath(input, configDir, mustExist = true) {
615
- const expanded = input.startsWith("~/") ? path.join(process.env.HOME ?? "", input.slice(2)) : input;
616
- const fromCwd = path.resolve(expanded);
617
- const fromConfig = path.resolve(configDir, expanded);
618
- if (!mustExist || await exists(fromCwd))
619
- return fromCwd;
620
- if (await exists(fromConfig))
621
- return fromConfig;
622
- return fromCwd;
623
- }
624
- async function readProfile(filePath) {
625
- const profile = JSON.parse(await readFile(filePath, "utf8"));
626
- if (!profile.id)
627
- throw new Error(`Search profile missing id: ${filePath}`);
628
- return { filePath, profile };
629
- }
630
- async function readFixture(filePath) {
631
- const fixture = JSON.parse(await readFile(filePath, "utf8"));
632
- if (!fixture.id)
633
- throw new Error(`Search fixture missing id: ${filePath}`);
634
- return { filePath, fixture };
635
- }
636
- async function exists(filePath) {
637
- try {
638
- await readFile(filePath);
639
- return true;
640
- }
641
- catch {
642
- return false;
643
- }
644
- }
645
- function resolveSmokeCwd(cwd) {
646
- if (!cwd)
647
- return process.cwd();
648
- if (cwd === "$BEVYL_REPO")
649
- return process.env.BEVYL_REPO ?? null;
650
- if (cwd.startsWith("$BEVYL_REPO/")) {
651
- const root = process.env.BEVYL_REPO;
652
- return root ? path.join(root, cwd.slice("$BEVYL_REPO/".length)) : null;
653
- }
654
- return cwd.startsWith("~/") ? path.join(process.env.HOME ?? "", cwd.slice(2)) : cwd;
655
- }
656
- function resolveReplayCwd(replay) {
657
- if (replay.cwd)
658
- return expandPath(replay.cwd);
659
- if (replay.repoEnv) {
660
- const value = process.env[replay.repoEnv];
661
- return value ? expandPath(value) : null;
662
- }
663
- return process.cwd();
664
- }
665
- function expandPath(input) {
666
- return input.startsWith("~/") ? path.join(process.env.HOME ?? "", input.slice(2)) : input;
667
- }
668
- function safeSegment(value) {
669
- return value.replace(/[^A-Za-z0-9._-]+/g, "_").replace(/^_+|_+$/g, "") || "run";
670
- }
671
- function round(value) {
672
- return Math.round(value * 1000) / 1000;
673
- }
674
- function numericStat(value) {
675
- const parsed = Number(value);
676
- return Number.isFinite(parsed) ? parsed : 0;
677
- }
@@ -1,6 +0,0 @@
1
- import type { RepomixSearchConfig, SearchProfile, StupifyCheck } from "./types.ts";
2
- export declare function loadSearchProfile(profilePath: string | null): Promise<SearchProfile | null>;
3
- export declare function effectiveSearchChecks(explicitCheckIds: readonly string[] | null, profile: SearchProfile | null): readonly StupifyCheck[];
4
- export declare function effectiveMaxCandidates(defaultValue: number, profile: SearchProfile | null): number;
5
- export declare function effectiveMaxSearchInputTokens(defaultValue: number, profile: SearchProfile | null): number;
6
- export declare function effectiveRepomixConfig(defaultValue: RepomixSearchConfig, profile: SearchProfile | null): RepomixSearchConfig;