@stupify/cli 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/README.md +26 -31
  2. package/dist/analysis.d.ts +11 -9
  3. package/dist/analysis.js +30 -173
  4. package/dist/checks.d.ts +1 -0
  5. package/dist/checks.js +89 -2
  6. package/dist/command.js +55 -91
  7. package/dist/constants.d.ts +1 -1
  8. package/dist/constants.js +1 -1
  9. package/dist/counter-scout.js +70 -8
  10. package/dist/doctor.d.ts +4 -0
  11. package/dist/doctor.js +131 -0
  12. package/dist/git.d.ts +4 -1
  13. package/dist/git.js +34 -0
  14. package/dist/hooks.d.ts +3 -0
  15. package/dist/hooks.js +117 -0
  16. package/dist/model.d.ts +1 -15
  17. package/dist/model.js +37 -21
  18. package/dist/prompts.d.ts +8 -5
  19. package/dist/prompts.js +58 -168
  20. package/dist/render.d.ts +2 -2
  21. package/dist/render.js +70 -78
  22. package/dist/repomix-provider.d.ts +10 -2
  23. package/dist/repomix-provider.js +62 -11
  24. package/dist/search-bench.d.ts +1 -0
  25. package/dist/search-bench.js +675 -0
  26. package/dist/search-profile.d.ts +6 -0
  27. package/dist/search-profile.js +73 -0
  28. package/dist/sem-provider.d.ts +2 -2
  29. package/dist/sem-provider.js +33 -7
  30. package/dist/stupify.d.ts +2 -0
  31. package/dist/stupify.js +183 -333
  32. package/dist/types.d.ts +193 -109
  33. package/package.json +1 -1
  34. package/src/analysis.ts +48 -268
  35. package/src/checks.ts +91 -2
  36. package/src/command.ts +62 -107
  37. package/src/constants.ts +1 -1
  38. package/src/counter-scout.ts +63 -7
  39. package/src/doctor.ts +140 -0
  40. package/src/git.ts +35 -1
  41. package/src/hooks.ts +134 -0
  42. package/src/model.ts +39 -26
  43. package/src/prompts.ts +66 -202
  44. package/src/render.ts +68 -79
  45. package/src/repomix-provider.ts +66 -10
  46. package/src/search-bench.ts +783 -0
  47. package/src/search-profile.ts +89 -0
  48. package/src/sem-provider.ts +36 -9
  49. package/src/stupify.ts +213 -526
  50. package/src/types.ts +195 -119
  51. package/dist/batcher.d.ts +0 -3
  52. package/dist/batcher.js +0 -142
  53. package/dist/candidate-context.d.ts +0 -2
  54. package/dist/candidate-context.js +0 -40
  55. package/dist/experiment.d.ts +0 -1
  56. package/dist/experiment.js +0 -225
  57. package/src/batcher.ts +0 -198
  58. package/src/candidate-context.ts +0 -43
  59. package/src/experiment.ts +0 -317
@@ -0,0 +1,675 @@
1
+ import { execFile } from "node:child_process";
2
+ import { mkdir, mkdtemp, readFile, readdir, rm, writeFile } from "node:fs/promises";
3
+ import { tmpdir } from "node:os";
4
+ import path from "node:path";
5
+ import { promisify } from "node:util";
6
+ const execFileAsync = promisify(execFile);
7
+ export async function runSearchBench(configPath) {
8
+ const startedAt = new Date();
9
+ const configFile = path.resolve(configPath);
10
+ const configDir = path.dirname(configFile);
11
+ const config = JSON.parse(await readFile(configFile, "utf8"));
12
+ const outputDir = path.resolve("experiments/results", `${safeSegment(config.name)}-${startedAt.toISOString().replace(/[:.]/g, "-")}`);
13
+ const profilesDir = path.join(outputDir, "profiles");
14
+ const runsDir = path.join(outputDir, "runs");
15
+ const replayDir = path.join(outputDir, "real-replay");
16
+ await mkdir(profilesDir, { recursive: true });
17
+ await mkdir(runsDir, { recursive: true });
18
+ await mkdir(replayDir, { recursive: true });
19
+ const profilePaths = await resolveProfilePaths(config.profiles, configDir);
20
+ const profiles = await Promise.all(profilePaths.map(readProfile));
21
+ await Promise.all(profiles.map(({ profile, filePath }) => writeFile(path.join(profilesDir, `${safeSegment(profile.id)}.json`), JSON.stringify({ source: filePath, ...profile }, null, 2))));
22
+ const fixturePaths = await resolveGlob(config.fixtures, configDir);
23
+ const fixtures = await Promise.all(fixturePaths.map(readFixture));
24
+ const allRuns = [];
25
+ const replayRuns = [];
26
+ for (const { profile, filePath: profilePath } of profiles) {
27
+ for (const { fixture } of fixtures) {
28
+ const run = await runFixture(profile.id, profilePath, fixture);
29
+ allRuns.push(run);
30
+ await writeRunFiles(runsDir, `${fixture.id}__${profile.id}`, run, fixture.description);
31
+ }
32
+ for (const smoke of config.realSmokeRuns ?? []) {
33
+ const run = await runSmoke(profile.id, profilePath, smoke);
34
+ allRuns.push(run);
35
+ await writeRunFiles(runsDir, `${smoke.id}__${profile.id}`, run, "Real repo smoke run");
36
+ }
37
+ }
38
+ for (const replay of config.realCommitReplay ?? []) {
39
+ const runs = await runCommitReplay(replay, profiles, replayDir);
40
+ replayRuns.push(...runs);
41
+ }
42
+ const leaderboard = summarize(profiles.map(({ profile }) => profile), fixtures.map(({ fixture }) => fixture), allRuns);
43
+ const perCheck = summarizeByCheck(allRuns);
44
+ const summary = {
45
+ name: config.name,
46
+ outputDir,
47
+ generatedAt: startedAt.toISOString(),
48
+ runs: allRuns,
49
+ realReplayRuns: replayRuns,
50
+ leaderboard,
51
+ perCheck,
52
+ };
53
+ await writeFile(path.join(outputDir, "summary.json"), JSON.stringify(summary, null, 2));
54
+ const leaderboardText = renderLeaderboard(leaderboard, perCheck);
55
+ await writeFile(path.join(outputDir, "leaderboard.md"), leaderboardText);
56
+ await writeFile(path.join(outputDir, "real-replay-summary.json"), JSON.stringify(replayRuns, null, 2));
57
+ await writeFile(path.join(outputDir, "real-replay.md"), renderReplayMarkdown(replayRuns));
58
+ await writeFile(path.join(outputDir, "real-replay-review.md"), renderReplayReviewMarkdown(replayRuns));
59
+ return `Search bench complete.
60
+ Results: ${outputDir}
61
+
62
+ ${leaderboardText}`;
63
+ }
64
+ async function runFixture(profileId, profilePath, fixture) {
65
+ const tempDir = await mkdtemp(path.join(tmpdir(), "stupify-search-fixture-"));
66
+ try {
67
+ await execFileAsync("git", ["init", "-q"], { cwd: tempDir });
68
+ const patchPath = path.join(tempDir, "fixture.patch");
69
+ await writeFile(patchPath, fixture.stagedPatch);
70
+ await execFileAsync("git", ["apply", "--recount", "--whitespace=nowarn", patchPath], { cwd: tempDir, maxBuffer: 32 * 1024 * 1024 });
71
+ await rm(patchPath, { force: true });
72
+ await execFileAsync("git", ["add", "-A"], { cwd: tempDir });
73
+ const result = await runCli(tempDir, ["--staged", "--json", "--search-profile", profilePath]);
74
+ const run = resultToBenchRun(profileId, result, { fixtureId: fixture.id, expected: fixture.expected });
75
+ return {
76
+ ...run,
77
+ score: scoreFixtureRun(run, fixture.expected),
78
+ };
79
+ }
80
+ catch (error) {
81
+ return errorRun(profileId, { fixtureId: fixture.id, expected: fixture.expected }, error);
82
+ }
83
+ finally {
84
+ await rm(tempDir, { recursive: true, force: true });
85
+ }
86
+ }
87
+ async function runSmoke(profileId, profilePath, smoke) {
88
+ const cwd = resolveSmokeCwd(smoke.cwd);
89
+ if (!cwd) {
90
+ return {
91
+ profileId,
92
+ smokeId: smoke.id,
93
+ elapsedMs: 0,
94
+ modelCalls: 0,
95
+ patterns: [],
96
+ targets: 0,
97
+ targetsByPattern: {},
98
+ inputTokens: 0,
99
+ skipped: true,
100
+ skipReason: "missing_cwd",
101
+ matches: [],
102
+ targetsPreview: [],
103
+ matchesUsingCounterReasonAsProof: 0,
104
+ score: -5,
105
+ error: "Smoke cwd is not configured. Set BEVYL_REPO or provide cwd.",
106
+ };
107
+ }
108
+ try {
109
+ const result = await runCli(cwd, [...smoke.args, "--json", "--search-profile", profilePath]);
110
+ const run = resultToBenchRun(profileId, result, { smokeId: smoke.id });
111
+ return {
112
+ ...run,
113
+ score: scoreSmokeRun(run),
114
+ };
115
+ }
116
+ catch (error) {
117
+ return errorRun(profileId, { smokeId: smoke.id }, error);
118
+ }
119
+ }
120
+ async function runCommitReplay(replay, profiles, replayDir) {
121
+ const cwd = resolveReplayCwd(replay);
122
+ if (!cwd) {
123
+ return replay.profiles.map((profileId) => replayErrorRun(replay.id, profileId, { sha: "", shortSha: "(none)" }, new Error(`Replay cwd is not configured. Set ${replay.repoEnv ?? "repo env"} or provide cwd.`)));
124
+ }
125
+ const commits = await replayCommits(cwd, replay);
126
+ const profilesById = new Map(profiles.map((profile) => [profile.profile.id, profile]));
127
+ const runs = [];
128
+ for (const commit of commits) {
129
+ for (const profileId of replay.profiles) {
130
+ const profile = profilesById.get(profileId);
131
+ const run = profile
132
+ ? await runReplayCommit(cwd, replay.id, commit, profile.profile.id, profile.filePath)
133
+ : replayErrorRun(replay.id, profileId, commit, new Error(`Unknown replay profile: ${profileId}`));
134
+ runs.push(run);
135
+ await writeFile(path.join(replayDir, `${safeSegment(replay.id)}__${safeSegment(commit.shortSha)}__${safeSegment(profileId)}.json`), JSON.stringify(run, null, 2));
136
+ }
137
+ }
138
+ return runs;
139
+ }
140
+ async function replayCommits(cwd, replay) {
141
+ const args = ["log", "--format=%H", `-${replay.limit}`];
142
+ if (replay.nonMerge)
143
+ args.push("--no-merges");
144
+ if (replay.since)
145
+ args.push(`--since=${replay.since}`);
146
+ const { stdout } = await execFileAsync("git", args, { cwd, maxBuffer: 32 * 1024 * 1024 });
147
+ return stdout
148
+ .split(/\r?\n/)
149
+ .map((sha) => sha.trim())
150
+ .filter(Boolean)
151
+ .map((sha) => ({ sha, shortSha: sha.slice(0, 7) }));
152
+ }
153
+ async function runReplayCommit(repoCwd, replayId, commit, profileId, profilePath) {
154
+ const tempDir = await mkdtemp(path.join(tmpdir(), "stupify-replay-"));
155
+ let worktreeAdded = false;
156
+ try {
157
+ const parent = `${commit.sha}^`;
158
+ const stats = await commitStats(repoCwd, parent, commit.sha);
159
+ await execFileAsync("git", ["worktree", "add", "--detach", tempDir, parent], { cwd: repoCwd, maxBuffer: 64 * 1024 * 1024 });
160
+ worktreeAdded = true;
161
+ const { stdout: patch } = await execFileAsync("git", ["diff", "--binary", parent, commit.sha], { cwd: repoCwd, maxBuffer: 128 * 1024 * 1024 });
162
+ const patchPath = path.join(tempDir, "commit.patch");
163
+ await writeFile(patchPath, patch);
164
+ await execFileAsync("git", ["apply", "--cached", "--whitespace=nowarn", patchPath], { cwd: tempDir, maxBuffer: 128 * 1024 * 1024 });
165
+ await rm(patchPath, { force: true });
166
+ const result = await runCli(tempDir, ["--staged", "--json", "--search-profile", profilePath]);
167
+ return replayResult(replayId, profileId, commit, result, stats);
168
+ }
169
+ catch (error) {
170
+ return replayErrorRun(replayId, profileId, commit, error);
171
+ }
172
+ finally {
173
+ if (worktreeAdded) {
174
+ await execFileAsync("git", ["worktree", "remove", "--force", tempDir], { cwd: repoCwd, maxBuffer: 64 * 1024 * 1024 }).catch(async () => {
175
+ await rm(tempDir, { recursive: true, force: true });
176
+ await execFileAsync("git", ["worktree", "prune"], { cwd: repoCwd }).catch(() => undefined);
177
+ });
178
+ }
179
+ else {
180
+ await rm(tempDir, { recursive: true, force: true });
181
+ }
182
+ }
183
+ }
184
+ async function commitStats(cwd, parent, target) {
185
+ const { stdout } = await execFileAsync("git", ["diff", "--numstat", parent, target], { cwd, maxBuffer: 32 * 1024 * 1024 });
186
+ let changedFiles = 0;
187
+ let addedLines = 0;
188
+ let deletedLines = 0;
189
+ for (const line of stdout.split(/\r?\n/).filter(Boolean)) {
190
+ const [added, deleted] = line.split(/\s+/);
191
+ changedFiles += 1;
192
+ addedLines += numericStat(added);
193
+ deletedLines += numericStat(deleted);
194
+ }
195
+ return { changedFiles, addedLines, deletedLines };
196
+ }
197
+ function replayResult(replayId, profileId, commit, result, stats) {
198
+ return {
199
+ replayId,
200
+ profileId,
201
+ commitId: commit.shortSha,
202
+ outcome: replayOutcome(result),
203
+ changedFiles: stats.changedFiles,
204
+ addedLines: stats.addedLines,
205
+ deletedLines: stats.deletedLines,
206
+ elapsedMs: result.stats.elapsedMs,
207
+ skipped: result.stats.skipped ?? false,
208
+ skipReason: result.stats.skipReason,
209
+ targets: result.stats.searchTargets ?? result.stats.candidates ?? 0,
210
+ inputTokens: result.stats.inputTokens ?? 0,
211
+ repomixPackedTokens: result.stats.repomixTokens,
212
+ modelCalls: result.stats.modelCalls,
213
+ matches: result.matches,
214
+ matchesByPattern: countMatches(result.matches),
215
+ };
216
+ }
217
+ function replayOutcome(result) {
218
+ if (result.stats.skipReason === "input_too_large")
219
+ return "skipped_input_too_large";
220
+ if (result.stats.skipReason === "no_candidates")
221
+ return "no_candidates";
222
+ if (result.matches.length > 0)
223
+ return "ran_with_matches";
224
+ return "ran_no_matches";
225
+ }
226
+ function replayErrorRun(replayId, profileId, commit, error) {
227
+ return {
228
+ replayId,
229
+ profileId,
230
+ commitId: commit.shortSha,
231
+ outcome: "error",
232
+ changedFiles: 0,
233
+ addedLines: 0,
234
+ deletedLines: 0,
235
+ elapsedMs: 0,
236
+ skipped: true,
237
+ skipReason: "error",
238
+ targets: 0,
239
+ inputTokens: 0,
240
+ modelCalls: 0,
241
+ matches: [],
242
+ matchesByPattern: {},
243
+ error: error instanceof Error ? error.message : String(error),
244
+ };
245
+ }
246
+ async function runCli(cwd, args) {
247
+ const startedAt = Date.now();
248
+ const cliPath = process.argv[1];
249
+ const { stdout } = await execFileAsync(process.execPath, [cliPath, ...args], {
250
+ cwd,
251
+ env: process.env,
252
+ maxBuffer: 128 * 1024 * 1024,
253
+ });
254
+ const parsed = JSON.parse(stdout);
255
+ return {
256
+ ...parsed,
257
+ stats: {
258
+ ...parsed.stats,
259
+ elapsedMs: parsed.stats.elapsedMs || Date.now() - startedAt,
260
+ },
261
+ };
262
+ }
263
+ function resultToBenchRun(profileId, result, identity) {
264
+ return {
265
+ profileId,
266
+ fixtureId: identity.fixtureId,
267
+ smokeId: identity.smokeId,
268
+ elapsedMs: result.stats.elapsedMs,
269
+ modelCalls: result.stats.modelCalls,
270
+ patterns: result.patterns,
271
+ targets: result.stats.searchTargets ?? result.stats.candidates ?? 0,
272
+ targetsByPattern: result.stats.targetsByPattern ?? {},
273
+ inputTokens: result.stats.inputTokens ?? 0,
274
+ repomixPackedTokens: result.stats.repomixTokens,
275
+ skipped: result.stats.skipped ?? false,
276
+ skipReason: result.stats.skipReason,
277
+ matches: result.matches,
278
+ expected: identity.expected,
279
+ targetsPreview: result.stats.targetsPreview ?? [],
280
+ matchesUsingCounterReasonAsProof: countCounterReasonProofs(result.matches),
281
+ };
282
+ }
283
+ function errorRun(profileId, identity, error) {
284
+ return {
285
+ profileId,
286
+ fixtureId: identity.fixtureId,
287
+ smokeId: identity.smokeId,
288
+ elapsedMs: 0,
289
+ modelCalls: 0,
290
+ patterns: [],
291
+ targets: 0,
292
+ targetsByPattern: {},
293
+ inputTokens: 0,
294
+ skipped: true,
295
+ skipReason: "error",
296
+ matches: [],
297
+ expected: identity.expected,
298
+ targetsPreview: [],
299
+ matchesUsingCounterReasonAsProof: 0,
300
+ score: identity.fixtureId ? -3 : -5,
301
+ error: error instanceof Error ? error.message : String(error),
302
+ };
303
+ }
304
+ function scoreFixtureRun(run, expected) {
305
+ const activePatterns = new Set(run.patterns.map((pattern) => pattern));
306
+ const activeExpected = expected.filter((item) => activePatterns.has(item.patternId));
307
+ let score = run.skipped && activeExpected.some((item) => item.shouldMatch) ? -3 : 0;
308
+ const matchCounts = countMatches(run.matches);
309
+ const expectedPatterns = new Set(activeExpected.map((item) => item.patternId));
310
+ for (const item of activeExpected) {
311
+ const matched = (matchCounts[item.patternId] ?? 0) > 0;
312
+ if (item.shouldMatch && matched)
313
+ score += 5;
314
+ if (item.shouldMatch && !matched)
315
+ score -= 4;
316
+ if (!item.shouldMatch && !matched)
317
+ score += 2;
318
+ if (!item.shouldMatch && matched)
319
+ score -= 10;
320
+ }
321
+ for (const match of run.matches) {
322
+ const id = match.patternId;
323
+ if (!expectedPatterns.has(id))
324
+ score -= 6;
325
+ }
326
+ score -= (run.elapsedMs / 1000) * 0.05;
327
+ score -= (run.inputTokens / 1000) * 0.001;
328
+ return round(score);
329
+ }
330
+ function scoreSmokeRun(run) {
331
+ let score = 0;
332
+ if (run.skipped)
333
+ score -= 5;
334
+ if (run.matches.length > 3)
335
+ score -= 3;
336
+ if (run.elapsedMs > 60_000)
337
+ score -= 5;
338
+ if (run.inputTokens > 12_000 && run.skipped)
339
+ score -= 5;
340
+ score -= (run.elapsedMs / 1000) * 0.05;
341
+ score -= (run.inputTokens / 1000) * 0.001;
342
+ return round(score);
343
+ }
344
+ function summarize(profiles, fixtures, runs) {
345
+ const rows = profiles.map((profile) => {
346
+ const fixtureRuns = runs.filter((run) => run.profileId === profile.id && run.fixtureId);
347
+ const smokeRuns = runs.filter((run) => run.profileId === profile.id && run.smokeId);
348
+ const counts = fixtureRuns.reduce((acc, run) => addFixtureCounts(acc, run), emptyCounts());
349
+ const positiveFixtureCount = fixtureRuns
350
+ .flatMap((run) => (run.expected ?? []).filter((item) => run.patterns.some((pattern) => pattern === item.patternId)))
351
+ .filter((expected) => expected.shouldMatch).length;
352
+ const avgMs = fixtureRuns.length === 0
353
+ ? 0
354
+ : fixtureRuns.reduce((sum, run) => sum + run.elapsedMs, 0) / fixtureRuns.length;
355
+ const decision = decisionForProfile(counts, positiveFixtureCount, smokeRuns);
356
+ return {
357
+ profileId: profile.id,
358
+ fixtureScore: round(fixtureRuns.reduce((sum, run) => sum + (run.score ?? 0), 0)),
359
+ falsePositives: counts.fp,
360
+ falseNegatives: counts.fn,
361
+ truePositives: counts.tp,
362
+ trueNegatives: counts.tn,
363
+ wrongPatterns: counts.wp,
364
+ assignedCheckFalsePositives: counts.assignedFp,
365
+ avgMs: Math.round(avgMs),
366
+ smokeMatches: smokeRuns.reduce((sum, run) => sum + run.matches.length, 0),
367
+ smokeSkipped: smokeRuns.filter((run) => run.skipped).length,
368
+ matchesUsingCounterReasonAsProof: fixtureRuns.reduce((sum, run) => sum + run.matchesUsingCounterReasonAsProof, 0),
369
+ decision,
370
+ };
371
+ });
372
+ return rows.sort((a, b) => b.fixtureScore - a.fixtureScore);
373
+ }
374
+ function summarizeByCheck(runs) {
375
+ const counts = new Map();
376
+ for (const run of runs.filter((item) => item.fixtureId)) {
377
+ const expected = run.expected ?? [];
378
+ const activePatterns = new Set(run.patterns.map((pattern) => pattern));
379
+ const activeExpected = expected.filter((item) => activePatterns.has(item.patternId));
380
+ for (const item of activeExpected) {
381
+ const current = counts.get(item.patternId) ?? emptyCounts();
382
+ const matched = run.matches.some((match) => match.patternId === item.patternId);
383
+ if (item.shouldMatch && matched)
384
+ current.tp += 1;
385
+ if (item.shouldMatch && !matched)
386
+ current.fn += 1;
387
+ if (!item.shouldMatch && matched) {
388
+ current.fp += 1;
389
+ current.assignedFp += 1;
390
+ }
391
+ if (!item.shouldMatch && !matched)
392
+ current.tn += 1;
393
+ counts.set(item.patternId, current);
394
+ }
395
+ const expectedPatterns = new Set(activeExpected.map((item) => item.patternId));
396
+ for (const match of run.matches) {
397
+ const id = match.patternId;
398
+ if (expectedPatterns.has(id))
399
+ continue;
400
+ const current = counts.get(id) ?? emptyCounts();
401
+ current.fp += 1;
402
+ current.wp += 1;
403
+ counts.set(id, current);
404
+ }
405
+ }
406
+ return [...counts.entries()]
407
+ .map(([checkId, count]) => ({
408
+ checkId,
409
+ truePositives: count.tp,
410
+ falsePositives: count.fp,
411
+ falseNegatives: count.fn,
412
+ wrongPatterns: count.wp,
413
+ assignedCheckFalsePositives: count.assignedFp,
414
+ decision: checkDecision(count),
415
+ }))
416
+ .sort((a, b) => a.checkId.localeCompare(b.checkId));
417
+ }
418
+ function addFixtureCounts(counts, run) {
419
+ const expected = run.expected ?? [];
420
+ const activePatterns = new Set(run.patterns.map((pattern) => pattern));
421
+ const activeExpected = expected.filter((item) => activePatterns.has(item.patternId));
422
+ const matchCounts = countMatches(run.matches);
423
+ const expectedPatterns = new Set(activeExpected.map((item) => item.patternId));
424
+ for (const item of activeExpected) {
425
+ const matched = (matchCounts[item.patternId] ?? 0) > 0;
426
+ if (item.shouldMatch && matched)
427
+ counts.tp += 1;
428
+ if (item.shouldMatch && !matched)
429
+ counts.fn += 1;
430
+ if (!item.shouldMatch && !matched)
431
+ counts.tn += 1;
432
+ if (!item.shouldMatch && matched) {
433
+ counts.fp += 1;
434
+ counts.assignedFp += 1;
435
+ }
436
+ }
437
+ for (const match of run.matches) {
438
+ const id = match.patternId;
439
+ if (!expectedPatterns.has(id)) {
440
+ if (!expectedPatterns.has(id))
441
+ counts.fp += 1;
442
+ counts.wp += 1;
443
+ }
444
+ }
445
+ return counts;
446
+ }
447
+ function emptyCounts() {
448
+ return { tp: 0, tn: 0, fp: 0, fn: 0, wp: 0, assignedFp: 0 };
449
+ }
450
+ function decisionForProfile(counts, positiveFixtureCount, smokeRuns) {
451
+ if (counts.fp > 0)
452
+ return "reject: false positives";
453
+ if (counts.wp > 0)
454
+ return "reject: wrong pattern";
455
+ if (counts.tp < Math.ceil(positiveFixtureCount * 0.6))
456
+ return "reject: low recall";
457
+ if (smokeRuns.some((run) => run.matches.length > 3))
458
+ return "reject: noisy smoke";
459
+ if (smokeRuns.some((run) => run.elapsedMs > 60_000))
460
+ return "reject: slow smoke";
461
+ if (smokeRuns.some((run) => run.skipped))
462
+ return "fixture candidate";
463
+ return "candidate hook default";
464
+ }
465
+ function checkDecision(counts) {
466
+ if (counts.fp > 0)
467
+ return "not search-safe";
468
+ if (counts.tp === 0 && counts.fn > 0)
469
+ return "blind";
470
+ if (counts.fn > counts.tp)
471
+ return "low recall";
472
+ return "candidate";
473
+ }
474
+ function countMatches(matches) {
475
+ const counts = {};
476
+ for (const match of matches)
477
+ counts[match.patternId] = (counts[match.patternId] ?? 0) + 1;
478
+ return counts;
479
+ }
480
+ function countCounterReasonProofs(matches) {
481
+ return matches.filter((match) => /counter_reason/i.test(match.proof)).length;
482
+ }
483
+ function emptyReplayOutcomeCounts() {
484
+ return {
485
+ runs: 0,
486
+ no_candidates: 0,
487
+ ran_no_matches: 0,
488
+ ran_with_matches: 0,
489
+ skipped_input_too_large: 0,
490
+ error: 0,
491
+ matches: 0,
492
+ modelCalls: 0,
493
+ targets: 0,
494
+ };
495
+ }
496
+ async function writeRunFiles(runsDir, id, run, description) {
497
+ const safeId = safeSegment(id);
498
+ await writeFile(path.join(runsDir, `${safeId}.json`), JSON.stringify(run, null, 2));
499
+ await writeFile(path.join(runsDir, `${safeId}.md`), renderRunMarkdown(run, description));
500
+ }
501
+ function renderRunMarkdown(run, description) {
502
+ return `# ${run.fixtureId ?? run.smokeId}
503
+
504
+ Profile: ${run.profileId}
505
+ Description: ${description}
506
+ Runtime: ${run.elapsedMs}ms
507
+ Targets: ${run.targets}
508
+ Model calls: ${run.modelCalls}
509
+ Input tokens: ${run.inputTokens}
510
+ Counter-reason proofs: ${run.matchesUsingCounterReasonAsProof}
511
+ Skipped: ${run.skipped ? `${run.skipReason ?? "yes"}` : "no"}
512
+ Score: ${run.score ?? "n/a"}
513
+
514
+ ## Matches
515
+ ${run.matches.length === 0 ? "(none)" : run.matches.map((match, index) => `${index + 1}. ${match.patternId} (${match.targetId})
516
+ reason: ${match.reason}
517
+ proof: ${match.proof}`).join("\n")}
518
+
519
+ ## Expected
520
+ ${(run.expected ?? []).length === 0 ? "(none)" : (run.expected ?? []).map((expected) => `- ${expected.patternId}: ${expected.shouldMatch ? "match" : "no match"}`).join("\n")}
521
+
522
+ ## Targets
523
+ ${run.targetsPreview.length === 0 ? "(none)" : run.targetsPreview.map((target) => `- ${target.targetId}: ${target.patternId} ${target.entityKind ?? ""} ${target.sourceKind ?? ""}`.trim()).join("\n")}
524
+
525
+ ${run.error ? `## Error\n${run.error}\n` : ""}`;
526
+ }
527
+ function renderLeaderboard(rows, perCheck) {
528
+ const table = rows.map((row, index) => `| ${index + 1} | ${row.profileId} | ${row.fixtureScore} | ${row.falsePositives} | ${row.wrongPatterns} | ${row.assignedCheckFalsePositives} | ${row.falseNegatives} | ${row.truePositives} | ${row.matchesUsingCounterReasonAsProof} | ${row.avgMs} | ${row.smokeMatches} | ${row.smokeSkipped} | ${row.decision} |`).join("\n");
529
+ const checkTable = perCheck.map((row) => `| ${row.checkId} | ${row.truePositives} | ${row.falsePositives} | ${row.wrongPatterns} | ${row.assignedCheckFalsePositives} | ${row.falseNegatives} | ${row.decision} |`).join("\n");
530
+ return `# Search Bench Leaderboard
531
+
532
+ | rank | profile | fixture score | FP | wrong FP | assigned FP | FN | TP | counter-proof | avg ms | smoke matches | smoke skipped | decision |
533
+ |---|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---|
534
+ ${table}
535
+
536
+ ## Per-Check Summary
537
+
538
+ | check | TP | FP | wrong FP | assigned FP | FN | decision |
539
+ |---|---:|---:|---:|---:|---:|---|
540
+ ${checkTable}
541
+ `;
542
+ }
543
+ function renderReplayMarkdown(runs) {
544
+ const table = runs.map((run) => {
545
+ const patterns = Object.entries(run.matchesByPattern)
546
+ .filter(([, count]) => count > 0)
547
+ .map(([pattern, count]) => count === 1 ? pattern : `${pattern}(${count})`)
548
+ .join(", ") || "-";
549
+ return `| ${run.profileId} | ${run.commitId} | ${run.changedFiles} | +${run.addedLines}/-${run.deletedLines} | ${run.outcome} | ${run.elapsedMs} | ${run.targets} | ${run.inputTokens} | ${run.matches.length} | ${patterns} | |`;
550
+ }).join("\n");
551
+ const outcomeTable = renderReplayOutcomeSummary(runs);
552
+ return `# Real Staged Replay
553
+
554
+ ${outcomeTable}
555
+
556
+ | profile | commit | files | +/- | outcome | ms | targets | input tokens | matches | patterns | manual |
557
+ |---|---|---:|---:|---|---:|---:|---:|---:|---|---|
558
+ ${table}
559
+ `;
560
+ }
561
+ function renderReplayReviewMarkdown(runs) {
562
+ const matched = runs.filter((run) => run.matches.length > 0);
563
+ if (matched.length === 0)
564
+ return "# Real Replay Review\n\nNo real replay matches.\n";
565
+ return `# Real Replay Review
566
+
567
+ ${matched.flatMap((run) => run.matches.map((match) => `## ${run.profileId} / ${run.commitId}
568
+
569
+ Pattern: ${match.patternId}
570
+ Target: ${match.targetId}
571
+ Reason: ${match.reason}
572
+ Proof: ${match.proof}
573
+ Manual label: [good / maybe / bad]
574
+ Notes:
575
+ `)).join("\n")}`;
576
+ }
577
+ function renderReplayOutcomeSummary(runs) {
578
+ const byProfile = new Map();
579
+ for (const run of runs) {
580
+ const current = byProfile.get(run.profileId) ?? emptyReplayOutcomeCounts();
581
+ current.runs += 1;
582
+ current[run.outcome] += 1;
583
+ current.matches += run.matches.length;
584
+ current.modelCalls += run.modelCalls;
585
+ current.targets += run.targets;
586
+ byProfile.set(run.profileId, current);
587
+ }
588
+ const table = [...byProfile.entries()].map(([profile, counts]) => `| ${profile} | ${counts.runs} | ${counts.no_candidates} | ${counts.ran_no_matches} | ${counts.ran_with_matches} | ${counts.skipped_input_too_large} | ${counts.error} | ${counts.matches} | ${counts.modelCalls} | ${counts.targets} |`).join("\n");
589
+ return `## Outcome Summary
590
+
591
+ | profile | runs | no candidates | ran no matches | ran with matches | input too large | errors | matches | model calls | targets |
592
+ |---|---:|---:|---:|---:|---:|---:|---:|---:|---:|
593
+ ${table}`;
594
+ }
595
+ async function resolveProfilePaths(profilePaths, configDir) {
596
+ return Promise.all(profilePaths.map((profilePath) => resolvePath(profilePath, configDir)));
597
+ }
598
+ async function resolveGlob(pattern, configDir) {
599
+ const resolved = await resolvePath(pattern, configDir, false);
600
+ if (!resolved.includes("*"))
601
+ return [resolved];
602
+ const before = resolved.slice(0, resolved.indexOf("*"));
603
+ const after = resolved.slice(resolved.indexOf("*") + 1);
604
+ const dir = before.endsWith(path.sep) ? before.slice(0, -1) : path.dirname(before);
605
+ const prefix = before.endsWith(path.sep) ? "" : path.basename(before);
606
+ const entries = await readdir(dir);
607
+ return entries
608
+ .filter((entry) => entry.startsWith(prefix) && entry.endsWith(after))
609
+ .map((entry) => path.join(dir, entry))
610
+ .sort();
611
+ }
612
+ async function resolvePath(input, configDir, mustExist = true) {
613
+ const expanded = input.startsWith("~/") ? path.join(process.env.HOME ?? "", input.slice(2)) : input;
614
+ const fromCwd = path.resolve(expanded);
615
+ const fromConfig = path.resolve(configDir, expanded);
616
+ if (!mustExist || await exists(fromCwd))
617
+ return fromCwd;
618
+ if (await exists(fromConfig))
619
+ return fromConfig;
620
+ return fromCwd;
621
+ }
622
+ async function readProfile(filePath) {
623
+ const profile = JSON.parse(await readFile(filePath, "utf8"));
624
+ if (!profile.id)
625
+ throw new Error(`Search profile missing id: ${filePath}`);
626
+ return { filePath, profile };
627
+ }
628
+ async function readFixture(filePath) {
629
+ const fixture = JSON.parse(await readFile(filePath, "utf8"));
630
+ if (!fixture.id)
631
+ throw new Error(`Search fixture missing id: ${filePath}`);
632
+ return { filePath, fixture };
633
+ }
634
+ async function exists(filePath) {
635
+ try {
636
+ await readFile(filePath);
637
+ return true;
638
+ }
639
+ catch {
640
+ return false;
641
+ }
642
+ }
643
+ function resolveSmokeCwd(cwd) {
644
+ if (!cwd)
645
+ return process.cwd();
646
+ if (cwd === "$BEVYL_REPO")
647
+ return process.env.BEVYL_REPO ?? null;
648
+ if (cwd.startsWith("$BEVYL_REPO/")) {
649
+ const root = process.env.BEVYL_REPO;
650
+ return root ? path.join(root, cwd.slice("$BEVYL_REPO/".length)) : null;
651
+ }
652
+ return cwd.startsWith("~/") ? path.join(process.env.HOME ?? "", cwd.slice(2)) : cwd;
653
+ }
654
+ function resolveReplayCwd(replay) {
655
+ if (replay.cwd)
656
+ return expandPath(replay.cwd);
657
+ if (replay.repoEnv) {
658
+ const value = process.env[replay.repoEnv];
659
+ return value ? expandPath(value) : null;
660
+ }
661
+ return process.cwd();
662
+ }
663
+ function expandPath(input) {
664
+ return input.startsWith("~/") ? path.join(process.env.HOME ?? "", input.slice(2)) : input;
665
+ }
666
+ function safeSegment(value) {
667
+ return value.replace(/[^A-Za-z0-9._-]+/g, "_").replace(/^_+|_+$/g, "") || "run";
668
+ }
669
+ function round(value) {
670
+ return Math.round(value * 1000) / 1000;
671
+ }
672
+ function numericStat(value) {
673
+ const parsed = Number(value);
674
+ return Number.isFinite(parsed) ? parsed : 0;
675
+ }
@@ -0,0 +1,6 @@
1
+ import type { RepomixSearchConfig, SearchProfile, StupifyCheck } from "./types.ts";
2
+ export declare function loadSearchProfile(profilePath: string | null): Promise<SearchProfile | null>;
3
+ export declare function effectiveSearchChecks(explicitCheckIds: readonly string[] | null, profile: SearchProfile | null): readonly StupifyCheck[];
4
+ export declare function effectiveMaxCandidates(defaultValue: number, profile: SearchProfile | null): number;
5
+ export declare function effectiveMaxSearchInputTokens(defaultValue: number, profile: SearchProfile | null): number;
6
+ export declare function effectiveRepomixConfig(defaultValue: RepomixSearchConfig, profile: SearchProfile | null): RepomixSearchConfig;