@stupify/cli 0.0.16 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/.review/CORPUS.md +73 -0
  2. package/.review/REVIEW-PROMPT.md +52 -0
  3. package/.review/RUBRIC.md +46 -0
  4. package/LICENSE +1 -1
  5. package/README.md +41 -39
  6. package/package.json +24 -25
  7. package/src/cli.ts +358 -0
  8. package/src/review-sweep.ts +492 -0
  9. package/dist/analysis.d.ts +0 -16
  10. package/dist/analysis.js +0 -168
  11. package/dist/cache.d.ts +0 -2
  12. package/dist/cache.js +0 -57
  13. package/dist/checks.d.ts +0 -4
  14. package/dist/checks.js +0 -228
  15. package/dist/command.d.ts +0 -2
  16. package/dist/command.js +0 -147
  17. package/dist/constants.d.ts +0 -4
  18. package/dist/constants.js +0 -53
  19. package/dist/counter-scout.d.ts +0 -21
  20. package/dist/counter-scout.js +0 -167
  21. package/dist/diff.d.ts +0 -1
  22. package/dist/diff.js +0 -10
  23. package/dist/doctor.d.ts +0 -16
  24. package/dist/doctor.js +0 -143
  25. package/dist/git.d.ts +0 -17
  26. package/dist/git.js +0 -368
  27. package/dist/hooks.d.ts +0 -5
  28. package/dist/hooks.js +0 -135
  29. package/dist/index.d.ts +0 -1
  30. package/dist/index.js +0 -1
  31. package/dist/model.d.ts +0 -11
  32. package/dist/model.js +0 -296
  33. package/dist/prompts.d.ts +0 -8
  34. package/dist/prompts.js +0 -89
  35. package/dist/render.d.ts +0 -6
  36. package/dist/render.js +0 -295
  37. package/dist/repomix-provider.d.ts +0 -12
  38. package/dist/repomix-provider.js +0 -196
  39. package/dist/search-bench.d.ts +0 -1
  40. package/dist/search-bench.js +0 -677
  41. package/dist/search-profile.d.ts +0 -6
  42. package/dist/search-profile.js +0 -73
  43. package/dist/sem-provider.d.ts +0 -2
  44. package/dist/sem-provider.js +0 -255
  45. package/dist/stupify.d.ts +0 -38
  46. package/dist/stupify.js +0 -505
  47. package/dist/trace.d.ts +0 -31
  48. package/dist/trace.js +0 -86
  49. package/dist/types.d.ts +0 -341
  50. package/dist/types.js +0 -6
  51. package/dist/ui.d.ts +0 -34
  52. package/dist/ui.js +0 -143
  53. package/src/analysis.ts +0 -223
  54. package/src/cache.ts +0 -63
  55. package/src/checks.ts +0 -231
  56. package/src/command.ts +0 -173
  57. package/src/constants.ts +0 -56
  58. package/src/counter-scout.ts +0 -195
  59. package/src/diff.ts +0 -9
  60. package/src/doctor.ts +0 -166
  61. package/src/git.ts +0 -380
  62. package/src/hooks.ts +0 -151
  63. package/src/index.ts +0 -1
  64. package/src/model.ts +0 -367
  65. package/src/prompts.ts +0 -100
  66. package/src/render.ts +0 -328
  67. package/src/repomix-provider.ts +0 -219
  68. package/src/search-bench.ts +0 -783
  69. package/src/search-profile.ts +0 -89
  70. package/src/sem-provider.ts +0 -300
  71. package/src/stupify.ts +0 -604
  72. package/src/trace.ts +0 -126
  73. package/src/types.ts +0 -362
  74. package/src/ui.ts +0 -187
@@ -0,0 +1,492 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * stupify (review sweep) — auto-review open GitHub PRs with Codex against a corpus of code YOU picked.
4
+ * The engine the `stupify` CLI deploys to ~/.stupify and runs on a cron (or `stupify run`); config.env sits
5
+ * next to it.
6
+ *
7
+ * OPT-IN by default (SCOPE=label): only PRs tagged REVIEW_LABEL are reviewed, so spend tracks exactly what
8
+ * you tag. SCOPE=auto reviews all non-draft, non-bot PRs under DIFF_LINE_CAP.
9
+ * The "taste" — REVIEW-PROMPT.md, RUBRIC.md, CORPUS.md — lives in the TARGET repo under REVIEW_DIR (default
10
+ * `.review/`), so it's version-controlled with the code it judges and edited via a normal PR.
11
+ * Idempotent: skips a PR already reviewed — or already reported as failed — at its current head SHA, via a
12
+ * hidden marker comment. A new push moves the SHA, clears the markers, and re-arms the review.
13
+ * Per-PR memory: each review is fed the PR's existing review thread, so it won't re-raise resolved/declined
14
+ * items and converges ("no new blocking issues") instead of nagging forever.
15
+ *
16
+ * Single-flight: the sweep takes its own lockfile (state/sweep.lock) so two cron ticks never overlap — no
17
+ * `flock` dependency. Every knob lives in config.env next to this file (read fresh each run). Run: `bun review-sweep.ts`.
18
+ */
19
+ import { spawnSync } from 'node:child_process'
20
+ import { appendFileSync, existsSync, mkdirSync, readFileSync, rmSync, statSync, writeFileSync } from 'node:fs'
21
+ import { dirname, join } from 'node:path'
22
+ import { fileURLToPath } from 'node:url'
23
+
24
+ const KIT_DIR = dirname(fileURLToPath(import.meta.url))
25
+
26
+ interface Config {
27
+ repoDir: string // dedicated checkout we hard-reset — never a working checkout you care about
28
+ remote: string
29
+ slug: string
30
+ defaultBranch: string
31
+ reviewDir: string // dir IN the target repo holding REVIEW-PROMPT.md / RUBRIC.md / CORPUS.md
32
+ scope: 'label' | 'auto'
33
+ reviewLabel: string
34
+ diffLineCap: number
35
+ dryRun: boolean
36
+ maxPrs: number
37
+ stateDir: string
38
+ codexEffort: string
39
+ codexProvider: string // optional `-c model_provider=...`; empty = codex's own default/auth
40
+ codexModel: string // optional `-c model=...`; empty = codex's default model
41
+ }
42
+
43
+ function loadConfig(): Config {
44
+ const file = parseEnvFile(join(KIT_DIR, 'config.env'))
45
+ // A one-shot env override wins over the persisted config.env, so `DRY_RUN=1 bun review-sweep.ts` actually
46
+ // previews even when the deployed file says DRY_RUN=0. Cron sets none of these keys, so it falls to the file.
47
+ const pick = (key: string, fallback: string): string => process.env[key] ?? file[key] ?? fallback
48
+ const int = (key: string, fallback: number, min: number): number => {
49
+ const set = process.env[key] ?? file[key]
50
+ if (set === undefined) return fallback
51
+ const trimmed = set.trim()
52
+ const n = Number(trimmed)
53
+ if (/^\d+$/.test(trimmed) && n >= min) return n
54
+ log(`config: ${key}='${set}' is not an integer ≥ ${min} — using ${fallback}`)
55
+ return fallback
56
+ }
57
+ const bool = (key: string, unset: boolean, onInvalid: boolean): boolean => {
58
+ const set = process.env[key] ?? file[key]
59
+ if (set === undefined) return unset
60
+ const v = set.trim().toLowerCase()
61
+ if (v === '1' || v === 'true' || v === 'yes' || v === 'on') return true
62
+ if (v === '0' || v === 'false' || v === 'no' || v === 'off') return false
63
+ log(`config: ${key}='${set}' is not a boolean (1/0/true/false/yes/no/on/off) — using ${onInvalid} (fail-safe)`)
64
+ return onInvalid
65
+ }
66
+
67
+ // Home is where the CLI deployed us (~/.stupify) — config.env, state, and the dedicated checkout all live here.
68
+ const stupifyHome = pick('STUPIFY_HOME', KIT_DIR)
69
+ const stateDir = join(stupifyHome, 'state')
70
+ mkdirSync(stateDir, { recursive: true })
71
+ LOG = join(stateDir, 'sweep.log') // set before parsing knobs so config warnings reach sweep.log, not just cron.log
72
+
73
+ const slug = pick('REPO_SLUG', '').trim()
74
+ if (!slug) {
75
+ log('config: REPO_SLUG is required (owner/repo) — aborting. Run `stupify` to set up.')
76
+ process.exit(1)
77
+ }
78
+ const scopeRaw = pick('SCOPE', 'label').trim().toLowerCase()
79
+ if (scopeRaw !== 'label' && scopeRaw !== 'auto') log(`config: SCOPE='${scopeRaw}' is not 'label' or 'auto' — using label`)
80
+
81
+ return {
82
+ repoDir: join(stupifyHome, 'repo'), // HARD-PINNED under STUPIFY_HOME: refreshRepo runs `git reset --hard` here
83
+ remote: pick('REMOTE', `https://github.com/${slug}.git`),
84
+ slug,
85
+ defaultBranch: pick('DEFAULT_BRANCH', 'main'),
86
+ reviewDir: pick('REVIEW_DIR', '.review'),
87
+ scope: scopeRaw === 'auto' ? 'auto' : 'label',
88
+ reviewLabel: pick('REVIEW_LABEL', 'codex-review'),
89
+ diffLineCap: int('DIFF_LINE_CAP', 800, 1),
90
+ dryRun: bool('DRY_RUN', false, true), // unset = live (cron's normal mode); garbage = preview (never post on a typo)
91
+ maxPrs: int('MAX_PRS', 15, 1),
92
+ stateDir,
93
+ codexEffort: pick('CODEX_EFFORT', 'high'),
94
+ codexProvider: pick('CODEX_PROVIDER', ''),
95
+ codexModel: pick('CODEX_MODEL', ''),
96
+ }
97
+ }
98
+
99
+ /** Minimal KEY=VALUE reader for config.env: strips `# inline comments` and matching surrounding quotes, so a
100
+ * value reads the same here as it does when bash sources the file (`KEY='https://…'` → `https://…`). */
101
+ function parseEnvFile(path: string): Record<string, string> {
102
+ if (!existsSync(path)) return {}
103
+ const out: Record<string, string> = {}
104
+ for (const raw of readFileSync(path, 'utf8').split('\n')) {
105
+ const line = raw.trim()
106
+ if (!line || line.startsWith('#')) continue
107
+ const eq = line.indexOf('=')
108
+ if (eq < 0) continue
109
+ const key = line.slice(0, eq).trim()
110
+ const value = line.slice(eq + 1)
111
+ const comment = value.indexOf(' #')
112
+ let v = (comment < 0 ? value : value.slice(0, comment)).trim()
113
+ if (v.length >= 2 && (v[0] === "'" || v[0] === '"') && v.at(-1) === v[0]) v = v.slice(1, -1)
114
+ out[key] = v
115
+ }
116
+ return out
117
+ }
118
+
119
+ interface ProcResult {
120
+ ok: boolean
121
+ stdout: string
122
+ combined: string
123
+ }
124
+
125
+ function exec(cmd: string, args: string[], opts: { cwd?: string; timeoutMs?: number } = {}): ProcResult {
126
+ const r = spawnSync(cmd, args, {
127
+ cwd: opts.cwd,
128
+ input: '', // close stdin (codex would otherwise read from the terminal)
129
+ timeout: opts.timeoutMs,
130
+ encoding: 'utf8',
131
+ maxBuffer: 64 * 1024 * 1024,
132
+ })
133
+ const stdout = r.stdout ?? ''
134
+ // spawnSync reports a timeout via signal (SIGTERM) and a spawn failure (ENOENT etc.) via `error`, both with
135
+ // EMPTY stdout/stderr. Fold them into combined so the failure path surfaces the real cause, not "no output".
136
+ let combined = stdout + (r.stderr ?? '')
137
+ if (r.signal) combined += `\n${cmd}: process killed by ${r.signal}${opts.timeoutMs ? ` (timeout ${opts.timeoutMs}ms)` : ''}`
138
+ if (r.error) combined += `\n${cmd}: ${r.error.message}`
139
+ return { ok: r.status === 0 && r.error === undefined, stdout, combined }
140
+ }
141
+
142
+ let LOG = ''
143
+ function log(message: string): void {
144
+ const line = `${new Date().toISOString().replace(/\.\d{3}Z$/, 'Z')} ${message}`
145
+ if (LOG) appendFileSync(LOG, `${line}\n`)
146
+ console.log(line)
147
+ }
148
+
149
+ /** Refresh the dedicated checkout to origin/main. Returns false on any git failure. */
150
+ function refreshRepo(cfg: Config): boolean {
151
+ mkdirSync(dirname(cfg.repoDir), { recursive: true })
152
+ if (!existsSync(join(cfg.repoDir, '.git'))) {
153
+ log(`cloning ${cfg.remote} -> ${cfg.repoDir}`)
154
+ if (!exec('git', ['clone', '-q', cfg.remote, cfg.repoDir]).ok) return logFail('clone failed')
155
+ }
156
+ const branch = cfg.defaultBranch
157
+ const ok =
158
+ exec('git', ['fetch', '-q', 'origin', branch], { cwd: cfg.repoDir }).ok &&
159
+ exec('git', ['checkout', '-q', branch], { cwd: cfg.repoDir }).ok &&
160
+ exec('git', ['reset', '-q', '--hard', `origin/${branch}`], { cwd: cfg.repoDir }).ok
161
+ return ok || logFail(`refresh failed (is the default branch '${branch}'? set DEFAULT_BRANCH if not)`)
162
+ }
163
+
164
+ function logFail(message: string): false {
165
+ log(message)
166
+ return false
167
+ }
168
+
169
+ interface Pr {
170
+ number: number
171
+ headRefOid: string
172
+ isDraft: boolean
173
+ author: { login: string } | null
174
+ labels: { name: string }[]
175
+ }
176
+
177
+ function listPrs(cfg: Config): Pr[] | null {
178
+ // Filter the PR list directly rather than `gh pr list --label` — that search index lags behind labelling.
179
+ const fields = 'number,headRefOid,isDraft,author,labels'
180
+ const r = exec('gh', ['pr', 'list', '--repo', cfg.slug, '--state', 'open', '--json', fields])
181
+ if (!r.ok) {
182
+ log('gh pr list failed (auth/network down?) — aborting sweep')
183
+ return null
184
+ }
185
+ let raw: unknown
186
+ try {
187
+ raw = JSON.parse(r.stdout)
188
+ } catch {
189
+ log('gh pr list returned unparseable JSON — aborting sweep')
190
+ return null
191
+ }
192
+ if (!Array.isArray(raw)) {
193
+ log('gh pr list returned a non-array — aborting sweep')
194
+ return null
195
+ }
196
+ const prs = raw.filter(isPr)
197
+ if (prs.length < raw.length) log(`gh pr list: ${raw.length - prs.length} entries failed shape check — skipped`)
198
+ return prs
199
+ }
200
+
201
+ // Fully validate the gh boundary. gh guarantees the --json shape, but an auth-error page or schema drift
202
+ // would otherwise throw (or silently mis-scope) mid-loop instead of skipping cleanly. `in`-narrowing, no
203
+ // assertions. This is a complete `is Pr` — every field inScope/the loop trust is checked here.
204
+ function isPr(raw: unknown): raw is Pr {
205
+ if (typeof raw !== 'object' || raw === null) return false
206
+ if (!('number' in raw) || typeof raw.number !== 'number') return false
207
+ if (!('headRefOid' in raw) || typeof raw.headRefOid !== 'string') return false
208
+ if (!('isDraft' in raw) || typeof raw.isDraft !== 'boolean') return false
209
+ if (!('labels' in raw) || !Array.isArray(raw.labels) || !raw.labels.every(isLabel)) return false
210
+ return 'author' in raw && isAuthor(raw.author)
211
+ }
212
+
213
+ function isLabel(raw: unknown): raw is { name: string } {
214
+ return typeof raw === 'object' && raw !== null && 'name' in raw && typeof raw.name === 'string'
215
+ }
216
+
217
+ function isAuthor(raw: unknown): raw is { login: string } | null {
218
+ return raw === null || (typeof raw === 'object' && 'login' in raw && typeof raw.login === 'string')
219
+ }
220
+
221
+ function hasReviewLabel(pr: Pr, cfg: Config): boolean {
222
+ return pr.labels.some((l) => l.name === cfg.reviewLabel)
223
+ }
224
+
225
+ function inScope(pr: Pr, cfg: Config): boolean {
226
+ if (pr.isDraft) return false
227
+ if ((pr.author?.login ?? '').endsWith('[bot]')) return false // never review bot PRs, in EITHER scope
228
+ if (cfg.scope === 'label') return hasReviewLabel(pr, cfg)
229
+ return true // auto: any non-draft, non-bot PR
230
+ }
231
+
232
+ interface Comment {
233
+ login: string
234
+ body: string
235
+ }
236
+
237
+ // null = couldn't read the PR (gh failed or returned junk). The caller SKIPS such a PR rather than treating
238
+ // it as unreviewed — manufacturing empty comments here would let a GitHub blip duplicate-post a review.
239
+ function prComments(cfg: Config, number: number): Comment[] | null {
240
+ const r = exec('gh', ['pr', 'view', String(number), '--repo', cfg.slug, '--json', 'comments'])
241
+ if (!r.ok) return null
242
+ let raw: unknown
243
+ try {
244
+ raw = JSON.parse(r.stdout)
245
+ } catch {
246
+ return null
247
+ }
248
+ if (typeof raw !== 'object' || raw === null || !('comments' in raw) || !Array.isArray(raw.comments)) return null
249
+ return raw.comments.map(toComment)
250
+ }
251
+
252
+ function toComment(c: unknown): Comment {
253
+ if (typeof c !== 'object' || c === null) return { login: '', body: '' }
254
+ const body = 'body' in c && typeof c.body === 'string' ? c.body : ''
255
+ const author = 'author' in c ? c.author : null
256
+ const login =
257
+ typeof author === 'object' && author !== null && 'login' in author && typeof author.login === 'string'
258
+ ? author.login
259
+ : ''
260
+ return { login, body }
261
+ }
262
+
263
+ // The per-PR MEMORY: the existing review conversation — the reviewer's past reviews + the author's replies —
264
+ // fed back into the prompt so it stops re-litigating settled points and knows when to converge. The GitHub
265
+ // thread IS the durable store (survives restarts, already holds the replies); we just read it back.
266
+ const MEMORY_COMMENTS = 20 // recent thread context, bounded so the prompt can't balloon on a chatty PR
267
+
268
+ function priorReviewThread(comments: Comment[]): string {
269
+ return comments
270
+ .filter((c) => !c.login.endsWith('[bot]')) // drop CI bots; keep prior reviews + human/agent replies
271
+ .slice(-MEMORY_COMMENTS)
272
+ .map((c) => `@${c.login}:\n${c.body.replace(/<!--[\s\S]*?-->/g, '').trim()}`) // strip hidden markers
273
+ .filter((entry) => entry.length > 0)
274
+ .join('\n\n---\n\n')
275
+ }
276
+
277
+ // null = couldn't read the diff. The caller skips (auto) or notes it (dry-run) rather than treating an
278
+ // unreadable diff as "0 lines" — a silent under-cap that would auto-review something it never measured.
279
+ function diffLineCount(cfg: Config, number: number): number | null {
280
+ const r = exec('gh', ['pr', 'diff', String(number), '--repo', cfg.slug])
281
+ if (!r.ok) return null
282
+ if (!r.stdout) return 0
283
+ return r.stdout.split('\n').length - (r.stdout.endsWith('\n') ? 1 : 0)
284
+ }
285
+
286
+ function markersFor(pr: Pr): { mark: string; failMark: string } {
287
+ return {
288
+ mark: `<!-- stupify:${pr.headRefOid} -->`,
289
+ failMark: `<!-- stupify-failed:${pr.headRefOid} -->`,
290
+ }
291
+ }
292
+
293
+ function reviewPrompt(cfg: Config, pr: Pr, priorThread: string): string {
294
+ const { mark } = markersFor(pr)
295
+ const outPath = `/tmp/review-${pr.number}.md`
296
+ const dir = cfg.reviewDir
297
+ const memory = priorThread
298
+ ? `\n\n## Prior reviews on this PR (your memory)
299
+ This is the existing review conversation — your past reviews and the author's replies. You are CONTINUING it,
300
+ not starting fresh. Apply ${dir}/REVIEW-PROMPT.md's "Prior reviews on this PR" rules: don't re-raise resolved or
301
+ reasoned-declined items, report only what's genuinely new, and converge (post the one-line "no new issues"
302
+ and stop) if nothing new remains.
303
+
304
+ ${priorThread}`
305
+ : ''
306
+ return `You are a code reviewer running in an automated sweep (you have gh + git; no token needed). DO NOT modify any code.
307
+ Read ${dir}/REVIEW-PROMPT.md and ${dir}/RUBRIC.md (the spec + rubric) and ${dir}/CORPUS.md (the curated good-code reference; open the live files it points at as needed). Then:
308
+ 1. Get the diff: gh pr diff ${pr.number} --repo ${cfg.slug}
309
+ 2. Review it per the spec — catch bugs / type-lies / dead-code / footguns AND reinvents-primitive / slop, each citing the corpus primitive it should reuse; sort worst-first.
310
+ 3. Write the review to ${outPath}, formatted EXACTLY per ${dir}/REVIEW-PROMPT.md's 'Comment format' section (it owns the format — opener, finding blocks, attribution). END the file with exactly this line: ${mark}
311
+ 4. Post it: gh pr comment ${pr.number} --repo ${cfg.slug} --body-file ${outPath}
312
+ Keep it terse; no preamble.${memory}`
313
+ }
314
+
315
+ /** Run one review. Returns tokens used on success, or null when codex couldn't run (a failure was posted). */
316
+ function reviewPr(cfg: Config, pr: Pr, priorThread: string): number | null {
317
+ const { failMark } = markersFor(pr)
318
+ const outPath = `/tmp/review-${pr.number}.md`
319
+ log(`reviewing PR #${pr.number} @ ${pr.headRefOid.slice(0, 8)}`)
320
+ const codexArgs = [
321
+ 'exec',
322
+ '--cd',
323
+ cfg.repoDir,
324
+ '--sandbox',
325
+ 'workspace-write',
326
+ '-c',
327
+ `model_reasoning_effort=${cfg.codexEffort}`,
328
+ '-c',
329
+ 'sandbox_workspace_write.network_access=true',
330
+ '-c',
331
+ 'sandbox_workspace_write.writable_roots=["/tmp"]',
332
+ ]
333
+ if (cfg.codexProvider) codexArgs.push('-c', `model_provider=${cfg.codexProvider}`)
334
+ if (cfg.codexModel) codexArgs.push('-c', `model=${cfg.codexModel}`)
335
+ codexArgs.push(reviewPrompt(cfg, pr, priorThread))
336
+
337
+ const cx = exec('codex', codexArgs, { cwd: cfg.repoDir, timeoutMs: 1_200_000 })
338
+ appendFileSync(LOG, `${cx.combined}\n`)
339
+
340
+ if (cx.ok) {
341
+ const tokens = parseTokens(cx.combined)
342
+ log(` #${pr.number} done (${tokens ?? '?'} tokens)`)
343
+ return tokens ?? 0
344
+ }
345
+
346
+ // Codex couldn't run (provider down, out of credits, timeout, bad diff). Don't fail silently — post a short
347
+ // error on the PR and stamp FAIL_MARK so the next sweep skips this head instead of re-hammering every minute.
348
+ const reason = failureReason(cx.combined)
349
+ log(` review FAILED for #${pr.number} — ${reason}`)
350
+ const body = [
351
+ "uhh — i couldn't review this one. codex didn't run:",
352
+ '',
353
+ `> ${reason}`,
354
+ '',
355
+ "_— stupify (auto-reviewer). i'll retry on your next push._",
356
+ failMark,
357
+ ].join('\n')
358
+ writeFileSync(outPath, `${body}\n`)
359
+ if (!exec('gh', ['pr', 'comment', String(pr.number), '--repo', cfg.slug, '--body-file', outPath]).ok) {
360
+ log(` (couldn't post failure comment for #${pr.number} either — gh down?)`)
361
+ }
362
+ return null
363
+ }
364
+
365
+ /** codex prints `tokens used` then the count on the next line — read the last such pair. */
366
+ function parseTokens(out: string): number | null {
367
+ const lines = out.split('\n')
368
+ for (let i = lines.length - 1; i >= 0; i--) {
369
+ const line = lines[i]
370
+ if (line !== undefined && /tokens used/i.test(line)) {
371
+ const digits = (lines[i + 1] ?? '').replace(/\D/g, '')
372
+ return digits ? Number(digits) : null
373
+ }
374
+ }
375
+ return null
376
+ }
377
+
378
+ function failureReason(out: string): string {
379
+ const signal = /payment required|credits|quota|rate.?limit|429|5\d\d |timeout|killed|enoent|spawn|error/i
380
+ const noise = /no error|0 error/i
381
+ const hit = out
382
+ .split('\n')
383
+ .map((l) => l.trim())
384
+ .filter((l) => signal.test(l) && !noise.test(l))
385
+ .at(-1)
386
+ const cleaned = (hit ?? '').replace(/`/g, ' ').slice(0, 220).trim()
387
+ return cleaned || 'codex run failed (no output captured — check the sweep log)'
388
+ }
389
+
390
+ // Single-flight without flock: O_EXCL create wins atomically; a lock older than 30 min (longer than any
391
+ // possible sweep — codex is capped at 20) is treated as stale from a crashed run and stolen.
392
+ function acquireLock(path: string): boolean {
393
+ try {
394
+ writeFileSync(path, String(process.pid), { flag: 'wx' })
395
+ return true
396
+ } catch {
397
+ try {
398
+ if (Date.now() - statSync(path).mtimeMs > 30 * 60_000) {
399
+ writeFileSync(path, String(process.pid))
400
+ return true
401
+ }
402
+ } catch {
403
+ /* lock vanished between calls — let the next sweep retry */
404
+ }
405
+ return false
406
+ }
407
+ }
408
+
409
+ function main(): void {
410
+ const cfg = loadConfig() // also mkdirs stateDir and sets LOG, so config warnings are already captured
411
+
412
+ const lockPath = join(cfg.stateDir, 'sweep.lock')
413
+ if (!acquireLock(lockPath)) {
414
+ log('another sweep already running — skip')
415
+ return
416
+ }
417
+ process.on('exit', () => {
418
+ try {
419
+ rmSync(lockPath, { force: true })
420
+ } catch {
421
+ /* best-effort */
422
+ }
423
+ })
424
+
425
+ if (!refreshRepo(cfg)) process.exit(1)
426
+ const haveMachinery =
427
+ existsSync(join(cfg.repoDir, cfg.reviewDir, 'CORPUS.md')) &&
428
+ existsSync(join(cfg.repoDir, cfg.reviewDir, 'REVIEW-PROMPT.md')) &&
429
+ existsSync(join(cfg.repoDir, cfg.reviewDir, 'RUBRIC.md'))
430
+ if (!haveMachinery) {
431
+ log(`no review machinery in ${cfg.slug}:${cfg.reviewDir}/ (need REVIEW-PROMPT.md + RUBRIC.md + CORPUS.md) — no-op. Copy the templates from the stupify repo.`)
432
+ return
433
+ }
434
+
435
+ const prs = listPrs(cfg)
436
+ if (prs === null) process.exit(1)
437
+ const queue = prs.filter((pr) => inScope(pr, cfg)) // MAX_PRS is applied to PRs actually HANDLED, not iterated (below)
438
+
439
+ let reviewed = 0
440
+ let tokens = 0
441
+ // Count PRs we do real (costly) work on, and cap THAT at MAX_PRS — so a backlog of already-reviewed PRs at
442
+ // the front of the list can't consume the budget and starve later ones.
443
+ let handled = 0
444
+ for (const pr of queue) {
445
+ const { mark, failMark } = markersFor(pr)
446
+ const comments = prComments(cfg, pr.number)
447
+ if (comments === null) {
448
+ log(`skip #${pr.number} — couldn't read it from gh (failed/malformed); will retry next sweep`)
449
+ continue
450
+ }
451
+ const bodies = comments.map((c) => c.body).join('\n')
452
+ if (bodies.includes(mark) || bodies.includes(failMark)) continue
453
+
454
+ // Past the cheap dedup skip — this PR is a real candidate. Enforce MAX_PRS here, not on the
455
+ // iterated list, and defer the rest to the next sweep.
456
+ if (handled >= cfg.maxPrs) {
457
+ log(`reached MAX_PRS=${cfg.maxPrs} this sweep — deferring remaining candidates to the next sweep`)
458
+ break
459
+ }
460
+ handled += 1
461
+
462
+ let lines = 0
463
+ if (cfg.scope === 'auto' || cfg.dryRun) {
464
+ const counted = diffLineCount(cfg, pr.number)
465
+ if (counted === null) {
466
+ log(`skip #${pr.number} — couldn't read its diff from gh; will retry next sweep`)
467
+ continue
468
+ }
469
+ lines = counted
470
+ }
471
+ // auto-scope only: skip oversized diffs UNLESS the PR carries the review label (the documented force-include).
472
+ // (label-scope means you already opted in, so size never gates there.)
473
+ if (cfg.scope === 'auto' && lines > cfg.diffLineCap && !hasReviewLabel(pr, cfg)) {
474
+ log(`skip #${pr.number} — diff ${lines} lines > cap ${cfg.diffLineCap} (add '${cfg.reviewLabel}' to force)`)
475
+ continue
476
+ }
477
+ if (cfg.dryRun) {
478
+ log(`DRY_RUN would review #${pr.number} @ ${pr.headRefOid.slice(0, 8)} (diff ${lines} lines)`)
479
+ continue
480
+ }
481
+
482
+ const used = reviewPr(cfg, pr, priorReviewThread(comments))
483
+ if (used !== null) {
484
+ reviewed += 1
485
+ tokens += used
486
+ }
487
+ }
488
+
489
+ log(`sweep done — scope=${cfg.scope} reviewed=${reviewed} tokens~${tokens}`)
490
+ }
491
+
492
+ main()
@@ -1,16 +0,0 @@
1
- import type { LocalModel } from "./model.ts";
2
- import type { SearchMatch, SemChangeSet, SemContext, SemContextPack, StupifyCheck } from "./types.ts";
3
- export declare function runSearch(model: LocalModel, request: SearchRequest): Promise<readonly SearchMatch[]>;
4
- export type SearchRequest = Readonly<{
5
- prompt: string;
6
- schema: unknown;
7
- contexts: readonly SemContext[];
8
- }>;
9
- export declare function searchRequest(input: Readonly<{
10
- changeSet: SemChangeSet;
11
- contexts: readonly SemContext[];
12
- pack: SemContextPack;
13
- patterns: readonly StupifyCheck[];
14
- includeCounterReasonInPrompt?: boolean;
15
- }>): SearchRequest;
16
- export declare function countPromptTokens(model: LocalModel, prompt: string): Promise<number>;
package/dist/analysis.js DELETED
@@ -1,168 +0,0 @@
1
- import { cachedJson, fingerprint } from "./cache.js";
2
- import { searchPrompt } from "./prompts.js";
3
- import { diagnostic, diagnosticError } from "./ui.js";
4
- export async function runSearch(model, request) {
5
- const raw = await runJsonPrompt(model, request.prompt, request.schema, 0);
6
- return uncheckedSearchMatches(raw, request.contexts);
7
- }
8
- export function searchRequest(input) {
9
- return {
10
- prompt: searchPrompt({
11
- ...input,
12
- includeCounterReason: input.includeCounterReasonInPrompt ?? false,
13
- }),
14
- schema: searchSchema(input.contexts),
15
- contexts: input.contexts,
16
- };
17
- }
18
- export async function countPromptTokens(model, prompt) {
19
- const cached = await cachedJson("prompt-tokens", fingerprint({
20
- version: 1,
21
- modelId: model.id,
22
- profile: model.profile,
23
- prompt,
24
- }), async () => {
25
- const response = await fetch(`${model.baseUrl}/tokenize`, {
26
- method: "POST",
27
- headers: { "content-type": "application/json" },
28
- body: JSON.stringify({ content: prompt }),
29
- });
30
- if (!response.ok) {
31
- throw new Error(`llama-server tokenize failed: HTTP ${response.status} ${await response.text()}`);
32
- }
33
- const body = await response.json();
34
- if (!Array.isArray(body.tokens))
35
- throw new Error("llama-server tokenize returned no tokens.");
36
- return { count: body.tokens.length };
37
- });
38
- return cached.count;
39
- }
40
- function searchSchema(contexts) {
41
- return {
42
- type: "object",
43
- properties: {
44
- matches: {
45
- type: "array",
46
- maxItems: 5,
47
- items: {
48
- type: "object",
49
- properties: {
50
- targetId: { type: "string", enum: contexts.map((context) => context.targetId) },
51
- reason: { type: "string" },
52
- proof: { type: "string" },
53
- },
54
- required: ["targetId", "reason", "proof"],
55
- additionalProperties: false,
56
- },
57
- },
58
- },
59
- required: ["matches"],
60
- additionalProperties: false,
61
- };
62
- }
63
- function uncheckedSearchMatches(value, contexts) {
64
- const output = value;
65
- const contextsByTargetId = new Map(contexts.map((context) => [context.targetId, context]));
66
- return (output.matches ?? []).flatMap((match) => {
67
- const targetId = match.targetId ?? "";
68
- const context = contextsByTargetId.get(targetId);
69
- if (!context)
70
- return [];
71
- return [{
72
- targetId,
73
- patternId: context.checkId,
74
- reason: match.reason ?? "",
75
- proof: sourcePointer(context),
76
- snapshot: sourceSnapshot(context),
77
- filePath: context.filePath,
78
- entityName: context.entityName,
79
- entityKind: context.entityKind,
80
- }];
81
- });
82
- }
83
- function sourcePointer(context) {
84
- const file = context.filePath ?? "(unknown)";
85
- return `${file}::${context.entityKind || "entity"}::${context.entityName || context.entityId}`;
86
- }
87
- function sourceSnapshot(context) {
88
- try {
89
- const parsed = JSON.parse(context.text);
90
- const snapshot = stringSnapshot(parsed.after) ?? stringSnapshot(parsed.before);
91
- return snapshot ? limitSnapshot(snapshot) : undefined;
92
- }
93
- catch {
94
- return undefined;
95
- }
96
- }
97
- function stringSnapshot(value) {
98
- if (typeof value !== "string")
99
- return undefined;
100
- const trimmed = value.trim();
101
- if (!trimmed || trimmed === "(none)")
102
- return undefined;
103
- return trimmed;
104
- }
105
- function limitSnapshot(value) {
106
- const lines = value.split(/\r?\n/);
107
- const limit = 12;
108
- if (lines.length <= limit)
109
- return value;
110
- return `${lines.slice(0, limit).join("\n")}
111
- [stupify: snapshot shortened after ${limit} lines]`;
112
- }
113
- async function runJsonPrompt(model, prompt, schema, temperature) {
114
- return cachedJson("model-json", fingerprint({
115
- version: 1,
116
- modelId: model.id,
117
- profile: model.profile,
118
- prompt,
119
- schema,
120
- temperature,
121
- }), () => runJsonPromptUncached(model, prompt, schema, temperature));
122
- }
123
- async function runJsonPromptUncached(model, prompt, schema, temperature) {
124
- const first = await complete(model, prompt, schema, temperature);
125
- const parsed = parseJson(first);
126
- if (parsed.ok)
127
- return parsed.value;
128
- const retry = await complete(model, `${prompt}
129
-
130
- Your previous response was not valid JSON. Return the requested JSON object only.`, schema, temperature);
131
- const retryParsed = parseJson(retry);
132
- if (retryParsed.ok)
133
- return retryParsed.value;
134
- diagnosticError("Raw model output:");
135
- diagnostic(retry);
136
- throw new Error("Model returned invalid JSON.");
137
- }
138
- async function complete(model, prompt, schema, temperature) {
139
- const response = await fetch(`${model.baseUrl}/v1/chat/completions`, {
140
- method: "POST",
141
- headers: { "content-type": "application/json" },
142
- body: JSON.stringify({
143
- model: model.id,
144
- messages: [{ role: "user", content: prompt }],
145
- temperature,
146
- response_format: {
147
- type: "json_object",
148
- schema,
149
- },
150
- }),
151
- });
152
- if (!response.ok)
153
- throw new Error(`llama-server request failed: HTTP ${response.status} ${await response.text()}`);
154
- const body = await response.json();
155
- const content = body.choices?.[0]?.message?.content;
156
- if (typeof content !== "string")
157
- throw new Error("llama-server returned no message content.");
158
- return content;
159
- }
160
- function parseJson(raw) {
161
- try {
162
- const value = JSON.parse(raw);
163
- return { ok: true, value };
164
- }
165
- catch {
166
- return { ok: false };
167
- }
168
- }
package/dist/cache.d.ts DELETED
@@ -1,2 +0,0 @@
1
- export declare function fingerprint(value: unknown): string;
2
- export declare function cachedJson<T>(namespace: string, key: string, compute: () => Promise<T>): Promise<T>;