@stupify/cli 0.0.16 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/.review/CORPUS.md +44 -0
  2. package/.review/CORPUS.template.md +73 -0
  3. package/.review/REVIEW-PROMPT.md +52 -0
  4. package/.review/RUBRIC.md +46 -0
  5. package/LICENSE +1 -1
  6. package/README.md +95 -37
  7. package/package.json +27 -26
  8. package/packs/antirez.md +10 -0
  9. package/packs/anton-kropp.md +10 -0
  10. package/packs/dhh.md +10 -0
  11. package/packs/dtolnay.md +10 -0
  12. package/packs/jarred-sumner.md +9 -0
  13. package/packs/mitchell-hashimoto.md +10 -0
  14. package/packs/rich-harris.md +10 -0
  15. package/packs/simon-willison.md +10 -0
  16. package/packs/sindre-sorhus.md +10 -0
  17. package/packs/tanner-linsley.md +10 -0
  18. package/packs/zod.md +10 -0
  19. package/src/cli.ts +626 -0
  20. package/src/prime-install.test.ts +109 -0
  21. package/src/prime.ts +50 -0
  22. package/src/review-sweep.test.ts +101 -0
  23. package/src/review-sweep.ts +526 -0
  24. package/dist/analysis.d.ts +0 -16
  25. package/dist/analysis.js +0 -168
  26. package/dist/cache.d.ts +0 -2
  27. package/dist/cache.js +0 -57
  28. package/dist/checks.d.ts +0 -4
  29. package/dist/checks.js +0 -228
  30. package/dist/command.d.ts +0 -2
  31. package/dist/command.js +0 -147
  32. package/dist/constants.d.ts +0 -4
  33. package/dist/constants.js +0 -53
  34. package/dist/counter-scout.d.ts +0 -21
  35. package/dist/counter-scout.js +0 -167
  36. package/dist/diff.d.ts +0 -1
  37. package/dist/diff.js +0 -10
  38. package/dist/doctor.d.ts +0 -16
  39. package/dist/doctor.js +0 -143
  40. package/dist/git.d.ts +0 -17
  41. package/dist/git.js +0 -368
  42. package/dist/hooks.d.ts +0 -5
  43. package/dist/hooks.js +0 -135
  44. package/dist/index.d.ts +0 -1
  45. package/dist/index.js +0 -1
  46. package/dist/model.d.ts +0 -11
  47. package/dist/model.js +0 -296
  48. package/dist/prompts.d.ts +0 -8
  49. package/dist/prompts.js +0 -89
  50. package/dist/render.d.ts +0 -6
  51. package/dist/render.js +0 -295
  52. package/dist/repomix-provider.d.ts +0 -12
  53. package/dist/repomix-provider.js +0 -196
  54. package/dist/search-bench.d.ts +0 -1
  55. package/dist/search-bench.js +0 -677
  56. package/dist/search-profile.d.ts +0 -6
  57. package/dist/search-profile.js +0 -73
  58. package/dist/sem-provider.d.ts +0 -2
  59. package/dist/sem-provider.js +0 -255
  60. package/dist/stupify.d.ts +0 -38
  61. package/dist/stupify.js +0 -505
  62. package/dist/trace.d.ts +0 -31
  63. package/dist/trace.js +0 -86
  64. package/dist/types.d.ts +0 -341
  65. package/dist/types.js +0 -6
  66. package/dist/ui.d.ts +0 -34
  67. package/dist/ui.js +0 -143
  68. package/src/analysis.ts +0 -223
  69. package/src/cache.ts +0 -63
  70. package/src/checks.ts +0 -231
  71. package/src/command.ts +0 -173
  72. package/src/constants.ts +0 -56
  73. package/src/counter-scout.ts +0 -195
  74. package/src/diff.ts +0 -9
  75. package/src/doctor.ts +0 -166
  76. package/src/git.ts +0 -380
  77. package/src/hooks.ts +0 -151
  78. package/src/index.ts +0 -1
  79. package/src/model.ts +0 -367
  80. package/src/prompts.ts +0 -100
  81. package/src/render.ts +0 -328
  82. package/src/repomix-provider.ts +0 -219
  83. package/src/search-bench.ts +0 -783
  84. package/src/search-profile.ts +0 -89
  85. package/src/sem-provider.ts +0 -300
  86. package/src/stupify.ts +0 -604
  87. package/src/trace.ts +0 -126
  88. package/src/types.ts +0 -362
  89. package/src/ui.ts +0 -187
@@ -0,0 +1,526 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * stupify (review sweep) — auto-review open GitHub PRs with Codex against a corpus of code YOU picked.
4
+ * The engine the `stupify` CLI deploys to ~/.stupify and runs on a cron (or `stupify run`); config.env sits
5
+ * next to it.
6
+ *
7
+ * Reviews every PR by default (SCOPE=auto): every non-draft, non-bot PR under DIFF_LINE_CAP, no label needed.
8
+ * REVIEW_LABEL is just a force-include override for an oversized diff. Want manual control instead? SCOPE=label
9
+ * flips it to opt-in: only PRs you tag REVIEW_LABEL are reviewed, so spend tracks exactly what you tag.
10
+ * The "taste" — REVIEW-PROMPT.md, RUBRIC.md, CORPUS.md — lives in the TARGET repo under REVIEW_DIR (default
11
+ * `.review/`), so it's version-controlled with the code it judges and edited via a normal PR.
12
+ * Idempotent: skips a PR already reviewed — or already reported as failed — at its current head SHA, via a
13
+ * hidden marker comment. A new push moves the SHA, clears the markers, and re-arms the review.
14
+ * Per-PR memory: each review is fed the PR's existing review thread, so it won't re-raise resolved/declined
15
+ * items and converges ("no new blocking issues") instead of nagging forever.
16
+ *
17
+ * Single-flight: the sweep takes its own lockfile (state/sweep.lock) so two cron ticks never overlap — no
18
+ * `flock` dependency. Every knob lives in config.env next to this file (read fresh each run). Run: `bun review-sweep.ts`.
19
+ */
20
+ import { spawnSync } from 'node:child_process'
21
+ import { appendFileSync, existsSync, mkdirSync, readFileSync, rmSync, statSync, writeFileSync } from 'node:fs'
22
+ import { dirname, join } from 'node:path'
23
+ import { fileURLToPath } from 'node:url'
24
+
25
+ const KIT_DIR = dirname(fileURLToPath(import.meta.url))
26
+
27
+ export interface Config {
28
+ repoDir: string // dedicated checkout we hard-reset — never a working checkout you care about
29
+ remote: string
30
+ slug: string
31
+ defaultBranch: string
32
+ reviewDir: string // resolved review dir holding REVIEW-PROMPT.md / RUBRIC.md / CORPUS.md — the repo's .review/ if it has one, else homeReviewDir (set in main)
33
+ homeReviewDir: string // fallback taste the CLI assembled under STUPIFY_HOME/.review (packs or bring-your-own)
34
+ scope: 'label' | 'auto'
35
+ reviewLabel: string
36
+ diffLineCap: number
37
+ dryRun: boolean
38
+ maxPrs: number
39
+ stateDir: string
40
+ codexEffort: string
41
+ codexProvider: string // optional `-c model_provider=...`; empty = codex's own default/auth
42
+ codexModel: string // optional `-c model=...`; empty = codex's default model
43
+ }
44
+
45
+ function loadConfig(): Config {
46
+ const file = parseEnvFile(join(KIT_DIR, 'config.env'))
47
+ // A one-shot env override wins over the persisted config.env, so `DRY_RUN=1 bun review-sweep.ts` actually
48
+ // previews even when the deployed file says DRY_RUN=0. Cron sets none of these keys, so it falls to the file.
49
+ const pick = (key: string, fallback: string): string => process.env[key] ?? file[key] ?? fallback
50
+ const int = (key: string, fallback: number, min: number): number => {
51
+ const set = process.env[key] ?? file[key]
52
+ if (set === undefined) return fallback
53
+ const trimmed = set.trim()
54
+ const n = Number(trimmed)
55
+ if (/^\d+$/.test(trimmed) && n >= min) return n
56
+ log(`config: ${key}='${set}' is not an integer ≥ ${min} — using ${fallback}`)
57
+ return fallback
58
+ }
59
+ const bool = (key: string, unset: boolean, onInvalid: boolean): boolean => {
60
+ const set = process.env[key] ?? file[key]
61
+ if (set === undefined) return unset
62
+ const v = set.trim().toLowerCase()
63
+ if (v === '1' || v === 'true' || v === 'yes' || v === 'on') return true
64
+ if (v === '0' || v === 'false' || v === 'no' || v === 'off') return false
65
+ log(`config: ${key}='${set}' is not a boolean (1/0/true/false/yes/no/on/off) — using ${onInvalid} (fail-safe)`)
66
+ return onInvalid
67
+ }
68
+
69
+ // Home is where the CLI deployed us (~/.stupify) — config.env, state, and the dedicated checkout all live here.
70
+ const stupifyHome = pick('STUPIFY_HOME', KIT_DIR)
71
+ const stateDir = join(stupifyHome, 'state')
72
+ mkdirSync(stateDir, { recursive: true })
73
+ LOG = join(stateDir, 'sweep.log') // set before parsing knobs so config warnings reach sweep.log, not just cron.log
74
+
75
+ const slug = pick('REPO_SLUG', '').trim()
76
+ if (!slug) {
77
+ log('config: REPO_SLUG is required (owner/repo) — aborting. Run `stupify` to set up.')
78
+ process.exit(1)
79
+ }
80
+ const scopeRaw = pick('SCOPE', 'auto').trim().toLowerCase()
81
+ if (scopeRaw !== 'label' && scopeRaw !== 'auto') log(`config: SCOPE='${scopeRaw}' is not 'label' or 'auto' — using auto`)
82
+
83
+ return {
84
+ repoDir: join(stupifyHome, 'repo'), // HARD-PINNED under STUPIFY_HOME: refreshRepo runs `git reset --hard` here
85
+ remote: pick('REMOTE', `https://github.com/${slug}.git`),
86
+ slug,
87
+ defaultBranch: pick('DEFAULT_BRANCH', 'main'),
88
+ reviewDir: pick('REVIEW_DIR', '.review'), // relative name here; main() resolves it to an absolute path (repo's or home's)
89
+ homeReviewDir: join(stupifyHome, '.review'),
90
+ scope: scopeRaw === 'label' ? 'label' : 'auto', // auto is the default; only the explicit string 'label' opts into per-PR tagging
91
+ reviewLabel: pick('REVIEW_LABEL', 'codex-review'),
92
+ diffLineCap: int('DIFF_LINE_CAP', 800, 1),
93
+ dryRun: bool('DRY_RUN', false, true), // unset = live (cron's normal mode); garbage = preview (never post on a typo)
94
+ maxPrs: int('MAX_PRS', 15, 1),
95
+ stateDir,
96
+ codexEffort: pick('CODEX_EFFORT', 'high'),
97
+ codexProvider: pick('CODEX_PROVIDER', ''),
98
+ codexModel: pick('CODEX_MODEL', ''),
99
+ }
100
+ }
101
+
102
+ /** Minimal KEY=VALUE reader for config.env: strips `# inline comments` and matching surrounding quotes, so a
103
+ * value reads the same here as it does when bash sources the file (`KEY='https://…'` → `https://…`). */
104
+ function parseEnvFile(path: string): Record<string, string> {
105
+ if (!existsSync(path)) return {}
106
+ const out: Record<string, string> = {}
107
+ for (const raw of readFileSync(path, 'utf8').split('\n')) {
108
+ const line = raw.trim()
109
+ if (!line || line.startsWith('#')) continue
110
+ const eq = line.indexOf('=')
111
+ if (eq < 0) continue
112
+ const key = line.slice(0, eq).trim()
113
+ const value = line.slice(eq + 1)
114
+ const comment = value.indexOf(' #')
115
+ let v = (comment < 0 ? value : value.slice(0, comment)).trim()
116
+ if (v.length >= 2 && (v[0] === "'" || v[0] === '"') && v.at(-1) === v[0]) v = v.slice(1, -1)
117
+ out[key] = v
118
+ }
119
+ return out
120
+ }
121
+
122
+ interface ProcResult {
123
+ ok: boolean
124
+ stdout: string
125
+ combined: string
126
+ }
127
+
128
+ function exec(cmd: string, args: string[], opts: { cwd?: string; timeoutMs?: number } = {}): ProcResult {
129
+ const r = spawnSync(cmd, args, {
130
+ cwd: opts.cwd,
131
+ input: '', // close stdin (codex would otherwise read from the terminal)
132
+ timeout: opts.timeoutMs,
133
+ encoding: 'utf8',
134
+ maxBuffer: 64 * 1024 * 1024,
135
+ })
136
+ const stdout = r.stdout ?? ''
137
+ // spawnSync reports a timeout via signal (SIGTERM) and a spawn failure (ENOENT etc.) via `error`, both with
138
+ // EMPTY stdout/stderr. Fold them into combined so the failure path surfaces the real cause, not "no output".
139
+ let combined = stdout + (r.stderr ?? '')
140
+ if (r.signal) combined += `\n${cmd}: process killed by ${r.signal}${opts.timeoutMs ? ` (timeout ${opts.timeoutMs}ms)` : ''}`
141
+ if (r.error) combined += `\n${cmd}: ${r.error.message}`
142
+ return { ok: r.status === 0 && r.error === undefined, stdout, combined }
143
+ }
144
+
145
+ let LOG = ''
146
+ function log(message: string): void {
147
+ const line = `${new Date().toISOString().replace(/\.\d{3}Z$/, 'Z')} ${message}`
148
+ if (LOG) appendFileSync(LOG, `${line}\n`)
149
+ console.log(line)
150
+ }
151
+
152
+ /** Refresh the dedicated checkout to origin/main. Returns false on any git failure. */
153
+ function refreshRepo(cfg: Config): boolean {
154
+ mkdirSync(dirname(cfg.repoDir), { recursive: true })
155
+ if (!existsSync(join(cfg.repoDir, '.git'))) {
156
+ log(`cloning ${cfg.remote} -> ${cfg.repoDir}`)
157
+ if (!exec('git', ['clone', '-q', cfg.remote, cfg.repoDir]).ok) return logFail('clone failed')
158
+ }
159
+ const branch = cfg.defaultBranch
160
+ const ok =
161
+ exec('git', ['fetch', '-q', 'origin', branch], { cwd: cfg.repoDir }).ok &&
162
+ exec('git', ['checkout', '-q', branch], { cwd: cfg.repoDir }).ok &&
163
+ exec('git', ['reset', '-q', '--hard', `origin/${branch}`], { cwd: cfg.repoDir }).ok
164
+ return ok || logFail(`refresh failed (is the default branch '${branch}'? set DEFAULT_BRANCH if not)`)
165
+ }
166
+
167
+ function logFail(message: string): false {
168
+ log(message)
169
+ return false
170
+ }
171
+
172
+ export interface Pr {
173
+ number: number
174
+ headRefOid: string
175
+ isDraft: boolean
176
+ author: { login: string; is_bot: boolean } | null // is_bot flags GitHub App bots (app/dependabot) the [bot] suffix misses
177
+ labels: { name: string }[]
178
+ }
179
+
180
+ function listPrs(cfg: Config): Pr[] | null {
181
+ // Filter the PR list directly rather than `gh pr list --label` — that search index lags behind labelling.
182
+ const fields = 'number,headRefOid,isDraft,author,labels'
183
+ const r = exec('gh', ['pr', 'list', '--repo', cfg.slug, '--state', 'open', '--json', fields])
184
+ if (!r.ok) {
185
+ log('gh pr list failed (auth/network down?) — aborting sweep')
186
+ return null
187
+ }
188
+ let raw: unknown
189
+ try {
190
+ raw = JSON.parse(r.stdout)
191
+ } catch {
192
+ log('gh pr list returned unparseable JSON — aborting sweep')
193
+ return null
194
+ }
195
+ if (!Array.isArray(raw)) {
196
+ log('gh pr list returned a non-array — aborting sweep')
197
+ return null
198
+ }
199
+ const prs = raw.filter(isPr)
200
+ if (prs.length < raw.length) log(`gh pr list: ${raw.length - prs.length} entries failed shape check — skipped`)
201
+ return prs
202
+ }
203
+
204
+ // Fully validate the gh boundary. gh guarantees the --json shape, but an auth-error page or schema drift
205
+ // would otherwise throw (or silently mis-scope) mid-loop instead of skipping cleanly. `in`-narrowing, no
206
+ // assertions. This is a complete `is Pr` — every field inScope/the loop trust is checked here.
207
+ function isPr(raw: unknown): raw is Pr {
208
+ if (typeof raw !== 'object' || raw === null) return false
209
+ if (!('number' in raw) || typeof raw.number !== 'number') return false
210
+ if (!('headRefOid' in raw) || typeof raw.headRefOid !== 'string') return false
211
+ if (!('isDraft' in raw) || typeof raw.isDraft !== 'boolean') return false
212
+ if (!('labels' in raw) || !Array.isArray(raw.labels) || !raw.labels.every(isLabel)) return false
213
+ return 'author' in raw && isAuthor(raw.author)
214
+ }
215
+
216
+ function isLabel(raw: unknown): raw is { name: string } {
217
+ return typeof raw === 'object' && raw !== null && 'name' in raw && typeof raw.name === 'string'
218
+ }
219
+
220
+ function isAuthor(raw: unknown): raw is { login: string; is_bot: boolean } | null {
221
+ if (raw === null) return true
222
+ if (typeof raw !== 'object') return false
223
+ return 'login' in raw && typeof raw.login === 'string' && 'is_bot' in raw && typeof raw.is_bot === 'boolean'
224
+ }
225
+
226
+ function hasReviewLabel(pr: Pr, cfg: Config): boolean {
227
+ return pr.labels.some((l) => l.name === cfg.reviewLabel)
228
+ }
229
+
230
+ function inScope(pr: Pr, cfg: Config): boolean {
231
+ if (pr.isDraft) return false
232
+ // Never review bot PRs, in EITHER scope. gh's is_bot catches GitHub App bots (login `app/dependabot`) that
233
+ // the `[bot]` suffix misses; keep the suffix check as a belt-and-suspenders fallback.
234
+ if (pr.author?.is_bot === true || (pr.author?.login ?? '').endsWith('[bot]')) return false
235
+ if (cfg.scope === 'label') return hasReviewLabel(pr, cfg)
236
+ return true // auto: any non-draft, non-bot PR
237
+ }
238
+
239
+ interface Comment {
240
+ login: string
241
+ body: string
242
+ }
243
+
244
+ // null = couldn't read the PR (gh failed or returned junk). The caller SKIPS such a PR rather than treating
245
+ // it as unreviewed — manufacturing empty comments here would let a GitHub blip duplicate-post a review.
246
+ function prComments(cfg: Config, number: number): Comment[] | null {
247
+ const r = exec('gh', ['pr', 'view', String(number), '--repo', cfg.slug, '--json', 'comments'])
248
+ if (!r.ok) return null
249
+ let raw: unknown
250
+ try {
251
+ raw = JSON.parse(r.stdout)
252
+ } catch {
253
+ return null
254
+ }
255
+ if (typeof raw !== 'object' || raw === null || !('comments' in raw) || !Array.isArray(raw.comments)) return null
256
+ return raw.comments.map(toComment)
257
+ }
258
+
259
+ function toComment(c: unknown): Comment {
260
+ if (typeof c !== 'object' || c === null) return { login: '', body: '' }
261
+ const body = 'body' in c && typeof c.body === 'string' ? c.body : ''
262
+ const author = 'author' in c ? c.author : null
263
+ const login =
264
+ typeof author === 'object' && author !== null && 'login' in author && typeof author.login === 'string'
265
+ ? author.login
266
+ : ''
267
+ return { login, body }
268
+ }
269
+
270
+ // The per-PR MEMORY: the existing review conversation — the reviewer's past reviews + the author's replies —
271
+ // fed back into the prompt so it stops re-litigating settled points and knows when to converge. The GitHub
272
+ // thread IS the durable store (survives restarts, already holds the replies); we just read it back.
273
+ const MEMORY_COMMENTS = 20 // recent thread context, bounded so the prompt can't balloon on a chatty PR
274
+
275
+ function priorReviewThread(comments: Comment[]): string {
276
+ return comments
277
+ .filter((c) => !c.login.endsWith('[bot]')) // drop CI bots; keep prior reviews + human/agent replies
278
+ .slice(-MEMORY_COMMENTS)
279
+ .map((c) => `@${c.login}:\n${c.body.replace(/<!--[\s\S]*?-->/g, '').trim()}`) // strip hidden markers
280
+ .filter((entry) => entry.length > 0)
281
+ .join('\n\n---\n\n')
282
+ }
283
+
284
+ // null = couldn't read the diff. The caller skips (auto) or notes it (dry-run) rather than treating an
285
+ // unreadable diff as "0 lines" — a silent under-cap that would auto-review something it never measured.
286
+ function diffLineCount(cfg: Config, number: number): number | null {
287
+ const r = exec('gh', ['pr', 'diff', String(number), '--repo', cfg.slug])
288
+ if (!r.ok) return null
289
+ if (!r.stdout) return 0
290
+ return r.stdout.split('\n').length - (r.stdout.endsWith('\n') ? 1 : 0)
291
+ }
292
+
293
+ function markersFor(pr: Pr): { mark: string; failMark: string } {
294
+ return {
295
+ mark: `<!-- stupify:${pr.headRefOid} -->`,
296
+ failMark: `<!-- stupify-failed:${pr.headRefOid} -->`,
297
+ }
298
+ }
299
+
300
+ /** The taste prefix: instructions + the spec, rubric, and corpus INDEX, inlined verbatim. This is byte-identical
301
+ * for every PR in a repo, so it forms a stable prompt PREFIX the provider caches across diff threads — you pay
302
+ * full price for it once, then cache-read rates on every later PR. (If codex `Read` these files mid-loop instead,
303
+ * they'd arrive as tool results after model-chosen steps that vary per run, and wouldn't cache.) We inline the
304
+ * corpus INDEX only — its exemplars stay commit-pinned links the model opens on demand, so a review never pays to
305
+ * read the whole corpus. Keep ALL per-PR tokens (diff target, marker, memory) OUT of here — they go in the tail. */
306
+ export function stablePrefix(cfg: Config): string {
307
+ const read = (f: string) => readFileSync(join(cfg.reviewDir, f), 'utf8').trim()
308
+ return `You are a code reviewer running in an automated sweep (you have gh + git; no token needed). DO NOT modify any code.
309
+ Everything down to the "THIS PR" line is your fixed spec and taste — identical for every PR, so treat it as standing reference.
310
+
311
+ ===== REVIEW SPEC (format + rules) =====
312
+ ${read('REVIEW-PROMPT.md')}
313
+
314
+ ===== RUBRIC (what counts as slop) =====
315
+ ${read('RUBRIC.md')}
316
+
317
+ ===== CORPUS (good-code reference; the links are commit-pinned — open one ONLY when a finding needs to cite it) =====
318
+ ${read('CORPUS.md')}`
319
+ }
320
+
321
+ export function reviewPrompt(cfg: Config, pr: Pr, priorThread: string): string {
322
+ const { mark } = markersFor(pr)
323
+ const outPath = `/tmp/review-${pr.number}.md`
324
+ const memory = priorThread
325
+ ? `\n\n## Prior reviews on this PR (your memory)
326
+ This is the existing review conversation — your past reviews and the author's replies. You are CONTINUING it,
327
+ not starting fresh. Apply the spec's "Prior reviews on this PR" rules: don't re-raise resolved or
328
+ reasoned-declined items, report only what's genuinely new, and converge (post the one-line "no new issues"
329
+ and stop) if nothing new remains.
330
+
331
+ ${priorThread}`
332
+ : ''
333
+ // Stable prefix first (cached across PRs); then the ONLY per-PR tokens — diff target, output marker, memory.
334
+ return `${stablePrefix(cfg)}
335
+
336
+ ===== THIS PR (the only part that changes per run) =====
337
+ Review ONE pull request, per the spec and rubric above:
338
+ 1. Get the diff: gh pr diff ${pr.number} --repo ${cfg.slug}
339
+ 2. Review it — catch bugs / type-lies / dead-code / footguns AND reinvents-primitive / slop, each citing the corpus primitive it should reuse; sort worst-first.
340
+ 3. Write the review to ${outPath}, formatted EXACTLY per the spec's 'Comment format' section (it owns the format — opener, finding blocks, attribution). END the file with exactly this line: ${mark}
341
+ 4. Post it: gh pr comment ${pr.number} --repo ${cfg.slug} --body-file ${outPath}
342
+ Keep it terse; no preamble.${memory}`
343
+ }
344
+
345
+ /** Run one review. Returns tokens used on success, or null when codex couldn't run (a failure was posted). */
346
+ function reviewPr(cfg: Config, pr: Pr, priorThread: string): number | null {
347
+ const { failMark } = markersFor(pr)
348
+ const outPath = `/tmp/review-${pr.number}.md`
349
+ log(`reviewing PR #${pr.number} @ ${pr.headRefOid.slice(0, 8)}`)
350
+ const codexArgs = [
351
+ 'exec',
352
+ '--cd',
353
+ cfg.repoDir,
354
+ '--sandbox',
355
+ 'workspace-write',
356
+ '-c',
357
+ `model_reasoning_effort=${cfg.codexEffort}`,
358
+ '-c',
359
+ 'sandbox_workspace_write.network_access=true',
360
+ '-c',
361
+ 'sandbox_workspace_write.writable_roots=["/tmp"]',
362
+ ]
363
+ if (cfg.codexProvider) codexArgs.push('-c', `model_provider=${cfg.codexProvider}`)
364
+ if (cfg.codexModel) codexArgs.push('-c', `model=${cfg.codexModel}`)
365
+ codexArgs.push(reviewPrompt(cfg, pr, priorThread))
366
+
367
+ const cx = exec('codex', codexArgs, { cwd: cfg.repoDir, timeoutMs: 1_200_000 })
368
+ appendFileSync(LOG, `${cx.combined}\n`)
369
+
370
+ if (cx.ok) {
371
+ const tokens = parseTokens(cx.combined)
372
+ log(` #${pr.number} done (${tokens ?? '?'} tokens)`)
373
+ return tokens ?? 0
374
+ }
375
+
376
+ // Codex couldn't run (provider down, out of credits, timeout, bad diff). Don't fail silently — post a short
377
+ // error on the PR and stamp FAIL_MARK so the next sweep skips this head instead of re-hammering every minute.
378
+ const reason = failureReason(cx.combined)
379
+ log(` review FAILED for #${pr.number} — ${reason}`)
380
+ const body = [
381
+ "uhh — i couldn't review this one. codex didn't run:",
382
+ '',
383
+ `> ${reason}`,
384
+ '',
385
+ "_— stupify (auto-reviewer). i'll retry on your next push._",
386
+ failMark,
387
+ ].join('\n')
388
+ writeFileSync(outPath, `${body}\n`)
389
+ if (!exec('gh', ['pr', 'comment', String(pr.number), '--repo', cfg.slug, '--body-file', outPath]).ok) {
390
+ log(` (couldn't post failure comment for #${pr.number} either — gh down?)`)
391
+ }
392
+ return null
393
+ }
394
+
395
+ /** codex prints `tokens used` then the count on the next line — read the last such pair. */
396
+ function parseTokens(out: string): number | null {
397
+ const lines = out.split('\n')
398
+ for (let i = lines.length - 1; i >= 0; i--) {
399
+ const line = lines[i]
400
+ if (line !== undefined && /tokens used/i.test(line)) {
401
+ const digits = (lines[i + 1] ?? '').replace(/\D/g, '')
402
+ return digits ? Number(digits) : null
403
+ }
404
+ }
405
+ return null
406
+ }
407
+
408
+ function failureReason(out: string): string {
409
+ const signal = /payment required|credits|quota|rate.?limit|429|5\d\d |timeout|killed|enoent|spawn|error/i
410
+ const noise = /no error|0 error/i
411
+ const hit = out
412
+ .split('\n')
413
+ .map((l) => l.trim())
414
+ .filter((l) => signal.test(l) && !noise.test(l))
415
+ .at(-1)
416
+ const cleaned = (hit ?? '').replace(/`/g, ' ').slice(0, 220).trim()
417
+ return cleaned || 'codex run failed (no output captured — check the sweep log)'
418
+ }
419
+
420
+ // Single-flight without flock: O_EXCL create wins atomically; a lock older than 30 min (longer than any
421
+ // possible sweep — codex is capped at 20) is treated as stale from a crashed run and stolen.
422
+ function acquireLock(path: string): boolean {
423
+ try {
424
+ writeFileSync(path, String(process.pid), { flag: 'wx' })
425
+ return true
426
+ } catch {
427
+ try {
428
+ if (Date.now() - statSync(path).mtimeMs > 30 * 60_000) {
429
+ writeFileSync(path, String(process.pid))
430
+ return true
431
+ }
432
+ } catch {
433
+ /* lock vanished between calls — let the next sweep retry */
434
+ }
435
+ return false
436
+ }
437
+ }
438
+
439
+ function main(): void {
440
+ const cfg = loadConfig() // also mkdirs stateDir and sets LOG, so config warnings are already captured
441
+
442
+ const lockPath = join(cfg.stateDir, 'sweep.lock')
443
+ if (!acquireLock(lockPath)) {
444
+ log('another sweep already running — skip')
445
+ return
446
+ }
447
+ process.on('exit', () => {
448
+ try {
449
+ rmSync(lockPath, { force: true })
450
+ } catch {
451
+ /* best-effort */
452
+ }
453
+ })
454
+
455
+ if (!refreshRepo(cfg)) process.exit(1)
456
+ // Resolve the taste: the target repo's own .review/ wins (a repo can override); otherwise fall back to the
457
+ // home taste the CLI assembled from packs (~/.stupify/.review). Either way cfg.reviewDir becomes ABSOLUTE.
458
+ const repoReview = join(cfg.repoDir, cfg.reviewDir)
459
+ cfg.reviewDir = existsSync(join(repoReview, 'CORPUS.md')) ? repoReview : cfg.homeReviewDir
460
+ const haveMachinery =
461
+ existsSync(join(cfg.reviewDir, 'CORPUS.md')) &&
462
+ existsSync(join(cfg.reviewDir, 'REVIEW-PROMPT.md')) &&
463
+ existsSync(join(cfg.reviewDir, 'RUBRIC.md'))
464
+ if (!haveMachinery) {
465
+ log(`no review machinery at ${cfg.reviewDir}/ (need REVIEW-PROMPT.md + RUBRIC.md + CORPUS.md) — no-op. Run \`stupify setup\` to assemble taste, or add a .review/ to ${cfg.slug}.`)
466
+ return
467
+ }
468
+
469
+ const prs = listPrs(cfg)
470
+ if (prs === null) process.exit(1)
471
+ const queue = prs.filter((pr) => inScope(pr, cfg)) // MAX_PRS is applied to PRs actually HANDLED, not iterated (below)
472
+
473
+ let reviewed = 0
474
+ let tokens = 0
475
+ // Count PRs we do real (costly) work on, and cap THAT at MAX_PRS — so a backlog of already-reviewed PRs at
476
+ // the front of the list can't consume the budget and starve later ones.
477
+ let handled = 0
478
+ for (const pr of queue) {
479
+ const { mark, failMark } = markersFor(pr)
480
+ const comments = prComments(cfg, pr.number)
481
+ if (comments === null) {
482
+ log(`skip #${pr.number} — couldn't read it from gh (failed/malformed); will retry next sweep`)
483
+ continue
484
+ }
485
+ const bodies = comments.map((c) => c.body).join('\n')
486
+ if (bodies.includes(mark) || bodies.includes(failMark)) continue
487
+
488
+ // Past the cheap dedup skip — this PR is a real candidate. Enforce MAX_PRS here, not on the
489
+ // iterated list, and defer the rest to the next sweep.
490
+ if (handled >= cfg.maxPrs) {
491
+ log(`reached MAX_PRS=${cfg.maxPrs} this sweep — deferring remaining candidates to the next sweep`)
492
+ break
493
+ }
494
+ handled += 1
495
+
496
+ let lines = 0
497
+ if (cfg.scope === 'auto' || cfg.dryRun) {
498
+ const counted = diffLineCount(cfg, pr.number)
499
+ if (counted === null) {
500
+ log(`skip #${pr.number} — couldn't read its diff from gh; will retry next sweep`)
501
+ continue
502
+ }
503
+ lines = counted
504
+ }
505
+ // auto-scope only: skip oversized diffs UNLESS the PR carries the review label (the documented force-include).
506
+ // (label-scope means you already opted in, so size never gates there.)
507
+ if (cfg.scope === 'auto' && lines > cfg.diffLineCap && !hasReviewLabel(pr, cfg)) {
508
+ log(`skip #${pr.number} — diff ${lines} lines > cap ${cfg.diffLineCap} (add '${cfg.reviewLabel}' to force)`)
509
+ continue
510
+ }
511
+ if (cfg.dryRun) {
512
+ log(`DRY_RUN would review #${pr.number} @ ${pr.headRefOid.slice(0, 8)} (diff ${lines} lines)`)
513
+ continue
514
+ }
515
+
516
+ const used = reviewPr(cfg, pr, priorReviewThread(comments))
517
+ if (used !== null) {
518
+ reviewed += 1
519
+ tokens += used
520
+ }
521
+ }
522
+
523
+ log(`sweep done — scope=${cfg.scope} reviewed=${reviewed} tokens~${tokens}`)
524
+ }
525
+
526
+ if (import.meta.main) main() // run only when invoked directly (cron / `stupify run`); stays importable for tests
@@ -1,16 +0,0 @@
1
- import type { LocalModel } from "./model.ts";
2
- import type { SearchMatch, SemChangeSet, SemContext, SemContextPack, StupifyCheck } from "./types.ts";
3
- export declare function runSearch(model: LocalModel, request: SearchRequest): Promise<readonly SearchMatch[]>;
4
- export type SearchRequest = Readonly<{
5
- prompt: string;
6
- schema: unknown;
7
- contexts: readonly SemContext[];
8
- }>;
9
- export declare function searchRequest(input: Readonly<{
10
- changeSet: SemChangeSet;
11
- contexts: readonly SemContext[];
12
- pack: SemContextPack;
13
- patterns: readonly StupifyCheck[];
14
- includeCounterReasonInPrompt?: boolean;
15
- }>): SearchRequest;
16
- export declare function countPromptTokens(model: LocalModel, prompt: string): Promise<number>;