@stupify/cli 0.0.16 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.review/CORPUS.md +44 -0
- package/.review/CORPUS.template.md +73 -0
- package/.review/REVIEW-PROMPT.md +52 -0
- package/.review/RUBRIC.md +46 -0
- package/LICENSE +1 -1
- package/README.md +95 -37
- package/package.json +27 -26
- package/packs/antirez.md +10 -0
- package/packs/anton-kropp.md +10 -0
- package/packs/dhh.md +10 -0
- package/packs/dtolnay.md +10 -0
- package/packs/jarred-sumner.md +9 -0
- package/packs/mitchell-hashimoto.md +10 -0
- package/packs/rich-harris.md +10 -0
- package/packs/simon-willison.md +10 -0
- package/packs/sindre-sorhus.md +10 -0
- package/packs/tanner-linsley.md +10 -0
- package/packs/zod.md +10 -0
- package/src/cli.ts +626 -0
- package/src/prime-install.test.ts +109 -0
- package/src/prime.ts +50 -0
- package/src/review-sweep.test.ts +101 -0
- package/src/review-sweep.ts +526 -0
- package/dist/analysis.d.ts +0 -16
- package/dist/analysis.js +0 -168
- package/dist/cache.d.ts +0 -2
- package/dist/cache.js +0 -57
- package/dist/checks.d.ts +0 -4
- package/dist/checks.js +0 -228
- package/dist/command.d.ts +0 -2
- package/dist/command.js +0 -147
- package/dist/constants.d.ts +0 -4
- package/dist/constants.js +0 -53
- package/dist/counter-scout.d.ts +0 -21
- package/dist/counter-scout.js +0 -167
- package/dist/diff.d.ts +0 -1
- package/dist/diff.js +0 -10
- package/dist/doctor.d.ts +0 -16
- package/dist/doctor.js +0 -143
- package/dist/git.d.ts +0 -17
- package/dist/git.js +0 -368
- package/dist/hooks.d.ts +0 -5
- package/dist/hooks.js +0 -135
- package/dist/index.d.ts +0 -1
- package/dist/index.js +0 -1
- package/dist/model.d.ts +0 -11
- package/dist/model.js +0 -296
- package/dist/prompts.d.ts +0 -8
- package/dist/prompts.js +0 -89
- package/dist/render.d.ts +0 -6
- package/dist/render.js +0 -295
- package/dist/repomix-provider.d.ts +0 -12
- package/dist/repomix-provider.js +0 -196
- package/dist/search-bench.d.ts +0 -1
- package/dist/search-bench.js +0 -677
- package/dist/search-profile.d.ts +0 -6
- package/dist/search-profile.js +0 -73
- package/dist/sem-provider.d.ts +0 -2
- package/dist/sem-provider.js +0 -255
- package/dist/stupify.d.ts +0 -38
- package/dist/stupify.js +0 -505
- package/dist/trace.d.ts +0 -31
- package/dist/trace.js +0 -86
- package/dist/types.d.ts +0 -341
- package/dist/types.js +0 -6
- package/dist/ui.d.ts +0 -34
- package/dist/ui.js +0 -143
- package/src/analysis.ts +0 -223
- package/src/cache.ts +0 -63
- package/src/checks.ts +0 -231
- package/src/command.ts +0 -173
- package/src/constants.ts +0 -56
- package/src/counter-scout.ts +0 -195
- package/src/diff.ts +0 -9
- package/src/doctor.ts +0 -166
- package/src/git.ts +0 -380
- package/src/hooks.ts +0 -151
- package/src/index.ts +0 -1
- package/src/model.ts +0 -367
- package/src/prompts.ts +0 -100
- package/src/render.ts +0 -328
- package/src/repomix-provider.ts +0 -219
- package/src/search-bench.ts +0 -783
- package/src/search-profile.ts +0 -89
- package/src/sem-provider.ts +0 -300
- package/src/stupify.ts +0 -604
- package/src/trace.ts +0 -126
- package/src/types.ts +0 -362
- package/src/ui.ts +0 -187
|
@@ -0,0 +1,526 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* stupify (review sweep) — auto-review open GitHub PRs with Codex against a corpus of code YOU picked.
|
|
4
|
+
* The engine the `stupify` CLI deploys to ~/.stupify and runs on a cron (or `stupify run`); config.env sits
|
|
5
|
+
* next to it.
|
|
6
|
+
*
|
|
7
|
+
* Reviews every PR by default (SCOPE=auto): every non-draft, non-bot PR under DIFF_LINE_CAP, no label needed.
|
|
8
|
+
* REVIEW_LABEL is just a force-include override for an oversized diff. Want manual control instead? SCOPE=label
|
|
9
|
+
* flips it to opt-in: only PRs you tag REVIEW_LABEL are reviewed, so spend tracks exactly what you tag.
|
|
10
|
+
* The "taste" — REVIEW-PROMPT.md, RUBRIC.md, CORPUS.md — lives in the TARGET repo under REVIEW_DIR (default
|
|
11
|
+
* `.review/`), so it's version-controlled with the code it judges and edited via a normal PR.
|
|
12
|
+
* Idempotent: skips a PR already reviewed — or already reported as failed — at its current head SHA, via a
|
|
13
|
+
* hidden marker comment. A new push moves the SHA, clears the markers, and re-arms the review.
|
|
14
|
+
* Per-PR memory: each review is fed the PR's existing review thread, so it won't re-raise resolved/declined
|
|
15
|
+
* items and converges ("no new blocking issues") instead of nagging forever.
|
|
16
|
+
*
|
|
17
|
+
* Single-flight: the sweep takes its own lockfile (state/sweep.lock) so two cron ticks never overlap — no
|
|
18
|
+
* `flock` dependency. Every knob lives in config.env next to this file (read fresh each run). Run: `bun review-sweep.ts`.
|
|
19
|
+
*/
|
|
20
|
+
import { spawnSync } from 'node:child_process'
|
|
21
|
+
import { appendFileSync, existsSync, mkdirSync, readFileSync, rmSync, statSync, writeFileSync } from 'node:fs'
|
|
22
|
+
import { dirname, join } from 'node:path'
|
|
23
|
+
import { fileURLToPath } from 'node:url'
|
|
24
|
+
|
|
25
|
+
const KIT_DIR = dirname(fileURLToPath(import.meta.url))
|
|
26
|
+
|
|
27
|
+
export interface Config {
|
|
28
|
+
repoDir: string // dedicated checkout we hard-reset — never a working checkout you care about
|
|
29
|
+
remote: string
|
|
30
|
+
slug: string
|
|
31
|
+
defaultBranch: string
|
|
32
|
+
reviewDir: string // resolved review dir holding REVIEW-PROMPT.md / RUBRIC.md / CORPUS.md — the repo's .review/ if it has one, else homeReviewDir (set in main)
|
|
33
|
+
homeReviewDir: string // fallback taste the CLI assembled under STUPIFY_HOME/.review (packs or bring-your-own)
|
|
34
|
+
scope: 'label' | 'auto'
|
|
35
|
+
reviewLabel: string
|
|
36
|
+
diffLineCap: number
|
|
37
|
+
dryRun: boolean
|
|
38
|
+
maxPrs: number
|
|
39
|
+
stateDir: string
|
|
40
|
+
codexEffort: string
|
|
41
|
+
codexProvider: string // optional `-c model_provider=...`; empty = codex's own default/auth
|
|
42
|
+
codexModel: string // optional `-c model=...`; empty = codex's default model
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function loadConfig(): Config {
|
|
46
|
+
const file = parseEnvFile(join(KIT_DIR, 'config.env'))
|
|
47
|
+
// A one-shot env override wins over the persisted config.env, so `DRY_RUN=1 bun review-sweep.ts` actually
|
|
48
|
+
// previews even when the deployed file says DRY_RUN=0. Cron sets none of these keys, so it falls to the file.
|
|
49
|
+
const pick = (key: string, fallback: string): string => process.env[key] ?? file[key] ?? fallback
|
|
50
|
+
const int = (key: string, fallback: number, min: number): number => {
|
|
51
|
+
const set = process.env[key] ?? file[key]
|
|
52
|
+
if (set === undefined) return fallback
|
|
53
|
+
const trimmed = set.trim()
|
|
54
|
+
const n = Number(trimmed)
|
|
55
|
+
if (/^\d+$/.test(trimmed) && n >= min) return n
|
|
56
|
+
log(`config: ${key}='${set}' is not an integer ≥ ${min} — using ${fallback}`)
|
|
57
|
+
return fallback
|
|
58
|
+
}
|
|
59
|
+
const bool = (key: string, unset: boolean, onInvalid: boolean): boolean => {
|
|
60
|
+
const set = process.env[key] ?? file[key]
|
|
61
|
+
if (set === undefined) return unset
|
|
62
|
+
const v = set.trim().toLowerCase()
|
|
63
|
+
if (v === '1' || v === 'true' || v === 'yes' || v === 'on') return true
|
|
64
|
+
if (v === '0' || v === 'false' || v === 'no' || v === 'off') return false
|
|
65
|
+
log(`config: ${key}='${set}' is not a boolean (1/0/true/false/yes/no/on/off) — using ${onInvalid} (fail-safe)`)
|
|
66
|
+
return onInvalid
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Home is where the CLI deployed us (~/.stupify) — config.env, state, and the dedicated checkout all live here.
|
|
70
|
+
const stupifyHome = pick('STUPIFY_HOME', KIT_DIR)
|
|
71
|
+
const stateDir = join(stupifyHome, 'state')
|
|
72
|
+
mkdirSync(stateDir, { recursive: true })
|
|
73
|
+
LOG = join(stateDir, 'sweep.log') // set before parsing knobs so config warnings reach sweep.log, not just cron.log
|
|
74
|
+
|
|
75
|
+
const slug = pick('REPO_SLUG', '').trim()
|
|
76
|
+
if (!slug) {
|
|
77
|
+
log('config: REPO_SLUG is required (owner/repo) — aborting. Run `stupify` to set up.')
|
|
78
|
+
process.exit(1)
|
|
79
|
+
}
|
|
80
|
+
const scopeRaw = pick('SCOPE', 'auto').trim().toLowerCase()
|
|
81
|
+
if (scopeRaw !== 'label' && scopeRaw !== 'auto') log(`config: SCOPE='${scopeRaw}' is not 'label' or 'auto' — using auto`)
|
|
82
|
+
|
|
83
|
+
return {
|
|
84
|
+
repoDir: join(stupifyHome, 'repo'), // HARD-PINNED under STUPIFY_HOME: refreshRepo runs `git reset --hard` here
|
|
85
|
+
remote: pick('REMOTE', `https://github.com/${slug}.git`),
|
|
86
|
+
slug,
|
|
87
|
+
defaultBranch: pick('DEFAULT_BRANCH', 'main'),
|
|
88
|
+
reviewDir: pick('REVIEW_DIR', '.review'), // relative name here; main() resolves it to an absolute path (repo's or home's)
|
|
89
|
+
homeReviewDir: join(stupifyHome, '.review'),
|
|
90
|
+
scope: scopeRaw === 'label' ? 'label' : 'auto', // auto is the default; only the explicit string 'label' opts into per-PR tagging
|
|
91
|
+
reviewLabel: pick('REVIEW_LABEL', 'codex-review'),
|
|
92
|
+
diffLineCap: int('DIFF_LINE_CAP', 800, 1),
|
|
93
|
+
dryRun: bool('DRY_RUN', false, true), // unset = live (cron's normal mode); garbage = preview (never post on a typo)
|
|
94
|
+
maxPrs: int('MAX_PRS', 15, 1),
|
|
95
|
+
stateDir,
|
|
96
|
+
codexEffort: pick('CODEX_EFFORT', 'high'),
|
|
97
|
+
codexProvider: pick('CODEX_PROVIDER', ''),
|
|
98
|
+
codexModel: pick('CODEX_MODEL', ''),
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/** Minimal KEY=VALUE reader for config.env: strips `# inline comments` and matching surrounding quotes, so a
|
|
103
|
+
* value reads the same here as it does when bash sources the file (`KEY='https://…'` → `https://…`). */
|
|
104
|
+
function parseEnvFile(path: string): Record<string, string> {
|
|
105
|
+
if (!existsSync(path)) return {}
|
|
106
|
+
const out: Record<string, string> = {}
|
|
107
|
+
for (const raw of readFileSync(path, 'utf8').split('\n')) {
|
|
108
|
+
const line = raw.trim()
|
|
109
|
+
if (!line || line.startsWith('#')) continue
|
|
110
|
+
const eq = line.indexOf('=')
|
|
111
|
+
if (eq < 0) continue
|
|
112
|
+
const key = line.slice(0, eq).trim()
|
|
113
|
+
const value = line.slice(eq + 1)
|
|
114
|
+
const comment = value.indexOf(' #')
|
|
115
|
+
let v = (comment < 0 ? value : value.slice(0, comment)).trim()
|
|
116
|
+
if (v.length >= 2 && (v[0] === "'" || v[0] === '"') && v.at(-1) === v[0]) v = v.slice(1, -1)
|
|
117
|
+
out[key] = v
|
|
118
|
+
}
|
|
119
|
+
return out
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
interface ProcResult {
|
|
123
|
+
ok: boolean
|
|
124
|
+
stdout: string
|
|
125
|
+
combined: string
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function exec(cmd: string, args: string[], opts: { cwd?: string; timeoutMs?: number } = {}): ProcResult {
|
|
129
|
+
const r = spawnSync(cmd, args, {
|
|
130
|
+
cwd: opts.cwd,
|
|
131
|
+
input: '', // close stdin (codex would otherwise read from the terminal)
|
|
132
|
+
timeout: opts.timeoutMs,
|
|
133
|
+
encoding: 'utf8',
|
|
134
|
+
maxBuffer: 64 * 1024 * 1024,
|
|
135
|
+
})
|
|
136
|
+
const stdout = r.stdout ?? ''
|
|
137
|
+
// spawnSync reports a timeout via signal (SIGTERM) and a spawn failure (ENOENT etc.) via `error`, both with
|
|
138
|
+
// EMPTY stdout/stderr. Fold them into combined so the failure path surfaces the real cause, not "no output".
|
|
139
|
+
let combined = stdout + (r.stderr ?? '')
|
|
140
|
+
if (r.signal) combined += `\n${cmd}: process killed by ${r.signal}${opts.timeoutMs ? ` (timeout ${opts.timeoutMs}ms)` : ''}`
|
|
141
|
+
if (r.error) combined += `\n${cmd}: ${r.error.message}`
|
|
142
|
+
return { ok: r.status === 0 && r.error === undefined, stdout, combined }
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
let LOG = ''
|
|
146
|
+
function log(message: string): void {
|
|
147
|
+
const line = `${new Date().toISOString().replace(/\.\d{3}Z$/, 'Z')} ${message}`
|
|
148
|
+
if (LOG) appendFileSync(LOG, `${line}\n`)
|
|
149
|
+
console.log(line)
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/** Refresh the dedicated checkout to origin/main. Returns false on any git failure. */
|
|
153
|
+
function refreshRepo(cfg: Config): boolean {
|
|
154
|
+
mkdirSync(dirname(cfg.repoDir), { recursive: true })
|
|
155
|
+
if (!existsSync(join(cfg.repoDir, '.git'))) {
|
|
156
|
+
log(`cloning ${cfg.remote} -> ${cfg.repoDir}`)
|
|
157
|
+
if (!exec('git', ['clone', '-q', cfg.remote, cfg.repoDir]).ok) return logFail('clone failed')
|
|
158
|
+
}
|
|
159
|
+
const branch = cfg.defaultBranch
|
|
160
|
+
const ok =
|
|
161
|
+
exec('git', ['fetch', '-q', 'origin', branch], { cwd: cfg.repoDir }).ok &&
|
|
162
|
+
exec('git', ['checkout', '-q', branch], { cwd: cfg.repoDir }).ok &&
|
|
163
|
+
exec('git', ['reset', '-q', '--hard', `origin/${branch}`], { cwd: cfg.repoDir }).ok
|
|
164
|
+
return ok || logFail(`refresh failed (is the default branch '${branch}'? set DEFAULT_BRANCH if not)`)
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
function logFail(message: string): false {
|
|
168
|
+
log(message)
|
|
169
|
+
return false
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
export interface Pr {
|
|
173
|
+
number: number
|
|
174
|
+
headRefOid: string
|
|
175
|
+
isDraft: boolean
|
|
176
|
+
author: { login: string; is_bot: boolean } | null // is_bot flags GitHub App bots (app/dependabot) the [bot] suffix misses
|
|
177
|
+
labels: { name: string }[]
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
function listPrs(cfg: Config): Pr[] | null {
|
|
181
|
+
// Filter the PR list directly rather than `gh pr list --label` — that search index lags behind labelling.
|
|
182
|
+
const fields = 'number,headRefOid,isDraft,author,labels'
|
|
183
|
+
const r = exec('gh', ['pr', 'list', '--repo', cfg.slug, '--state', 'open', '--json', fields])
|
|
184
|
+
if (!r.ok) {
|
|
185
|
+
log('gh pr list failed (auth/network down?) — aborting sweep')
|
|
186
|
+
return null
|
|
187
|
+
}
|
|
188
|
+
let raw: unknown
|
|
189
|
+
try {
|
|
190
|
+
raw = JSON.parse(r.stdout)
|
|
191
|
+
} catch {
|
|
192
|
+
log('gh pr list returned unparseable JSON — aborting sweep')
|
|
193
|
+
return null
|
|
194
|
+
}
|
|
195
|
+
if (!Array.isArray(raw)) {
|
|
196
|
+
log('gh pr list returned a non-array — aborting sweep')
|
|
197
|
+
return null
|
|
198
|
+
}
|
|
199
|
+
const prs = raw.filter(isPr)
|
|
200
|
+
if (prs.length < raw.length) log(`gh pr list: ${raw.length - prs.length} entries failed shape check — skipped`)
|
|
201
|
+
return prs
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Fully validate the gh boundary. gh guarantees the --json shape, but an auth-error page or schema drift
|
|
205
|
+
// would otherwise throw (or silently mis-scope) mid-loop instead of skipping cleanly. `in`-narrowing, no
|
|
206
|
+
// assertions. This is a complete `is Pr` — every field inScope/the loop trust is checked here.
|
|
207
|
+
function isPr(raw: unknown): raw is Pr {
|
|
208
|
+
if (typeof raw !== 'object' || raw === null) return false
|
|
209
|
+
if (!('number' in raw) || typeof raw.number !== 'number') return false
|
|
210
|
+
if (!('headRefOid' in raw) || typeof raw.headRefOid !== 'string') return false
|
|
211
|
+
if (!('isDraft' in raw) || typeof raw.isDraft !== 'boolean') return false
|
|
212
|
+
if (!('labels' in raw) || !Array.isArray(raw.labels) || !raw.labels.every(isLabel)) return false
|
|
213
|
+
return 'author' in raw && isAuthor(raw.author)
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
function isLabel(raw: unknown): raw is { name: string } {
|
|
217
|
+
return typeof raw === 'object' && raw !== null && 'name' in raw && typeof raw.name === 'string'
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
function isAuthor(raw: unknown): raw is { login: string; is_bot: boolean } | null {
|
|
221
|
+
if (raw === null) return true
|
|
222
|
+
if (typeof raw !== 'object') return false
|
|
223
|
+
return 'login' in raw && typeof raw.login === 'string' && 'is_bot' in raw && typeof raw.is_bot === 'boolean'
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
function hasReviewLabel(pr: Pr, cfg: Config): boolean {
|
|
227
|
+
return pr.labels.some((l) => l.name === cfg.reviewLabel)
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
function inScope(pr: Pr, cfg: Config): boolean {
|
|
231
|
+
if (pr.isDraft) return false
|
|
232
|
+
// Never review bot PRs, in EITHER scope. gh's is_bot catches GitHub App bots (login `app/dependabot`) that
|
|
233
|
+
// the `[bot]` suffix misses; keep the suffix check as a belt-and-suspenders fallback.
|
|
234
|
+
if (pr.author?.is_bot === true || (pr.author?.login ?? '').endsWith('[bot]')) return false
|
|
235
|
+
if (cfg.scope === 'label') return hasReviewLabel(pr, cfg)
|
|
236
|
+
return true // auto: any non-draft, non-bot PR
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
interface Comment {
|
|
240
|
+
login: string
|
|
241
|
+
body: string
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// null = couldn't read the PR (gh failed or returned junk). The caller SKIPS such a PR rather than treating
|
|
245
|
+
// it as unreviewed — manufacturing empty comments here would let a GitHub blip duplicate-post a review.
|
|
246
|
+
function prComments(cfg: Config, number: number): Comment[] | null {
|
|
247
|
+
const r = exec('gh', ['pr', 'view', String(number), '--repo', cfg.slug, '--json', 'comments'])
|
|
248
|
+
if (!r.ok) return null
|
|
249
|
+
let raw: unknown
|
|
250
|
+
try {
|
|
251
|
+
raw = JSON.parse(r.stdout)
|
|
252
|
+
} catch {
|
|
253
|
+
return null
|
|
254
|
+
}
|
|
255
|
+
if (typeof raw !== 'object' || raw === null || !('comments' in raw) || !Array.isArray(raw.comments)) return null
|
|
256
|
+
return raw.comments.map(toComment)
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
function toComment(c: unknown): Comment {
|
|
260
|
+
if (typeof c !== 'object' || c === null) return { login: '', body: '' }
|
|
261
|
+
const body = 'body' in c && typeof c.body === 'string' ? c.body : ''
|
|
262
|
+
const author = 'author' in c ? c.author : null
|
|
263
|
+
const login =
|
|
264
|
+
typeof author === 'object' && author !== null && 'login' in author && typeof author.login === 'string'
|
|
265
|
+
? author.login
|
|
266
|
+
: ''
|
|
267
|
+
return { login, body }
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// The per-PR MEMORY: the existing review conversation — the reviewer's past reviews + the author's replies —
|
|
271
|
+
// fed back into the prompt so it stops re-litigating settled points and knows when to converge. The GitHub
|
|
272
|
+
// thread IS the durable store (survives restarts, already holds the replies); we just read it back.
|
|
273
|
+
const MEMORY_COMMENTS = 20 // recent thread context, bounded so the prompt can't balloon on a chatty PR
|
|
274
|
+
|
|
275
|
+
function priorReviewThread(comments: Comment[]): string {
|
|
276
|
+
return comments
|
|
277
|
+
.filter((c) => !c.login.endsWith('[bot]')) // drop CI bots; keep prior reviews + human/agent replies
|
|
278
|
+
.slice(-MEMORY_COMMENTS)
|
|
279
|
+
.map((c) => `@${c.login}:\n${c.body.replace(/<!--[\s\S]*?-->/g, '').trim()}`) // strip hidden markers
|
|
280
|
+
.filter((entry) => entry.length > 0)
|
|
281
|
+
.join('\n\n---\n\n')
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
// null = couldn't read the diff. The caller skips (auto) or notes it (dry-run) rather than treating an
|
|
285
|
+
// unreadable diff as "0 lines" — a silent under-cap that would auto-review something it never measured.
|
|
286
|
+
function diffLineCount(cfg: Config, number: number): number | null {
|
|
287
|
+
const r = exec('gh', ['pr', 'diff', String(number), '--repo', cfg.slug])
|
|
288
|
+
if (!r.ok) return null
|
|
289
|
+
if (!r.stdout) return 0
|
|
290
|
+
return r.stdout.split('\n').length - (r.stdout.endsWith('\n') ? 1 : 0)
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
function markersFor(pr: Pr): { mark: string; failMark: string } {
|
|
294
|
+
return {
|
|
295
|
+
mark: `<!-- stupify:${pr.headRefOid} -->`,
|
|
296
|
+
failMark: `<!-- stupify-failed:${pr.headRefOid} -->`,
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
/** The taste prefix: instructions + the spec, rubric, and corpus INDEX, inlined verbatim. This is byte-identical
|
|
301
|
+
* for every PR in a repo, so it forms a stable prompt PREFIX the provider caches across diff threads — you pay
|
|
302
|
+
* full price for it once, then cache-read rates on every later PR. (If codex `Read` these files mid-loop instead,
|
|
303
|
+
* they'd arrive as tool results after model-chosen steps that vary per run, and wouldn't cache.) We inline the
|
|
304
|
+
* corpus INDEX only — its exemplars stay commit-pinned links the model opens on demand, so a review never pays to
|
|
305
|
+
* read the whole corpus. Keep ALL per-PR tokens (diff target, marker, memory) OUT of here — they go in the tail. */
|
|
306
|
+
export function stablePrefix(cfg: Config): string {
|
|
307
|
+
const read = (f: string) => readFileSync(join(cfg.reviewDir, f), 'utf8').trim()
|
|
308
|
+
return `You are a code reviewer running in an automated sweep (you have gh + git; no token needed). DO NOT modify any code.
|
|
309
|
+
Everything down to the "THIS PR" line is your fixed spec and taste — identical for every PR, so treat it as standing reference.
|
|
310
|
+
|
|
311
|
+
===== REVIEW SPEC (format + rules) =====
|
|
312
|
+
${read('REVIEW-PROMPT.md')}
|
|
313
|
+
|
|
314
|
+
===== RUBRIC (what counts as slop) =====
|
|
315
|
+
${read('RUBRIC.md')}
|
|
316
|
+
|
|
317
|
+
===== CORPUS (good-code reference; the links are commit-pinned — open one ONLY when a finding needs to cite it) =====
|
|
318
|
+
${read('CORPUS.md')}`
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
export function reviewPrompt(cfg: Config, pr: Pr, priorThread: string): string {
|
|
322
|
+
const { mark } = markersFor(pr)
|
|
323
|
+
const outPath = `/tmp/review-${pr.number}.md`
|
|
324
|
+
const memory = priorThread
|
|
325
|
+
? `\n\n## Prior reviews on this PR (your memory)
|
|
326
|
+
This is the existing review conversation — your past reviews and the author's replies. You are CONTINUING it,
|
|
327
|
+
not starting fresh. Apply the spec's "Prior reviews on this PR" rules: don't re-raise resolved or
|
|
328
|
+
reasoned-declined items, report only what's genuinely new, and converge (post the one-line "no new issues"
|
|
329
|
+
and stop) if nothing new remains.
|
|
330
|
+
|
|
331
|
+
${priorThread}`
|
|
332
|
+
: ''
|
|
333
|
+
// Stable prefix first (cached across PRs); then the ONLY per-PR tokens — diff target, output marker, memory.
|
|
334
|
+
return `${stablePrefix(cfg)}
|
|
335
|
+
|
|
336
|
+
===== THIS PR (the only part that changes per run) =====
|
|
337
|
+
Review ONE pull request, per the spec and rubric above:
|
|
338
|
+
1. Get the diff: gh pr diff ${pr.number} --repo ${cfg.slug}
|
|
339
|
+
2. Review it — catch bugs / type-lies / dead-code / footguns AND reinvents-primitive / slop, each citing the corpus primitive it should reuse; sort worst-first.
|
|
340
|
+
3. Write the review to ${outPath}, formatted EXACTLY per the spec's 'Comment format' section (it owns the format — opener, finding blocks, attribution). END the file with exactly this line: ${mark}
|
|
341
|
+
4. Post it: gh pr comment ${pr.number} --repo ${cfg.slug} --body-file ${outPath}
|
|
342
|
+
Keep it terse; no preamble.${memory}`
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
/** Run one review. Returns tokens used on success, or null when codex couldn't run (a failure was posted). */
|
|
346
|
+
function reviewPr(cfg: Config, pr: Pr, priorThread: string): number | null {
|
|
347
|
+
const { failMark } = markersFor(pr)
|
|
348
|
+
const outPath = `/tmp/review-${pr.number}.md`
|
|
349
|
+
log(`reviewing PR #${pr.number} @ ${pr.headRefOid.slice(0, 8)}`)
|
|
350
|
+
const codexArgs = [
|
|
351
|
+
'exec',
|
|
352
|
+
'--cd',
|
|
353
|
+
cfg.repoDir,
|
|
354
|
+
'--sandbox',
|
|
355
|
+
'workspace-write',
|
|
356
|
+
'-c',
|
|
357
|
+
`model_reasoning_effort=${cfg.codexEffort}`,
|
|
358
|
+
'-c',
|
|
359
|
+
'sandbox_workspace_write.network_access=true',
|
|
360
|
+
'-c',
|
|
361
|
+
'sandbox_workspace_write.writable_roots=["/tmp"]',
|
|
362
|
+
]
|
|
363
|
+
if (cfg.codexProvider) codexArgs.push('-c', `model_provider=${cfg.codexProvider}`)
|
|
364
|
+
if (cfg.codexModel) codexArgs.push('-c', `model=${cfg.codexModel}`)
|
|
365
|
+
codexArgs.push(reviewPrompt(cfg, pr, priorThread))
|
|
366
|
+
|
|
367
|
+
const cx = exec('codex', codexArgs, { cwd: cfg.repoDir, timeoutMs: 1_200_000 })
|
|
368
|
+
appendFileSync(LOG, `${cx.combined}\n`)
|
|
369
|
+
|
|
370
|
+
if (cx.ok) {
|
|
371
|
+
const tokens = parseTokens(cx.combined)
|
|
372
|
+
log(` #${pr.number} done (${tokens ?? '?'} tokens)`)
|
|
373
|
+
return tokens ?? 0
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
// Codex couldn't run (provider down, out of credits, timeout, bad diff). Don't fail silently — post a short
|
|
377
|
+
// error on the PR and stamp FAIL_MARK so the next sweep skips this head instead of re-hammering every minute.
|
|
378
|
+
const reason = failureReason(cx.combined)
|
|
379
|
+
log(` review FAILED for #${pr.number} — ${reason}`)
|
|
380
|
+
const body = [
|
|
381
|
+
"uhh — i couldn't review this one. codex didn't run:",
|
|
382
|
+
'',
|
|
383
|
+
`> ${reason}`,
|
|
384
|
+
'',
|
|
385
|
+
"_— stupify (auto-reviewer). i'll retry on your next push._",
|
|
386
|
+
failMark,
|
|
387
|
+
].join('\n')
|
|
388
|
+
writeFileSync(outPath, `${body}\n`)
|
|
389
|
+
if (!exec('gh', ['pr', 'comment', String(pr.number), '--repo', cfg.slug, '--body-file', outPath]).ok) {
|
|
390
|
+
log(` (couldn't post failure comment for #${pr.number} either — gh down?)`)
|
|
391
|
+
}
|
|
392
|
+
return null
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
/** codex prints `tokens used` then the count on the next line — read the last such pair. */
|
|
396
|
+
function parseTokens(out: string): number | null {
|
|
397
|
+
const lines = out.split('\n')
|
|
398
|
+
for (let i = lines.length - 1; i >= 0; i--) {
|
|
399
|
+
const line = lines[i]
|
|
400
|
+
if (line !== undefined && /tokens used/i.test(line)) {
|
|
401
|
+
const digits = (lines[i + 1] ?? '').replace(/\D/g, '')
|
|
402
|
+
return digits ? Number(digits) : null
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
return null
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
function failureReason(out: string): string {
|
|
409
|
+
const signal = /payment required|credits|quota|rate.?limit|429|5\d\d |timeout|killed|enoent|spawn|error/i
|
|
410
|
+
const noise = /no error|0 error/i
|
|
411
|
+
const hit = out
|
|
412
|
+
.split('\n')
|
|
413
|
+
.map((l) => l.trim())
|
|
414
|
+
.filter((l) => signal.test(l) && !noise.test(l))
|
|
415
|
+
.at(-1)
|
|
416
|
+
const cleaned = (hit ?? '').replace(/`/g, ' ').slice(0, 220).trim()
|
|
417
|
+
return cleaned || 'codex run failed (no output captured — check the sweep log)'
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
// Single-flight without flock: O_EXCL create wins atomically; a lock older than 30 min (longer than any
|
|
421
|
+
// possible sweep — codex is capped at 20) is treated as stale from a crashed run and stolen.
|
|
422
|
+
function acquireLock(path: string): boolean {
|
|
423
|
+
try {
|
|
424
|
+
writeFileSync(path, String(process.pid), { flag: 'wx' })
|
|
425
|
+
return true
|
|
426
|
+
} catch {
|
|
427
|
+
try {
|
|
428
|
+
if (Date.now() - statSync(path).mtimeMs > 30 * 60_000) {
|
|
429
|
+
writeFileSync(path, String(process.pid))
|
|
430
|
+
return true
|
|
431
|
+
}
|
|
432
|
+
} catch {
|
|
433
|
+
/* lock vanished between calls — let the next sweep retry */
|
|
434
|
+
}
|
|
435
|
+
return false
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
function main(): void {
|
|
440
|
+
const cfg = loadConfig() // also mkdirs stateDir and sets LOG, so config warnings are already captured
|
|
441
|
+
|
|
442
|
+
const lockPath = join(cfg.stateDir, 'sweep.lock')
|
|
443
|
+
if (!acquireLock(lockPath)) {
|
|
444
|
+
log('another sweep already running — skip')
|
|
445
|
+
return
|
|
446
|
+
}
|
|
447
|
+
process.on('exit', () => {
|
|
448
|
+
try {
|
|
449
|
+
rmSync(lockPath, { force: true })
|
|
450
|
+
} catch {
|
|
451
|
+
/* best-effort */
|
|
452
|
+
}
|
|
453
|
+
})
|
|
454
|
+
|
|
455
|
+
if (!refreshRepo(cfg)) process.exit(1)
|
|
456
|
+
// Resolve the taste: the target repo's own .review/ wins (a repo can override); otherwise fall back to the
|
|
457
|
+
// home taste the CLI assembled from packs (~/.stupify/.review). Either way cfg.reviewDir becomes ABSOLUTE.
|
|
458
|
+
const repoReview = join(cfg.repoDir, cfg.reviewDir)
|
|
459
|
+
cfg.reviewDir = existsSync(join(repoReview, 'CORPUS.md')) ? repoReview : cfg.homeReviewDir
|
|
460
|
+
const haveMachinery =
|
|
461
|
+
existsSync(join(cfg.reviewDir, 'CORPUS.md')) &&
|
|
462
|
+
existsSync(join(cfg.reviewDir, 'REVIEW-PROMPT.md')) &&
|
|
463
|
+
existsSync(join(cfg.reviewDir, 'RUBRIC.md'))
|
|
464
|
+
if (!haveMachinery) {
|
|
465
|
+
log(`no review machinery at ${cfg.reviewDir}/ (need REVIEW-PROMPT.md + RUBRIC.md + CORPUS.md) — no-op. Run \`stupify setup\` to assemble taste, or add a .review/ to ${cfg.slug}.`)
|
|
466
|
+
return
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
const prs = listPrs(cfg)
|
|
470
|
+
if (prs === null) process.exit(1)
|
|
471
|
+
const queue = prs.filter((pr) => inScope(pr, cfg)) // MAX_PRS is applied to PRs actually HANDLED, not iterated (below)
|
|
472
|
+
|
|
473
|
+
let reviewed = 0
|
|
474
|
+
let tokens = 0
|
|
475
|
+
// Count PRs we do real (costly) work on, and cap THAT at MAX_PRS — so a backlog of already-reviewed PRs at
|
|
476
|
+
// the front of the list can't consume the budget and starve later ones.
|
|
477
|
+
let handled = 0
|
|
478
|
+
for (const pr of queue) {
|
|
479
|
+
const { mark, failMark } = markersFor(pr)
|
|
480
|
+
const comments = prComments(cfg, pr.number)
|
|
481
|
+
if (comments === null) {
|
|
482
|
+
log(`skip #${pr.number} — couldn't read it from gh (failed/malformed); will retry next sweep`)
|
|
483
|
+
continue
|
|
484
|
+
}
|
|
485
|
+
const bodies = comments.map((c) => c.body).join('\n')
|
|
486
|
+
if (bodies.includes(mark) || bodies.includes(failMark)) continue
|
|
487
|
+
|
|
488
|
+
// Past the cheap dedup skip — this PR is a real candidate. Enforce MAX_PRS here, not on the
|
|
489
|
+
// iterated list, and defer the rest to the next sweep.
|
|
490
|
+
if (handled >= cfg.maxPrs) {
|
|
491
|
+
log(`reached MAX_PRS=${cfg.maxPrs} this sweep — deferring remaining candidates to the next sweep`)
|
|
492
|
+
break
|
|
493
|
+
}
|
|
494
|
+
handled += 1
|
|
495
|
+
|
|
496
|
+
let lines = 0
|
|
497
|
+
if (cfg.scope === 'auto' || cfg.dryRun) {
|
|
498
|
+
const counted = diffLineCount(cfg, pr.number)
|
|
499
|
+
if (counted === null) {
|
|
500
|
+
log(`skip #${pr.number} — couldn't read its diff from gh; will retry next sweep`)
|
|
501
|
+
continue
|
|
502
|
+
}
|
|
503
|
+
lines = counted
|
|
504
|
+
}
|
|
505
|
+
// auto-scope only: skip oversized diffs UNLESS the PR carries the review label (the documented force-include).
|
|
506
|
+
// (label-scope means you already opted in, so size never gates there.)
|
|
507
|
+
if (cfg.scope === 'auto' && lines > cfg.diffLineCap && !hasReviewLabel(pr, cfg)) {
|
|
508
|
+
log(`skip #${pr.number} — diff ${lines} lines > cap ${cfg.diffLineCap} (add '${cfg.reviewLabel}' to force)`)
|
|
509
|
+
continue
|
|
510
|
+
}
|
|
511
|
+
if (cfg.dryRun) {
|
|
512
|
+
log(`DRY_RUN would review #${pr.number} @ ${pr.headRefOid.slice(0, 8)} (diff ${lines} lines)`)
|
|
513
|
+
continue
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
const used = reviewPr(cfg, pr, priorReviewThread(comments))
|
|
517
|
+
if (used !== null) {
|
|
518
|
+
reviewed += 1
|
|
519
|
+
tokens += used
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
log(`sweep done — scope=${cfg.scope} reviewed=${reviewed} tokens~${tokens}`)
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
if (import.meta.main) main() // run only when invoked directly (cron / `stupify run`); stays importable for tests
|
package/dist/analysis.d.ts
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import type { LocalModel } from "./model.ts";
|
|
2
|
-
import type { SearchMatch, SemChangeSet, SemContext, SemContextPack, StupifyCheck } from "./types.ts";
|
|
3
|
-
export declare function runSearch(model: LocalModel, request: SearchRequest): Promise<readonly SearchMatch[]>;
|
|
4
|
-
export type SearchRequest = Readonly<{
|
|
5
|
-
prompt: string;
|
|
6
|
-
schema: unknown;
|
|
7
|
-
contexts: readonly SemContext[];
|
|
8
|
-
}>;
|
|
9
|
-
export declare function searchRequest(input: Readonly<{
|
|
10
|
-
changeSet: SemChangeSet;
|
|
11
|
-
contexts: readonly SemContext[];
|
|
12
|
-
pack: SemContextPack;
|
|
13
|
-
patterns: readonly StupifyCheck[];
|
|
14
|
-
includeCounterReasonInPrompt?: boolean;
|
|
15
|
-
}>): SearchRequest;
|
|
16
|
-
export declare function countPromptTokens(model: LocalModel, prompt: string): Promise<number>;
|