@stupify/cli 0.0.16 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.review/CORPUS.md +73 -0
- package/.review/REVIEW-PROMPT.md +52 -0
- package/.review/RUBRIC.md +46 -0
- package/LICENSE +1 -1
- package/README.md +41 -39
- package/package.json +24 -25
- package/src/cli.ts +358 -0
- package/src/review-sweep.ts +492 -0
- package/dist/analysis.d.ts +0 -16
- package/dist/analysis.js +0 -168
- package/dist/cache.d.ts +0 -2
- package/dist/cache.js +0 -57
- package/dist/checks.d.ts +0 -4
- package/dist/checks.js +0 -228
- package/dist/command.d.ts +0 -2
- package/dist/command.js +0 -147
- package/dist/constants.d.ts +0 -4
- package/dist/constants.js +0 -53
- package/dist/counter-scout.d.ts +0 -21
- package/dist/counter-scout.js +0 -167
- package/dist/diff.d.ts +0 -1
- package/dist/diff.js +0 -10
- package/dist/doctor.d.ts +0 -16
- package/dist/doctor.js +0 -143
- package/dist/git.d.ts +0 -17
- package/dist/git.js +0 -368
- package/dist/hooks.d.ts +0 -5
- package/dist/hooks.js +0 -135
- package/dist/index.d.ts +0 -1
- package/dist/index.js +0 -1
- package/dist/model.d.ts +0 -11
- package/dist/model.js +0 -296
- package/dist/prompts.d.ts +0 -8
- package/dist/prompts.js +0 -89
- package/dist/render.d.ts +0 -6
- package/dist/render.js +0 -295
- package/dist/repomix-provider.d.ts +0 -12
- package/dist/repomix-provider.js +0 -196
- package/dist/search-bench.d.ts +0 -1
- package/dist/search-bench.js +0 -677
- package/dist/search-profile.d.ts +0 -6
- package/dist/search-profile.js +0 -73
- package/dist/sem-provider.d.ts +0 -2
- package/dist/sem-provider.js +0 -255
- package/dist/stupify.d.ts +0 -38
- package/dist/stupify.js +0 -505
- package/dist/trace.d.ts +0 -31
- package/dist/trace.js +0 -86
- package/dist/types.d.ts +0 -341
- package/dist/types.js +0 -6
- package/dist/ui.d.ts +0 -34
- package/dist/ui.js +0 -143
- package/src/analysis.ts +0 -223
- package/src/cache.ts +0 -63
- package/src/checks.ts +0 -231
- package/src/command.ts +0 -173
- package/src/constants.ts +0 -56
- package/src/counter-scout.ts +0 -195
- package/src/diff.ts +0 -9
- package/src/doctor.ts +0 -166
- package/src/git.ts +0 -380
- package/src/hooks.ts +0 -151
- package/src/index.ts +0 -1
- package/src/model.ts +0 -367
- package/src/prompts.ts +0 -100
- package/src/render.ts +0 -328
- package/src/repomix-provider.ts +0 -219
- package/src/search-bench.ts +0 -783
- package/src/search-profile.ts +0 -89
- package/src/sem-provider.ts +0 -300
- package/src/stupify.ts +0 -604
- package/src/trace.ts +0 -126
- package/src/types.ts +0 -362
- package/src/ui.ts +0 -187
|
@@ -0,0 +1,492 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* stupify (review sweep) — auto-review open GitHub PRs with Codex against a corpus of code YOU picked.
|
|
4
|
+
* The engine the `stupify` CLI deploys to ~/.stupify and runs on a cron (or `stupify run`); config.env sits
|
|
5
|
+
* next to it.
|
|
6
|
+
*
|
|
7
|
+
* OPT-IN by default (SCOPE=label): only PRs tagged REVIEW_LABEL are reviewed, so spend tracks exactly what
|
|
8
|
+
* you tag. SCOPE=auto reviews all non-draft, non-bot PRs under DIFF_LINE_CAP.
|
|
9
|
+
* The "taste" — REVIEW-PROMPT.md, RUBRIC.md, CORPUS.md — lives in the TARGET repo under REVIEW_DIR (default
|
|
10
|
+
* `.review/`), so it's version-controlled with the code it judges and edited via a normal PR.
|
|
11
|
+
* Idempotent: skips a PR already reviewed — or already reported as failed — at its current head SHA, via a
|
|
12
|
+
* hidden marker comment. A new push moves the SHA, clears the markers, and re-arms the review.
|
|
13
|
+
* Per-PR memory: each review is fed the PR's existing review thread, so it won't re-raise resolved/declined
|
|
14
|
+
* items and converges ("no new blocking issues") instead of nagging forever.
|
|
15
|
+
*
|
|
16
|
+
* Single-flight: the sweep takes its own lockfile (state/sweep.lock) so two cron ticks never overlap — no
|
|
17
|
+
* `flock` dependency. Every knob lives in config.env next to this file (read fresh each run). Run: `bun review-sweep.ts`.
|
|
18
|
+
*/
|
|
19
|
+
import { spawnSync } from 'node:child_process'
|
|
20
|
+
import { appendFileSync, existsSync, mkdirSync, readFileSync, rmSync, statSync, writeFileSync } from 'node:fs'
|
|
21
|
+
import { dirname, join } from 'node:path'
|
|
22
|
+
import { fileURLToPath } from 'node:url'
|
|
23
|
+
|
|
24
|
+
const KIT_DIR = dirname(fileURLToPath(import.meta.url))
|
|
25
|
+
|
|
26
|
+
interface Config {
|
|
27
|
+
repoDir: string // dedicated checkout we hard-reset — never a working checkout you care about
|
|
28
|
+
remote: string
|
|
29
|
+
slug: string
|
|
30
|
+
defaultBranch: string
|
|
31
|
+
reviewDir: string // dir IN the target repo holding REVIEW-PROMPT.md / RUBRIC.md / CORPUS.md
|
|
32
|
+
scope: 'label' | 'auto'
|
|
33
|
+
reviewLabel: string
|
|
34
|
+
diffLineCap: number
|
|
35
|
+
dryRun: boolean
|
|
36
|
+
maxPrs: number
|
|
37
|
+
stateDir: string
|
|
38
|
+
codexEffort: string
|
|
39
|
+
codexProvider: string // optional `-c model_provider=...`; empty = codex's own default/auth
|
|
40
|
+
codexModel: string // optional `-c model=...`; empty = codex's default model
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function loadConfig(): Config {
|
|
44
|
+
const file = parseEnvFile(join(KIT_DIR, 'config.env'))
|
|
45
|
+
// A one-shot env override wins over the persisted config.env, so `DRY_RUN=1 bun review-sweep.ts` actually
|
|
46
|
+
// previews even when the deployed file says DRY_RUN=0. Cron sets none of these keys, so it falls to the file.
|
|
47
|
+
const pick = (key: string, fallback: string): string => process.env[key] ?? file[key] ?? fallback
|
|
48
|
+
const int = (key: string, fallback: number, min: number): number => {
|
|
49
|
+
const set = process.env[key] ?? file[key]
|
|
50
|
+
if (set === undefined) return fallback
|
|
51
|
+
const trimmed = set.trim()
|
|
52
|
+
const n = Number(trimmed)
|
|
53
|
+
if (/^\d+$/.test(trimmed) && n >= min) return n
|
|
54
|
+
log(`config: ${key}='${set}' is not an integer ≥ ${min} — using ${fallback}`)
|
|
55
|
+
return fallback
|
|
56
|
+
}
|
|
57
|
+
const bool = (key: string, unset: boolean, onInvalid: boolean): boolean => {
|
|
58
|
+
const set = process.env[key] ?? file[key]
|
|
59
|
+
if (set === undefined) return unset
|
|
60
|
+
const v = set.trim().toLowerCase()
|
|
61
|
+
if (v === '1' || v === 'true' || v === 'yes' || v === 'on') return true
|
|
62
|
+
if (v === '0' || v === 'false' || v === 'no' || v === 'off') return false
|
|
63
|
+
log(`config: ${key}='${set}' is not a boolean (1/0/true/false/yes/no/on/off) — using ${onInvalid} (fail-safe)`)
|
|
64
|
+
return onInvalid
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Home is where the CLI deployed us (~/.stupify) — config.env, state, and the dedicated checkout all live here.
|
|
68
|
+
const stupifyHome = pick('STUPIFY_HOME', KIT_DIR)
|
|
69
|
+
const stateDir = join(stupifyHome, 'state')
|
|
70
|
+
mkdirSync(stateDir, { recursive: true })
|
|
71
|
+
LOG = join(stateDir, 'sweep.log') // set before parsing knobs so config warnings reach sweep.log, not just cron.log
|
|
72
|
+
|
|
73
|
+
const slug = pick('REPO_SLUG', '').trim()
|
|
74
|
+
if (!slug) {
|
|
75
|
+
log('config: REPO_SLUG is required (owner/repo) — aborting. Run `stupify` to set up.')
|
|
76
|
+
process.exit(1)
|
|
77
|
+
}
|
|
78
|
+
const scopeRaw = pick('SCOPE', 'label').trim().toLowerCase()
|
|
79
|
+
if (scopeRaw !== 'label' && scopeRaw !== 'auto') log(`config: SCOPE='${scopeRaw}' is not 'label' or 'auto' — using label`)
|
|
80
|
+
|
|
81
|
+
return {
|
|
82
|
+
repoDir: join(stupifyHome, 'repo'), // HARD-PINNED under STUPIFY_HOME: refreshRepo runs `git reset --hard` here
|
|
83
|
+
remote: pick('REMOTE', `https://github.com/${slug}.git`),
|
|
84
|
+
slug,
|
|
85
|
+
defaultBranch: pick('DEFAULT_BRANCH', 'main'),
|
|
86
|
+
reviewDir: pick('REVIEW_DIR', '.review'),
|
|
87
|
+
scope: scopeRaw === 'auto' ? 'auto' : 'label',
|
|
88
|
+
reviewLabel: pick('REVIEW_LABEL', 'codex-review'),
|
|
89
|
+
diffLineCap: int('DIFF_LINE_CAP', 800, 1),
|
|
90
|
+
dryRun: bool('DRY_RUN', false, true), // unset = live (cron's normal mode); garbage = preview (never post on a typo)
|
|
91
|
+
maxPrs: int('MAX_PRS', 15, 1),
|
|
92
|
+
stateDir,
|
|
93
|
+
codexEffort: pick('CODEX_EFFORT', 'high'),
|
|
94
|
+
codexProvider: pick('CODEX_PROVIDER', ''),
|
|
95
|
+
codexModel: pick('CODEX_MODEL', ''),
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/** Minimal KEY=VALUE reader for config.env: strips `# inline comments` and matching surrounding quotes, so a
|
|
100
|
+
* value reads the same here as it does when bash sources the file (`KEY='https://…'` → `https://…`). */
|
|
101
|
+
function parseEnvFile(path: string): Record<string, string> {
|
|
102
|
+
if (!existsSync(path)) return {}
|
|
103
|
+
const out: Record<string, string> = {}
|
|
104
|
+
for (const raw of readFileSync(path, 'utf8').split('\n')) {
|
|
105
|
+
const line = raw.trim()
|
|
106
|
+
if (!line || line.startsWith('#')) continue
|
|
107
|
+
const eq = line.indexOf('=')
|
|
108
|
+
if (eq < 0) continue
|
|
109
|
+
const key = line.slice(0, eq).trim()
|
|
110
|
+
const value = line.slice(eq + 1)
|
|
111
|
+
const comment = value.indexOf(' #')
|
|
112
|
+
let v = (comment < 0 ? value : value.slice(0, comment)).trim()
|
|
113
|
+
if (v.length >= 2 && (v[0] === "'" || v[0] === '"') && v.at(-1) === v[0]) v = v.slice(1, -1)
|
|
114
|
+
out[key] = v
|
|
115
|
+
}
|
|
116
|
+
return out
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
interface ProcResult {
|
|
120
|
+
ok: boolean
|
|
121
|
+
stdout: string
|
|
122
|
+
combined: string
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function exec(cmd: string, args: string[], opts: { cwd?: string; timeoutMs?: number } = {}): ProcResult {
|
|
126
|
+
const r = spawnSync(cmd, args, {
|
|
127
|
+
cwd: opts.cwd,
|
|
128
|
+
input: '', // close stdin (codex would otherwise read from the terminal)
|
|
129
|
+
timeout: opts.timeoutMs,
|
|
130
|
+
encoding: 'utf8',
|
|
131
|
+
maxBuffer: 64 * 1024 * 1024,
|
|
132
|
+
})
|
|
133
|
+
const stdout = r.stdout ?? ''
|
|
134
|
+
// spawnSync reports a timeout via signal (SIGTERM) and a spawn failure (ENOENT etc.) via `error`, both with
|
|
135
|
+
// EMPTY stdout/stderr. Fold them into combined so the failure path surfaces the real cause, not "no output".
|
|
136
|
+
let combined = stdout + (r.stderr ?? '')
|
|
137
|
+
if (r.signal) combined += `\n${cmd}: process killed by ${r.signal}${opts.timeoutMs ? ` (timeout ${opts.timeoutMs}ms)` : ''}`
|
|
138
|
+
if (r.error) combined += `\n${cmd}: ${r.error.message}`
|
|
139
|
+
return { ok: r.status === 0 && r.error === undefined, stdout, combined }
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
let LOG = ''
|
|
143
|
+
function log(message: string): void {
|
|
144
|
+
const line = `${new Date().toISOString().replace(/\.\d{3}Z$/, 'Z')} ${message}`
|
|
145
|
+
if (LOG) appendFileSync(LOG, `${line}\n`)
|
|
146
|
+
console.log(line)
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/** Refresh the dedicated checkout to origin/main. Returns false on any git failure. */
|
|
150
|
+
function refreshRepo(cfg: Config): boolean {
|
|
151
|
+
mkdirSync(dirname(cfg.repoDir), { recursive: true })
|
|
152
|
+
if (!existsSync(join(cfg.repoDir, '.git'))) {
|
|
153
|
+
log(`cloning ${cfg.remote} -> ${cfg.repoDir}`)
|
|
154
|
+
if (!exec('git', ['clone', '-q', cfg.remote, cfg.repoDir]).ok) return logFail('clone failed')
|
|
155
|
+
}
|
|
156
|
+
const branch = cfg.defaultBranch
|
|
157
|
+
const ok =
|
|
158
|
+
exec('git', ['fetch', '-q', 'origin', branch], { cwd: cfg.repoDir }).ok &&
|
|
159
|
+
exec('git', ['checkout', '-q', branch], { cwd: cfg.repoDir }).ok &&
|
|
160
|
+
exec('git', ['reset', '-q', '--hard', `origin/${branch}`], { cwd: cfg.repoDir }).ok
|
|
161
|
+
return ok || logFail(`refresh failed (is the default branch '${branch}'? set DEFAULT_BRANCH if not)`)
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function logFail(message: string): false {
|
|
165
|
+
log(message)
|
|
166
|
+
return false
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
interface Pr {
|
|
170
|
+
number: number
|
|
171
|
+
headRefOid: string
|
|
172
|
+
isDraft: boolean
|
|
173
|
+
author: { login: string } | null
|
|
174
|
+
labels: { name: string }[]
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function listPrs(cfg: Config): Pr[] | null {
|
|
178
|
+
// Filter the PR list directly rather than `gh pr list --label` — that search index lags behind labelling.
|
|
179
|
+
const fields = 'number,headRefOid,isDraft,author,labels'
|
|
180
|
+
const r = exec('gh', ['pr', 'list', '--repo', cfg.slug, '--state', 'open', '--json', fields])
|
|
181
|
+
if (!r.ok) {
|
|
182
|
+
log('gh pr list failed (auth/network down?) — aborting sweep')
|
|
183
|
+
return null
|
|
184
|
+
}
|
|
185
|
+
let raw: unknown
|
|
186
|
+
try {
|
|
187
|
+
raw = JSON.parse(r.stdout)
|
|
188
|
+
} catch {
|
|
189
|
+
log('gh pr list returned unparseable JSON — aborting sweep')
|
|
190
|
+
return null
|
|
191
|
+
}
|
|
192
|
+
if (!Array.isArray(raw)) {
|
|
193
|
+
log('gh pr list returned a non-array — aborting sweep')
|
|
194
|
+
return null
|
|
195
|
+
}
|
|
196
|
+
const prs = raw.filter(isPr)
|
|
197
|
+
if (prs.length < raw.length) log(`gh pr list: ${raw.length - prs.length} entries failed shape check — skipped`)
|
|
198
|
+
return prs
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// Fully validate the gh boundary. gh guarantees the --json shape, but an auth-error page or schema drift
|
|
202
|
+
// would otherwise throw (or silently mis-scope) mid-loop instead of skipping cleanly. `in`-narrowing, no
|
|
203
|
+
// assertions. This is a complete `is Pr` — every field inScope/the loop trust is checked here.
|
|
204
|
+
function isPr(raw: unknown): raw is Pr {
|
|
205
|
+
if (typeof raw !== 'object' || raw === null) return false
|
|
206
|
+
if (!('number' in raw) || typeof raw.number !== 'number') return false
|
|
207
|
+
if (!('headRefOid' in raw) || typeof raw.headRefOid !== 'string') return false
|
|
208
|
+
if (!('isDraft' in raw) || typeof raw.isDraft !== 'boolean') return false
|
|
209
|
+
if (!('labels' in raw) || !Array.isArray(raw.labels) || !raw.labels.every(isLabel)) return false
|
|
210
|
+
return 'author' in raw && isAuthor(raw.author)
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
function isLabel(raw: unknown): raw is { name: string } {
|
|
214
|
+
return typeof raw === 'object' && raw !== null && 'name' in raw && typeof raw.name === 'string'
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
function isAuthor(raw: unknown): raw is { login: string } | null {
|
|
218
|
+
return raw === null || (typeof raw === 'object' && 'login' in raw && typeof raw.login === 'string')
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
function hasReviewLabel(pr: Pr, cfg: Config): boolean {
|
|
222
|
+
return pr.labels.some((l) => l.name === cfg.reviewLabel)
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
function inScope(pr: Pr, cfg: Config): boolean {
|
|
226
|
+
if (pr.isDraft) return false
|
|
227
|
+
if ((pr.author?.login ?? '').endsWith('[bot]')) return false // never review bot PRs, in EITHER scope
|
|
228
|
+
if (cfg.scope === 'label') return hasReviewLabel(pr, cfg)
|
|
229
|
+
return true // auto: any non-draft, non-bot PR
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
interface Comment {
|
|
233
|
+
login: string
|
|
234
|
+
body: string
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// null = couldn't read the PR (gh failed or returned junk). The caller SKIPS such a PR rather than treating
|
|
238
|
+
// it as unreviewed — manufacturing empty comments here would let a GitHub blip duplicate-post a review.
|
|
239
|
+
function prComments(cfg: Config, number: number): Comment[] | null {
|
|
240
|
+
const r = exec('gh', ['pr', 'view', String(number), '--repo', cfg.slug, '--json', 'comments'])
|
|
241
|
+
if (!r.ok) return null
|
|
242
|
+
let raw: unknown
|
|
243
|
+
try {
|
|
244
|
+
raw = JSON.parse(r.stdout)
|
|
245
|
+
} catch {
|
|
246
|
+
return null
|
|
247
|
+
}
|
|
248
|
+
if (typeof raw !== 'object' || raw === null || !('comments' in raw) || !Array.isArray(raw.comments)) return null
|
|
249
|
+
return raw.comments.map(toComment)
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
function toComment(c: unknown): Comment {
|
|
253
|
+
if (typeof c !== 'object' || c === null) return { login: '', body: '' }
|
|
254
|
+
const body = 'body' in c && typeof c.body === 'string' ? c.body : ''
|
|
255
|
+
const author = 'author' in c ? c.author : null
|
|
256
|
+
const login =
|
|
257
|
+
typeof author === 'object' && author !== null && 'login' in author && typeof author.login === 'string'
|
|
258
|
+
? author.login
|
|
259
|
+
: ''
|
|
260
|
+
return { login, body }
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// The per-PR MEMORY: the existing review conversation — the reviewer's past reviews + the author's replies —
|
|
264
|
+
// fed back into the prompt so it stops re-litigating settled points and knows when to converge. The GitHub
|
|
265
|
+
// thread IS the durable store (survives restarts, already holds the replies); we just read it back.
|
|
266
|
+
const MEMORY_COMMENTS = 20 // recent thread context, bounded so the prompt can't balloon on a chatty PR
|
|
267
|
+
|
|
268
|
+
function priorReviewThread(comments: Comment[]): string {
|
|
269
|
+
return comments
|
|
270
|
+
.filter((c) => !c.login.endsWith('[bot]')) // drop CI bots; keep prior reviews + human/agent replies
|
|
271
|
+
.slice(-MEMORY_COMMENTS)
|
|
272
|
+
.map((c) => `@${c.login}:\n${c.body.replace(/<!--[\s\S]*?-->/g, '').trim()}`) // strip hidden markers
|
|
273
|
+
.filter((entry) => entry.length > 0)
|
|
274
|
+
.join('\n\n---\n\n')
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// null = couldn't read the diff. The caller skips (auto) or notes it (dry-run) rather than treating an
|
|
278
|
+
// unreadable diff as "0 lines" — a silent under-cap that would auto-review something it never measured.
|
|
279
|
+
function diffLineCount(cfg: Config, number: number): number | null {
|
|
280
|
+
const r = exec('gh', ['pr', 'diff', String(number), '--repo', cfg.slug])
|
|
281
|
+
if (!r.ok) return null
|
|
282
|
+
if (!r.stdout) return 0
|
|
283
|
+
return r.stdout.split('\n').length - (r.stdout.endsWith('\n') ? 1 : 0)
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
function markersFor(pr: Pr): { mark: string; failMark: string } {
|
|
287
|
+
return {
|
|
288
|
+
mark: `<!-- stupify:${pr.headRefOid} -->`,
|
|
289
|
+
failMark: `<!-- stupify-failed:${pr.headRefOid} -->`,
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
function reviewPrompt(cfg: Config, pr: Pr, priorThread: string): string {
|
|
294
|
+
const { mark } = markersFor(pr)
|
|
295
|
+
const outPath = `/tmp/review-${pr.number}.md`
|
|
296
|
+
const dir = cfg.reviewDir
|
|
297
|
+
const memory = priorThread
|
|
298
|
+
? `\n\n## Prior reviews on this PR (your memory)
|
|
299
|
+
This is the existing review conversation — your past reviews and the author's replies. You are CONTINUING it,
|
|
300
|
+
not starting fresh. Apply ${dir}/REVIEW-PROMPT.md's "Prior reviews on this PR" rules: don't re-raise resolved or
|
|
301
|
+
reasoned-declined items, report only what's genuinely new, and converge (post the one-line "no new issues"
|
|
302
|
+
and stop) if nothing new remains.
|
|
303
|
+
|
|
304
|
+
${priorThread}`
|
|
305
|
+
: ''
|
|
306
|
+
return `You are a code reviewer running in an automated sweep (you have gh + git; no token needed). DO NOT modify any code.
|
|
307
|
+
Read ${dir}/REVIEW-PROMPT.md and ${dir}/RUBRIC.md (the spec + rubric) and ${dir}/CORPUS.md (the curated good-code reference; open the live files it points at as needed). Then:
|
|
308
|
+
1. Get the diff: gh pr diff ${pr.number} --repo ${cfg.slug}
|
|
309
|
+
2. Review it per the spec — catch bugs / type-lies / dead-code / footguns AND reinvents-primitive / slop, each citing the corpus primitive it should reuse; sort worst-first.
|
|
310
|
+
3. Write the review to ${outPath}, formatted EXACTLY per ${dir}/REVIEW-PROMPT.md's 'Comment format' section (it owns the format — opener, finding blocks, attribution). END the file with exactly this line: ${mark}
|
|
311
|
+
4. Post it: gh pr comment ${pr.number} --repo ${cfg.slug} --body-file ${outPath}
|
|
312
|
+
Keep it terse; no preamble.${memory}`
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/** Run one review. Returns tokens used on success, or null when codex couldn't run (a failure was posted). */
|
|
316
|
+
function reviewPr(cfg: Config, pr: Pr, priorThread: string): number | null {
|
|
317
|
+
const { failMark } = markersFor(pr)
|
|
318
|
+
const outPath = `/tmp/review-${pr.number}.md`
|
|
319
|
+
log(`reviewing PR #${pr.number} @ ${pr.headRefOid.slice(0, 8)}`)
|
|
320
|
+
const codexArgs = [
|
|
321
|
+
'exec',
|
|
322
|
+
'--cd',
|
|
323
|
+
cfg.repoDir,
|
|
324
|
+
'--sandbox',
|
|
325
|
+
'workspace-write',
|
|
326
|
+
'-c',
|
|
327
|
+
`model_reasoning_effort=${cfg.codexEffort}`,
|
|
328
|
+
'-c',
|
|
329
|
+
'sandbox_workspace_write.network_access=true',
|
|
330
|
+
'-c',
|
|
331
|
+
'sandbox_workspace_write.writable_roots=["/tmp"]',
|
|
332
|
+
]
|
|
333
|
+
if (cfg.codexProvider) codexArgs.push('-c', `model_provider=${cfg.codexProvider}`)
|
|
334
|
+
if (cfg.codexModel) codexArgs.push('-c', `model=${cfg.codexModel}`)
|
|
335
|
+
codexArgs.push(reviewPrompt(cfg, pr, priorThread))
|
|
336
|
+
|
|
337
|
+
const cx = exec('codex', codexArgs, { cwd: cfg.repoDir, timeoutMs: 1_200_000 })
|
|
338
|
+
appendFileSync(LOG, `${cx.combined}\n`)
|
|
339
|
+
|
|
340
|
+
if (cx.ok) {
|
|
341
|
+
const tokens = parseTokens(cx.combined)
|
|
342
|
+
log(` #${pr.number} done (${tokens ?? '?'} tokens)`)
|
|
343
|
+
return tokens ?? 0
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
// Codex couldn't run (provider down, out of credits, timeout, bad diff). Don't fail silently — post a short
|
|
347
|
+
// error on the PR and stamp FAIL_MARK so the next sweep skips this head instead of re-hammering every minute.
|
|
348
|
+
const reason = failureReason(cx.combined)
|
|
349
|
+
log(` review FAILED for #${pr.number} — ${reason}`)
|
|
350
|
+
const body = [
|
|
351
|
+
"uhh — i couldn't review this one. codex didn't run:",
|
|
352
|
+
'',
|
|
353
|
+
`> ${reason}`,
|
|
354
|
+
'',
|
|
355
|
+
"_— stupify (auto-reviewer). i'll retry on your next push._",
|
|
356
|
+
failMark,
|
|
357
|
+
].join('\n')
|
|
358
|
+
writeFileSync(outPath, `${body}\n`)
|
|
359
|
+
if (!exec('gh', ['pr', 'comment', String(pr.number), '--repo', cfg.slug, '--body-file', outPath]).ok) {
|
|
360
|
+
log(` (couldn't post failure comment for #${pr.number} either — gh down?)`)
|
|
361
|
+
}
|
|
362
|
+
return null
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
/** codex prints `tokens used` then the count on the next line — read the last such pair. */
|
|
366
|
+
function parseTokens(out: string): number | null {
|
|
367
|
+
const lines = out.split('\n')
|
|
368
|
+
for (let i = lines.length - 1; i >= 0; i--) {
|
|
369
|
+
const line = lines[i]
|
|
370
|
+
if (line !== undefined && /tokens used/i.test(line)) {
|
|
371
|
+
const digits = (lines[i + 1] ?? '').replace(/\D/g, '')
|
|
372
|
+
return digits ? Number(digits) : null
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
return null
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
function failureReason(out: string): string {
|
|
379
|
+
const signal = /payment required|credits|quota|rate.?limit|429|5\d\d |timeout|killed|enoent|spawn|error/i
|
|
380
|
+
const noise = /no error|0 error/i
|
|
381
|
+
const hit = out
|
|
382
|
+
.split('\n')
|
|
383
|
+
.map((l) => l.trim())
|
|
384
|
+
.filter((l) => signal.test(l) && !noise.test(l))
|
|
385
|
+
.at(-1)
|
|
386
|
+
const cleaned = (hit ?? '').replace(/`/g, ' ').slice(0, 220).trim()
|
|
387
|
+
return cleaned || 'codex run failed (no output captured — check the sweep log)'
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
// Single-flight without flock: O_EXCL create wins atomically; a lock older than 30 min (longer than any
|
|
391
|
+
// possible sweep — codex is capped at 20) is treated as stale from a crashed run and stolen.
|
|
392
|
+
function acquireLock(path: string): boolean {
|
|
393
|
+
try {
|
|
394
|
+
writeFileSync(path, String(process.pid), { flag: 'wx' })
|
|
395
|
+
return true
|
|
396
|
+
} catch {
|
|
397
|
+
try {
|
|
398
|
+
if (Date.now() - statSync(path).mtimeMs > 30 * 60_000) {
|
|
399
|
+
writeFileSync(path, String(process.pid))
|
|
400
|
+
return true
|
|
401
|
+
}
|
|
402
|
+
} catch {
|
|
403
|
+
/* lock vanished between calls — let the next sweep retry */
|
|
404
|
+
}
|
|
405
|
+
return false
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
function main(): void {
|
|
410
|
+
const cfg = loadConfig() // also mkdirs stateDir and sets LOG, so config warnings are already captured
|
|
411
|
+
|
|
412
|
+
const lockPath = join(cfg.stateDir, 'sweep.lock')
|
|
413
|
+
if (!acquireLock(lockPath)) {
|
|
414
|
+
log('another sweep already running — skip')
|
|
415
|
+
return
|
|
416
|
+
}
|
|
417
|
+
process.on('exit', () => {
|
|
418
|
+
try {
|
|
419
|
+
rmSync(lockPath, { force: true })
|
|
420
|
+
} catch {
|
|
421
|
+
/* best-effort */
|
|
422
|
+
}
|
|
423
|
+
})
|
|
424
|
+
|
|
425
|
+
if (!refreshRepo(cfg)) process.exit(1)
|
|
426
|
+
const haveMachinery =
|
|
427
|
+
existsSync(join(cfg.repoDir, cfg.reviewDir, 'CORPUS.md')) &&
|
|
428
|
+
existsSync(join(cfg.repoDir, cfg.reviewDir, 'REVIEW-PROMPT.md')) &&
|
|
429
|
+
existsSync(join(cfg.repoDir, cfg.reviewDir, 'RUBRIC.md'))
|
|
430
|
+
if (!haveMachinery) {
|
|
431
|
+
log(`no review machinery in ${cfg.slug}:${cfg.reviewDir}/ (need REVIEW-PROMPT.md + RUBRIC.md + CORPUS.md) — no-op. Copy the templates from the stupify repo.`)
|
|
432
|
+
return
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
const prs = listPrs(cfg)
|
|
436
|
+
if (prs === null) process.exit(1)
|
|
437
|
+
const queue = prs.filter((pr) => inScope(pr, cfg)) // MAX_PRS is applied to PRs actually HANDLED, not iterated (below)
|
|
438
|
+
|
|
439
|
+
let reviewed = 0
|
|
440
|
+
let tokens = 0
|
|
441
|
+
// Count PRs we do real (costly) work on, and cap THAT at MAX_PRS — so a backlog of already-reviewed PRs at
|
|
442
|
+
// the front of the list can't consume the budget and starve later ones.
|
|
443
|
+
let handled = 0
|
|
444
|
+
for (const pr of queue) {
|
|
445
|
+
const { mark, failMark } = markersFor(pr)
|
|
446
|
+
const comments = prComments(cfg, pr.number)
|
|
447
|
+
if (comments === null) {
|
|
448
|
+
log(`skip #${pr.number} — couldn't read it from gh (failed/malformed); will retry next sweep`)
|
|
449
|
+
continue
|
|
450
|
+
}
|
|
451
|
+
const bodies = comments.map((c) => c.body).join('\n')
|
|
452
|
+
if (bodies.includes(mark) || bodies.includes(failMark)) continue
|
|
453
|
+
|
|
454
|
+
// Past the cheap dedup skip — this PR is a real candidate. Enforce MAX_PRS here, not on the
|
|
455
|
+
// iterated list, and defer the rest to the next sweep.
|
|
456
|
+
if (handled >= cfg.maxPrs) {
|
|
457
|
+
log(`reached MAX_PRS=${cfg.maxPrs} this sweep — deferring remaining candidates to the next sweep`)
|
|
458
|
+
break
|
|
459
|
+
}
|
|
460
|
+
handled += 1
|
|
461
|
+
|
|
462
|
+
let lines = 0
|
|
463
|
+
if (cfg.scope === 'auto' || cfg.dryRun) {
|
|
464
|
+
const counted = diffLineCount(cfg, pr.number)
|
|
465
|
+
if (counted === null) {
|
|
466
|
+
log(`skip #${pr.number} — couldn't read its diff from gh; will retry next sweep`)
|
|
467
|
+
continue
|
|
468
|
+
}
|
|
469
|
+
lines = counted
|
|
470
|
+
}
|
|
471
|
+
// auto-scope only: skip oversized diffs UNLESS the PR carries the review label (the documented force-include).
|
|
472
|
+
// (label-scope means you already opted in, so size never gates there.)
|
|
473
|
+
if (cfg.scope === 'auto' && lines > cfg.diffLineCap && !hasReviewLabel(pr, cfg)) {
|
|
474
|
+
log(`skip #${pr.number} — diff ${lines} lines > cap ${cfg.diffLineCap} (add '${cfg.reviewLabel}' to force)`)
|
|
475
|
+
continue
|
|
476
|
+
}
|
|
477
|
+
if (cfg.dryRun) {
|
|
478
|
+
log(`DRY_RUN would review #${pr.number} @ ${pr.headRefOid.slice(0, 8)} (diff ${lines} lines)`)
|
|
479
|
+
continue
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
const used = reviewPr(cfg, pr, priorReviewThread(comments))
|
|
483
|
+
if (used !== null) {
|
|
484
|
+
reviewed += 1
|
|
485
|
+
tokens += used
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
log(`sweep done — scope=${cfg.scope} reviewed=${reviewed} tokens~${tokens}`)
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
main()
|
package/dist/analysis.d.ts
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import type { LocalModel } from "./model.ts";
|
|
2
|
-
import type { SearchMatch, SemChangeSet, SemContext, SemContextPack, StupifyCheck } from "./types.ts";
|
|
3
|
-
export declare function runSearch(model: LocalModel, request: SearchRequest): Promise<readonly SearchMatch[]>;
|
|
4
|
-
export type SearchRequest = Readonly<{
|
|
5
|
-
prompt: string;
|
|
6
|
-
schema: unknown;
|
|
7
|
-
contexts: readonly SemContext[];
|
|
8
|
-
}>;
|
|
9
|
-
export declare function searchRequest(input: Readonly<{
|
|
10
|
-
changeSet: SemChangeSet;
|
|
11
|
-
contexts: readonly SemContext[];
|
|
12
|
-
pack: SemContextPack;
|
|
13
|
-
patterns: readonly StupifyCheck[];
|
|
14
|
-
includeCounterReasonInPrompt?: boolean;
|
|
15
|
-
}>): SearchRequest;
|
|
16
|
-
export declare function countPromptTokens(model: LocalModel, prompt: string): Promise<number>;
|
package/dist/analysis.js
DELETED
|
@@ -1,168 +0,0 @@
|
|
|
1
|
-
import { cachedJson, fingerprint } from "./cache.js";
|
|
2
|
-
import { searchPrompt } from "./prompts.js";
|
|
3
|
-
import { diagnostic, diagnosticError } from "./ui.js";
|
|
4
|
-
export async function runSearch(model, request) {
|
|
5
|
-
const raw = await runJsonPrompt(model, request.prompt, request.schema, 0);
|
|
6
|
-
return uncheckedSearchMatches(raw, request.contexts);
|
|
7
|
-
}
|
|
8
|
-
export function searchRequest(input) {
|
|
9
|
-
return {
|
|
10
|
-
prompt: searchPrompt({
|
|
11
|
-
...input,
|
|
12
|
-
includeCounterReason: input.includeCounterReasonInPrompt ?? false,
|
|
13
|
-
}),
|
|
14
|
-
schema: searchSchema(input.contexts),
|
|
15
|
-
contexts: input.contexts,
|
|
16
|
-
};
|
|
17
|
-
}
|
|
18
|
-
export async function countPromptTokens(model, prompt) {
|
|
19
|
-
const cached = await cachedJson("prompt-tokens", fingerprint({
|
|
20
|
-
version: 1,
|
|
21
|
-
modelId: model.id,
|
|
22
|
-
profile: model.profile,
|
|
23
|
-
prompt,
|
|
24
|
-
}), async () => {
|
|
25
|
-
const response = await fetch(`${model.baseUrl}/tokenize`, {
|
|
26
|
-
method: "POST",
|
|
27
|
-
headers: { "content-type": "application/json" },
|
|
28
|
-
body: JSON.stringify({ content: prompt }),
|
|
29
|
-
});
|
|
30
|
-
if (!response.ok) {
|
|
31
|
-
throw new Error(`llama-server tokenize failed: HTTP ${response.status} ${await response.text()}`);
|
|
32
|
-
}
|
|
33
|
-
const body = await response.json();
|
|
34
|
-
if (!Array.isArray(body.tokens))
|
|
35
|
-
throw new Error("llama-server tokenize returned no tokens.");
|
|
36
|
-
return { count: body.tokens.length };
|
|
37
|
-
});
|
|
38
|
-
return cached.count;
|
|
39
|
-
}
|
|
40
|
-
function searchSchema(contexts) {
|
|
41
|
-
return {
|
|
42
|
-
type: "object",
|
|
43
|
-
properties: {
|
|
44
|
-
matches: {
|
|
45
|
-
type: "array",
|
|
46
|
-
maxItems: 5,
|
|
47
|
-
items: {
|
|
48
|
-
type: "object",
|
|
49
|
-
properties: {
|
|
50
|
-
targetId: { type: "string", enum: contexts.map((context) => context.targetId) },
|
|
51
|
-
reason: { type: "string" },
|
|
52
|
-
proof: { type: "string" },
|
|
53
|
-
},
|
|
54
|
-
required: ["targetId", "reason", "proof"],
|
|
55
|
-
additionalProperties: false,
|
|
56
|
-
},
|
|
57
|
-
},
|
|
58
|
-
},
|
|
59
|
-
required: ["matches"],
|
|
60
|
-
additionalProperties: false,
|
|
61
|
-
};
|
|
62
|
-
}
|
|
63
|
-
function uncheckedSearchMatches(value, contexts) {
|
|
64
|
-
const output = value;
|
|
65
|
-
const contextsByTargetId = new Map(contexts.map((context) => [context.targetId, context]));
|
|
66
|
-
return (output.matches ?? []).flatMap((match) => {
|
|
67
|
-
const targetId = match.targetId ?? "";
|
|
68
|
-
const context = contextsByTargetId.get(targetId);
|
|
69
|
-
if (!context)
|
|
70
|
-
return [];
|
|
71
|
-
return [{
|
|
72
|
-
targetId,
|
|
73
|
-
patternId: context.checkId,
|
|
74
|
-
reason: match.reason ?? "",
|
|
75
|
-
proof: sourcePointer(context),
|
|
76
|
-
snapshot: sourceSnapshot(context),
|
|
77
|
-
filePath: context.filePath,
|
|
78
|
-
entityName: context.entityName,
|
|
79
|
-
entityKind: context.entityKind,
|
|
80
|
-
}];
|
|
81
|
-
});
|
|
82
|
-
}
|
|
83
|
-
function sourcePointer(context) {
|
|
84
|
-
const file = context.filePath ?? "(unknown)";
|
|
85
|
-
return `${file}::${context.entityKind || "entity"}::${context.entityName || context.entityId}`;
|
|
86
|
-
}
|
|
87
|
-
function sourceSnapshot(context) {
|
|
88
|
-
try {
|
|
89
|
-
const parsed = JSON.parse(context.text);
|
|
90
|
-
const snapshot = stringSnapshot(parsed.after) ?? stringSnapshot(parsed.before);
|
|
91
|
-
return snapshot ? limitSnapshot(snapshot) : undefined;
|
|
92
|
-
}
|
|
93
|
-
catch {
|
|
94
|
-
return undefined;
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
function stringSnapshot(value) {
|
|
98
|
-
if (typeof value !== "string")
|
|
99
|
-
return undefined;
|
|
100
|
-
const trimmed = value.trim();
|
|
101
|
-
if (!trimmed || trimmed === "(none)")
|
|
102
|
-
return undefined;
|
|
103
|
-
return trimmed;
|
|
104
|
-
}
|
|
105
|
-
function limitSnapshot(value) {
|
|
106
|
-
const lines = value.split(/\r?\n/);
|
|
107
|
-
const limit = 12;
|
|
108
|
-
if (lines.length <= limit)
|
|
109
|
-
return value;
|
|
110
|
-
return `${lines.slice(0, limit).join("\n")}
|
|
111
|
-
[stupify: snapshot shortened after ${limit} lines]`;
|
|
112
|
-
}
|
|
113
|
-
async function runJsonPrompt(model, prompt, schema, temperature) {
|
|
114
|
-
return cachedJson("model-json", fingerprint({
|
|
115
|
-
version: 1,
|
|
116
|
-
modelId: model.id,
|
|
117
|
-
profile: model.profile,
|
|
118
|
-
prompt,
|
|
119
|
-
schema,
|
|
120
|
-
temperature,
|
|
121
|
-
}), () => runJsonPromptUncached(model, prompt, schema, temperature));
|
|
122
|
-
}
|
|
123
|
-
async function runJsonPromptUncached(model, prompt, schema, temperature) {
|
|
124
|
-
const first = await complete(model, prompt, schema, temperature);
|
|
125
|
-
const parsed = parseJson(first);
|
|
126
|
-
if (parsed.ok)
|
|
127
|
-
return parsed.value;
|
|
128
|
-
const retry = await complete(model, `${prompt}
|
|
129
|
-
|
|
130
|
-
Your previous response was not valid JSON. Return the requested JSON object only.`, schema, temperature);
|
|
131
|
-
const retryParsed = parseJson(retry);
|
|
132
|
-
if (retryParsed.ok)
|
|
133
|
-
return retryParsed.value;
|
|
134
|
-
diagnosticError("Raw model output:");
|
|
135
|
-
diagnostic(retry);
|
|
136
|
-
throw new Error("Model returned invalid JSON.");
|
|
137
|
-
}
|
|
138
|
-
async function complete(model, prompt, schema, temperature) {
|
|
139
|
-
const response = await fetch(`${model.baseUrl}/v1/chat/completions`, {
|
|
140
|
-
method: "POST",
|
|
141
|
-
headers: { "content-type": "application/json" },
|
|
142
|
-
body: JSON.stringify({
|
|
143
|
-
model: model.id,
|
|
144
|
-
messages: [{ role: "user", content: prompt }],
|
|
145
|
-
temperature,
|
|
146
|
-
response_format: {
|
|
147
|
-
type: "json_object",
|
|
148
|
-
schema,
|
|
149
|
-
},
|
|
150
|
-
}),
|
|
151
|
-
});
|
|
152
|
-
if (!response.ok)
|
|
153
|
-
throw new Error(`llama-server request failed: HTTP ${response.status} ${await response.text()}`);
|
|
154
|
-
const body = await response.json();
|
|
155
|
-
const content = body.choices?.[0]?.message?.content;
|
|
156
|
-
if (typeof content !== "string")
|
|
157
|
-
throw new Error("llama-server returned no message content.");
|
|
158
|
-
return content;
|
|
159
|
-
}
|
|
160
|
-
function parseJson(raw) {
|
|
161
|
-
try {
|
|
162
|
-
const value = JSON.parse(raw);
|
|
163
|
-
return { ok: true, value };
|
|
164
|
-
}
|
|
165
|
-
catch {
|
|
166
|
-
return { ok: false };
|
|
167
|
-
}
|
|
168
|
-
}
|
package/dist/cache.d.ts
DELETED