@letta-ai/letta-code 0.25.10 → 0.25.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -7
- package/letta.js +247149 -244616
- package/package.json +8 -3
- package/scripts/check-exported-functions.js +58 -0
- package/scripts/check-filename-casing.js +70 -0
- package/scripts/check-layer-boundaries.js +93 -0
- package/scripts/check-test-mock-isolation.js +19 -19
- package/scripts/check.js +64 -44
- package/scripts/codex-watch/check-release.ts +426 -0
- package/scripts/codex-watch/diff-models-json.test.ts +151 -0
- package/scripts/codex-watch/diff-models-json.ts +207 -0
- package/scripts/codex-watch/render-issue.ts +273 -0
- package/scripts/rename-to-kebab.js +59 -0
- package/scripts/run-unit-tests.cjs +79 -0
- package/scripts/update-kebab-imports.js +93 -0
- package/skills/working-in-parallel/SKILL.md +0 -90
|
@@ -0,0 +1,426 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* Watches stable openai/codex releases for tool/schema changes that may affect
|
|
4
|
+
* the letta-code harness.
|
|
5
|
+
*
|
|
6
|
+
* Usage:
|
|
7
|
+
* bun scripts/codex-watch/check-release.ts --dry-run
|
|
8
|
+
* bun scripts/codex-watch/check-release.ts --dry-run --since rust-v0.129.0
|
|
9
|
+
* bun scripts/codex-watch/check-release.ts --repo letta-ai/letta-code
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { spawnSync } from "node:child_process";
|
|
13
|
+
import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
|
14
|
+
import { tmpdir } from "node:os";
|
|
15
|
+
import { join } from "node:path";
|
|
16
|
+
import {
|
|
17
|
+
decideVerdict,
|
|
18
|
+
diffModelsJson,
|
|
19
|
+
type ModelsDiff,
|
|
20
|
+
type ModelsJson,
|
|
21
|
+
} from "./diff-models-json.ts";
|
|
22
|
+
import {
|
|
23
|
+
type PathChangeSummary,
|
|
24
|
+
renderBody,
|
|
25
|
+
renderTitle,
|
|
26
|
+
} from "./render-issue.ts";
|
|
27
|
+
|
|
28
|
+
const CODEX_REPO = "openai/codex";
|
|
29
|
+
const DEFAULT_TARGET_REPO =
|
|
30
|
+
process.env.GITHUB_REPOSITORY || "letta-ai/letta-code";
|
|
31
|
+
const WATCHED_PATHS = [
|
|
32
|
+
"codex-rs/models-manager/models.json",
|
|
33
|
+
"codex-rs/models-manager/prompt.md",
|
|
34
|
+
"codex-rs/core/src/tools",
|
|
35
|
+
"codex-rs/apply-patch",
|
|
36
|
+
];
|
|
37
|
+
const MAX_COMMITS_PER_PATH = 8;
|
|
38
|
+
|
|
39
|
+
interface Release {
|
|
40
|
+
tag_name: string;
|
|
41
|
+
draft: boolean;
|
|
42
|
+
prerelease: boolean;
|
|
43
|
+
html_url: string;
|
|
44
|
+
body: string | null;
|
|
45
|
+
published_at: string | null;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
interface Args {
|
|
49
|
+
dryRun: boolean;
|
|
50
|
+
sinceTag: string | null;
|
|
51
|
+
currentTag: string | null;
|
|
52
|
+
repo: string;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function parseArgs(argv: string[]): Args {
|
|
56
|
+
const args: Args = {
|
|
57
|
+
dryRun: false,
|
|
58
|
+
sinceTag: null,
|
|
59
|
+
currentTag: null,
|
|
60
|
+
repo: DEFAULT_TARGET_REPO,
|
|
61
|
+
};
|
|
62
|
+
for (let i = 0; i < argv.length; i++) {
|
|
63
|
+
const a = argv[i];
|
|
64
|
+
if (a === "--dry-run") args.dryRun = true;
|
|
65
|
+
else if (a === "--since") args.sinceTag = argv[++i] ?? null;
|
|
66
|
+
else if (a === "--current") args.currentTag = argv[++i] ?? null;
|
|
67
|
+
else if (a === "--repo") args.repo = argv[++i] ?? args.repo;
|
|
68
|
+
else if (a === "--help" || a === "-h") {
|
|
69
|
+
console.log(
|
|
70
|
+
`Usage: bun scripts/codex-watch/check-release.ts [--dry-run] [--since TAG] [--current TAG] [--repo OWNER/REPO]`,
|
|
71
|
+
);
|
|
72
|
+
process.exit(0);
|
|
73
|
+
} else {
|
|
74
|
+
throw new Error(`Unknown argument: ${a}`);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
return args;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function gh<T>(args: string[], input?: string): T {
|
|
81
|
+
const res = spawnSync("gh", args, {
|
|
82
|
+
encoding: "utf8",
|
|
83
|
+
input,
|
|
84
|
+
maxBuffer: 50 * 1024 * 1024,
|
|
85
|
+
stdio: input ? ["pipe", "pipe", "pipe"] : ["ignore", "pipe", "pipe"],
|
|
86
|
+
});
|
|
87
|
+
if (res.status !== 0) {
|
|
88
|
+
throw new Error(`gh ${args.join(" ")} failed:\n${res.stderr}`);
|
|
89
|
+
}
|
|
90
|
+
return JSON.parse(res.stdout) as T;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function git(args: string[], cwd?: string): string {
|
|
94
|
+
const res = spawnSync("git", args, {
|
|
95
|
+
cwd,
|
|
96
|
+
encoding: "utf8",
|
|
97
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
98
|
+
});
|
|
99
|
+
if (res.status !== 0) {
|
|
100
|
+
throw new Error(`git ${args.join(" ")} failed:\n${res.stderr}`);
|
|
101
|
+
}
|
|
102
|
+
return res.stdout;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function isStableRelease(release: Release): boolean {
|
|
106
|
+
if (release.draft || release.prerelease) return false;
|
|
107
|
+
return /^(rust-v|v)?\d+\.\d+\.\d+$/.test(release.tag_name);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
async function listStableReleases(): Promise<Release[]> {
|
|
111
|
+
const releases: Release[] = [];
|
|
112
|
+
const headers: Record<string, string> = {
|
|
113
|
+
Accept: "application/vnd.github+json",
|
|
114
|
+
"X-GitHub-Api-Version": "2022-11-28",
|
|
115
|
+
};
|
|
116
|
+
const token = process.env.GH_TOKEN || process.env.GITHUB_TOKEN;
|
|
117
|
+
if (token) headers.Authorization = `Bearer ${token}`;
|
|
118
|
+
|
|
119
|
+
for (let page = 1; page <= 10; page++) {
|
|
120
|
+
const url = `https://api.github.com/repos/${CODEX_REPO}/releases?per_page=100&page=${page}`;
|
|
121
|
+
const res = await fetch(url, { headers });
|
|
122
|
+
if (!res.ok) {
|
|
123
|
+
throw new Error(
|
|
124
|
+
`GitHub releases API failed (${res.status}): ${await res.text()}`,
|
|
125
|
+
);
|
|
126
|
+
}
|
|
127
|
+
const batch = (await res.json()) as Release[];
|
|
128
|
+
releases.push(...batch);
|
|
129
|
+
if (batch.length < 100) break;
|
|
130
|
+
}
|
|
131
|
+
return releases
|
|
132
|
+
.filter(isStableRelease)
|
|
133
|
+
.sort((a, b) => (a.published_at ?? "").localeCompare(b.published_at ?? ""));
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function findPreviousStable(
|
|
137
|
+
stables: Release[],
|
|
138
|
+
currentTag: string,
|
|
139
|
+
): Release | null {
|
|
140
|
+
const idx = stables.findIndex((r) => r.tag_name === currentTag);
|
|
141
|
+
if (idx <= 0) return null;
|
|
142
|
+
return stables[idx - 1] ?? null;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function hasReportedTag(targetRepo: string, tag: string): boolean {
|
|
146
|
+
const issues = gh<Array<{ title: string }>>([
|
|
147
|
+
"issue",
|
|
148
|
+
"list",
|
|
149
|
+
"--repo",
|
|
150
|
+
targetRepo,
|
|
151
|
+
"--state",
|
|
152
|
+
"all",
|
|
153
|
+
"--search",
|
|
154
|
+
`[codex-watch] openai/codex ${tag} in:title`,
|
|
155
|
+
"--limit",
|
|
156
|
+
"20",
|
|
157
|
+
"--json",
|
|
158
|
+
"title",
|
|
159
|
+
]);
|
|
160
|
+
return issues.some((i) =>
|
|
161
|
+
i.title.startsWith(`[codex-watch] openai/codex ${tag} `),
|
|
162
|
+
);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
function cloneCodex(tmp: string): string {
|
|
166
|
+
const dir = join(tmp, "codex");
|
|
167
|
+
git([
|
|
168
|
+
"clone",
|
|
169
|
+
"--filter=blob:none",
|
|
170
|
+
"--no-checkout",
|
|
171
|
+
`https://github.com/${CODEX_REPO}.git`,
|
|
172
|
+
dir,
|
|
173
|
+
]);
|
|
174
|
+
return dir;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function showFile(repoDir: string, tag: string, path: string): string | null {
|
|
178
|
+
const res = spawnSync("git", ["show", `${tag}:${path}`], {
|
|
179
|
+
cwd: repoDir,
|
|
180
|
+
encoding: "utf8",
|
|
181
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
182
|
+
});
|
|
183
|
+
if (res.status !== 0) return null;
|
|
184
|
+
return res.stdout;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
function changedFiles(
|
|
188
|
+
repoDir: string,
|
|
189
|
+
prevTag: string,
|
|
190
|
+
currTag: string,
|
|
191
|
+
): string[] {
|
|
192
|
+
return git(["diff", "--name-only", `${prevTag}..${currTag}`], repoDir)
|
|
193
|
+
.split("\n")
|
|
194
|
+
.filter(Boolean);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
function commitsForPath(
|
|
198
|
+
repoDir: string,
|
|
199
|
+
prevTag: string,
|
|
200
|
+
currTag: string,
|
|
201
|
+
path: string,
|
|
202
|
+
): string[] {
|
|
203
|
+
const out = git(
|
|
204
|
+
["log", "--format=%h %s", `${prevTag}..${currTag}`, "--", path],
|
|
205
|
+
repoDir,
|
|
206
|
+
);
|
|
207
|
+
const commits = out.split("\n").filter(Boolean);
|
|
208
|
+
if (commits.length <= MAX_COMMITS_PER_PATH) return commits;
|
|
209
|
+
return [
|
|
210
|
+
...commits.slice(0, MAX_COMMITS_PER_PATH),
|
|
211
|
+
`…and ${commits.length - MAX_COMMITS_PER_PATH} more commits`,
|
|
212
|
+
];
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
function diffPreview(
|
|
216
|
+
repoDir: string,
|
|
217
|
+
prevTag: string,
|
|
218
|
+
currTag: string,
|
|
219
|
+
path: string,
|
|
220
|
+
): string | null {
|
|
221
|
+
const out = git(
|
|
222
|
+
["diff", "--unified=2", `${prevTag}..${currTag}`, "--", path],
|
|
223
|
+
repoDir,
|
|
224
|
+
);
|
|
225
|
+
if (!out.trim()) return null;
|
|
226
|
+
return out.split("\n").slice(0, 120).join("\n");
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
function createIssue(
|
|
230
|
+
repo: string,
|
|
231
|
+
title: string,
|
|
232
|
+
body: string,
|
|
233
|
+
verdict: string,
|
|
234
|
+
): void {
|
|
235
|
+
const labels = ["codex-watch", "automation"];
|
|
236
|
+
if (
|
|
237
|
+
verdict === "tool-schema update needed" ||
|
|
238
|
+
verdict === "tool-surface review needed"
|
|
239
|
+
) {
|
|
240
|
+
labels.push("priority/review");
|
|
241
|
+
}
|
|
242
|
+
if (verdict === "no-op") labels.push("informational");
|
|
243
|
+
|
|
244
|
+
ensureLabels(repo, labels);
|
|
245
|
+
|
|
246
|
+
const bodyFile = join(tmpdir(), `codex-watch-${Date.now()}.md`);
|
|
247
|
+
writeFileSync(bodyFile, body);
|
|
248
|
+
try {
|
|
249
|
+
const args = [
|
|
250
|
+
"issue",
|
|
251
|
+
"create",
|
|
252
|
+
"--repo",
|
|
253
|
+
repo,
|
|
254
|
+
"--title",
|
|
255
|
+
title,
|
|
256
|
+
"--body-file",
|
|
257
|
+
bodyFile,
|
|
258
|
+
];
|
|
259
|
+
for (const label of labels) args.push("--label", label);
|
|
260
|
+
const res = spawnSync("gh", args, { encoding: "utf8" });
|
|
261
|
+
if (res.status !== 0) {
|
|
262
|
+
throw new Error(`gh ${args.join(" ")} failed:\n${res.stderr}`);
|
|
263
|
+
}
|
|
264
|
+
console.log(res.stdout.trim());
|
|
265
|
+
} finally {
|
|
266
|
+
rmSync(bodyFile, { force: true });
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
function ensureLabels(repo: string, labels: string[]): void {
|
|
271
|
+
for (const label of labels) {
|
|
272
|
+
const res = spawnSync("gh", ["label", "create", label, "--repo", repo], {
|
|
273
|
+
encoding: "utf8",
|
|
274
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
275
|
+
});
|
|
276
|
+
if (res.status !== 0 && !res.stderr.includes("already exists")) {
|
|
277
|
+
throw new Error(`gh label create ${label} failed:\n${res.stderr}`);
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
async function main() {
|
|
283
|
+
const args = parseArgs(process.argv.slice(2));
|
|
284
|
+
const stables = await listStableReleases();
|
|
285
|
+
if (stables.length === 0) throw new Error("No stable Codex releases found");
|
|
286
|
+
|
|
287
|
+
const current = args.currentTag
|
|
288
|
+
? stables.find((r) => r.tag_name === args.currentTag)
|
|
289
|
+
: stables.at(-1);
|
|
290
|
+
if (!current)
|
|
291
|
+
throw new Error(`Could not find current release ${args.currentTag}`);
|
|
292
|
+
|
|
293
|
+
const previous = args.sinceTag
|
|
294
|
+
? (stables.find((r) => r.tag_name === args.sinceTag) ??
|
|
295
|
+
({ tag_name: args.sinceTag } as Release))
|
|
296
|
+
: findPreviousStable(stables, current.tag_name);
|
|
297
|
+
if (!previous)
|
|
298
|
+
throw new Error(
|
|
299
|
+
`Could not find previous stable before ${current.tag_name}`,
|
|
300
|
+
);
|
|
301
|
+
|
|
302
|
+
const alreadyReported = args.dryRun
|
|
303
|
+
? false
|
|
304
|
+
: hasReportedTag(args.repo, current.tag_name);
|
|
305
|
+
if (alreadyReported) {
|
|
306
|
+
console.log(`Already reported ${current.tag_name}; nothing to do.`);
|
|
307
|
+
return;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
const tmp = mkdtempSync(join(tmpdir(), "codex-watch-"));
|
|
311
|
+
try {
|
|
312
|
+
const repoDir = cloneCodex(tmp);
|
|
313
|
+
git(
|
|
314
|
+
[
|
|
315
|
+
"fetch",
|
|
316
|
+
"--filter=blob:none",
|
|
317
|
+
"origin",
|
|
318
|
+
`refs/tags/${previous.tag_name}:refs/tags/${previous.tag_name}`,
|
|
319
|
+
],
|
|
320
|
+
repoDir,
|
|
321
|
+
);
|
|
322
|
+
git(
|
|
323
|
+
[
|
|
324
|
+
"fetch",
|
|
325
|
+
"--filter=blob:none",
|
|
326
|
+
"origin",
|
|
327
|
+
`refs/tags/${current.tag_name}:refs/tags/${current.tag_name}`,
|
|
328
|
+
],
|
|
329
|
+
repoDir,
|
|
330
|
+
);
|
|
331
|
+
|
|
332
|
+
let modelsDiff: ModelsDiff | null = null;
|
|
333
|
+
let parseError = false;
|
|
334
|
+
try {
|
|
335
|
+
const prevRaw = showFile(
|
|
336
|
+
repoDir,
|
|
337
|
+
previous.tag_name,
|
|
338
|
+
"codex-rs/models-manager/models.json",
|
|
339
|
+
);
|
|
340
|
+
const currRaw = showFile(
|
|
341
|
+
repoDir,
|
|
342
|
+
current.tag_name,
|
|
343
|
+
"codex-rs/models-manager/models.json",
|
|
344
|
+
);
|
|
345
|
+
if (!prevRaw || !currRaw)
|
|
346
|
+
throw new Error("missing models.json at one tag");
|
|
347
|
+
modelsDiff = diffModelsJson(
|
|
348
|
+
JSON.parse(prevRaw) as ModelsJson,
|
|
349
|
+
JSON.parse(currRaw) as ModelsJson,
|
|
350
|
+
);
|
|
351
|
+
} catch (err) {
|
|
352
|
+
parseError = true;
|
|
353
|
+
console.error(`Failed to parse/diff models.json: ${String(err)}`);
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
const changed = changedFiles(repoDir, previous.tag_name, current.tag_name);
|
|
357
|
+
const changedSet = new Set(changed);
|
|
358
|
+
const promptMdChanged = changedSet.has("codex-rs/models-manager/prompt.md");
|
|
359
|
+
const toolsDirChanged = changed.some((f) =>
|
|
360
|
+
f.startsWith("codex-rs/core/src/tools/"),
|
|
361
|
+
);
|
|
362
|
+
const applyPatchDirChanged = changed.some((f) =>
|
|
363
|
+
f.startsWith("codex-rs/apply-patch/"),
|
|
364
|
+
);
|
|
365
|
+
const verdict = decideVerdict({
|
|
366
|
+
models_diff: modelsDiff,
|
|
367
|
+
prompt_md_changed: promptMdChanged,
|
|
368
|
+
tools_dir_changed: toolsDirChanged,
|
|
369
|
+
apply_patch_dir_changed: applyPatchDirChanged,
|
|
370
|
+
parse_error: parseError,
|
|
371
|
+
});
|
|
372
|
+
|
|
373
|
+
const pathChanges: PathChangeSummary[] = WATCHED_PATHS.map((path) => ({
|
|
374
|
+
path,
|
|
375
|
+
commits: commitsForPath(
|
|
376
|
+
repoDir,
|
|
377
|
+
previous.tag_name,
|
|
378
|
+
current.tag_name,
|
|
379
|
+
path,
|
|
380
|
+
),
|
|
381
|
+
})).filter((p) => p.commits.length > 0);
|
|
382
|
+
|
|
383
|
+
const workflowUrl =
|
|
384
|
+
process.env.GITHUB_SERVER_URL &&
|
|
385
|
+
process.env.GITHUB_REPOSITORY &&
|
|
386
|
+
process.env.GITHUB_RUN_ID
|
|
387
|
+
? `${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID}`
|
|
388
|
+
: "local dry-run";
|
|
389
|
+
|
|
390
|
+
const input = {
|
|
391
|
+
previous_tag: previous.tag_name,
|
|
392
|
+
current_tag: current.tag_name,
|
|
393
|
+
release_url: current.html_url,
|
|
394
|
+
release_notes_md: current.body ?? "",
|
|
395
|
+
verdict,
|
|
396
|
+
models_diff: modelsDiff,
|
|
397
|
+
prompt_md_changed: promptMdChanged,
|
|
398
|
+
prompt_md_diff_preview: promptMdChanged
|
|
399
|
+
? diffPreview(
|
|
400
|
+
repoDir,
|
|
401
|
+
previous.tag_name,
|
|
402
|
+
current.tag_name,
|
|
403
|
+
"codex-rs/models-manager/prompt.md",
|
|
404
|
+
)
|
|
405
|
+
: null,
|
|
406
|
+
path_changes: pathChanges,
|
|
407
|
+
workflow_run_url: workflowUrl,
|
|
408
|
+
};
|
|
409
|
+
|
|
410
|
+
const title = renderTitle(input);
|
|
411
|
+
const body = renderBody(input);
|
|
412
|
+
|
|
413
|
+
if (args.dryRun) {
|
|
414
|
+
console.log(`# ${title}\n\n${body}`);
|
|
415
|
+
} else {
|
|
416
|
+
createIssue(args.repo, title, body, verdict);
|
|
417
|
+
}
|
|
418
|
+
} finally {
|
|
419
|
+
rmSync(tmp, { recursive: true, force: true });
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
main().catch((err) => {
|
|
424
|
+
console.error(err);
|
|
425
|
+
process.exit(1);
|
|
426
|
+
});
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
|
+
import {
|
|
3
|
+
decideVerdict,
|
|
4
|
+
diffModelsJson,
|
|
5
|
+
type ModelsJson,
|
|
6
|
+
} from "./diff-models-json.ts";
|
|
7
|
+
|
|
8
|
+
function model(slug: string, extra: Record<string, unknown> = {}) {
|
|
9
|
+
return {
|
|
10
|
+
slug,
|
|
11
|
+
apply_patch_tool_type: "freeform",
|
|
12
|
+
shell_type: "shell_command",
|
|
13
|
+
supports_parallel_tool_calls: true,
|
|
14
|
+
supports_search_tool: true,
|
|
15
|
+
experimental_supported_tools: [],
|
|
16
|
+
base_instructions: "Use apply_patch and multi_tool_use.parallel.",
|
|
17
|
+
model_messages: {
|
|
18
|
+
instructions_template: "You may call view_image when needed.",
|
|
19
|
+
},
|
|
20
|
+
...extra,
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function models(...entries: Array<Record<string, unknown>>): ModelsJson {
|
|
25
|
+
return { models: entries };
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
describe("diffModelsJson", () => {
|
|
29
|
+
test("detects tool schema field changes", () => {
|
|
30
|
+
const diff = diffModelsJson(
|
|
31
|
+
models(model("gpt-5.5")),
|
|
32
|
+
models(model("gpt-5.5", { shell_type: "unified_exec" })),
|
|
33
|
+
);
|
|
34
|
+
|
|
35
|
+
expect(diff.has_tool_schema_change).toBe(true);
|
|
36
|
+
expect(diff.field_deltas).toEqual([
|
|
37
|
+
{
|
|
38
|
+
slug: "gpt-5.5",
|
|
39
|
+
field: "shell_type",
|
|
40
|
+
previous: "shell_command",
|
|
41
|
+
current: "unified_exec",
|
|
42
|
+
},
|
|
43
|
+
]);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
test("detects prompt tool mention changes separately", () => {
|
|
47
|
+
const diff = diffModelsJson(
|
|
48
|
+
models(model("gpt-5.5")),
|
|
49
|
+
models(
|
|
50
|
+
model("gpt-5.5", {
|
|
51
|
+
base_instructions: "Use apply_patch and web_search.",
|
|
52
|
+
model_messages: { instructions_template: "No image tool mention." },
|
|
53
|
+
}),
|
|
54
|
+
),
|
|
55
|
+
);
|
|
56
|
+
|
|
57
|
+
expect(diff.has_tool_schema_change).toBe(false);
|
|
58
|
+
expect(diff.has_prompt_tool_change).toBe(true);
|
|
59
|
+
expect(diff.field_deltas.map((d) => d.field)).toEqual([
|
|
60
|
+
"prompt_tool_mentions",
|
|
61
|
+
]);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
test("reports added and removed models", () => {
|
|
65
|
+
const diff = diffModelsJson(
|
|
66
|
+
models(model("gpt-5.4"), model("gpt-5.5")),
|
|
67
|
+
models(model("gpt-5.5"), model("gpt-5.6")),
|
|
68
|
+
);
|
|
69
|
+
|
|
70
|
+
expect(diff.added_models).toEqual(["gpt-5.6"]);
|
|
71
|
+
expect(diff.removed_models).toEqual(["gpt-5.4"]);
|
|
72
|
+
});
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
describe("decideVerdict", () => {
|
|
76
|
+
test("returns no-op for empty diff", () => {
|
|
77
|
+
const models_diff = diffModelsJson(
|
|
78
|
+
models(model("gpt-5.5")),
|
|
79
|
+
models(model("gpt-5.5")),
|
|
80
|
+
);
|
|
81
|
+
expect(
|
|
82
|
+
decideVerdict({
|
|
83
|
+
models_diff,
|
|
84
|
+
prompt_md_changed: false,
|
|
85
|
+
tools_dir_changed: false,
|
|
86
|
+
apply_patch_dir_changed: false,
|
|
87
|
+
parse_error: false,
|
|
88
|
+
}),
|
|
89
|
+
).toBe("no-op");
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
test("returns tool-surface review needed when tools dir changed", () => {
|
|
93
|
+
const models_diff = diffModelsJson(
|
|
94
|
+
models(model("gpt-5.5")),
|
|
95
|
+
models(model("gpt-5.5")),
|
|
96
|
+
);
|
|
97
|
+
expect(
|
|
98
|
+
decideVerdict({
|
|
99
|
+
models_diff,
|
|
100
|
+
prompt_md_changed: false,
|
|
101
|
+
tools_dir_changed: true,
|
|
102
|
+
apply_patch_dir_changed: false,
|
|
103
|
+
parse_error: false,
|
|
104
|
+
}),
|
|
105
|
+
).toBe("tool-surface review needed");
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
test("returns tool-schema update needed for models.json schema fields", () => {
|
|
109
|
+
const models_diff = diffModelsJson(
|
|
110
|
+
models(model("gpt-5.5")),
|
|
111
|
+
models(model("gpt-5.5", { shell_type: "unified_exec" })),
|
|
112
|
+
);
|
|
113
|
+
expect(
|
|
114
|
+
decideVerdict({
|
|
115
|
+
models_diff,
|
|
116
|
+
prompt_md_changed: false,
|
|
117
|
+
tools_dir_changed: false,
|
|
118
|
+
apply_patch_dir_changed: false,
|
|
119
|
+
parse_error: false,
|
|
120
|
+
}),
|
|
121
|
+
).toBe("tool-schema update needed");
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
test("returns prompt-only update for prompt changes", () => {
|
|
125
|
+
const models_diff = diffModelsJson(
|
|
126
|
+
models(model("gpt-5.5")),
|
|
127
|
+
models(model("gpt-5.5")),
|
|
128
|
+
);
|
|
129
|
+
expect(
|
|
130
|
+
decideVerdict({
|
|
131
|
+
models_diff,
|
|
132
|
+
prompt_md_changed: true,
|
|
133
|
+
tools_dir_changed: false,
|
|
134
|
+
apply_patch_dir_changed: false,
|
|
135
|
+
parse_error: false,
|
|
136
|
+
}),
|
|
137
|
+
).toBe("prompt-only update");
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
test("returns manual review on parse error", () => {
|
|
141
|
+
expect(
|
|
142
|
+
decideVerdict({
|
|
143
|
+
models_diff: null,
|
|
144
|
+
prompt_md_changed: false,
|
|
145
|
+
tools_dir_changed: false,
|
|
146
|
+
apply_patch_dir_changed: false,
|
|
147
|
+
parse_error: true,
|
|
148
|
+
}),
|
|
149
|
+
).toBe("manual review required");
|
|
150
|
+
});
|
|
151
|
+
});
|