pi-codex-search 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +198 -50
- package/index.ts +922 -93
- package/package.json +10 -4
- package/scripts/codex-e2e.ts +797 -0
- package/src/codex.ts +90 -352
- package/src/command.ts +564 -0
- package/src/config.ts +287 -0
- package/src/cookies.ts +131 -0
- package/src/errors.ts +56 -0
- package/src/modes/responses.ts +310 -0
- package/src/modes/standalone.ts +378 -0
- package/src/modes/types.ts +41 -0
- package/src/ref-store.ts +74 -0
- package/src/transport.ts +110 -0
- package/src/ua.ts +67 -0
|
@@ -0,0 +1,797 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { readFile } from "node:fs/promises";
|
|
3
|
+
import { homedir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import {
|
|
6
|
+
CodexError,
|
|
7
|
+
createTransport,
|
|
8
|
+
extractAccountIdFromToken,
|
|
9
|
+
fetchCodexModels,
|
|
10
|
+
isUnsupportedStandaloneCombination,
|
|
11
|
+
runResponsesSearch,
|
|
12
|
+
runStandaloneCommands,
|
|
13
|
+
selectDefaultModel,
|
|
14
|
+
type CodexTransport,
|
|
15
|
+
type CodexWebSearchResult,
|
|
16
|
+
type Freshness,
|
|
17
|
+
type SearchContextSize,
|
|
18
|
+
type StandaloneCommandsOptions,
|
|
19
|
+
} from "../src/codex.ts";
|
|
20
|
+
|
|
21
|
+
const PROVIDER = "openai-codex";
|
|
22
|
+
const DEFAULT_AUTH_PATH = join(homedir(), ".pi", "agent", "auth.json");
|
|
23
|
+
const DEFAULT_QUERY = "OpenAI Codex release notes";
|
|
24
|
+
const FRESHNESS_VALUES: readonly Freshness[] = ["live", "indexed", "cached"];
|
|
25
|
+
const CONTEXT_VALUES: readonly SearchContextSize[] = ["low", "medium", "high"];
|
|
26
|
+
const API_VALUES = ["responses", "standalone"] as const;
|
|
27
|
+
const SUITE_VALUES = ["matrix", "actions", "session", "concurrency"] as const;
|
|
28
|
+
|
|
29
|
+
type SearchApi = (typeof API_VALUES)[number];
|
|
30
|
+
type E2eSuite = (typeof SUITE_VALUES)[number];
|
|
31
|
+
|
|
32
|
+
interface CliOptions {
|
|
33
|
+
authPath: string;
|
|
34
|
+
apis: SearchApi[];
|
|
35
|
+
contexts: SearchContextSize[];
|
|
36
|
+
freshnesses: Freshness[];
|
|
37
|
+
suites: E2eSuite[];
|
|
38
|
+
query: string;
|
|
39
|
+
model?: string;
|
|
40
|
+
baseUrl?: string;
|
|
41
|
+
timeoutMs: number;
|
|
42
|
+
concurrencyValues: number[];
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
interface AuthCredential {
|
|
46
|
+
access?: unknown;
|
|
47
|
+
accountId?: unknown;
|
|
48
|
+
expires?: unknown;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
interface AuthFile {
|
|
52
|
+
[PROVIDER]?: AuthCredential;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
interface E2eResult {
|
|
56
|
+
suite: E2eSuite;
|
|
57
|
+
name: string;
|
|
58
|
+
api?: SearchApi;
|
|
59
|
+
context?: SearchContextSize;
|
|
60
|
+
freshness?: Freshness;
|
|
61
|
+
ok: boolean;
|
|
62
|
+
skipped?: boolean;
|
|
63
|
+
status?: number;
|
|
64
|
+
kind?: string;
|
|
65
|
+
ms: number;
|
|
66
|
+
citationCount?: number;
|
|
67
|
+
refCount?: number;
|
|
68
|
+
textLength?: number;
|
|
69
|
+
message?: string;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
interface Runtime {
|
|
73
|
+
token: string;
|
|
74
|
+
accountId: string;
|
|
75
|
+
model: string;
|
|
76
|
+
baseUrl?: string;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
async function main(): Promise<void> {
|
|
80
|
+
const options = parseArgs(process.argv.slice(2));
|
|
81
|
+
const runtime = await buildRuntime(options);
|
|
82
|
+
const results: E2eResult[] = [];
|
|
83
|
+
|
|
84
|
+
if (options.suites.includes("matrix")) {
|
|
85
|
+
for (const api of options.apis) {
|
|
86
|
+
for (const context of options.contexts) {
|
|
87
|
+
for (const freshness of options.freshnesses) {
|
|
88
|
+
const unsupported = unsupportedResult("matrix", "search", api, context, freshness);
|
|
89
|
+
const result =
|
|
90
|
+
unsupported ??
|
|
91
|
+
(await runSearchCase({
|
|
92
|
+
suite: "matrix",
|
|
93
|
+
name: "search",
|
|
94
|
+
api,
|
|
95
|
+
context,
|
|
96
|
+
freshness,
|
|
97
|
+
query: options.query,
|
|
98
|
+
runtime,
|
|
99
|
+
timeoutMs: options.timeoutMs,
|
|
100
|
+
}));
|
|
101
|
+
results.push(result);
|
|
102
|
+
printResult(result);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if (options.suites.includes("actions") && options.apis.includes("standalone")) {
|
|
109
|
+
for (const result of await runStandaloneActionSuite(runtime, options)) {
|
|
110
|
+
results.push(result);
|
|
111
|
+
printResult(result);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if (options.suites.includes("session") && options.apis.includes("standalone")) {
|
|
116
|
+
for (const result of await runStandaloneSessionSuite(runtime, options)) {
|
|
117
|
+
results.push(result);
|
|
118
|
+
printResult(result);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if (options.suites.includes("concurrency")) {
|
|
123
|
+
for (const api of options.apis) {
|
|
124
|
+
for (const context of options.contexts) {
|
|
125
|
+
for (const freshness of options.freshnesses) {
|
|
126
|
+
for (const concurrency of options.concurrencyValues) {
|
|
127
|
+
const unsupported = unsupportedResult(
|
|
128
|
+
"concurrency",
|
|
129
|
+
`search/${concurrency}x`,
|
|
130
|
+
api,
|
|
131
|
+
context,
|
|
132
|
+
freshness,
|
|
133
|
+
);
|
|
134
|
+
if (unsupported) {
|
|
135
|
+
results.push(unsupported);
|
|
136
|
+
printResult(unsupported);
|
|
137
|
+
continue;
|
|
138
|
+
}
|
|
139
|
+
const concurrencyResults = await runConcurrencyCase({
|
|
140
|
+
api,
|
|
141
|
+
context,
|
|
142
|
+
freshness,
|
|
143
|
+
query: options.query,
|
|
144
|
+
runtime,
|
|
145
|
+
timeoutMs: options.timeoutMs,
|
|
146
|
+
concurrency,
|
|
147
|
+
});
|
|
148
|
+
for (const result of concurrencyResults) {
|
|
149
|
+
results.push(result);
|
|
150
|
+
printResult(result);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
const skipped = results.filter((result) => result.skipped).length;
|
|
159
|
+
const failed = results.filter((result) => !result.ok && !result.skipped).length;
|
|
160
|
+
const ok = results.filter((result) => result.ok && !result.skipped).length;
|
|
161
|
+
console.log(`summary: ${ok}/${results.length} ok, ${skipped} skipped`);
|
|
162
|
+
if (failed > 0) process.exitCode = 1;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
async function buildRuntime(options: CliOptions): Promise<Runtime> {
|
|
166
|
+
const auth = await loadAuth(options.authPath);
|
|
167
|
+
const token = readString(auth.access, `${PROVIDER}.access`);
|
|
168
|
+
const accountId = readOptionalString(auth.accountId) ?? extractAccountIdFromToken(token);
|
|
169
|
+
if (!accountId) {
|
|
170
|
+
throw new Error(`${PROVIDER}.accountId is missing and could not be decoded from access token`);
|
|
171
|
+
}
|
|
172
|
+
warnIfExpired(auth.expires);
|
|
173
|
+
const model = options.model ?? (await resolveModel(token, accountId, options.baseUrl));
|
|
174
|
+
return { token, accountId, model, baseUrl: options.baseUrl };
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function createRuntimeTransport(runtime: Runtime): CodexTransport {
|
|
178
|
+
return createTransport({
|
|
179
|
+
token: runtime.token,
|
|
180
|
+
accountId: runtime.accountId,
|
|
181
|
+
baseUrl: runtime.baseUrl,
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
function createRecordingTransport(runtime: Runtime, requestIds: string[]): CodexTransport {
|
|
186
|
+
return createTransport({
|
|
187
|
+
token: runtime.token,
|
|
188
|
+
accountId: runtime.accountId,
|
|
189
|
+
baseUrl: runtime.baseUrl,
|
|
190
|
+
fetchImpl: (async (input: string | URL | Request, init?: RequestInit) => {
|
|
191
|
+
const id = readBodyId(init?.body);
|
|
192
|
+
if (id) requestIds.push(id);
|
|
193
|
+
return await fetch(input, init);
|
|
194
|
+
}) as typeof fetch,
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
async function runSearchCase(input: {
|
|
199
|
+
suite: E2eSuite;
|
|
200
|
+
name: string;
|
|
201
|
+
api: SearchApi;
|
|
202
|
+
context: SearchContextSize;
|
|
203
|
+
freshness: Freshness;
|
|
204
|
+
query: string;
|
|
205
|
+
runtime: Runtime;
|
|
206
|
+
timeoutMs: number;
|
|
207
|
+
}): Promise<E2eResult> {
|
|
208
|
+
return withTimedResult(input, async (signal) => {
|
|
209
|
+
const transport = createRuntimeTransport(input.runtime);
|
|
210
|
+
return input.api === "standalone"
|
|
211
|
+
? await runStandaloneCommands({
|
|
212
|
+
model: input.runtime.model,
|
|
213
|
+
transport,
|
|
214
|
+
sessionId: makeSessionId("standalone"),
|
|
215
|
+
searchQuery: [{ q: input.query }],
|
|
216
|
+
freshness: input.freshness,
|
|
217
|
+
searchContextSize: input.context,
|
|
218
|
+
signal,
|
|
219
|
+
})
|
|
220
|
+
: await runResponsesSearch({
|
|
221
|
+
query: input.query,
|
|
222
|
+
model: input.runtime.model,
|
|
223
|
+
transport,
|
|
224
|
+
externalWebAccess: input.freshness !== "cached",
|
|
225
|
+
searchContextSize: input.context,
|
|
226
|
+
sessionId: makeSessionId("responses"),
|
|
227
|
+
threadId: makeSessionId("thread"),
|
|
228
|
+
signal,
|
|
229
|
+
});
|
|
230
|
+
});
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
async function runStandaloneActionSuite(
|
|
234
|
+
runtime: Runtime,
|
|
235
|
+
options: CliOptions,
|
|
236
|
+
): Promise<E2eResult[]> {
|
|
237
|
+
const results: E2eResult[] = [];
|
|
238
|
+
const context = pickSupportedStandaloneContext(options.contexts);
|
|
239
|
+
if (!context) {
|
|
240
|
+
return [skippedStandaloneSuite("actions", options.freshnesses)];
|
|
241
|
+
}
|
|
242
|
+
const freshness = pickSupportedStandaloneFreshness(options.freshnesses, context);
|
|
243
|
+
|
|
244
|
+
const actionCases: Array<{ name: string; options: StandaloneCommandsOptions }> = [
|
|
245
|
+
{
|
|
246
|
+
name: "finance",
|
|
247
|
+
options: standaloneOptions(runtime, freshness, context, {
|
|
248
|
+
finance: [{ ticker: "AMD", type: "equity", market: "USA" }],
|
|
249
|
+
}),
|
|
250
|
+
},
|
|
251
|
+
{
|
|
252
|
+
name: "weather",
|
|
253
|
+
options: standaloneOptions(runtime, freshness, context, {
|
|
254
|
+
weather: [{ location: "San Francisco, CA" }],
|
|
255
|
+
}),
|
|
256
|
+
},
|
|
257
|
+
{
|
|
258
|
+
name: "sports",
|
|
259
|
+
options: standaloneOptions(runtime, freshness, context, {
|
|
260
|
+
sports: [{ fn: "schedule", league: "epl", num_games: 1 }],
|
|
261
|
+
}),
|
|
262
|
+
},
|
|
263
|
+
{
|
|
264
|
+
name: "time",
|
|
265
|
+
options: standaloneOptions(runtime, freshness, context, { time: [{ utc_offset: "+03:00" }] }),
|
|
266
|
+
},
|
|
267
|
+
];
|
|
268
|
+
|
|
269
|
+
for (const actionCase of actionCases) {
|
|
270
|
+
results.push(
|
|
271
|
+
await runStandaloneActionCase(
|
|
272
|
+
actionCase.name,
|
|
273
|
+
actionCase.options,
|
|
274
|
+
context,
|
|
275
|
+
freshness,
|
|
276
|
+
options.timeoutMs,
|
|
277
|
+
),
|
|
278
|
+
);
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
const openResult = await runStandaloneActionCase(
|
|
282
|
+
"open",
|
|
283
|
+
standaloneOptions(runtime, freshness, context, { open: [{ refId: "https://openai.com" }] }),
|
|
284
|
+
context,
|
|
285
|
+
freshness,
|
|
286
|
+
options.timeoutMs,
|
|
287
|
+
);
|
|
288
|
+
results.push(openResult);
|
|
289
|
+
const openRef = openResult.ok ? firstRefFromResult(openResult) : undefined;
|
|
290
|
+
const pageRef = openRef ?? "https://openai.com";
|
|
291
|
+
|
|
292
|
+
for (const actionCase of [
|
|
293
|
+
{
|
|
294
|
+
name: "find",
|
|
295
|
+
options: standaloneOptions(runtime, freshness, context, {
|
|
296
|
+
find: [{ refId: pageRef, pattern: "OpenAI" }],
|
|
297
|
+
}),
|
|
298
|
+
},
|
|
299
|
+
{
|
|
300
|
+
name: "click",
|
|
301
|
+
options: standaloneOptions(runtime, freshness, context, {
|
|
302
|
+
click: [{ refId: pageRef, id: 0 }],
|
|
303
|
+
}),
|
|
304
|
+
},
|
|
305
|
+
{
|
|
306
|
+
name: "screenshot",
|
|
307
|
+
options: standaloneOptions(runtime, freshness, context, {
|
|
308
|
+
screenshot: [{ refId: pageRef, pageno: 0 }],
|
|
309
|
+
}),
|
|
310
|
+
},
|
|
311
|
+
]) {
|
|
312
|
+
results.push(
|
|
313
|
+
await runStandaloneActionCase(
|
|
314
|
+
actionCase.name,
|
|
315
|
+
actionCase.options,
|
|
316
|
+
context,
|
|
317
|
+
freshness,
|
|
318
|
+
options.timeoutMs,
|
|
319
|
+
),
|
|
320
|
+
);
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
return results;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
async function runStandaloneSessionSuite(
|
|
327
|
+
runtime: Runtime,
|
|
328
|
+
options: CliOptions,
|
|
329
|
+
): Promise<E2eResult[]> {
|
|
330
|
+
const context = pickSupportedStandaloneContext(options.contexts);
|
|
331
|
+
if (!context) {
|
|
332
|
+
return [skippedStandaloneSuite("session", options.freshnesses)];
|
|
333
|
+
}
|
|
334
|
+
const freshness = pickSupportedStandaloneFreshness(options.freshnesses, context);
|
|
335
|
+
const sessionId = makeSessionId("conversation");
|
|
336
|
+
const requestIds: string[] = [];
|
|
337
|
+
const transport = createRecordingTransport(runtime, requestIds);
|
|
338
|
+
const results: E2eResult[] = [];
|
|
339
|
+
|
|
340
|
+
const open = await runStandaloneActionCase(
|
|
341
|
+
"open-same-session",
|
|
342
|
+
standaloneOptions(runtime, freshness, context, {
|
|
343
|
+
transport,
|
|
344
|
+
sessionId,
|
|
345
|
+
open: [{ refId: "https://openai.com" }],
|
|
346
|
+
}),
|
|
347
|
+
context,
|
|
348
|
+
freshness,
|
|
349
|
+
options.timeoutMs,
|
|
350
|
+
"session",
|
|
351
|
+
);
|
|
352
|
+
results.push(open);
|
|
353
|
+
const refId = firstRefFromResult(open) ?? "https://openai.com";
|
|
354
|
+
|
|
355
|
+
results.push(
|
|
356
|
+
await runStandaloneActionCase(
|
|
357
|
+
"find-same-session",
|
|
358
|
+
standaloneOptions(runtime, freshness, context, {
|
|
359
|
+
transport,
|
|
360
|
+
sessionId,
|
|
361
|
+
find: [{ refId, pattern: "OpenAI" }],
|
|
362
|
+
}),
|
|
363
|
+
context,
|
|
364
|
+
freshness,
|
|
365
|
+
options.timeoutMs,
|
|
366
|
+
"session",
|
|
367
|
+
),
|
|
368
|
+
);
|
|
369
|
+
|
|
370
|
+
const reused = requestIds.length >= 2 && requestIds.every((id) => id === sessionId);
|
|
371
|
+
results.push({
|
|
372
|
+
suite: "session",
|
|
373
|
+
name: "request-id-reused-across-turns",
|
|
374
|
+
api: "standalone",
|
|
375
|
+
context,
|
|
376
|
+
freshness,
|
|
377
|
+
ok: reused,
|
|
378
|
+
ms: 0,
|
|
379
|
+
message: reused
|
|
380
|
+
? `sessionId=${sessionId}`
|
|
381
|
+
: `expected all request ids to equal ${sessionId}; got ${requestIds.join(",")}`,
|
|
382
|
+
});
|
|
383
|
+
|
|
384
|
+
const isolatedIds: string[] = [];
|
|
385
|
+
const isolatedTransport = createRecordingTransport(runtime, isolatedIds);
|
|
386
|
+
const firstSession = makeSessionId("isolated-a");
|
|
387
|
+
const secondSession = makeSessionId("isolated-b");
|
|
388
|
+
await Promise.all([
|
|
389
|
+
runStandaloneCommands(
|
|
390
|
+
standaloneOptions(runtime, freshness, context, {
|
|
391
|
+
transport: isolatedTransport,
|
|
392
|
+
sessionId: firstSession,
|
|
393
|
+
time: [{ utc_offset: "+00:00" }],
|
|
394
|
+
}),
|
|
395
|
+
).catch(() => undefined),
|
|
396
|
+
runStandaloneCommands(
|
|
397
|
+
standaloneOptions(runtime, freshness, context, {
|
|
398
|
+
transport: isolatedTransport,
|
|
399
|
+
sessionId: secondSession,
|
|
400
|
+
time: [{ utc_offset: "+01:00" }],
|
|
401
|
+
}),
|
|
402
|
+
).catch(() => undefined),
|
|
403
|
+
]);
|
|
404
|
+
const isolated = isolatedIds.includes(firstSession) && isolatedIds.includes(secondSession);
|
|
405
|
+
results.push({
|
|
406
|
+
suite: "session",
|
|
407
|
+
name: "parallel-sessions-use-distinct-ids",
|
|
408
|
+
api: "standalone",
|
|
409
|
+
context,
|
|
410
|
+
freshness,
|
|
411
|
+
ok: isolated,
|
|
412
|
+
ms: 0,
|
|
413
|
+
message: isolated
|
|
414
|
+
? `sessionIds=${firstSession},${secondSession}`
|
|
415
|
+
: `missing expected ids; got ${isolatedIds.join(",")}`,
|
|
416
|
+
});
|
|
417
|
+
|
|
418
|
+
return results;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
function standaloneOptions(
|
|
422
|
+
runtime: Runtime,
|
|
423
|
+
freshness: Freshness,
|
|
424
|
+
context: SearchContextSize,
|
|
425
|
+
commands: Partial<StandaloneCommandsOptions>,
|
|
426
|
+
): StandaloneCommandsOptions {
|
|
427
|
+
return {
|
|
428
|
+
model: runtime.model,
|
|
429
|
+
transport: commands.transport ?? createRuntimeTransport(runtime),
|
|
430
|
+
sessionId: commands.sessionId ?? makeSessionId("action"),
|
|
431
|
+
freshness,
|
|
432
|
+
searchContextSize: context,
|
|
433
|
+
maxOutputTokens: 8000,
|
|
434
|
+
...commands,
|
|
435
|
+
};
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
async function runStandaloneActionCase(
|
|
439
|
+
name: string,
|
|
440
|
+
options: StandaloneCommandsOptions,
|
|
441
|
+
context: SearchContextSize,
|
|
442
|
+
freshness: Freshness,
|
|
443
|
+
timeoutMs: number,
|
|
444
|
+
suite: E2eSuite = "actions",
|
|
445
|
+
): Promise<E2eResult> {
|
|
446
|
+
return withTimedResult(
|
|
447
|
+
{
|
|
448
|
+
suite,
|
|
449
|
+
name,
|
|
450
|
+
api: "standalone",
|
|
451
|
+
context,
|
|
452
|
+
freshness,
|
|
453
|
+
timeoutMs,
|
|
454
|
+
},
|
|
455
|
+
async (signal) => await runStandaloneCommands({ ...options, signal }),
|
|
456
|
+
);
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
async function runConcurrencyCase(input: {
|
|
460
|
+
api: SearchApi;
|
|
461
|
+
context: SearchContextSize;
|
|
462
|
+
freshness: Freshness;
|
|
463
|
+
query: string;
|
|
464
|
+
runtime: Runtime;
|
|
465
|
+
timeoutMs: number;
|
|
466
|
+
concurrency: number;
|
|
467
|
+
}): Promise<E2eResult[]> {
|
|
468
|
+
const promises = Array.from({ length: input.concurrency }, (_, index) =>
|
|
469
|
+
runSearchCase({
|
|
470
|
+
suite: "concurrency",
|
|
471
|
+
name: `search#${index + 1}/${input.concurrency}`,
|
|
472
|
+
api: input.api,
|
|
473
|
+
context: input.context,
|
|
474
|
+
freshness: input.freshness,
|
|
475
|
+
query: `${input.query} ${index + 1}`,
|
|
476
|
+
runtime: input.runtime,
|
|
477
|
+
timeoutMs: input.timeoutMs,
|
|
478
|
+
}),
|
|
479
|
+
);
|
|
480
|
+
return await Promise.all(promises);
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
async function withTimedResult(
|
|
484
|
+
input: {
|
|
485
|
+
suite: E2eSuite;
|
|
486
|
+
name: string;
|
|
487
|
+
api?: SearchApi;
|
|
488
|
+
context?: SearchContextSize;
|
|
489
|
+
freshness?: Freshness;
|
|
490
|
+
timeoutMs: number;
|
|
491
|
+
},
|
|
492
|
+
run: (signal: AbortSignal) => Promise<CodexWebSearchResult>,
|
|
493
|
+
): Promise<E2eResult> {
|
|
494
|
+
const started = Date.now();
|
|
495
|
+
const controller = new AbortController();
|
|
496
|
+
const timeout = setTimeout(() => controller.abort(), input.timeoutMs);
|
|
497
|
+
try {
|
|
498
|
+
const result = await run(controller.signal);
|
|
499
|
+
return {
|
|
500
|
+
suite: input.suite,
|
|
501
|
+
name: input.name,
|
|
502
|
+
api: input.api,
|
|
503
|
+
context: input.context,
|
|
504
|
+
freshness: input.freshness,
|
|
505
|
+
ok: true,
|
|
506
|
+
ms: Date.now() - started,
|
|
507
|
+
citationCount: result.citations.length,
|
|
508
|
+
refCount: Object.keys(result.refIds ?? {}).length,
|
|
509
|
+
textLength: result.text.length,
|
|
510
|
+
message: firstRefFromWebResult(result),
|
|
511
|
+
};
|
|
512
|
+
} catch (error) {
|
|
513
|
+
return {
|
|
514
|
+
suite: input.suite,
|
|
515
|
+
name: input.name,
|
|
516
|
+
api: input.api,
|
|
517
|
+
context: input.context,
|
|
518
|
+
freshness: input.freshness,
|
|
519
|
+
ok: false,
|
|
520
|
+
status: error instanceof CodexError ? error.status : undefined,
|
|
521
|
+
kind:
|
|
522
|
+
error instanceof CodexError ? error.kind : error instanceof Error ? error.name : "unknown",
|
|
523
|
+
ms: Date.now() - started,
|
|
524
|
+
message: summarizeError(error),
|
|
525
|
+
};
|
|
526
|
+
} finally {
|
|
527
|
+
clearTimeout(timeout);
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
async function resolveModel(
|
|
532
|
+
token: string,
|
|
533
|
+
accountId: string,
|
|
534
|
+
baseUrl: string | undefined,
|
|
535
|
+
): Promise<string> {
|
|
536
|
+
const models = await fetchCodexModels({ token, accountId, baseUrl });
|
|
537
|
+
const model = selectDefaultModel(models);
|
|
538
|
+
if (!model) throw new Error("Codex model list is empty");
|
|
539
|
+
return model;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
async function loadAuth(path: string): Promise<AuthCredential> {
|
|
543
|
+
const raw = await readFile(path, "utf-8");
|
|
544
|
+
const parsed = JSON.parse(raw) as AuthFile;
|
|
545
|
+
const credential = parsed[PROVIDER];
|
|
546
|
+
if (!credential || typeof credential !== "object") {
|
|
547
|
+
throw new Error(`${PROVIDER} credential not found in ${path}`);
|
|
548
|
+
}
|
|
549
|
+
return credential;
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
function parseArgs(args: string[]): CliOptions {
|
|
553
|
+
const options: CliOptions = {
|
|
554
|
+
authPath: DEFAULT_AUTH_PATH,
|
|
555
|
+
apis: [...API_VALUES],
|
|
556
|
+
contexts: [...CONTEXT_VALUES],
|
|
557
|
+
freshnesses: [...FRESHNESS_VALUES],
|
|
558
|
+
suites: [...SUITE_VALUES],
|
|
559
|
+
query: DEFAULT_QUERY,
|
|
560
|
+
timeoutMs: 45_000,
|
|
561
|
+
concurrencyValues: [2, 4],
|
|
562
|
+
};
|
|
563
|
+
|
|
564
|
+
for (let i = 0; i < args.length; i += 1) {
|
|
565
|
+
const arg = args[i];
|
|
566
|
+
const next = () => {
|
|
567
|
+
const value = args[++i];
|
|
568
|
+
if (!value) throw new Error(`${arg} requires a value`);
|
|
569
|
+
return value;
|
|
570
|
+
};
|
|
571
|
+
switch (arg) {
|
|
572
|
+
case "--auth":
|
|
573
|
+
options.authPath = next();
|
|
574
|
+
break;
|
|
575
|
+
case "--api":
|
|
576
|
+
options.apis = parseList(next(), API_VALUES, "api");
|
|
577
|
+
break;
|
|
578
|
+
case "--context":
|
|
579
|
+
options.contexts = parseList(next(), CONTEXT_VALUES, "context");
|
|
580
|
+
break;
|
|
581
|
+
case "--freshness":
|
|
582
|
+
options.freshnesses = parseList(next(), FRESHNESS_VALUES, "freshness");
|
|
583
|
+
break;
|
|
584
|
+
case "--suite":
|
|
585
|
+
options.suites = parseList(next(), SUITE_VALUES, "suite");
|
|
586
|
+
break;
|
|
587
|
+
case "--query":
|
|
588
|
+
options.query = next();
|
|
589
|
+
break;
|
|
590
|
+
case "--model":
|
|
591
|
+
options.model = next();
|
|
592
|
+
break;
|
|
593
|
+
case "--base-url":
|
|
594
|
+
options.baseUrl = next();
|
|
595
|
+
break;
|
|
596
|
+
case "--timeout-ms":
|
|
597
|
+
options.timeoutMs = parsePositiveInteger(next(), "--timeout-ms");
|
|
598
|
+
break;
|
|
599
|
+
case "--concurrency":
|
|
600
|
+
options.concurrencyValues = parseIntegerList(next(), "--concurrency");
|
|
601
|
+
break;
|
|
602
|
+
case "--help":
|
|
603
|
+
printHelp();
|
|
604
|
+
process.exit(0);
|
|
605
|
+
default:
|
|
606
|
+
throw new Error(`Unknown argument: ${arg}`);
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
return options;
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
function unsupportedResult(
|
|
614
|
+
suite: E2eSuite,
|
|
615
|
+
name: string,
|
|
616
|
+
api: SearchApi,
|
|
617
|
+
context: SearchContextSize,
|
|
618
|
+
freshness: Freshness,
|
|
619
|
+
): E2eResult | undefined {
|
|
620
|
+
if (api === "standalone" && (suite === "matrix" || suite === "concurrency")) {
|
|
621
|
+
return {
|
|
622
|
+
suite,
|
|
623
|
+
name,
|
|
624
|
+
api,
|
|
625
|
+
context,
|
|
626
|
+
freshness,
|
|
627
|
+
ok: true,
|
|
628
|
+
skipped: true,
|
|
629
|
+
ms: 0,
|
|
630
|
+
message: "standalone search_query is disabled; use responses/codex_search",
|
|
631
|
+
};
|
|
632
|
+
}
|
|
633
|
+
if (api === "standalone" && isUnsupportedStandaloneCombination(context, freshness)) {
|
|
634
|
+
return {
|
|
635
|
+
suite,
|
|
636
|
+
name,
|
|
637
|
+
api,
|
|
638
|
+
context,
|
|
639
|
+
freshness,
|
|
640
|
+
ok: true,
|
|
641
|
+
skipped: true,
|
|
642
|
+
ms: 0,
|
|
643
|
+
message: "standalone/low is intentionally disabled",
|
|
644
|
+
};
|
|
645
|
+
}
|
|
646
|
+
return undefined;
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
function skippedStandaloneSuite(suite: E2eSuite, freshnesses: Freshness[]): E2eResult {
|
|
650
|
+
return {
|
|
651
|
+
suite,
|
|
652
|
+
name: suite,
|
|
653
|
+
api: "standalone",
|
|
654
|
+
freshness: freshnesses[0],
|
|
655
|
+
ok: true,
|
|
656
|
+
skipped: true,
|
|
657
|
+
ms: 0,
|
|
658
|
+
message:
|
|
659
|
+
'standalone actions require search_context_size "medium" or "high"; pass --context medium',
|
|
660
|
+
};
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
function parseList<T extends string>(value: string, allowed: readonly T[], label: string): T[] {
|
|
664
|
+
const parsed = value
|
|
665
|
+
.split(",")
|
|
666
|
+
.map((item) => item.trim())
|
|
667
|
+
.filter(Boolean);
|
|
668
|
+
if (parsed.length === 0) {
|
|
669
|
+
throw new Error(`Invalid ${label}: expected at least one of ${allowed.join(", ")}`);
|
|
670
|
+
}
|
|
671
|
+
for (const item of parsed) {
|
|
672
|
+
if (!allowed.includes(item as T)) {
|
|
673
|
+
throw new Error(`Invalid ${label}: ${item}. Expected one of ${allowed.join(", ")}`);
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
return parsed as T[];
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
function parseIntegerList(value: string, label: string): number[] {
|
|
680
|
+
const parsed = value.split(",").map((item) => parsePositiveInteger(item.trim(), label));
|
|
681
|
+
return [...new Set(parsed)];
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
function parsePositiveInteger(value: string, label: string): number {
|
|
685
|
+
const parsed = Number(value);
|
|
686
|
+
if (!Number.isInteger(parsed) || parsed <= 0) {
|
|
687
|
+
throw new Error(`${label} must be a positive integer`);
|
|
688
|
+
}
|
|
689
|
+
return parsed;
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
function readString(value: unknown, label: string): string {
|
|
693
|
+
if (typeof value === "string" && value.length > 0) return value;
|
|
694
|
+
throw new Error(`${label} is missing`);
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
function readOptionalString(value: unknown): string | undefined {
|
|
698
|
+
return typeof value === "string" && value.length > 0 ? value : undefined;
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
function readBodyId(body: BodyInit | null | undefined): string | undefined {
|
|
702
|
+
if (typeof body !== "string") return undefined;
|
|
703
|
+
try {
|
|
704
|
+
const parsed = JSON.parse(body) as { id?: unknown };
|
|
705
|
+
return typeof parsed.id === "string" ? parsed.id : undefined;
|
|
706
|
+
} catch {
|
|
707
|
+
return undefined;
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
function warnIfExpired(value: unknown): void {
|
|
712
|
+
const expires = typeof value === "number" ? value : undefined;
|
|
713
|
+
if (expires !== undefined && expires <= Date.now()) {
|
|
714
|
+
console.warn(
|
|
715
|
+
"warning: stored openai-codex access token appears expired; run /login openai-codex in Pi if requests fail with auth errors",
|
|
716
|
+
);
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
function pickSupportedStandaloneContext(
|
|
721
|
+
values: SearchContextSize[],
|
|
722
|
+
): SearchContextSize | undefined {
|
|
723
|
+
if (values.includes("medium")) return "medium";
|
|
724
|
+
return values.find((value) => value !== "low");
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
function pickSupportedStandaloneFreshness(
|
|
728
|
+
values: Freshness[],
|
|
729
|
+
context: SearchContextSize,
|
|
730
|
+
): Freshness {
|
|
731
|
+
if (context !== "low" && values.includes("live")) return "live";
|
|
732
|
+
return values.find((value) => value !== "live") ?? "indexed";
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
function makeSessionId(prefix: string): string {
|
|
736
|
+
return `pi-codex-e2e-${prefix}-${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
function firstRefFromResult(result: E2eResult): string | undefined {
|
|
740
|
+
return result.message?.startsWith("ref=") ? result.message.slice("ref=".length) : undefined;
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
function firstRefFromWebResult(result: CodexWebSearchResult): string | undefined {
|
|
744
|
+
const first = Object.keys(result.refIds ?? {})[0];
|
|
745
|
+
return first ? `ref=${first}` : undefined;
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
function summarizeError(error: unknown): string {
|
|
749
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
750
|
+
return message.replace(/\s+/g, " ").slice(0, 240);
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
function printResult(result: E2eResult): void {
|
|
754
|
+
const parts = [result.suite, result.name, result.api, result.context, result.freshness].filter(
|
|
755
|
+
Boolean,
|
|
756
|
+
);
|
|
757
|
+
const prefix = parts.join("/");
|
|
758
|
+
if (result.skipped) {
|
|
759
|
+
console.log(`skip ${prefix} ${result.message ?? "skipped"}`);
|
|
760
|
+
return;
|
|
761
|
+
}
|
|
762
|
+
if (result.ok) {
|
|
763
|
+
console.log(
|
|
764
|
+
`ok ${prefix} ${result.ms}ms text=${result.textLength ?? 0} citations=${result.citationCount ?? 0} refs=${result.refCount ?? 0}${result.message ? ` ${result.message}` : ""}`,
|
|
765
|
+
);
|
|
766
|
+
return;
|
|
767
|
+
}
|
|
768
|
+
const status = result.status === undefined ? "" : ` http=${result.status}`;
|
|
769
|
+
console.log(
|
|
770
|
+
`fail ${prefix} ${result.ms}ms kind=${result.kind ?? "unknown"}${status} ${result.message ?? ""}`,
|
|
771
|
+
);
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
function printHelp(): void {
|
|
775
|
+
console.log(`Usage: node scripts/codex-e2e.ts [options]
|
|
776
|
+
|
|
777
|
+
Default runs matrix, standalone action, standalone session, and concurrency suites.
|
|
778
|
+
standalone/low is intentionally skipped because low-context standalone requests trigger Cloudflare.
|
|
779
|
+
|
|
780
|
+
Options:
|
|
781
|
+
--auth PATH Pi auth file (default: ~/.pi/agent/auth.json)
|
|
782
|
+
--suite LIST matrix, actions, session, concurrency, or comma list (default: all)
|
|
783
|
+
--api LIST responses, standalone, or comma list (default: both)
|
|
784
|
+
--context LIST low, medium, high, or comma list (default: all)
|
|
785
|
+
--freshness LIST live, indexed, cached, or comma list (default: all)
|
|
786
|
+
--query TEXT Query to run (default: ${DEFAULT_QUERY})
|
|
787
|
+
--model MODEL Codex model id; otherwise resolves /codex/models default
|
|
788
|
+
--base-url URL Override Codex base URL
|
|
789
|
+
--timeout-ms N Per-case timeout (default: 45000)
|
|
790
|
+
--concurrency LIST Parallel requests per concurrency combo, e.g. 2,4,8 (default: 2,4)
|
|
791
|
+
`);
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
main().catch((error: unknown) => {
|
|
795
|
+
console.error(summarizeError(error));
|
|
796
|
+
process.exit(1);
|
|
797
|
+
});
|