portable-agent-layer 0.40.0 → 0.41.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,114 @@
1
1
  /**
2
- * Lightweight Anthropic API wrapper used by session naming, failure capture, etc.
2
+ * Inference dispatcher.
3
+ *
4
+ * Public entry: `inference(opts)`. Internally routes to the best available path
5
+ * based on the active agent, claude-binary availability, and recursion depth.
6
+ *
7
+ * Routing order (first match wins):
8
+ * 1. depth >= MAX_DEPTH → refuse (prevents recursion if env leaks)
9
+ * 2. isClaude() + claude on PATH → inferenceViaClaudeSpawn (subscription-billed)
10
+ * 3. hasApiKey() → inferenceViaApi (current Anthropic API path)
11
+ * 4. otherwise → { success: false }
12
+ *
13
+ * The claude-spawn path mirrors PAI/TOOLS/Inference.ts in spawn args:
14
+ * --print --tools '' --setting-sources '' --output-format text --system-prompt …
15
+ * These flags prevent the spawned subprocess from loading PAL hooks or making
16
+ * tool calls — the primary recursion defense. PAL's spawn-guard env sentinel
17
+ * is the secondary belt-and-suspenders layer (see lib/spawn-guard.ts).
18
+ *
19
+ * Other-agent dispatchers (codex exec, copilot -p, cursor-agent -p) are not
20
+ * yet wired and currently fall through to the API path.
3
21
  */
4
22
 
23
+ import { accessSync, constants, existsSync } from "node:fs";
24
+ import { basename, delimiter, resolve as resolvePath } from "node:path";
25
+ import {
26
+ getActiveAgent,
27
+ isClaude,
28
+ isCodex,
29
+ isCopilot,
30
+ isCursor,
31
+ isOpencode,
32
+ } from "./agent";
33
+ import { logDebug } from "./log";
5
34
  import { HAIKU_MODEL } from "./models";
35
+ import { buildSpawnGuardEnv, getInferenceDepth, SPAWN_GUARD_ENV } from "./spawn-guard";
6
36
 
7
37
  export function hasApiKey(): boolean {
8
38
  return !!process.env.PAL_ANTHROPIC_API_KEY;
9
39
  }
10
40
 
41
+ export function hasOpenAiKey(): boolean {
42
+ return !!process.env.PAL_OPENAI_API_KEY;
43
+ }
44
+
45
+ /**
46
+ * Preview what `inference()` would do RIGHT NOW given current env + binaries.
47
+ * Pure diagnostic — never spawns or fetches. Used by `pal cli doctor`.
48
+ */
49
+ export function previewInferenceRoute(): {
50
+ agent: string;
51
+ route:
52
+ | "claude-spawn"
53
+ | "codex-spawn"
54
+ | "openai-api"
55
+ | "opencode-spawn"
56
+ | "copilot-spawn"
57
+ | "cursor-spawn"
58
+ | "anthropic-api"
59
+ | "disabled"
60
+ | "none";
61
+ reason: string;
62
+ } {
63
+ const agent = getActiveAgent();
64
+ if (process.env.PAL_INFERENCE_DISABLED === "1") {
65
+ return {
66
+ agent,
67
+ route: "disabled",
68
+ reason: "PAL_INFERENCE_DISABLED=1 (test kill-switch)",
69
+ };
70
+ }
71
+ if (isClaude() && hasClaudeBinary())
72
+ return { agent, route: "claude-spawn", reason: "claude binary on PATH" };
73
+ if (isCodex() && hasCodexBinary())
74
+ return { agent, route: "codex-spawn", reason: "codex binary on PATH" };
75
+ if (isCodex() && hasOpenAiKey())
76
+ return {
77
+ agent,
78
+ route: "openai-api",
79
+ reason: "codex agent without codex binary; PAL_OPENAI_API_KEY set",
80
+ };
81
+ if (isOpencode() && hasOpencodeBinary())
82
+ return { agent, route: "opencode-spawn", reason: "opencode binary on PATH" };
83
+ if (isCopilot() && hasCopilotBinary())
84
+ return { agent, route: "copilot-spawn", reason: "copilot binary on PATH" };
85
+ if (isCursor() && hasCursorBinary())
86
+ return { agent, route: "cursor-spawn", reason: "cursor-agent binary on PATH" };
87
+ if (hasApiKey())
88
+ return {
89
+ agent,
90
+ route: "anthropic-api",
91
+ reason: "fallback — PAL_ANTHROPIC_API_KEY set",
92
+ };
93
+ return {
94
+ agent,
95
+ route: "none",
96
+ reason:
97
+ "no native CLI binary for active agent and no PAL_ANTHROPIC_API_KEY/PAL_OPENAI_API_KEY",
98
+ };
99
+ }
100
+
101
+ /** True if any inference path is currently usable (subscription CLI OR API key). */
102
+ export function canInfer(): boolean {
103
+ if (isClaude() && hasClaudeBinary()) return true;
104
+ if (isCodex() && hasCodexBinary()) return true;
105
+ if (isCodex() && hasOpenAiKey()) return true;
106
+ if (isOpencode() && hasOpencodeBinary()) return true;
107
+ if (isCopilot() && hasCopilotBinary()) return true;
108
+ if (isCursor() && hasCursorBinary()) return true;
109
+ return hasApiKey();
110
+ }
111
+
11
112
  interface InferenceOptions {
12
113
  system?: string;
13
114
  user: string;
@@ -16,6 +117,10 @@ interface InferenceOptions {
16
117
  timeout?: number;
17
118
  /** JSON schema for structured output — guarantees valid JSON matching the schema */
18
119
  jsonSchema?: Record<string, unknown>;
120
+ /** Opaque label identifying the calling handler — appears in debug logs as caller=X */
121
+ caller?: string;
122
+ /** Session ID the call is associated with — appears in debug logs as sessionId=X */
123
+ sessionId?: string;
19
124
  }
20
125
 
21
126
  interface InferenceResult {
@@ -25,6 +130,596 @@ interface InferenceResult {
25
130
  }
26
131
 
27
132
  export async function inference(opts: InferenceOptions): Promise<InferenceResult> {
133
+ // Hard kill-switch — set by the test suite to guarantee no real inference
134
+ // ever fires from tests (no spawn, no API call). Production code never sets it.
135
+ if (process.env.PAL_INFERENCE_DISABLED === "1") {
136
+ return { success: false };
137
+ }
138
+ const depth = getInferenceDepth();
139
+ if (depth >= SPAWN_GUARD_ENV.MAX_DEPTH) {
140
+ logDebug("inference", `refuse: depth=${depth} >= max=${SPAWN_GUARD_ENV.MAX_DEPTH}`);
141
+ return { success: false };
142
+ }
143
+ const agent = getActiveAgent();
144
+ const caller = opts.caller ?? "anonymous";
145
+ const session = opts.sessionId ?? "-";
146
+ const tag = `caller=${caller} sessionId=${session}`;
147
+ if (isClaude()) {
148
+ const bin = getClaudeBinary();
149
+ if (bin) {
150
+ logDebug(
151
+ "inference",
152
+ `${tag} route=claude-spawn agent=${agent} model=${opts.model ?? HAIKU_MODEL}`
153
+ );
154
+ return inferenceViaCliSpawn(bin, buildClaudeArgs(opts), opts.user, opts);
155
+ }
156
+ }
157
+ if (isCodex()) {
158
+ const bin = getCodexBinary();
159
+ if (bin) {
160
+ logDebug("inference", `${tag} route=codex-spawn agent=${agent}`);
161
+ return inferenceViaCliSpawn(bin, buildCodexArgs(opts), "", opts);
162
+ }
163
+ }
164
+ if (isCodex() && hasOpenAiKey()) {
165
+ logDebug("inference", `${tag} route=openai-api agent=${agent}`);
166
+ return inferenceViaOpenAiApi(opts);
167
+ }
168
+ if (isOpencode()) {
169
+ const bin = getOpencodeBinary();
170
+ if (bin) {
171
+ logDebug("inference", `${tag} route=opencode-spawn agent=${agent}`);
172
+ return inferenceViaCliSpawn(
173
+ bin,
174
+ buildOpencodeArgs(opts),
175
+ "",
176
+ opts,
177
+ extractOpencodeText
178
+ );
179
+ }
180
+ }
181
+ if (isCopilot()) {
182
+ const bin = getCopilotBinary();
183
+ if (bin) {
184
+ logDebug("inference", `${tag} route=copilot-spawn agent=${agent}`);
185
+ return inferenceViaCliSpawn(bin, buildCopilotArgs(opts), "", opts);
186
+ }
187
+ }
188
+ if (isCursor()) {
189
+ const bin = getCursorBinary();
190
+ if (bin) {
191
+ logDebug("inference", `${tag} route=cursor-spawn agent=${agent}`);
192
+ return inferenceViaCliSpawn(bin, buildCursorArgs(opts), "", opts);
193
+ }
194
+ }
195
+ if (hasApiKey()) {
196
+ logDebug("inference", `${tag} route=anthropic-api agent=${agent}`);
197
+ return inferenceViaApi(opts);
198
+ }
199
+ logDebug(
200
+ "inference",
201
+ `${tag} route=none agent=${agent} hasApiKey=false hasOpenAiKey=${hasOpenAiKey()} hasClaude=${hasClaudeBinary()} hasCodex=${hasCodexBinary()} hasOpencode=${hasOpencodeBinary()} hasCopilot=${hasCopilotBinary()} hasCursor=${hasCursorBinary()}`
202
+ );
203
+ return { success: false };
204
+ }
205
+
206
+ // ─────────────────────────────────────────────────────────────────────────────
207
+ // Per-agent CLI metadata — binary presence + argv builders
208
+ // ─────────────────────────────────────────────────────────────────────────────
209
+
210
+ let claudeBinaryCache: string | null | undefined;
211
+ let codexBinaryCache: string | null | undefined;
212
+ let opencodeBinaryCache: string | null | undefined;
213
+ let copilotBinaryCache: string | null | undefined;
214
+ let cursorBinaryCache: string | null | undefined;
215
+
216
+ /**
217
+ * Resolve a binary on PATH to its full absolute path.
218
+ *
219
+ * Manual PATH walk (instead of Bun.which / `which` subprocess) because:
220
+ * 1. Ubuntu 24.04 dropped the `which` binary entirely.
221
+ * 2. Windows has no `which` at all.
222
+ * 3. Bun.which snapshots PATH at startup and ignores mid-test mutations.
223
+ * 4. Bun.spawn on Windows is inconsistent at resolving PATHEXT for bare
224
+ * names — passing the full `.cmd`/`.exe` path bypasses that fragility.
225
+ *
226
+ * Returns the resolved absolute path or null.
227
+ */
228
+ function findBinaryOnPath(name: string): string | null {
229
+ const PATH = process.env.PATH;
230
+ if (!PATH) return null;
231
+ const exts =
232
+ process.platform === "win32"
233
+ ? (process.env.PATHEXT ?? ".COM;.EXE;.BAT;.CMD").split(";")
234
+ : [""];
235
+ for (const dir of PATH.split(delimiter)) {
236
+ if (!dir) continue;
237
+ for (const ext of exts) {
238
+ const candidate = resolvePath(dir, name + ext);
239
+ try {
240
+ if (process.platform === "win32") {
241
+ // Windows has no executable bit — existence in PATHEXT is enough.
242
+ if (existsSync(candidate)) return candidate;
243
+ } else {
244
+ accessSync(candidate, constants.X_OK);
245
+ return candidate;
246
+ }
247
+ } catch {
248
+ /* not here — try next */
249
+ }
250
+ }
251
+ }
252
+ return null;
253
+ }
254
+
255
+ function getClaudeBinary(): string | null {
256
+ if (claudeBinaryCache !== undefined) return claudeBinaryCache;
257
+ claudeBinaryCache = findBinaryOnPath("claude");
258
+ return claudeBinaryCache;
259
+ }
260
+
261
+ function getCodexBinary(): string | null {
262
+ if (codexBinaryCache !== undefined) return codexBinaryCache;
263
+ codexBinaryCache = findBinaryOnPath("codex");
264
+ return codexBinaryCache;
265
+ }
266
+
267
+ function getOpencodeBinary(): string | null {
268
+ if (opencodeBinaryCache !== undefined) return opencodeBinaryCache;
269
+ opencodeBinaryCache = findBinaryOnPath("opencode");
270
+ return opencodeBinaryCache;
271
+ }
272
+
273
+ function getCopilotBinary(): string | null {
274
+ if (copilotBinaryCache !== undefined) return copilotBinaryCache;
275
+ copilotBinaryCache = findBinaryOnPath("copilot");
276
+ return copilotBinaryCache;
277
+ }
278
+
279
+ function getCursorBinary(): string | null {
280
+ if (cursorBinaryCache !== undefined) return cursorBinaryCache;
281
+ cursorBinaryCache = findBinaryOnPath("cursor-agent");
282
+ return cursorBinaryCache;
283
+ }
284
+
285
+ function hasClaudeBinary(): boolean {
286
+ return getClaudeBinary() !== null;
287
+ }
288
+ function hasCodexBinary(): boolean {
289
+ return getCodexBinary() !== null;
290
+ }
291
+ function hasOpencodeBinary(): boolean {
292
+ return getOpencodeBinary() !== null;
293
+ }
294
+ function hasCopilotBinary(): boolean {
295
+ return getCopilotBinary() !== null;
296
+ }
297
+ function hasCursorBinary(): boolean {
298
+ return getCursorBinary() !== null;
299
+ }
300
+
301
+ /** Test-only: reset the cached claude-binary resolution. */
302
+ export function _resetClaudeBinaryCache(): void {
303
+ claudeBinaryCache = undefined;
304
+ }
305
+
306
+ /** Test-only: reset the cached codex-binary resolution. */
307
+ export function _resetCodexBinaryCache(): void {
308
+ codexBinaryCache = undefined;
309
+ }
310
+
311
+ /** Test-only: reset the cached opencode-binary resolution. */
312
+ export function _resetOpencodeBinaryCache(): void {
313
+ opencodeBinaryCache = undefined;
314
+ }
315
+
316
+ /** Test-only: reset the cached copilot-binary resolution. */
317
+ export function _resetCopilotBinaryCache(): void {
318
+ copilotBinaryCache = undefined;
319
+ }
320
+
321
+ /** Test-only: reset the cached cursor-binary resolution. */
322
+ export function _resetCursorBinaryCache(): void {
323
+ cursorBinaryCache = undefined;
324
+ }
325
+
326
+ /** Build the argv for `claude --print …` from inference options. Pure. */
327
+ export function buildClaudeArgs(opts: InferenceOptions): string[] {
328
+ const model = opts.model ?? HAIKU_MODEL;
329
+ const system = opts.jsonSchema
330
+ ? injectJsonSchemaInstruction(opts.system ?? "", opts.jsonSchema)
331
+ : opts.system;
332
+ const args = [
333
+ "--print",
334
+ "--model",
335
+ model,
336
+ "--tools",
337
+ "",
338
+ "--output-format",
339
+ "text",
340
+ "--setting-sources",
341
+ "",
342
+ ];
343
+ if (system) {
344
+ args.push("--system-prompt", system);
345
+ }
346
+ return args;
347
+ }
348
+
349
+ /**
350
+ * Build the argv for `codex exec …` from inference options. Pure.
351
+ *
352
+ * Recursion + tool-use defense (mirrors claude's `--setting-sources '' --tools ''`):
353
+ * --ignore-user-config → no ~/.codex/config.toml → no hooks load in the child
354
+ * --ignore-rules → no execpolicy .rules files load
355
+ * --sandbox read-only → child cannot execute shell commands even if it tries
356
+ * --ephemeral → no session persistence; one-shot only
357
+ *
358
+ * Codex has no --system-prompt equivalent — the full prompt is a single positional
359
+ * argv string. We concatenate system + user + JSON-schema instruction into one
360
+ * prompt. ARG_MAX is ~256KB on macOS; typical PAL prompts are 1-2KB.
361
+ */
362
+ export function buildCodexArgs(opts: InferenceOptions): string[] {
363
+ const parts: string[] = [];
364
+ if (opts.system) parts.push(opts.system);
365
+ parts.push(opts.user);
366
+ if (opts.jsonSchema) {
367
+ parts.push(
368
+ `Respond with ONLY a JSON value matching this schema (no prose, no markdown): ${JSON.stringify(opts.jsonSchema)}`
369
+ );
370
+ }
371
+ const prompt = parts.join("\n\n");
372
+ return [
373
+ "exec",
374
+ "--color",
375
+ "never",
376
+ "--skip-git-repo-check",
377
+ "--ignore-user-config",
378
+ "--ignore-rules",
379
+ "--sandbox",
380
+ "read-only",
381
+ "--ephemeral",
382
+ prompt,
383
+ ];
384
+ }
385
+
386
+ /**
387
+ * Build the argv for `opencode run …` from inference options. Pure.
388
+ *
389
+ * Recursion defense:
390
+ * --pure → run WITHOUT external plugins → PAL's own opencode plugin
391
+ * doesn't load in the spawned child → no hook recursion.
392
+ * --format json → emits NDJSON events on stdout; we extract the agent's
393
+ * text via extractOpencodeText() rather than wading through
394
+ * decoration ("> build · provider/model" banner etc).
395
+ *
396
+ * opencode (like codex) has no --system-prompt equivalent — the full prompt is
397
+ * the positional message argv. System + user + JSON-schema are concatenated.
398
+ * Provider/model is left unset so opencode uses the user's configured default.
399
+ */
400
+ export function buildOpencodeArgs(opts: InferenceOptions): string[] {
401
+ const parts: string[] = [];
402
+ if (opts.system) parts.push(opts.system);
403
+ parts.push(opts.user);
404
+ if (opts.jsonSchema) {
405
+ parts.push(
406
+ `Respond with ONLY a JSON value matching this schema (no prose, no markdown): ${JSON.stringify(opts.jsonSchema)}`
407
+ );
408
+ }
409
+ const prompt = parts.join("\n\n");
410
+ return ["run", "--pure", "--format", "json", prompt];
411
+ }
412
+
413
+ /**
414
+ * Build the argv for `cursor-agent -p …` from inference options. Pure.
415
+ *
416
+ * Recursion + tool-use defense:
417
+ * --mode ask → read-only Q&A; the agent cannot edit files or run
418
+ * shell commands, eliminating any path back into our
419
+ * hooks. Cursor's equivalent of claude's `--tools ''`
420
+ * and codex's `--sandbox read-only`.
421
+ * --output-format text → clean stdout (default but explicit)
422
+ * --trust → required for headless mode; without it, cursor-agent
423
+ * exits 0 with a "trust this directory" hint instead
424
+ * of running inference. Safe to pair with --mode ask
425
+ * because that mode disallows tool calls anyway.
426
+ *
427
+ * cursor-agent has no --system-prompt flag — system + user + JSON-schema are
428
+ * concatenated into a single positional prompt argument.
429
+ *
430
+ * Auth note: cursor-agent picks up either `cursor-agent login` credentials or
431
+ * `CURSOR_API_KEY` env var. PAL doesn't manage these — that's the user's setup.
432
+ */
433
+ export function buildCursorArgs(opts: InferenceOptions): string[] {
434
+ const parts: string[] = [];
435
+ if (opts.system) parts.push(opts.system);
436
+ parts.push(opts.user);
437
+ if (opts.jsonSchema) {
438
+ parts.push(
439
+ `Respond with ONLY a JSON value matching this schema (no prose, no markdown): ${JSON.stringify(opts.jsonSchema)}`
440
+ );
441
+ }
442
+ const prompt = parts.join("\n\n");
443
+ return ["-p", "--mode", "ask", "--output-format", "text", "--trust", prompt];
444
+ }
445
+
446
+ /**
447
+ * Build the argv for `copilot -p …` from inference options. Pure.
448
+ *
449
+ * Recursion defense:
450
+ * --no-custom-instructions → don't load PAL's copilot custom instructions
451
+ * in the spawned child (equivalent to claude's
452
+ * `--setting-sources ''` and opencode's `--pure`)
453
+ * --disable-builtin-mcps → no MCP servers in the child (extra safety)
454
+ * --no-auto-update → prevent CLI self-update from delaying the spawn
455
+ * --no-color → clean stdout for capture
456
+ * --allow-all-tools → REQUIRED for non-interactive mode (without it,
457
+ * copilot prompts for tool-use confirmation)
458
+ *
459
+ * Copilot has no --system-prompt flag — system + user + JSON-schema are
460
+ * concatenated into a single prompt passed via -p.
461
+ */
462
+ export function buildCopilotArgs(opts: InferenceOptions): string[] {
463
+ const parts: string[] = [];
464
+ if (opts.system) parts.push(opts.system);
465
+ parts.push(opts.user);
466
+ if (opts.jsonSchema) {
467
+ parts.push(
468
+ `Respond with ONLY a JSON value matching this schema (no prose, no markdown): ${JSON.stringify(opts.jsonSchema)}`
469
+ );
470
+ }
471
+ const prompt = parts.join("\n\n");
472
+ return [
473
+ "-p",
474
+ prompt,
475
+ "--no-custom-instructions",
476
+ "--disable-builtin-mcps",
477
+ "--no-auto-update",
478
+ "--no-color",
479
+ "--allow-all-tools",
480
+ ];
481
+ }
482
+
483
+ /**
484
+ * Extract the agent's text reply from opencode --format json NDJSON output.
485
+ * Concatenates all `type:"text"` event payloads in order. Returns empty
486
+ * string on parse failure or no text events.
487
+ */
488
+ export function extractOpencodeText(rawStdout: string): string {
489
+ const texts: string[] = [];
490
+ for (const line of rawStdout.split("\n")) {
491
+ if (!line.trim()) continue;
492
+ try {
493
+ const event = JSON.parse(line) as {
494
+ type?: string;
495
+ part?: { type?: string; text?: string };
496
+ };
497
+ if (event.type === "text" && event.part?.type === "text" && event.part.text) {
498
+ texts.push(event.part.text);
499
+ }
500
+ } catch {
501
+ /* not a JSON line — opencode also emits non-JSON lines, skip them */
502
+ }
503
+ }
504
+ return texts.join("").trim();
505
+ }
506
+
507
+ /** Append a JSON-schema instruction to the system prompt (PAI pattern). */
508
+ export function injectJsonSchemaInstruction(
509
+ systemPrompt: string,
510
+ schema: Record<string, unknown>
511
+ ): string {
512
+ const schemaLine = `Respond with ONLY a JSON value matching this schema (no prose, no markdown): ${JSON.stringify(schema)}`;
513
+ return systemPrompt ? `${systemPrompt}\n\n${schemaLine}` : schemaLine;
514
+ }
515
+
516
+ /** Extract a JSON object or array from raw text output. Returns null on failure. */
517
+ export function parseJsonFromOutput(output: string): unknown | null {
518
+ const objectMatch = /\{[\s\S]*\}/.exec(output);
519
+ const arrayMatch = /\[[\s\S]*\]/.exec(output);
520
+ for (const candidate of [objectMatch?.[0], arrayMatch?.[0]]) {
521
+ if (!candidate) continue;
522
+ try {
523
+ return JSON.parse(candidate);
524
+ } catch {
525
+ /* try next */
526
+ }
527
+ }
528
+ return null;
529
+ }
530
+
531
+ interface RawSpawnResult {
532
+ code: number | null;
533
+ stdout: string;
534
+ stderr: string;
535
+ timedOut: boolean;
536
+ }
537
+
538
+ /**
539
+ * One CLI invocation. Returns raw streams + exit info, no parsing.
540
+ * Used by every per-agent dispatcher (claude --print, codex exec, etc).
541
+ */
542
+ async function singleCliAttempt(
543
+ binary: string,
544
+ args: string[],
545
+ stdinInput: string,
546
+ env: NodeJS.ProcessEnv,
547
+ timeout: number
548
+ ): Promise<RawSpawnResult> {
549
+ return new Promise<RawSpawnResult>((resolve) => {
550
+ let stdout = "";
551
+ let stderr = "";
552
+ let timedOut = false;
553
+ let settled = false;
554
+ const finish = (r: RawSpawnResult) => {
555
+ if (settled) return;
556
+ settled = true;
557
+ resolve(r);
558
+ };
559
+
560
+ let proc: ReturnType<typeof Bun.spawn>;
561
+ try {
562
+ proc = Bun.spawn([binary, ...args], {
563
+ env,
564
+ stdin: "pipe",
565
+ stdout: "pipe",
566
+ stderr: "pipe",
567
+ });
568
+ } catch (err) {
569
+ void logError("inference:spawn", err);
570
+ finish({ code: null, stdout: "", stderr: "", timedOut: false });
571
+ return;
572
+ }
573
+
574
+ const timer = setTimeout(() => {
575
+ timedOut = true;
576
+ try {
577
+ proc.kill();
578
+ } catch {
579
+ /* ignore */
580
+ }
581
+ }, timeout);
582
+
583
+ const stdinWriter =
584
+ proc.stdin && typeof proc.stdin !== "number"
585
+ ? (proc.stdin as {
586
+ write: (s: string) => void;
587
+ end: () => void;
588
+ close?: () => void;
589
+ })
590
+ : null;
591
+ if (stdinWriter) {
592
+ try {
593
+ if (stdinInput) stdinWriter.write(stdinInput);
594
+ stdinWriter.end();
595
+ } catch (err) {
596
+ void logError("inference:stdin", err);
597
+ }
598
+ }
599
+
600
+ void (async () => {
601
+ const stdoutStream =
602
+ proc.stdout && typeof proc.stdout !== "number"
603
+ ? (proc.stdout as ReadableStream<Uint8Array>)
604
+ : null;
605
+ const stderrStream =
606
+ proc.stderr && typeof proc.stderr !== "number"
607
+ ? (proc.stderr as ReadableStream<Uint8Array>)
608
+ : null;
609
+ try {
610
+ if (stdoutStream) stdout = await new Response(stdoutStream).text();
611
+ } catch {
612
+ /* ignore */
613
+ }
614
+ try {
615
+ if (stderrStream) stderr = await new Response(stderrStream).text();
616
+ } catch {
617
+ /* ignore */
618
+ }
619
+ await proc.exited;
620
+ clearTimeout(timer);
621
+ finish({ code: proc.exitCode, stdout, stderr, timedOut });
622
+ })();
623
+ });
624
+ }
625
+
626
+ /**
627
+ * Generic CLI dispatcher: spawn `binary args`, write stdinInput to stdin (may be
628
+ * empty for argv-only CLIs like codex), capture stdout, retry once on empty-abort.
629
+ * Mirrors PAI's universal pattern across all supported subscription CLIs.
630
+ */
631
+ async function inferenceViaCliSpawn(
632
+ binary: string,
633
+ args: string[],
634
+ stdinInput: string,
635
+ opts: InferenceOptions,
636
+ extractText?: (rawStdout: string) => string
637
+ ): Promise<InferenceResult> {
638
+ const timeout = opts.timeout ?? 15000;
639
+ const env = buildSpawnGuardEnv(process.env);
640
+ const started = Date.now();
641
+ const caller = opts.caller ?? "anonymous";
642
+ const session = opts.sessionId ?? "-";
643
+ const tag = `caller=${caller} sessionId=${session}`;
644
+ // Friendly name for logs — strip path + extension so cross-platform diffs
645
+ // (e.g. C:\…\claude.cmd vs /usr/local/bin/claude) read the same in debug.log.
646
+ const binaryName = basename(binary).replace(/\.(cmd|bat|exe|com)$/i, "");
647
+
648
+ // Attempt 1
649
+ let attempt = await singleCliAttempt(binary, args, stdinInput, env, timeout);
650
+
651
+ // Universal retry on empty-output exit≠0 (correlates strongly with burst-
652
+ // concurrency races — the binary silently aborts without writing to either
653
+ // stream). One retry only, 500-1500ms jitter so the burst settles.
654
+ const isEmptyAbort =
655
+ attempt.code !== 0 &&
656
+ !attempt.timedOut &&
657
+ attempt.stdout.length === 0 &&
658
+ attempt.stderr.length === 0;
659
+ if (isEmptyAbort) {
660
+ const jitterMs = 500 + Math.floor(Math.random() * 1000);
661
+ logDebug(
662
+ "inference:spawn",
663
+ `${tag} retry: empty-abort binary=${binaryName} exit=${attempt.code} after ${Date.now() - started}ms, jitter=${jitterMs}ms`
664
+ );
665
+ await new Promise((r) => setTimeout(r, jitterMs));
666
+ attempt = await singleCliAttempt(binary, args, stdinInput, env, timeout);
667
+ }
668
+
669
+ const elapsedMs = Date.now() - started;
670
+ const finish = (result: InferenceResult): InferenceResult => {
671
+ logDebug(
672
+ "inference:spawn",
673
+ `${tag} done binary=${binaryName} success=${result.success} bytes=${result.output?.length ?? 0} elapsedMs=${elapsedMs}`
674
+ );
675
+ return result;
676
+ };
677
+
678
+ if (attempt.timedOut) {
679
+ void logError(
680
+ "inference:spawn",
681
+ `${tag} timeout binary=${binaryName} after ${timeout}ms`
682
+ );
683
+ return finish({ success: false });
684
+ }
685
+ if (attempt.code !== 0) {
686
+ void logError(
687
+ "inference:spawn",
688
+ `${tag} exited=${attempt.code} binary=${binaryName} argv=${JSON.stringify(args)} stderr(${attempt.stderr.length})=${attempt.stderr.slice(0, 300)} stdout(${attempt.stdout.length})=${attempt.stdout.slice(0, 300)}`
689
+ );
690
+ return finish({ success: false });
691
+ }
692
+ const rawText = attempt.stdout.trim();
693
+ if (!rawText) return finish({ success: false });
694
+ const text = extractText ? extractText(rawText) : rawText;
695
+ if (!text) {
696
+ // Extraction returned empty — the binary succeeded but our extractor found
697
+ // no usable text. Log the raw stdout so we can see what was actually emitted.
698
+ void logError(
699
+ "inference:spawn",
700
+ `${tag} extract-empty binary=${binaryName} rawStdout(${rawText.length})=${rawText.slice(0, 500)}`
701
+ );
702
+ return finish({ success: false });
703
+ }
704
+ if (opts.jsonSchema) {
705
+ const parsed = parseJsonFromOutput(text);
706
+ if (parsed === null) return finish({ success: false, output: text });
707
+ return finish({ success: true, output: JSON.stringify(parsed) });
708
+ }
709
+ return finish({ success: true, output: text });
710
+ }
711
+
712
+ async function logError(scope: string, err: unknown): Promise<void> {
713
+ const { logError: log } = await import("./log");
714
+ log(scope, err);
715
+ }
716
+
717
+ // ─────────────────────────────────────────────────────────────────────────────
718
+ // Anthropic API path — used when no claude binary is available, or when the
719
+ // active agent is not claude. Preserves the original PAL inference behavior.
720
+ // ─────────────────────────────────────────────────────────────────────────────
721
+
722
+ async function inferenceViaApi(opts: InferenceOptions): Promise<InferenceResult> {
28
723
  const apiKey = process.env.PAL_ANTHROPIC_API_KEY;
29
724
  if (!apiKey) return { success: false };
30
725
 
@@ -67,9 +762,8 @@ export async function inference(opts: InferenceOptions): Promise<InferenceResult
67
762
  clearTimeout(timer);
68
763
 
69
764
  if (!response.ok) {
70
- const { logError } = await import("./log");
71
765
  const errBody = await response.text().catch(() => "");
72
- logError("inference", `HTTP ${response.status}: ${errBody.slice(0, 200)}`);
766
+ await logError("inference", `HTTP ${response.status}: ${errBody.slice(0, 200)}`);
73
767
  return { success: false };
74
768
  }
75
769
 
@@ -88,8 +782,95 @@ export async function inference(opts: InferenceOptions): Promise<InferenceResult
88
782
 
89
783
  return { success: true, output: text, usage };
90
784
  } catch (err) {
91
- const { logError } = await import("./log");
92
- logError("inference", err);
785
+ await logError("inference", err);
786
+ return { success: false };
787
+ }
788
+ }
789
+
790
+ // ─────────────────────────────────────────────────────────────────────────────
791
+ // OpenAI API path — fallback for codex users without a codex binary on PATH.
792
+ // Codex users almost always have an OpenAI key already; falling back to
793
+ // Anthropic for them would be backwards. Uses chat/completions with the
794
+ // structured-output schema for JSON-mode callers.
795
+ // ─────────────────────────────────────────────────────────────────────────────
796
+
797
+ const OPENAI_DEFAULT_MODEL = "gpt-5.4-mini";
798
+
799
+ async function inferenceViaOpenAiApi(opts: InferenceOptions): Promise<InferenceResult> {
800
+ const apiKey = process.env.PAL_OPENAI_API_KEY;
801
+ if (!apiKey) return { success: false };
802
+
803
+ const {
804
+ system,
805
+ user,
806
+ model = OPENAI_DEFAULT_MODEL,
807
+ maxTokens = 500,
808
+ timeout = 15000,
809
+ jsonSchema,
810
+ } = opts;
811
+
812
+ try {
813
+ const controller = new AbortController();
814
+ const timer = setTimeout(() => controller.abort(), timeout);
815
+
816
+ const messages: Array<{ role: string; content: string }> = [];
817
+ if (system) messages.push({ role: "system", content: system });
818
+ messages.push({ role: "user", content: user });
819
+
820
+ const body: Record<string, unknown> = {
821
+ model,
822
+ max_tokens: maxTokens,
823
+ messages,
824
+ };
825
+ if (jsonSchema) {
826
+ body.response_format = {
827
+ type: "json_schema",
828
+ json_schema: { name: "structured_response", strict: true, schema: jsonSchema },
829
+ };
830
+ }
831
+
832
+ const response = await fetch("https://api.openai.com/v1/chat/completions", {
833
+ method: "POST",
834
+ headers: {
835
+ Authorization: `Bearer ${apiKey}`,
836
+ "content-type": "application/json",
837
+ },
838
+ body: JSON.stringify(body),
839
+ signal: controller.signal,
840
+ });
841
+
842
+ clearTimeout(timer);
843
+
844
+ if (!response.ok) {
845
+ const errBody = await response.text().catch(() => "");
846
+ await logError(
847
+ "inference:openai",
848
+ `HTTP ${response.status}: ${errBody.slice(0, 200)}`
849
+ );
850
+ return { success: false };
851
+ }
852
+
853
+ const data = (await response.json()) as Record<string, unknown>;
854
+ const rawUsage = data?.usage as
855
+ | { prompt_tokens?: number; completion_tokens?: number }
856
+ | undefined;
857
+ const usage =
858
+ rawUsage?.prompt_tokens != null && rawUsage?.completion_tokens != null
859
+ ? {
860
+ inputTokens: rawUsage.prompt_tokens,
861
+ outputTokens: rawUsage.completion_tokens,
862
+ }
863
+ : undefined;
864
+
865
+ const choices = data?.choices as
866
+ | Array<{ message?: { content?: string } }>
867
+ | undefined;
868
+ const text = choices?.[0]?.message?.content?.trim();
869
+ if (!text) return { success: false, usage };
870
+
871
+ return { success: true, output: text, usage };
872
+ } catch (err) {
873
+ await logError("inference:openai", err);
93
874
  return { success: false };
94
875
  }
95
876
  }