@agwab/pi-workflow 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/README.md +14 -3
  2. package/agents/researcher.md +17 -7
  3. package/dist/artifact-graph-runtime.js +1 -0
  4. package/dist/compiler.js +2 -2
  5. package/dist/dynamic-generated-task-runtime.js +4 -3
  6. package/dist/dynamic-runtime-bundle.js +3 -2
  7. package/dist/extension.js +40 -1
  8. package/dist/subagent-backend.js +82 -27
  9. package/dist/tool-metadata.d.ts +1 -0
  10. package/dist/tool-metadata.js +13 -1
  11. package/dist/workflow-artifact-extension.js +3 -2
  12. package/dist/workflow-artifact-tool.js +84 -4
  13. package/dist/workflow-web-source-extension.d.ts +43 -0
  14. package/dist/workflow-web-source-extension.js +1194 -0
  15. package/dist/workflow-web-source.d.ts +171 -0
  16. package/dist/workflow-web-source.js +897 -0
  17. package/docs/usage.md +32 -18
  18. package/node_modules/@agwab/pi-subagent/package.json +1 -1
  19. package/node_modules/@agwab/pi-subagent/src/api.ts +245 -132
  20. package/node_modules/@agwab/pi-subagent/src/artifacts/result.ts +243 -163
  21. package/node_modules/@agwab/pi-subagent/src/core/constants.ts +117 -90
  22. package/node_modules/@agwab/pi-subagent/src/core/validation.ts +728 -475
  23. package/node_modules/@agwab/pi-subagent/src/orchestrate/run.ts +305 -209
  24. package/node_modules/@agwab/pi-subagent/src/runners/headless-model.ts +750 -439
  25. package/node_modules/@agwab/pi-subagent/src/runners/tmux.ts +422 -268
  26. package/package.json +2 -2
  27. package/skills/workflow-guide/scaffolds/object-tool-fallback/schemas/fetch-control.schema.json +1 -1
  28. package/skills/workflow-guide/scaffolds/object-tool-fallback/spec.json +4 -3
  29. package/src/artifact-graph-runtime.ts +1 -0
  30. package/src/compiler.ts +2 -1
  31. package/src/dynamic-generated-task-runtime.ts +4 -2
  32. package/src/dynamic-runtime-bundle.ts +3 -2
  33. package/src/extension.ts +46 -1
  34. package/src/subagent-backend.ts +121 -37
  35. package/src/tool-metadata.ts +22 -1
  36. package/src/workflow-artifact-extension.ts +3 -2
  37. package/src/workflow-artifact-tool.ts +96 -4
  38. package/src/workflow-web-source-extension.ts +1411 -0
  39. package/src/workflow-web-source.ts +1171 -0
  40. package/workflows/README.md +1 -1
  41. package/workflows/deep-research/helpers/claim-evidence-gate.mjs +474 -40
  42. package/workflows/deep-research/helpers/final-audit-packet.mjs +219 -0
  43. package/workflows/deep-research/helpers/normalize-input-packet.mjs +436 -0
  44. package/workflows/deep-research/helpers/render-executive.mjs +571 -198
  45. package/workflows/deep-research/schemas/deep-research-executive-render-control.schema.json +35 -8
  46. package/workflows/deep-research/schemas/deep-research-normalize-claims-control.schema.json +45 -4
  47. package/workflows/deep-research/schemas/deep-research-verify-claims-control.schema.json +0 -2
  48. package/workflows/deep-research/spec.json +36 -21
  49. package/workflows/deep-review/helpers/render-review-report.mjs +502 -0
  50. package/workflows/deep-review/schemas/deep-review-render-control.schema.json +50 -0
  51. package/workflows/deep-review/spec.json +22 -1
@@ -1,510 +1,821 @@
1
1
  import { once } from "node:events";
2
2
  import { createReadStream, createWriteStream } from "node:fs";
3
- import { stat, writeFile } from "node:fs/promises";
3
+ import { realpath, stat, writeFile } from "node:fs/promises";
4
4
  import { spawn } from "node:child_process";
5
5
  import { resolve } from "node:path";
6
6
  import { buildAgentSystemPrompt, type AgentDefinition } from "../agents.ts";
7
- import { createAttemptArtifactStore, type ArtifactRef, type ProcessMetadata, type ResultEnvelope, type ResultMetadata } from "../artifacts/index.ts";
7
+ import {
8
+ createAttemptArtifactStore,
9
+ type ArtifactRef,
10
+ type ProcessMetadata,
11
+ type ResultEnvelope,
12
+ type ResultMetadata,
13
+ } from "../artifacts/index.ts";
8
14
  import type { ResultWorkspace } from "../artifacts/result.ts";
9
- import type { AgentScope, FailureKind, SandboxInput, Status, ThinkingLevel } from "../core/constants.ts";
15
+ import type {
16
+ AgentScope,
17
+ FailureKind,
18
+ SandboxInput,
19
+ Status,
20
+ ThinkingLevel,
21
+ } from "../core/constants.ts";
10
22
  import { sandboxAllowedDomains } from "../core/constants.ts";
11
23
  import { SandboxUnavailableError, withSandboxedArgv } from "../sandbox/srt.ts";
12
- import { flushToolCallTelemetry, ToolCallTelemetryCollector } from "./tool-call-telemetry.ts";
24
+ import {
25
+ flushToolCallTelemetry,
26
+ ToolCallTelemetryCollector,
27
+ } from "./tool-call-telemetry.ts";
13
28
 
14
29
  export interface RunHeadlessModelOptions {
15
- agent: string;
16
- task: string;
17
- roleContext?: string;
18
- agentScope?: AgentScope;
19
- confirmProjectAgents?: boolean;
20
- cwd?: string;
21
- artifactCwd?: string;
22
- runId?: string;
23
- attemptId?: string;
24
- runsDir?: string;
25
- correlationId?: string;
26
- timeoutMs?: number;
27
- signal?: AbortSignal;
28
- piCommand?: string;
29
- sandbox?: SandboxInput | null;
30
- workspace?: Partial<ResultWorkspace>;
31
- model?: string;
32
- thinking?: ThinkingLevel;
33
- tools?: string[];
34
- systemPrompt?: string;
35
- skills?: string[];
36
- extensions?: string[];
37
- agentDefinition?: AgentDefinition;
38
- captureToolCalls?: boolean;
39
- onProcessStart?: (process: ProcessMetadata) => void | Promise<void>;
30
+ agent: string;
31
+ task: string;
32
+ roleContext?: string;
33
+ agentScope?: AgentScope;
34
+ confirmProjectAgents?: boolean;
35
+ cwd?: string;
36
+ artifactCwd?: string;
37
+ runId?: string;
38
+ attemptId?: string;
39
+ runsDir?: string;
40
+ correlationId?: string;
41
+ parentSessionId?: string;
42
+ sessionId?: string;
43
+ timeoutMs?: number;
44
+ signal?: AbortSignal;
45
+ piCommand?: string;
46
+ sandbox?: SandboxInput | null;
47
+ workspace?: Partial<ResultWorkspace>;
48
+ model?: string;
49
+ thinking?: ThinkingLevel;
50
+ tools?: string[];
51
+ systemPrompt?: string;
52
+ skills?: string[];
53
+ extensions?: string[];
54
+ agentDefinition?: AgentDefinition;
55
+ captureToolCalls?: boolean;
56
+ onProcessStart?: (process: ProcessMetadata) => void | Promise<void>;
40
57
  }
41
58
 
42
- interface ProcessOutcome {
43
- status: Status;
44
- failureKind: FailureKind | null;
45
- exitCode: number | null;
46
- signal: string | null;
59
+ export interface ProcessOutcome {
60
+ status: Status;
61
+ failureKind: FailureKind | null;
62
+ exitCode: number | null;
63
+ signal: string | null;
47
64
  }
48
65
 
49
66
  interface ProcessResult {
50
- outcome: ProcessOutcome;
51
- stderrRef: ArtifactRef;
52
- toolCallArtifactRefs: ArtifactRef[];
53
- parsed: PiJsonParseResult;
54
- stderrText: string;
55
- stderrContextLengthExceeded: boolean;
67
+ outcome: ProcessOutcome;
68
+ stderrRef: ArtifactRef;
69
+ toolCallArtifactRefs: ArtifactRef[];
70
+ parsed: PiJsonParseResult;
71
+ stderrText: string;
72
+ stderrContextLengthExceeded: boolean;
56
73
  }
57
74
 
58
75
  export interface PiJsonParseResult {
59
- finalAssistantText: string;
60
- errors: string[];
61
- parseErrors: string[];
62
- metadata: Partial<ResultMetadata>;
76
+ finalAssistantText: string;
77
+ errors: string[];
78
+ parseErrors: string[];
79
+ metadata: Partial<ResultMetadata>;
63
80
  }
64
81
 
65
82
  const CONTEXT_LENGTH_ERROR_PATTERN =
66
- /\bcontext[_ -]?length[_ -]?exceeded\b|\bcontext[_ -]?window[_ -]?(?:exceeded|overflow|exhausted)\b|\b(?:maximum|max)[_ -]?context[_ -]?length\b|\btoo many tokens\b|\b(?:prompt|input|request)[^\n]{0,80}\btoo large\b|\bcontext_length_exceeded\b/i;
67
-
68
- export function detectContextLengthExceeded(signals: { stderrText?: string; errors?: readonly string[] }): boolean {
69
- const text = [signals.stderrText, ...(signals.errors ?? [])].filter((entry): entry is string => typeof entry === "string" && entry.length > 0).join("\n");
70
- return CONTEXT_LENGTH_ERROR_PATTERN.test(text);
83
+ /\bcontext[_ -]?length[_ -]?exceeded\b|\bcontext[_ -]?window[_ -]?(?:exceeded|overflow|exhausted)\b|\b(?:maximum|max)[_ -]?context[_ -]?length\b|\btoo many tokens\b|\b(?:prompt|input|request)[^\n]{0,80}\btoo large\b|\bcontext_length_exceeded\b/i;
84
+
85
+ export function detectContextLengthExceeded(signals: {
86
+ stderrText?: string;
87
+ errors?: readonly string[];
88
+ }): boolean {
89
+ const text = [signals.stderrText, ...(signals.errors ?? [])]
90
+ .filter(
91
+ (entry): entry is string => typeof entry === "string" && entry.length > 0,
92
+ )
93
+ .join("\n");
94
+ return CONTEXT_LENGTH_ERROR_PATTERN.test(text);
71
95
  }
72
96
 
73
97
  function normalizeTimeoutMs(timeoutMs: number | undefined): number | undefined {
74
- if (timeoutMs === undefined) return undefined;
75
- if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) {
76
- throw new Error("timeoutMs must be a positive finite number when provided.");
77
- }
78
- return timeoutMs;
98
+ if (timeoutMs === undefined) return undefined;
99
+ if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) {
100
+ throw new Error(
101
+ "timeoutMs must be a positive finite number when provided.",
102
+ );
103
+ }
104
+ return timeoutMs;
105
+ }
106
+
107
+ type SessionManagerModule = {
108
+ SessionManager?: {
109
+ list?: (cwd: string) => Promise<Array<{ id: string }>>;
110
+ };
111
+ };
112
+
113
+ export async function resultSessionMetadata(
114
+ cwd: string,
115
+ sessionId: string | undefined,
116
+ ): Promise<Partial<ResultMetadata>> {
117
+ if (sessionId === undefined) {
118
+ return { session: { requested: false, disposition: "ephemeral" } };
119
+ }
120
+
121
+ try {
122
+ const sessionCwd = await realpath(cwd).catch(() => cwd);
123
+ const mod = (await import(
124
+ "@earendil-works/pi-coding-agent"
125
+ )) as SessionManagerModule;
126
+ if (typeof mod.SessionManager?.list !== "function") {
127
+ return {
128
+ sessionId,
129
+ session: {
130
+ id: sessionId,
131
+ requested: true,
132
+ disposition: "unavailable",
133
+ reason: "resume_unsupported",
134
+ },
135
+ };
136
+ }
137
+ const sessions = await mod.SessionManager.list(sessionCwd);
138
+ return {
139
+ sessionId,
140
+ session: {
141
+ id: sessionId,
142
+ requested: true,
143
+ disposition: sessions.some((session) => session.id === sessionId)
144
+ ? "resumed"
145
+ : "created",
146
+ },
147
+ };
148
+ } catch {
149
+ return {
150
+ sessionId,
151
+ session: {
152
+ id: sessionId,
153
+ requested: true,
154
+ disposition: "unavailable",
155
+ reason: "session_store_error",
156
+ },
157
+ };
158
+ }
79
159
  }
80
160
 
81
161
  function toBuffer(chunk: Buffer | string): Buffer {
82
- return Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
162
+ return Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
83
163
  }
84
164
 
85
165
  function textFromContent(content: unknown): string {
86
- if (!Array.isArray(content)) return "";
87
- return content
88
- .map((part) => {
89
- if (typeof part === "object" && part !== null && "type" in part && "text" in part) {
90
- const record = part as { type?: unknown; text?: unknown };
91
- if (record.type === "text" && typeof record.text === "string") return record.text;
92
- }
93
- return "";
94
- })
95
- .join("");
166
+ if (!Array.isArray(content)) return "";
167
+ return content
168
+ .map((part) => {
169
+ if (
170
+ typeof part === "object" &&
171
+ part !== null &&
172
+ "type" in part &&
173
+ "text" in part
174
+ ) {
175
+ const record = part as { type?: unknown; text?: unknown };
176
+ if (record.type === "text" && typeof record.text === "string")
177
+ return record.text;
178
+ }
179
+ return "";
180
+ })
181
+ .join("");
96
182
  }
97
183
 
98
184
  function errorText(value: unknown): string | undefined {
99
- if (typeof value === "string" && value.length > 0) return value;
100
- if (typeof value === "object" && value !== null) {
101
- const record = value as Record<string, unknown>;
102
- if (typeof record.message === "string" && record.message.length > 0) return record.message;
103
- if (typeof record.error === "string" && record.error.length > 0) return record.error;
104
- }
105
- return undefined;
185
+ if (typeof value === "string" && value.length > 0) return value;
186
+ if (typeof value === "object" && value !== null) {
187
+ const record = value as Record<string, unknown>;
188
+ if (typeof record.message === "string" && record.message.length > 0)
189
+ return record.message;
190
+ if (typeof record.error === "string" && record.error.length > 0)
191
+ return record.error;
192
+ }
193
+ return undefined;
106
194
  }
107
195
 
108
- const PARSED_EVENT_PATTERN = /"type"\s*:\s*"(?:message_end|turn_end|agent_end|error)"/;
109
- const TOOL_CALL_EVENT_PATTERN = /"type"\s*:\s*"(?:tool_execution_start|tool_execution_end)"/;
196
+ const PARSED_EVENT_PATTERN =
197
+ /"type"\s*:\s*"(?:message_end|turn_end|agent_end|error)"/;
198
+ const TOOL_CALL_EVENT_PATTERN =
199
+ /"type"\s*:\s*"(?:tool_execution_start|tool_execution_end)"/;
110
200
  const MAX_PARSE_ERRORS = 20;
201
+ const MAX_METADATA_ERRORS = 20;
111
202
  const MAX_JSON_LINE_CHARS = 64 * 1024 * 1024;
112
203
  const STDERR_TEXT_LIMIT = 256 * 1024;
113
204
 
114
205
  function emptyParseResult(): PiJsonParseResult {
115
- return { finalAssistantText: "", errors: [], parseErrors: [], metadata: {} };
206
+ return { finalAssistantText: "", errors: [], parseErrors: [], metadata: {} };
116
207
  }
117
208
 
118
209
  function pushParseError(parsed: PiJsonParseResult, message: string): void {
119
- if (parsed.parseErrors.length < MAX_PARSE_ERRORS) parsed.parseErrors.push(message);
210
+ if (parsed.parseErrors.length < MAX_PARSE_ERRORS)
211
+ parsed.parseErrors.push(message);
120
212
  }
121
213
 
122
- function parsePiJsonLine(line: string, lineNumber: number, parsed: PiJsonParseResult, onEvent?: (event: unknown) => void): void {
123
- if (line.trim().length === 0) return;
124
- if (!PARSED_EVENT_PATTERN.test(line) && (onEvent === undefined || !TOOL_CALL_EVENT_PATTERN.test(line))) return;
125
- if (line.length > MAX_JSON_LINE_CHARS) {
126
- pushParseError(parsed, `line ${lineNumber}: JSON event too large to parse (${line.length} chars)`);
127
- return;
128
- }
129
-
130
- let event: unknown;
131
- try {
132
- event = JSON.parse(line);
133
- } catch (error) {
134
- const message = error instanceof Error ? error.message : String(error);
135
- pushParseError(parsed, `line ${lineNumber}: ${message}`);
136
- return;
137
- }
138
-
139
- onEvent?.(event);
140
-
141
- if (typeof event !== "object" || event === null) return;
142
- const record = event as Record<string, unknown>;
143
- const type = record.type;
144
-
145
- if (type === "message_end" || type === "turn_end") {
146
- const message = record.message;
147
- if (typeof message === "object" && message !== null && (message as Record<string, unknown>).role === "assistant") {
148
- const assistant = message as Record<string, unknown>;
149
- parsed.finalAssistantText = textFromContent(assistant.content);
150
- if (typeof assistant.provider === "string") parsed.metadata.provider = assistant.provider;
151
- if (typeof assistant.model === "string") parsed.metadata.model = assistant.model;
152
- if (assistant.usage !== undefined) parsed.metadata.usage = assistant.usage;
153
- if (typeof assistant.stopReason === "string") parsed.metadata.stopReason = assistant.stopReason;
154
- if (assistant.stopReason === "error") {
155
- const text = errorText(assistant.errorMessage) ?? errorText(assistant.error) ?? "assistant stopped with an error";
156
- parsed.errors.push(text);
157
- }
158
- }
159
- } else if (type === "agent_end") {
160
- const messages = record.messages;
161
- if (Array.isArray(messages)) {
162
- for (const message of messages) {
163
- if (typeof message === "object" && message !== null && (message as Record<string, unknown>).role === "assistant") {
164
- const text = textFromContent((message as Record<string, unknown>).content);
165
- if (text.length > 0) parsed.finalAssistantText = text;
166
- }
167
- }
168
- }
169
- }
170
-
171
- if (type === "error") {
172
- const text = errorText(record.error) ?? errorText(record.message) ?? errorText(record);
173
- if (text) parsed.errors.push(text);
174
- }
214
+ function parsePiJsonLine(
215
+ line: string,
216
+ lineNumber: number,
217
+ parsed: PiJsonParseResult,
218
+ onEvent?: (event: unknown) => void,
219
+ ): void {
220
+ if (line.trim().length === 0) return;
221
+ if (
222
+ !PARSED_EVENT_PATTERN.test(line) &&
223
+ (onEvent === undefined || !TOOL_CALL_EVENT_PATTERN.test(line))
224
+ )
225
+ return;
226
+ if (line.length > MAX_JSON_LINE_CHARS) {
227
+ pushParseError(
228
+ parsed,
229
+ `line ${lineNumber}: JSON event too large to parse (${line.length} chars)`,
230
+ );
231
+ return;
232
+ }
233
+
234
+ let event: unknown;
235
+ try {
236
+ event = JSON.parse(line);
237
+ } catch (error) {
238
+ const message = error instanceof Error ? error.message : String(error);
239
+ pushParseError(parsed, `line ${lineNumber}: ${message}`);
240
+ return;
241
+ }
242
+
243
+ onEvent?.(event);
244
+
245
+ if (typeof event !== "object" || event === null) return;
246
+ const record = event as Record<string, unknown>;
247
+ const type = record.type;
248
+
249
+ if (type === "message_end" || type === "turn_end") {
250
+ const message = record.message;
251
+ if (
252
+ typeof message === "object" &&
253
+ message !== null &&
254
+ (message as Record<string, unknown>).role === "assistant"
255
+ ) {
256
+ const assistant = message as Record<string, unknown>;
257
+ parsed.finalAssistantText = textFromContent(assistant.content);
258
+ if (typeof assistant.provider === "string")
259
+ parsed.metadata.provider = assistant.provider;
260
+ if (typeof assistant.model === "string")
261
+ parsed.metadata.model = assistant.model;
262
+ if (assistant.usage !== undefined)
263
+ parsed.metadata.usage = assistant.usage;
264
+ if (typeof assistant.stopReason === "string")
265
+ parsed.metadata.stopReason = assistant.stopReason;
266
+ if (assistant.stopReason === "error") {
267
+ const text =
268
+ errorText(assistant.errorMessage) ??
269
+ errorText(assistant.error) ??
270
+ "assistant stopped with an error";
271
+ parsed.errors.push(text);
272
+ }
273
+ }
274
+ } else if (type === "agent_end") {
275
+ const messages = record.messages;
276
+ if (Array.isArray(messages)) {
277
+ for (const message of messages) {
278
+ if (
279
+ typeof message === "object" &&
280
+ message !== null &&
281
+ (message as Record<string, unknown>).role === "assistant"
282
+ ) {
283
+ const text = textFromContent(
284
+ (message as Record<string, unknown>).content,
285
+ );
286
+ if (text.length > 0) parsed.finalAssistantText = text;
287
+ }
288
+ }
289
+ }
290
+ }
291
+
292
+ if (type === "error") {
293
+ const text =
294
+ errorText(record.error) ?? errorText(record.message) ?? errorText(record);
295
+ if (text) parsed.errors.push(text);
296
+ }
175
297
  }
176
298
 
177
299
  class PiJsonStreamParser {
178
- readonly parsed = emptyParseResult();
179
- private buffered = "";
180
- private lineNumber = 0;
181
- private discardingOversizedLine = false;
182
- private readonly onEvent?: (event: unknown) => void;
183
-
184
- constructor(onEvent?: (event: unknown) => void) {
185
- this.onEvent = onEvent;
186
- }
187
-
188
- push(chunk: Buffer | string): void {
189
- let text = typeof chunk === "string" ? chunk : chunk.toString("utf8");
190
- while (text.length > 0) {
191
- if (this.discardingOversizedLine) {
192
- const newline = text.indexOf("\n");
193
- if (newline < 0) return;
194
- this.discardingOversizedLine = false;
195
- this.buffered = "";
196
- text = text.slice(newline + 1);
197
- continue;
198
- }
199
-
200
- const newline = text.indexOf("\n");
201
- const segment = newline < 0 ? text : text.slice(0, newline + 1);
202
- this.buffered += segment;
203
- text = newline < 0 ? "" : text.slice(newline + 1);
204
-
205
- if (this.buffered.length > MAX_JSON_LINE_CHARS) {
206
- this.lineNumber += 1;
207
- pushParseError(this.parsed, `line ${this.lineNumber}: JSON event too large to parse`);
208
- this.buffered = "";
209
- this.discardingOversizedLine = newline < 0;
210
- continue;
211
- }
212
-
213
- if (newline >= 0) this.flushLine();
214
- }
215
- }
216
-
217
- finish(): PiJsonParseResult {
218
- if (!this.discardingOversizedLine && this.buffered.length > 0) this.flushLine();
219
- return this.parsed;
220
- }
221
-
222
- private flushLine(): void {
223
- this.lineNumber += 1;
224
- const line = this.buffered.endsWith("\n") ? this.buffered.slice(0, -1).replace(/\r$/, "") : this.buffered;
225
- this.buffered = "";
226
- parsePiJsonLine(line, this.lineNumber, this.parsed, this.onEvent);
227
- }
300
+ readonly parsed = emptyParseResult();
301
+ private buffered = "";
302
+ private lineNumber = 0;
303
+ private discardingOversizedLine = false;
304
+ private readonly onEvent?: (event: unknown) => void;
305
+
306
+ constructor(onEvent?: (event: unknown) => void) {
307
+ this.onEvent = onEvent;
308
+ }
309
+
310
+ push(chunk: Buffer | string): void {
311
+ let text = typeof chunk === "string" ? chunk : chunk.toString("utf8");
312
+ while (text.length > 0) {
313
+ if (this.discardingOversizedLine) {
314
+ const newline = text.indexOf("\n");
315
+ if (newline < 0) return;
316
+ this.discardingOversizedLine = false;
317
+ this.buffered = "";
318
+ text = text.slice(newline + 1);
319
+ continue;
320
+ }
321
+
322
+ const newline = text.indexOf("\n");
323
+ const segment = newline < 0 ? text : text.slice(0, newline + 1);
324
+ this.buffered += segment;
325
+ text = newline < 0 ? "" : text.slice(newline + 1);
326
+
327
+ if (this.buffered.length > MAX_JSON_LINE_CHARS) {
328
+ this.lineNumber += 1;
329
+ pushParseError(
330
+ this.parsed,
331
+ `line ${this.lineNumber}: JSON event too large to parse`,
332
+ );
333
+ this.buffered = "";
334
+ this.discardingOversizedLine = newline < 0;
335
+ continue;
336
+ }
337
+
338
+ if (newline >= 0) this.flushLine();
339
+ }
340
+ }
341
+
342
+ finish(): PiJsonParseResult {
343
+ if (!this.discardingOversizedLine && this.buffered.length > 0)
344
+ this.flushLine();
345
+ return this.parsed;
346
+ }
347
+
348
+ private flushLine(): void {
349
+ this.lineNumber += 1;
350
+ const line = this.buffered.endsWith("\n")
351
+ ? this.buffered.slice(0, -1).replace(/\r$/, "")
352
+ : this.buffered;
353
+ this.buffered = "";
354
+ parsePiJsonLine(line, this.lineNumber, this.parsed, this.onEvent);
355
+ }
228
356
  }
229
357
 
230
358
  export function parsePiJsonLines(stdout: string): PiJsonParseResult {
231
- const parser = new PiJsonStreamParser();
232
- parser.push(stdout);
233
- return parser.finish();
359
+ const parser = new PiJsonStreamParser();
360
+ parser.push(stdout);
361
+ return parser.finish();
362
+ }
363
+
364
+ export async function parsePiJsonFile(
365
+ path: string,
366
+ ): Promise<PiJsonParseResult> {
367
+ const parser = new PiJsonStreamParser();
368
+ const stream = createReadStream(path, { encoding: "utf8" });
369
+ for await (const chunk of stream) parser.push(chunk);
370
+ return parser.finish();
371
+ }
372
+
373
+ export function resolvePiJsonOutcome(
374
+ processOutcome: ProcessOutcome,
375
+ parsed: PiJsonParseResult,
376
+ contextLengthExceeded: boolean,
377
+ ): ProcessOutcome {
378
+ if (processOutcome.status !== "completed") return processOutcome;
379
+ if (parsed.parseErrors.length > 0 && parsed.finalAssistantText.length === 0) {
380
+ return { ...processOutcome, status: "failed", failureKind: "parse" };
381
+ }
382
+ if (
383
+ parsed.errors.length > 0 &&
384
+ parsedErrorsAreFatal(parsed, contextLengthExceeded)
385
+ ) {
386
+ return { ...processOutcome, status: "failed", failureKind: "model" };
387
+ }
388
+ return processOutcome;
389
+ }
390
+
391
+ export function resultMetadataFromParse(
392
+ parsed: PiJsonParseResult,
393
+ contextLengthExceeded: boolean,
394
+ outcome: ProcessOutcome,
395
+ ): Partial<ResultMetadata> {
396
+ return {
397
+ ...parsed.metadata,
398
+ contextLengthExceeded,
399
+ ...(parsed.errors.length === 0
400
+ ? {}
401
+ : { streamErrors: parsed.errors.slice(0, MAX_METADATA_ERRORS) }),
402
+ ...(outcome.status === "completed" && parsed.errors.length > 0
403
+ ? { nonFatalStreamErrors: parsed.errors.slice(0, MAX_METADATA_ERRORS) }
404
+ : {}),
405
+ ...(parsed.parseErrors.length === 0
406
+ ? {}
407
+ : { parseErrors: parsed.parseErrors.slice(0, MAX_METADATA_ERRORS) }),
408
+ };
234
409
  }
235
410
 
236
- export async function parsePiJsonFile(path: string): Promise<PiJsonParseResult> {
237
- const parser = new PiJsonStreamParser();
238
- const stream = createReadStream(path, { encoding: "utf8" });
239
- for await (const chunk of stream) parser.push(chunk);
240
- return parser.finish();
411
+ function parsedErrorsAreFatal(
412
+ parsed: PiJsonParseResult,
413
+ contextLengthExceeded: boolean,
414
+ ): boolean {
415
+ return (
416
+ parsed.finalAssistantText.length === 0 ||
417
+ parsed.metadata.stopReason === "error" ||
418
+ contextLengthExceeded
419
+ );
241
420
  }
242
421
 
243
422
  function buildPrompt(options: RunHeadlessModelOptions): string {
244
- if (options.systemPrompt !== undefined) return options.task;
245
- const sections = [
246
- `You are the Pi subagent named ${JSON.stringify(options.agent)}.`,
247
- options.roleContext ? `Role context:\n${options.roleContext}` : undefined,
248
- options.agentScope ? `Agent scope: ${options.agentScope}` : undefined,
249
- options.confirmProjectAgents === undefined ? undefined : `confirmProjectAgents: ${String(options.confirmProjectAgents)}`,
250
- `Task:\n${options.task}`,
251
- ];
252
- return sections.filter((section): section is string => section !== undefined).join("\n\n");
423
+ if (options.systemPrompt !== undefined) return options.task;
424
+ const sections = [
425
+ `You are the Pi subagent named ${JSON.stringify(options.agent)}.`,
426
+ options.roleContext ? `Role context:\n${options.roleContext}` : undefined,
427
+ options.agentScope ? `Agent scope: ${options.agentScope}` : undefined,
428
+ options.confirmProjectAgents === undefined
429
+ ? undefined
430
+ : `confirmProjectAgents: ${String(options.confirmProjectAgents)}`,
431
+ `Task:\n${options.task}`,
432
+ ];
433
+ return sections
434
+ .filter((section): section is string => section !== undefined)
435
+ .join("\n\n");
253
436
  }
254
437
 
255
- export function buildPiArgv(options: RunHeadlessModelOptions): readonly [string, ...string[]] {
256
- const argv: string[] = [
257
- options.piCommand ?? "pi",
258
- "--mode",
259
- "json",
260
- "--print",
261
- "--no-session",
262
- "--no-context-files",
263
- "--exclude-tools",
264
- "subagent",
265
- ];
266
- const model = options.model ?? options.agentDefinition?.model;
267
- const thinking = options.thinking ?? options.agentDefinition?.thinking;
268
- const tools = options.tools ?? options.agentDefinition?.tools;
269
- const agentSystemPrompt = options.systemPrompt !== undefined ? undefined : options.agentDefinition === undefined ? undefined : buildAgentSystemPrompt(options.agentDefinition);
270
-
271
- if (options.systemPrompt !== undefined) {
272
- argv.push("--system-prompt", options.systemPrompt);
273
- } else if (agentSystemPrompt !== undefined) {
274
- argv.push(options.agentDefinition?.systemPromptMode === "replace" ? "--system-prompt" : "--append-system-prompt", agentSystemPrompt);
275
- }
276
- if (model !== undefined) argv.push("--model", model);
277
- if (thinking !== undefined) argv.push("--thinking", thinking);
278
- if (tools !== undefined && tools.length > 0) argv.push("--tools", tools.join(","));
279
- else if (tools !== undefined) argv.push("--no-tools");
280
- if (options.skills !== undefined && options.skills.length === 0) argv.push("--no-skills");
281
- else for (const skill of options.skills ?? []) argv.push("--skill", skill);
282
- if (options.extensions !== undefined && options.extensions.length === 0) argv.push("--no-extensions");
283
- else for (const extension of options.extensions ?? []) argv.push("--extension", extension);
284
- argv.push(buildPrompt(options));
285
- return argv as [string, ...string[]];
438
+ export function buildPiArgv(
439
+ options: RunHeadlessModelOptions,
440
+ ): readonly [string, ...string[]] {
441
+ const argv: string[] = [
442
+ options.piCommand ?? "pi",
443
+ "--mode",
444
+ "json",
445
+ "--print",
446
+ ];
447
+ if (options.sessionId !== undefined) {
448
+ argv.push("--session-id", options.sessionId);
449
+ } else {
450
+ argv.push("--no-session");
451
+ }
452
+ argv.push("--no-context-files", "--exclude-tools", "subagent");
453
+ const model = options.model ?? options.agentDefinition?.model;
454
+ const thinking = options.thinking ?? options.agentDefinition?.thinking;
455
+ const tools = options.tools ?? options.agentDefinition?.tools;
456
+ const agentSystemPrompt =
457
+ options.systemPrompt !== undefined
458
+ ? undefined
459
+ : options.agentDefinition === undefined
460
+ ? undefined
461
+ : buildAgentSystemPrompt(options.agentDefinition);
462
+
463
+ if (options.systemPrompt !== undefined) {
464
+ argv.push("--system-prompt", options.systemPrompt);
465
+ } else if (agentSystemPrompt !== undefined) {
466
+ argv.push(
467
+ options.agentDefinition?.systemPromptMode === "replace"
468
+ ? "--system-prompt"
469
+ : "--append-system-prompt",
470
+ agentSystemPrompt,
471
+ );
472
+ }
473
+ if (model !== undefined) argv.push("--model", model);
474
+ if (thinking !== undefined) argv.push("--thinking", thinking);
475
+ if (tools !== undefined && tools.length > 0)
476
+ argv.push("--tools", tools.join(","));
477
+ else if (tools !== undefined) argv.push("--no-tools");
478
+ if (options.skills !== undefined && options.skills.length === 0)
479
+ argv.push("--no-skills");
480
+ else for (const skill of options.skills ?? []) argv.push("--skill", skill);
481
+ if (options.extensions !== undefined && options.extensions.length === 0)
482
+ argv.push("--no-extensions");
483
+ else
484
+ for (const extension of options.extensions ?? [])
485
+ argv.push("--extension", extension);
486
+ argv.push(buildPrompt(options));
487
+ return argv as [string, ...string[]];
286
488
  }
287
489
 
288
490
  async function fileBytes(path: string): Promise<number> {
289
- try {
290
- return (await stat(path)).size;
291
- } catch {
292
- return 0;
293
- }
491
+ try {
492
+ return (await stat(path)).size;
493
+ } catch {
494
+ return 0;
495
+ }
294
496
  }
295
497
 
296
498
  function appendLimited(base: string, chunk: string, limit: number): string {
297
- if (base.length >= limit) return base;
298
- return base + chunk.slice(0, limit - base.length);
499
+ if (base.length >= limit) return base;
500
+ return base + chunk.slice(0, limit - base.length);
299
501
  }
300
502
 
301
503
  async function runProcess(
302
- argv: readonly [string, ...string[]],
303
- cwd: string,
304
- timeoutMs: number | undefined,
305
- store: Awaited<ReturnType<typeof createAttemptArtifactStore>>,
306
- captureToolCalls?: boolean,
307
- abortSignal?: AbortSignal,
308
- env?: NodeJS.ProcessEnv,
309
- onProcessStart?: (process: ProcessMetadata) => void | Promise<void>,
504
+ argv: readonly [string, ...string[]],
505
+ cwd: string,
506
+ timeoutMs: number | undefined,
507
+ store: Awaited<ReturnType<typeof createAttemptArtifactStore>>,
508
+ captureToolCalls?: boolean,
509
+ abortSignal?: AbortSignal,
510
+ env?: NodeJS.ProcessEnv,
511
+ onProcessStart?: (process: ProcessMetadata) => void | Promise<void>,
310
512
  ): Promise<ProcessResult> {
311
- const stderrPath = store.pathFor("stderr");
312
- await writeFile(stderrPath, "");
313
-
314
- const toolCallTelemetry = captureToolCalls === true ? new ToolCallTelemetryCollector() : undefined;
315
- const parser = new PiJsonStreamParser((event) => toolCallTelemetry?.processEvent(event));
316
- const stderrStream = createWriteStream(stderrPath, { flags: "w" });
317
- let stderrText = "";
318
- let stderrContextLengthExceeded = false;
319
-
320
- async function finishWith(outcome: ProcessOutcome): Promise<ProcessResult> {
321
- stderrStream.end();
322
- await once(stderrStream, "finish");
323
- const parsed = parser.finish();
324
- return {
325
- outcome,
326
- stderrRef: store.refFor("stderr", await fileBytes(stderrPath)),
327
- toolCallArtifactRefs: await flushToolCallTelemetry(toolCallTelemetry, store),
328
- parsed,
329
- stderrText,
330
- stderrContextLengthExceeded,
331
- };
332
- }
333
-
334
- if (abortSignal?.aborted) {
335
- return await finishWith({ status: "failed", failureKind: "abort", exitCode: null, signal: null });
336
- }
337
-
338
- return await new Promise<ProcessResult>((resolveProcess) => {
339
- const child = spawn(argv[0], argv.slice(1), {
340
- cwd,
341
- shell: false,
342
- detached: process.platform !== "win32",
343
- stdio: ["ignore", "pipe", "pipe"],
344
- ...(env === undefined ? {} : { env }),
345
- });
346
-
347
- if (child.pid !== undefined) {
348
- void Promise.resolve(onProcessStart?.({ pid: child.pid, processGroupId: process.platform === "win32" ? undefined : child.pid, command: argv[0] })).catch(() => undefined);
349
- }
350
-
351
- let settled = false;
352
- let stopKind: "timeout" | "abort" | null = null;
353
- let timeoutTimer: ReturnType<typeof setTimeout> | null = null;
354
- let forceKillTimer: ReturnType<typeof setTimeout> | null = null;
355
-
356
- function clearTimers(): void {
357
- if (timeoutTimer) clearTimeout(timeoutTimer);
358
- if (forceKillTimer) clearTimeout(forceKillTimer);
359
- timeoutTimer = null;
360
- forceKillTimer = null;
361
- }
362
-
363
- function cleanup(): void {
364
- clearTimers();
365
- abortSignal?.removeEventListener("abort", onAbort);
366
- }
367
-
368
- function signalChild(signal: NodeJS.Signals): void {
369
- try {
370
- if (child.pid !== undefined && process.platform !== "win32") process.kill(-child.pid, signal);
371
- else child.kill(signal);
372
- } catch {
373
- try { child.kill(signal); } catch { /* already exited */ }
374
- }
375
- }
376
-
377
- function requestStop(kind: "timeout" | "abort"): void {
378
- if (settled) return;
379
- stopKind ??= kind;
380
- signalChild("SIGTERM");
381
- forceKillTimer ??= setTimeout(() => {
382
- signalChild("SIGKILL");
383
- }, 1_000);
384
- }
385
-
386
- function onAbort(): void {
387
- requestStop("abort");
388
- }
389
-
390
- function settle(outcome: ProcessOutcome): void {
391
- if (settled) return;
392
- settled = true;
393
- cleanup();
394
- void finishWith(outcome).then(resolveProcess, () => resolveProcess({
395
- outcome: { status: "failed", failureKind: "internal", exitCode: null, signal: null },
396
- stderrRef: store.refFor("stderr", 0),
397
- toolCallArtifactRefs: [],
398
- parsed: parser.finish(),
399
- stderrText,
400
- stderrContextLengthExceeded,
401
- }));
402
- }
403
-
404
- child.stdout?.on("data", (chunk: Buffer | string) => {
405
- parser.push(toBuffer(chunk));
406
- });
407
-
408
- child.stderr?.on("data", (chunk: Buffer | string) => {
409
- const buffer = toBuffer(chunk);
410
- const text = buffer.toString("utf8");
411
- stderrText = appendLimited(stderrText, text, STDERR_TEXT_LIMIT);
412
- stderrContextLengthExceeded ||= detectContextLengthExceeded({ stderrText: text });
413
- if (!stderrStream.write(buffer)) {
414
- child.stderr?.pause();
415
- stderrStream.once("drain", () => child.stderr?.resume());
416
- }
417
- });
418
-
419
- child.on("error", () => {
420
- settle({ status: "failed", failureKind: "spawn", exitCode: null, signal: null });
421
- });
422
-
423
- child.on("close", (exitCode, signal) => {
424
- if (stopKind === null && signal !== null) {
425
- settle({ status: "cancelled", failureKind: "cancelled", exitCode, signal });
426
- return;
427
- }
428
- const failureKind = stopKind ?? (exitCode === 0 ? null : "model");
429
- settle({ status: failureKind === null ? "completed" : "failed", failureKind, exitCode, signal });
430
- });
431
-
432
- if (timeoutMs !== undefined) {
433
- timeoutTimer = setTimeout(() => {
434
- requestStop("timeout");
435
- }, timeoutMs);
436
- }
437
-
438
- abortSignal?.addEventListener("abort", onAbort, { once: true });
439
- if (abortSignal?.aborted) requestStop("abort");
440
- });
513
+ const stderrPath = store.pathFor("stderr");
514
+ await writeFile(stderrPath, "");
515
+
516
+ const toolCallTelemetry =
517
+ captureToolCalls === true ? new ToolCallTelemetryCollector() : undefined;
518
+ const parser = new PiJsonStreamParser((event) =>
519
+ toolCallTelemetry?.processEvent(event),
520
+ );
521
+ const stderrStream = createWriteStream(stderrPath, { flags: "w" });
522
+ let stderrText = "";
523
+ let stderrContextLengthExceeded = false;
524
+
525
+ async function finishWith(outcome: ProcessOutcome): Promise<ProcessResult> {
526
+ stderrStream.end();
527
+ await once(stderrStream, "finish");
528
+ const parsed = parser.finish();
529
+ return {
530
+ outcome,
531
+ stderrRef: store.refFor("stderr", await fileBytes(stderrPath)),
532
+ toolCallArtifactRefs: await flushToolCallTelemetry(
533
+ toolCallTelemetry,
534
+ store,
535
+ ),
536
+ parsed,
537
+ stderrText,
538
+ stderrContextLengthExceeded,
539
+ };
540
+ }
541
+
542
+ if (abortSignal?.aborted) {
543
+ return await finishWith({
544
+ status: "failed",
545
+ failureKind: "abort",
546
+ exitCode: null,
547
+ signal: null,
548
+ });
549
+ }
550
+
551
+ return await new Promise<ProcessResult>((resolveProcess) => {
552
+ const child = spawn(argv[0], argv.slice(1), {
553
+ cwd,
554
+ shell: false,
555
+ detached: process.platform !== "win32",
556
+ stdio: ["ignore", "pipe", "pipe"],
557
+ ...(env === undefined ? {} : { env }),
558
+ });
559
+
560
+ if (child.pid !== undefined) {
561
+ void Promise.resolve(
562
+ onProcessStart?.({
563
+ pid: child.pid,
564
+ processGroupId: process.platform === "win32" ? undefined : child.pid,
565
+ command: argv[0],
566
+ }),
567
+ ).catch(() => undefined);
568
+ }
569
+
570
+ let settled = false;
571
+ let stopKind: "timeout" | "abort" | null = null;
572
+ let timeoutTimer: ReturnType<typeof setTimeout> | null = null;
573
+ let forceKillTimer: ReturnType<typeof setTimeout> | null = null;
574
+
575
+ function clearTimers(): void {
576
+ if (timeoutTimer) clearTimeout(timeoutTimer);
577
+ if (forceKillTimer) clearTimeout(forceKillTimer);
578
+ timeoutTimer = null;
579
+ forceKillTimer = null;
580
+ }
581
+
582
+ function cleanup(): void {
583
+ clearTimers();
584
+ abortSignal?.removeEventListener("abort", onAbort);
585
+ }
586
+
587
+ function signalChild(signal: NodeJS.Signals): void {
588
+ try {
589
+ if (child.pid !== undefined && process.platform !== "win32")
590
+ process.kill(-child.pid, signal);
591
+ else child.kill(signal);
592
+ } catch {
593
+ try {
594
+ child.kill(signal);
595
+ } catch {
596
+ /* already exited */
597
+ }
598
+ }
599
+ }
600
+
601
+ function requestStop(kind: "timeout" | "abort"): void {
602
+ if (settled) return;
603
+ stopKind ??= kind;
604
+ signalChild("SIGTERM");
605
+ forceKillTimer ??= setTimeout(() => {
606
+ signalChild("SIGKILL");
607
+ }, 1_000);
608
+ }
609
+
610
+ function onAbort(): void {
611
+ requestStop("abort");
612
+ }
613
+
614
+ function settle(outcome: ProcessOutcome): void {
615
+ if (settled) return;
616
+ settled = true;
617
+ cleanup();
618
+ void finishWith(outcome).then(resolveProcess, () =>
619
+ resolveProcess({
620
+ outcome: {
621
+ status: "failed",
622
+ failureKind: "internal",
623
+ exitCode: null,
624
+ signal: null,
625
+ },
626
+ stderrRef: store.refFor("stderr", 0),
627
+ toolCallArtifactRefs: [],
628
+ parsed: parser.finish(),
629
+ stderrText,
630
+ stderrContextLengthExceeded,
631
+ }),
632
+ );
633
+ }
634
+
635
+ child.stdout?.on("data", (chunk: Buffer | string) => {
636
+ parser.push(toBuffer(chunk));
637
+ });
638
+
639
+ child.stderr?.on("data", (chunk: Buffer | string) => {
640
+ const buffer = toBuffer(chunk);
641
+ const text = buffer.toString("utf8");
642
+ stderrText = appendLimited(stderrText, text, STDERR_TEXT_LIMIT);
643
+ stderrContextLengthExceeded ||= detectContextLengthExceeded({
644
+ stderrText: text,
645
+ });
646
+ if (!stderrStream.write(buffer)) {
647
+ child.stderr?.pause();
648
+ stderrStream.once("drain", () => child.stderr?.resume());
649
+ }
650
+ });
651
+
652
+ child.on("error", () => {
653
+ settle({
654
+ status: "failed",
655
+ failureKind: "spawn",
656
+ exitCode: null,
657
+ signal: null,
658
+ });
659
+ });
660
+
661
+ child.on("close", (exitCode, signal) => {
662
+ if (stopKind === null && signal !== null) {
663
+ settle({
664
+ status: "cancelled",
665
+ failureKind: "cancelled",
666
+ exitCode,
667
+ signal,
668
+ });
669
+ return;
670
+ }
671
+ const failureKind = stopKind ?? (exitCode === 0 ? null : "model");
672
+ settle({
673
+ status: failureKind === null ? "completed" : "failed",
674
+ failureKind,
675
+ exitCode,
676
+ signal,
677
+ });
678
+ });
679
+
680
+ if (timeoutMs !== undefined) {
681
+ timeoutTimer = setTimeout(() => {
682
+ requestStop("timeout");
683
+ }, timeoutMs);
684
+ }
685
+
686
+ abortSignal?.addEventListener("abort", onAbort, { once: true });
687
+ if (abortSignal?.aborted) requestStop("abort");
688
+ });
441
689
  }
442
690
 
443
- export async function runHeadlessModel(options: RunHeadlessModelOptions): Promise<ResultEnvelope> {
444
- if (typeof options.agent !== "string" || options.agent.length === 0) {
445
- throw new Error("agent must be a non-empty string.");
446
- }
447
- if (typeof options.task !== "string" || options.task.length === 0) {
448
- throw new Error("task must be a non-empty string.");
449
- }
450
-
451
- const timeoutMs = normalizeTimeoutMs(options.timeoutMs);
452
- const cwd = resolve(options.cwd ?? process.cwd());
453
- const artifactCwd = resolve(options.artifactCwd ?? cwd);
454
- const startedAt = new Date();
455
- const store = await createAttemptArtifactStore({ cwd: artifactCwd, runId: options.runId, attemptId: options.attemptId, runsDir: options.runsDir });
456
- const argv = buildPiArgv(options);
457
- let processResult: ProcessResult;
458
- try {
459
- processResult = options.sandbox
460
- ? await withSandboxedArgv(argv, { sandbox: options.sandbox, cwd, writablePaths: [store.taskDir], signal: options.signal }, (launch) =>
461
- runProcess(launch.argv, cwd, timeoutMs, store, options.captureToolCalls, options.signal, launch.env, options.onProcessStart),
462
- )
463
- : await runProcess(argv, cwd, timeoutMs, store, options.captureToolCalls, options.signal, undefined, options.onProcessStart);
464
- } catch (error) {
465
- if (!(error instanceof SandboxUnavailableError)) throw error;
466
- const stderrRef = await store.writeTextArtifact("stderr", `${error.message}\n`);
467
- processResult = {
468
- outcome: { status: "failed", failureKind: "sandbox", exitCode: null, signal: null },
469
- stderrRef,
470
- toolCallArtifactRefs: [],
471
- parsed: emptyParseResult(),
472
- stderrText: `${error.message}\n`,
473
- stderrContextLengthExceeded: detectContextLengthExceeded({ stderrText: error.message }),
474
- };
475
- }
476
-
477
- const { outcome: processOutcome, stderrRef, toolCallArtifactRefs, parsed, stderrText, stderrContextLengthExceeded } = processResult;
478
- const contextLengthExceeded = stderrContextLengthExceeded || detectContextLengthExceeded({ stderrText, errors: parsed.errors });
479
-
480
- let outcome = processOutcome;
481
- if (processOutcome.status === "completed" && parsed.parseErrors.length > 0 && parsed.finalAssistantText.length === 0) {
482
- outcome = { ...processOutcome, status: "failed", failureKind: "parse" };
483
- } else if (processOutcome.status === "completed" && parsed.errors.length > 0) {
484
- outcome = { ...processOutcome, status: "failed", failureKind: "model" };
485
- }
486
-
487
- const completedAt = new Date();
488
- const outputText = parsed.finalAssistantText;
489
- const artifacts: ArtifactRef[] = [
490
- stderrRef,
491
- await store.writeTextArtifact("output", outputText),
492
- ...toolCallArtifactRefs,
493
- ];
494
-
495
- return await store.writeResult({
496
- backend: "headless",
497
- status: outcome.status,
498
- failureKind: outcome.failureKind,
499
- cwd: artifactCwd,
500
- startedAt,
501
- completedAt,
502
- workspace: options.workspace ?? { mode: "shared", cwd },
503
- sandbox: options.sandbox ? { enabled: true, allowedDomains: sandboxAllowedDomains(options.sandbox) } : { enabled: false },
504
- exitCode: outcome.exitCode,
505
- signal: outcome.signal,
506
- artifacts,
507
- correlationId: options.correlationId,
508
- metadata: { ...parsed.metadata, contextLengthExceeded },
509
- });
691
+ export async function runHeadlessModel(
692
+ options: RunHeadlessModelOptions,
693
+ ): Promise<ResultEnvelope> {
694
+ if (typeof options.agent !== "string" || options.agent.length === 0) {
695
+ throw new Error("agent must be a non-empty string.");
696
+ }
697
+ if (typeof options.task !== "string" || options.task.length === 0) {
698
+ throw new Error("task must be a non-empty string.");
699
+ }
700
+
701
+ const timeoutMs = normalizeTimeoutMs(options.timeoutMs);
702
+ const cwd = resolve(options.cwd ?? process.cwd());
703
+ const artifactCwd = resolve(options.artifactCwd ?? cwd);
704
+ const sessionMetadata = await resultSessionMetadata(cwd, options.sessionId);
705
+ const startedAt = new Date();
706
+ const store = await createAttemptArtifactStore({
707
+ cwd: artifactCwd,
708
+ runId: options.runId,
709
+ attemptId: options.attemptId,
710
+ runsDir: options.runsDir,
711
+ });
712
+ const argv = buildPiArgv(options);
713
+ let processResult: ProcessResult;
714
+ try {
715
+ processResult = options.sandbox
716
+ ? await withSandboxedArgv(
717
+ argv,
718
+ {
719
+ sandbox: options.sandbox,
720
+ cwd,
721
+ writablePaths: [store.taskDir],
722
+ signal: options.signal,
723
+ },
724
+ (launch) =>
725
+ runProcess(
726
+ launch.argv,
727
+ cwd,
728
+ timeoutMs,
729
+ store,
730
+ options.captureToolCalls,
731
+ options.signal,
732
+ launch.env,
733
+ options.onProcessStart,
734
+ ),
735
+ )
736
+ : await runProcess(
737
+ argv,
738
+ cwd,
739
+ timeoutMs,
740
+ store,
741
+ options.captureToolCalls,
742
+ options.signal,
743
+ undefined,
744
+ options.onProcessStart,
745
+ );
746
+ } catch (error) {
747
+ if (!(error instanceof SandboxUnavailableError)) throw error;
748
+ const stderrRef = await store.writeTextArtifact(
749
+ "stderr",
750
+ `${error.message}\n`,
751
+ );
752
+ processResult = {
753
+ outcome: {
754
+ status: "failed",
755
+ failureKind: "sandbox",
756
+ exitCode: null,
757
+ signal: null,
758
+ },
759
+ stderrRef,
760
+ toolCallArtifactRefs: [],
761
+ parsed: emptyParseResult(),
762
+ stderrText: `${error.message}\n`,
763
+ stderrContextLengthExceeded: detectContextLengthExceeded({
764
+ stderrText: error.message,
765
+ }),
766
+ };
767
+ }
768
+
769
+ const {
770
+ outcome: processOutcome,
771
+ stderrRef,
772
+ toolCallArtifactRefs,
773
+ parsed,
774
+ stderrText,
775
+ stderrContextLengthExceeded,
776
+ } = processResult;
777
+ const contextLengthExceeded =
778
+ stderrContextLengthExceeded ||
779
+ detectContextLengthExceeded({ stderrText, errors: parsed.errors });
780
+
781
+ const outcome = resolvePiJsonOutcome(
782
+ processOutcome,
783
+ parsed,
784
+ contextLengthExceeded,
785
+ );
786
+
787
+ const completedAt = new Date();
788
+ const outputText = parsed.finalAssistantText;
789
+ const artifacts: ArtifactRef[] = [
790
+ stderrRef,
791
+ await store.writeTextArtifact("output", outputText),
792
+ ...toolCallArtifactRefs,
793
+ ];
794
+
795
+ return await store.writeResult({
796
+ backend: "headless",
797
+ status: outcome.status,
798
+ failureKind: outcome.failureKind,
799
+ cwd: artifactCwd,
800
+ startedAt,
801
+ completedAt,
802
+ workspace: options.workspace ?? { mode: "shared", cwd },
803
+ sandbox: options.sandbox
804
+ ? {
805
+ enabled: true,
806
+ allowedDomains: sandboxAllowedDomains(options.sandbox),
807
+ }
808
+ : { enabled: false },
809
+ exitCode: outcome.exitCode,
810
+ signal: outcome.signal,
811
+ artifacts,
812
+ correlationId: options.correlationId,
813
+ metadata: {
814
+ ...resultMetadataFromParse(parsed, contextLengthExceeded, outcome),
815
+ ...sessionMetadata,
816
+ ...(options.parentSessionId === undefined
817
+ ? {}
818
+ : { parentSessionId: options.parentSessionId }),
819
+ },
820
+ });
510
821
  }