palmier 0.5.5 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -22,7 +22,7 @@ The serve daemon always runs a local HTTP server. Three access modes are availab
22
22
 
23
23
  **Local mode** is always available. The PWA is served at `http://localhost:<port>` and works without pairing or internet. The daemon binds to `127.0.0.1` by default.
24
24
 
25
- **LAN mode** is enabled during `palmier init`. The daemon binds to `0.0.0.0` instead, making the PWA and API endpoints accessible from the local network at `http://<host-ip>:<port>`. Devices must pair via OTP to access. Push notifications are not available.
25
+ **LAN mode** can be enabled during `palmier init`. The daemon binds to `0.0.0.0` instead, making the PWA and API endpoints accessible from the local network at `http://<host-ip>:<port>`. Devices must pair via OTP to access. Push notifications are not available.
26
26
 
27
27
  **Server mode** relays communication through the Palmier cloud server (via [NATS](https://nats.io), a lightweight messaging system). All features including push notifications are available. The PWA is served over HTTPS. Server mode and LAN mode can be active at the same time.
28
28
 
@@ -12,8 +12,7 @@ export class CodexAgent {
12
12
  getTaskRunCommandLine(task, followupPrompt, extraPermissions) {
13
13
  const yolo = extraPermissions === "yolo";
14
14
  const prompt = followupPrompt ?? (getAgentInstructions(task.frontmatter.id, yolo || !this.supportsPermissions) + "\n\n" + (task.body || task.frontmatter.user_prompt));
15
- // Using danger-full-access until workspace-write is fixed: https://github.com/openai/codex/issues/12572
16
- const args = ["exec", "--skip-git-repo-check", "--sandbox", "danger-full-access"];
15
+ const args = ["exec", "--skip-git-repo-check", "--sandbox", yolo ? "danger-full-access" : "workspace-write"];
17
16
  if (!yolo) {
18
17
  const allPerms = [...(task.frontmatter.permissions ?? []), ...(extraPermissions ?? [])];
19
18
  for (const p of allPerms) {
@@ -7,10 +7,6 @@ export declare function stripPalmierMarkers(output: string): string;
7
7
  * Execute a task by ID.
8
8
  */
9
9
  export declare function runCommand(taskId: string): Promise<void>;
10
- /**
11
- * Extract report file names from agent output.
12
- * Looks for lines matching: [PALMIER_REPORT] <filename>
13
- */
14
10
  export declare function parseReportFiles(output: string): string[];
15
11
  /**
16
12
  * Extract required permissions from agent output.
@@ -70,6 +70,14 @@ async function invokeAgentWithRetries(ctx, invokeTask) {
70
70
  }
71
71
  writer.end(reportFiles.length > 0 ? reportFiles : undefined);
72
72
  await publishHostEvent(ctx.nc, ctx.config.hostId, ctx.taskId, { event_type: "result-updated", run_id: ctx.runId });
73
+ if (reportFiles.length > 0) {
74
+ await publishHostEvent(ctx.nc, ctx.config.hostId, ctx.taskId, {
75
+ event_type: "report-generated",
76
+ run_id: ctx.runId,
77
+ name: ctx.task.frontmatter.name,
78
+ report_files: reportFiles,
79
+ });
80
+ }
73
81
  // Permission handling — agent requested permissions
74
82
  if (requiredPermissions.length > 0) {
75
83
  const response = await requestPermission(ctx.config, ctx.task, ctx.taskDir, requiredPermissions);
@@ -77,7 +85,7 @@ async function invokeAgentWithRetries(ctx, invokeTask) {
77
85
  await appendAndNotify(ctx, {
78
86
  role: "user",
79
87
  time: Date.now(),
80
- content: "Denied",
88
+ content: "Deny & Abort Task",
81
89
  type: "permission",
82
90
  });
83
91
  return { outcome: "failed" };
@@ -87,7 +95,7 @@ async function invokeAgentWithRetries(ctx, invokeTask) {
87
95
  await appendAndNotify(ctx, {
88
96
  role: "user",
89
97
  time: Date.now(),
90
- content: response === "granted_all" ? "Granted for all" : "Granted",
98
+ content: response === "granted_all" ? "Allow Always" : "Allow Once",
91
99
  type: "permission",
92
100
  });
93
101
  if (response === "granted_all") {
@@ -173,14 +181,19 @@ export async function runCommand(taskId) {
173
181
  // If requires_confirmation, notify clients and wait
174
182
  if (task.frontmatter.requires_confirmation) {
175
183
  const confirmed = await requestConfirmation(config, task, taskDir);
184
+ const confirmPrompt = `**Task Confirmation**\n\nRun task "${taskName || task.frontmatter.user_prompt}"?`;
185
+ appendRunMessage(taskDir, runId, { role: "assistant", time: Date.now(), content: confirmPrompt, type: "confirmation" });
186
+ await publishHostEvent(nc, config.hostId, taskId, { event_type: "result-updated", run_id: runId });
176
187
  if (!confirmed) {
177
188
  console.log("Task aborted by user.");
189
+ appendRunMessage(taskDir, runId, { role: "user", time: Date.now(), content: "Aborted", type: "confirmation" });
178
190
  appendRunMessage(taskDir, runId, { role: "status", time: Date.now(), content: "", type: "aborted" });
179
191
  await publishTaskEvent(nc, config, taskDir, taskId, "aborted", taskName, runId);
180
192
  await cleanup();
181
193
  return;
182
194
  }
183
195
  console.log("Task confirmed by user.");
196
+ appendRunMessage(taskDir, runId, { role: "user", time: Date.now(), content: "Confirmed", type: "confirmation" });
184
197
  appendRunMessage(taskDir, runId, { role: "status", time: Date.now(), content: "", type: "confirmation" });
185
198
  await publishHostEvent(nc, config.hostId, taskId, { event_type: "result-updated", run_id: runId });
186
199
  }
@@ -408,6 +421,7 @@ async function requestConfirmation(config, task, taskDir) {
408
421
  * Extract report file names from agent output.
409
422
  * Looks for lines matching: [PALMIER_REPORT] <filename>
410
423
  */
424
+ const ALLOWED_REPORT_EXT = [".md", ".txt", ".png", ".jpg", ".jpeg", ".gif", ".svg", ".webp"];
411
425
  export function parseReportFiles(output) {
412
426
  const regex = new RegExp(`^\\${TASK_REPORT_PREFIX}\\s+(.+)$`, "gm");
413
427
  const files = [];
@@ -415,8 +429,12 @@ export function parseReportFiles(output) {
415
429
  while ((match = regex.exec(output)) !== null) {
416
430
  const name = match[1].trim();
417
431
  // Skip placeholder examples echoed from the prompt (e.g. "<filename>")
418
- if (name && !name.startsWith("<"))
419
- files.push(name);
432
+ if (!name || name.startsWith("<"))
433
+ continue;
434
+ const ext = name.lastIndexOf(".") >= 0 ? name.slice(name.lastIndexOf(".")).toLowerCase() : "";
435
+ if (!ALLOWED_REPORT_EXT.includes(ext))
436
+ continue;
437
+ files.push(name);
420
438
  }
421
439
  return files;
422
440
  }
@@ -37,10 +37,12 @@ function parseResultFrontmatter(raw) {
37
37
  const startedMsg = statusMessages.find((m) => m.type === "started");
38
38
  const terminalStates = ["finished", "failed", "aborted"];
39
39
  const terminalMsg = [...statusMessages].reverse().find((m) => terminalStates.includes(m.type ?? ""));
40
- // If last status is "started", determine if it's a task run or follow-up
40
+ // If last status is "started" (or continuation like "confirmation"/"monitoring"),
41
+ // determine if it's a task run or follow-up
42
+ const activeStates = ["started", "monitoring", "confirmation"];
41
43
  let runningState;
42
- if (lastStatus?.type === "started" || lastStatus?.type === "monitoring") {
43
- runningState = terminalMsg ? "followup" : (lastStatus?.type ?? "started");
44
+ if (activeStates.includes(lastStatus?.type ?? "")) {
45
+ runningState = terminalMsg ? "followup" : "started";
44
46
  }
45
47
  else {
46
48
  runningState = lastStatus?.type;
@@ -154,6 +156,17 @@ export function createRpcHandler(config, nc) {
154
156
  host_platform: process.platform,
155
157
  };
156
158
  }
159
+ case "task.get": {
160
+ const params = request.params;
161
+ const taskDir = getTaskDir(config.projectRoot, params.id);
162
+ try {
163
+ const task = parseTaskFile(taskDir);
164
+ return flattenTask(task);
165
+ }
166
+ catch {
167
+ return { error: "Task not found" };
168
+ }
169
+ }
157
170
  case "task.create": {
158
171
  const params = request.params;
159
172
  // Only generate a plan for longer prompts that benefit from it
@@ -497,11 +510,14 @@ export function createRpcHandler(config, nc) {
497
510
  if (!params.run_id || !Array.isArray(params.report_files) || params.report_files.length === 0) {
498
511
  return { error: "run_id and report_files are required" };
499
512
  }
513
+ const ALLOWED_EXT = [".md", ".txt", ".png", ".jpg", ".jpeg", ".gif", ".svg", ".webp"];
514
+ const IMAGE_EXT = [".png", ".jpg", ".jpeg", ".gif", ".svg", ".webp"];
500
515
  const reports = [];
501
516
  const runDir = path.join(config.projectRoot, "tasks", params.id, params.run_id);
502
517
  for (const file of params.report_files) {
503
- if (!file.endsWith(".md") && !file.endsWith(".txt")) {
504
- reports.push({ file, error: "must end with .md or .txt" });
518
+ const ext = path.extname(file).toLowerCase();
519
+ if (!ALLOWED_EXT.includes(ext)) {
520
+ reports.push({ file, error: `unsupported file type: ${ext}` });
505
521
  continue;
506
522
  }
507
523
  const basename = path.basename(file);
@@ -511,8 +527,15 @@ export function createRpcHandler(config, nc) {
511
527
  }
512
528
  const reportPath = path.join(runDir, basename);
513
529
  try {
514
- const content = fs.readFileSync(reportPath, "utf-8");
515
- reports.push({ file, content });
530
+ if (IMAGE_EXT.includes(ext)) {
531
+ const buf = fs.readFileSync(reportPath);
532
+ const mime = ext === ".svg" ? "image/svg+xml" : `image/${ext.slice(1).replace("jpg", "jpeg")}`;
533
+ reports.push({ file, data_url: `data:${mime};base64,${buf.toString("base64")}` });
534
+ }
535
+ else {
536
+ const content = fs.readFileSync(reportPath, "utf-8");
537
+ reports.push({ file, content });
538
+ }
516
539
  }
517
540
  catch {
518
541
  reports.push({ file, error: "Report file not found" });
@@ -61,7 +61,6 @@ export function spawnCommand(command, args, opts) {
61
61
  opts.onData(d.toString("utf-8"));
62
62
  });
63
63
  child.stderr.on("data", (d) => {
64
- chunks.push(d);
65
64
  process.stderr.write(d);
66
65
  if (opts.onData)
67
66
  opts.onData(d.toString("utf-8"));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "palmier",
3
- "version": "0.5.5",
3
+ "version": "0.5.7",
4
4
  "description": "Palmier host CLI - provisions, executes tasks, and serves NATS RPC",
5
5
  "license": "Apache-2.0",
6
6
  "author": "Hongxu Cai",
@@ -16,8 +16,7 @@ export class CodexAgent implements AgentTool {
16
16
  getTaskRunCommandLine(task: ParsedTask, followupPrompt?: string, extraPermissions?: RequiredPermission[] | "yolo"): CommandLine {
17
17
  const yolo = extraPermissions === "yolo";
18
18
  const prompt = followupPrompt ?? (getAgentInstructions(task.frontmatter.id, yolo || !this.supportsPermissions) + "\n\n" + (task.body || task.frontmatter.user_prompt));
19
- // Using danger-full-access until workspace-write is fixed: https://github.com/openai/codex/issues/12572
20
- const args = ["exec", "--skip-git-repo-check", "--sandbox", "danger-full-access"];
19
+ const args = ["exec", "--skip-git-repo-check", "--sandbox", yolo ? "danger-full-access" : "workspace-write"];
21
20
 
22
21
  if (!yolo) {
23
22
  const allPerms = [...(task.frontmatter.permissions ?? []), ...(extraPermissions ?? [])];
@@ -106,6 +106,15 @@ async function invokeAgentWithRetries(
106
106
  writer.end(reportFiles.length > 0 ? reportFiles : undefined);
107
107
  await publishHostEvent(ctx.nc, ctx.config.hostId, ctx.taskId, { event_type: "result-updated", run_id: ctx.runId });
108
108
 
109
+ if (reportFiles.length > 0) {
110
+ await publishHostEvent(ctx.nc, ctx.config.hostId, ctx.taskId, {
111
+ event_type: "report-generated",
112
+ run_id: ctx.runId,
113
+ name: ctx.task.frontmatter.name,
114
+ report_files: reportFiles,
115
+ });
116
+ }
117
+
109
118
  // Permission handling — agent requested permissions
110
119
  if (requiredPermissions.length > 0) {
111
120
  const response = await requestPermission(ctx.config, ctx.task, ctx.taskDir, requiredPermissions);
@@ -114,7 +123,7 @@ async function invokeAgentWithRetries(
114
123
  await appendAndNotify(ctx, {
115
124
  role: "user",
116
125
  time: Date.now(),
117
- content: "Denied",
126
+ content: "Deny & Abort Task",
118
127
  type: "permission",
119
128
  });
120
129
  return { outcome: "failed" };
@@ -128,7 +137,7 @@ async function invokeAgentWithRetries(
128
137
  await appendAndNotify(ctx, {
129
138
  role: "user",
130
139
  time: Date.now(),
131
- content: response === "granted_all" ? "Granted for all" : "Granted",
140
+ content: response === "granted_all" ? "Allow Always" : "Allow Once",
132
141
  type: "permission",
133
142
  });
134
143
 
@@ -228,14 +237,20 @@ export async function runCommand(taskId: string): Promise<void> {
228
237
  // If requires_confirmation, notify clients and wait
229
238
  if (task.frontmatter.requires_confirmation) {
230
239
  const confirmed = await requestConfirmation(config, task, taskDir);
240
+ const confirmPrompt = `**Task Confirmation**\n\nRun task "${taskName || task.frontmatter.user_prompt}"?`;
241
+ appendRunMessage(taskDir, runId, { role: "assistant", time: Date.now(), content: confirmPrompt, type: "confirmation" });
242
+ await publishHostEvent(nc, config.hostId, taskId, { event_type: "result-updated", run_id: runId });
243
+
231
244
  if (!confirmed) {
232
245
  console.log("Task aborted by user.");
246
+ appendRunMessage(taskDir, runId, { role: "user", time: Date.now(), content: "Aborted", type: "confirmation" });
233
247
  appendRunMessage(taskDir, runId, { role: "status", time: Date.now(), content: "", type: "aborted" });
234
248
  await publishTaskEvent(nc, config, taskDir, taskId, "aborted", taskName, runId);
235
249
  await cleanup();
236
250
  return;
237
251
  }
238
252
  console.log("Task confirmed by user.");
253
+ appendRunMessage(taskDir, runId, { role: "user", time: Date.now(), content: "Confirmed", type: "confirmation" });
239
254
  appendRunMessage(taskDir, runId, { role: "status", time: Date.now(), content: "", type: "confirmation" });
240
255
  await publishHostEvent(nc, config.hostId, taskId, { event_type: "result-updated", run_id: runId });
241
256
  }
@@ -499,6 +514,8 @@ async function requestConfirmation(
499
514
  * Extract report file names from agent output.
500
515
  * Looks for lines matching: [PALMIER_REPORT] <filename>
501
516
  */
517
+ const ALLOWED_REPORT_EXT = [".md", ".txt", ".png", ".jpg", ".jpeg", ".gif", ".svg", ".webp"];
518
+
502
519
  export function parseReportFiles(output: string): string[] {
503
520
  const regex = new RegExp(`^\\${TASK_REPORT_PREFIX}\\s+(.+)$`, "gm");
504
521
  const files: string[] = [];
@@ -506,7 +523,10 @@ export function parseReportFiles(output: string): string[] {
506
523
  while ((match = regex.exec(output)) !== null) {
507
524
  const name = match[1].trim();
508
525
  // Skip placeholder examples echoed from the prompt (e.g. "<filename>")
509
- if (name && !name.startsWith("<")) files.push(name);
526
+ if (!name || name.startsWith("<")) continue;
527
+ const ext = name.lastIndexOf(".") >= 0 ? name.slice(name.lastIndexOf(".")).toLowerCase() : "";
528
+ if (!ALLOWED_REPORT_EXT.includes(ext)) continue;
529
+ files.push(name);
510
530
  }
511
531
  return files;
512
532
  }
@@ -47,10 +47,12 @@ function parseResultFrontmatter(raw: string): Record<string, unknown> {
47
47
  const terminalStates = ["finished", "failed", "aborted"];
48
48
  const terminalMsg = [...statusMessages].reverse().find((m: ConversationMessage) => terminalStates.includes(m.type ?? ""));
49
49
 
50
- // If last status is "started", determine if it's a task run or follow-up
50
+ // If last status is "started" (or continuation like "confirmation"/"monitoring"),
51
+ // determine if it's a task run or follow-up
52
+ const activeStates = ["started", "monitoring", "confirmation"];
51
53
  let runningState: string | undefined;
52
- if (lastStatus?.type === "started" || lastStatus?.type === "monitoring") {
53
- runningState = terminalMsg ? "followup" : (lastStatus?.type ?? "started");
54
+ if (activeStates.includes(lastStatus?.type ?? "")) {
55
+ runningState = terminalMsg ? "followup" : "started";
54
56
  } else {
55
57
  runningState = lastStatus?.type;
56
58
  }
@@ -182,6 +184,17 @@ export function createRpcHandler(config: HostConfig, nc?: NatsConnection) {
182
184
  };
183
185
  }
184
186
 
187
+ case "task.get": {
188
+ const params = request.params as { id: string };
189
+ const taskDir = getTaskDir(config.projectRoot, params.id);
190
+ try {
191
+ const task = parseTaskFile(taskDir);
192
+ return flattenTask(task);
193
+ } catch {
194
+ return { error: "Task not found" };
195
+ }
196
+ }
197
+
185
198
  case "task.create": {
186
199
  const params = request.params as {
187
200
  user_prompt: string;
@@ -577,11 +590,14 @@ export function createRpcHandler(config: HostConfig, nc?: NatsConnection) {
577
590
  if (!params.run_id || !Array.isArray(params.report_files) || params.report_files.length === 0) {
578
591
  return { error: "run_id and report_files are required" };
579
592
  }
580
- const reports: Array<{ file: string; content?: string; error?: string }> = [];
593
+ const ALLOWED_EXT = [".md", ".txt", ".png", ".jpg", ".jpeg", ".gif", ".svg", ".webp"];
594
+ const IMAGE_EXT = [".png", ".jpg", ".jpeg", ".gif", ".svg", ".webp"];
595
+ const reports: Array<{ file: string; content?: string; data_url?: string; error?: string }> = [];
581
596
  const runDir = path.join(config.projectRoot, "tasks", params.id, params.run_id);
582
597
  for (const file of params.report_files) {
583
- if (!file.endsWith(".md") && !file.endsWith(".txt")) {
584
- reports.push({ file, error: "must end with .md or .txt" });
598
+ const ext = path.extname(file).toLowerCase();
599
+ if (!ALLOWED_EXT.includes(ext)) {
600
+ reports.push({ file, error: `unsupported file type: ${ext}` });
585
601
  continue;
586
602
  }
587
603
  const basename = path.basename(file);
@@ -591,8 +607,14 @@ export function createRpcHandler(config: HostConfig, nc?: NatsConnection) {
591
607
  }
592
608
  const reportPath = path.join(runDir, basename);
593
609
  try {
594
- const content = fs.readFileSync(reportPath, "utf-8");
595
- reports.push({ file, content });
610
+ if (IMAGE_EXT.includes(ext)) {
611
+ const buf = fs.readFileSync(reportPath);
612
+ const mime = ext === ".svg" ? "image/svg+xml" : `image/${ext.slice(1).replace("jpg", "jpeg")}`;
613
+ reports.push({ file, data_url: `data:${mime};base64,${buf.toString("base64")}` });
614
+ } else {
615
+ const content = fs.readFileSync(reportPath, "utf-8");
616
+ reports.push({ file, content });
617
+ }
596
618
  } catch {
597
619
  reports.push({ file, error: "Report file not found" });
598
620
  }
@@ -110,7 +110,6 @@ export function spawnCommand(
110
110
  if (opts.onData) opts.onData(d.toString("utf-8"));
111
111
  });
112
112
  child.stderr!.on("data", (d: Buffer) => {
113
- chunks.push(d);
114
113
  process.stderr.write(d);
115
114
  if (opts.onData) opts.onData(d.toString("utf-8"));
116
115
  });
@@ -0,0 +1,224 @@
1
+ import { describe, it, beforeEach } from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import * as fs from "fs";
4
+ import * as os from "os";
5
+ import * as path from "path";
6
+ import {
7
+ createRunDir,
8
+ appendRunMessage,
9
+ readRunMessages,
10
+ beginStreamingMessage,
11
+ spliceUserMessage,
12
+ } from "../src/task.js";
13
+
14
+ let taskDir: string;
15
+ let runId: string;
16
+
17
+ function setup() {
18
+ taskDir = fs.mkdtempSync(path.join(os.tmpdir(), "palmier-test-"));
19
+ runId = createRunDir(taskDir, "Test Task", 1000, "claude");
20
+ }
21
+
22
+ describe("appendRunMessage + readRunMessages", () => {
23
+ beforeEach(setup);
24
+
25
+ it("writes and reads a user message", () => {
26
+ appendRunMessage(taskDir, runId, { role: "user", time: 1001, content: "Hello" });
27
+ const msgs = readRunMessages(taskDir, runId);
28
+ assert.equal(msgs.length, 1);
29
+ assert.equal(msgs[0].role, "user");
30
+ assert.equal(msgs[0].content, "Hello");
31
+ assert.equal(msgs[0].time, 1001);
32
+ });
33
+
34
+ it("writes and reads an assistant message", () => {
35
+ appendRunMessage(taskDir, runId, { role: "assistant", time: 1002, content: "Hi there" });
36
+ const msgs = readRunMessages(taskDir, runId);
37
+ assert.equal(msgs.length, 1);
38
+ assert.equal(msgs[0].role, "assistant");
39
+ assert.equal(msgs[0].content, "Hi there");
40
+ });
41
+
42
+ it("writes and reads a status message", () => {
43
+ appendRunMessage(taskDir, runId, { role: "status", time: 1003, content: "", type: "started" });
44
+ const msgs = readRunMessages(taskDir, runId);
45
+ assert.equal(msgs.length, 1);
46
+ assert.equal(msgs[0].role, "status");
47
+ assert.equal(msgs[0].type, "started");
48
+ });
49
+
50
+ it("preserves message type", () => {
51
+ appendRunMessage(taskDir, runId, { role: "user", time: 1004, content: "Confirmed", type: "confirmation" });
52
+ const msgs = readRunMessages(taskDir, runId);
53
+ assert.equal(msgs[0].type, "confirmation");
54
+ });
55
+
56
+ it("preserves attachments", () => {
57
+ appendRunMessage(taskDir, runId, { role: "assistant", time: 1005, content: "Done", attachments: ["report.md", "chart.png"] });
58
+ const msgs = readRunMessages(taskDir, runId);
59
+ assert.deepEqual(msgs[0].attachments, ["report.md", "chart.png"]);
60
+ });
61
+
62
+ it("reads multiple messages in order", () => {
63
+ appendRunMessage(taskDir, runId, { role: "status", time: 1000, content: "", type: "started" });
64
+ appendRunMessage(taskDir, runId, { role: "user", time: 1001, content: "Do something" });
65
+ appendRunMessage(taskDir, runId, { role: "assistant", time: 1002, content: "Done" });
66
+ appendRunMessage(taskDir, runId, { role: "status", time: 1003, content: "", type: "finished" });
67
+ const msgs = readRunMessages(taskDir, runId);
68
+ assert.equal(msgs.length, 4);
69
+ assert.equal(msgs[0].type, "started");
70
+ assert.equal(msgs[1].role, "user");
71
+ assert.equal(msgs[2].role, "assistant");
72
+ assert.equal(msgs[3].type, "finished");
73
+ });
74
+ });
75
+
76
+ describe("confirmation flow", () => {
77
+ beforeEach(setup);
78
+
79
+ it("records confirmation with assistant prompt, user response, and status", () => {
80
+ appendRunMessage(taskDir, runId, { role: "status", time: 1000, content: "", type: "started" });
81
+ appendRunMessage(taskDir, runId, { role: "assistant", time: 1001, content: '**Task Confirmation**\n\nRun task "My Task"?', type: "confirmation" });
82
+ appendRunMessage(taskDir, runId, { role: "user", time: 1002, content: "Confirmed", type: "confirmation" });
83
+ appendRunMessage(taskDir, runId, { role: "status", time: 1003, content: "", type: "confirmation" });
84
+
85
+ const msgs = readRunMessages(taskDir, runId);
86
+ assert.equal(msgs.length, 4);
87
+ assert.equal(msgs[1].role, "assistant");
88
+ assert.ok(msgs[1].content.includes("Task Confirmation"));
89
+ assert.equal(msgs[2].role, "user");
90
+ assert.equal(msgs[2].content, "Confirmed");
91
+ assert.equal(msgs[3].role, "status");
92
+ assert.equal(msgs[3].type, "confirmation");
93
+ });
94
+
95
+ it("records aborted confirmation", () => {
96
+ appendRunMessage(taskDir, runId, { role: "status", time: 1000, content: "", type: "started" });
97
+ appendRunMessage(taskDir, runId, { role: "assistant", time: 1001, content: '**Task Confirmation**\n\nRun task "My Task"?', type: "confirmation" });
98
+ appendRunMessage(taskDir, runId, { role: "user", time: 1002, content: "Aborted", type: "confirmation" });
99
+ appendRunMessage(taskDir, runId, { role: "status", time: 1003, content: "", type: "aborted" });
100
+
101
+ const msgs = readRunMessages(taskDir, runId);
102
+ assert.equal(msgs.length, 4);
103
+ assert.equal(msgs[2].content, "Aborted");
104
+ assert.equal(msgs[3].type, "aborted");
105
+ });
106
+ });
107
+
108
+ describe("beginStreamingMessage", () => {
109
+ beforeEach(setup);
110
+
111
+ it("streams chunks and finalizes", () => {
112
+ const writer = beginStreamingMessage(taskDir, runId, 2000);
113
+ writer.write("Hello ");
114
+ writer.write("world");
115
+ writer.end();
116
+
117
+ const msgs = readRunMessages(taskDir, runId);
118
+ assert.equal(msgs.length, 1);
119
+ assert.equal(msgs[0].role, "assistant");
120
+ assert.equal(msgs[0].content, "Hello world");
121
+ });
122
+
123
+ it("attaches report files to the last assistant message", () => {
124
+ const writer = beginStreamingMessage(taskDir, runId, 2000);
125
+ writer.write("Generated report.");
126
+ writer.end(["report.md", "chart.png"]);
127
+
128
+ const msgs = readRunMessages(taskDir, runId);
129
+ assert.equal(msgs.length, 1);
130
+ assert.deepEqual(msgs[0].attachments, ["report.md", "chart.png"]);
131
+ });
132
+ });
133
+
134
+ describe("spliceUserMessage", () => {
135
+ beforeEach(setup);
136
+
137
+ it("splits assistant stream for user input", () => {
138
+ const writer = beginStreamingMessage(taskDir, runId, 2000);
139
+ writer.write("Working on it...");
140
+
141
+ spliceUserMessage(taskDir, runId, { role: "user", time: 2001, content: "my-api-key", type: "input" });
142
+
143
+ writer.write("Continuing with key.");
144
+ writer.end();
145
+
146
+ const msgs = readRunMessages(taskDir, runId);
147
+ assert.equal(msgs.length, 3);
148
+ assert.equal(msgs[0].role, "assistant");
149
+ assert.equal(msgs[0].content, "Working on it...");
150
+ assert.equal(msgs[1].role, "user");
151
+ assert.equal(msgs[1].content, "my-api-key");
152
+ assert.equal(msgs[1].type, "input");
153
+ assert.equal(msgs[2].role, "assistant");
154
+ assert.equal(msgs[2].content, "Continuing with key.");
155
+ });
156
+
157
+ it("appends assistant text before splicing", () => {
158
+ const writer = beginStreamingMessage(taskDir, runId, 2000);
159
+ writer.write("Processing");
160
+
161
+ spliceUserMessage(
162
+ taskDir, runId,
163
+ { role: "user", time: 2001, content: "answer1", type: "input" },
164
+ "\n\n**What is your key?**",
165
+ );
166
+
167
+ writer.write("Done.");
168
+ writer.end();
169
+
170
+ const msgs = readRunMessages(taskDir, runId);
171
+ assert.equal(msgs.length, 3);
172
+ assert.ok(msgs[0].content.includes("What is your key?"));
173
+ assert.equal(msgs[1].content, "answer1");
174
+ assert.equal(msgs[2].content, "Done.");
175
+ });
176
+
177
+ it("attaches reports to last assistant message after splice", () => {
178
+ const writer = beginStreamingMessage(taskDir, runId, 2000);
179
+ writer.write("Part 1");
180
+
181
+ spliceUserMessage(taskDir, runId, { role: "user", time: 2001, content: "input", type: "input" });
182
+
183
+ writer.write("Part 2");
184
+ writer.end(["report.md"]);
185
+
186
+ const msgs = readRunMessages(taskDir, runId);
187
+ // Attachments should be on the last assistant message (after splice), not the first
188
+ assert.equal(msgs[0].attachments, undefined);
189
+ assert.deepEqual(msgs[2].attachments, ["report.md"]);
190
+ });
191
+ });
192
+
193
+ describe("permission flow", () => {
194
+ beforeEach(setup);
195
+
196
+ it("records permission grant as user message", () => {
197
+ appendRunMessage(taskDir, runId, { role: "status", time: 1000, content: "", type: "started" });
198
+ appendRunMessage(taskDir, runId, { role: "user", time: 1001, content: "Do something" });
199
+ // Simulate agent output with permission request (via streaming)
200
+ const writer = beginStreamingMessage(taskDir, runId, 1002);
201
+ writer.write("I need permission.\n\n**Permissions requested:**\n- **Read** Read files\n");
202
+ writer.end();
203
+ // Permission granted
204
+ appendRunMessage(taskDir, runId, { role: "user", time: 1003, content: "Granted", type: "permission" });
205
+
206
+ const msgs = readRunMessages(taskDir, runId);
207
+ assert.equal(msgs.length, 4);
208
+ assert.equal(msgs[3].role, "user");
209
+ assert.equal(msgs[3].content, "Granted");
210
+ assert.equal(msgs[3].type, "permission");
211
+ });
212
+
213
+ it("records permission denial", () => {
214
+ appendRunMessage(taskDir, runId, { role: "user", time: 1001, content: "Do something" });
215
+ const writer = beginStreamingMessage(taskDir, runId, 1002);
216
+ writer.write("Need permission.");
217
+ writer.end();
218
+ appendRunMessage(taskDir, runId, { role: "user", time: 1003, content: "Denied", type: "permission" });
219
+
220
+ const msgs = readRunMessages(taskDir, runId);
221
+ assert.equal(msgs[2].content, "Denied");
222
+ assert.equal(msgs[2].type, "permission");
223
+ });
224
+ });