castle-web-cli 0.4.12 → 0.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,10 +11,8 @@ const ROUTER_RULES = `You are Castle's create assistant: the fast conversational
11
11
  Hard rules:
12
12
  - You NEVER edit files or run state-changing commands. All building and fixing happens through background task agents -- always hand the longer work to them.
13
13
  - You are the fast lane: get to your final reply as quickly as possible. When the user reports something broken, do NOT dig into the code to diagnose it first -- spawn a task whose job is to investigate AND fix it. Only read deck files when your reply itself needs them (answering a question about the deck, grounding a claim -- never make things up); never read as pre-work before spawning a task.
14
- - Decompose with judgment -- parallel tasks are powerful but add overhead. Parallelize when the work is ambitious enough to be worth it, not for small asks.
15
- - For a new game (or a substantial new part of one): make sure the FIRST task delivers a small coherent playable demo on its own, so the user is not waiting on several tasks before playing anything. Then fan out in parallel to take it further.
16
- - Work on ART and MECHANICS simultaneously as separate tasks, with a later task (\`after:\` both) that combines them.
17
- - Parallel tasks must each be separately testable and must not step on each other's files.
14
+ - ALWAYS break the user's request into separately testable, parallelizable chunks: several small focused tasks over one big one, each something the user can try in the running deck on its own.
15
+ - Write task prompts for speed: ask each task for the quickest viable, testable change that still delivers a meaningful step up (one or a few features, pieces of art, etc.). Only tell a task to take its time and dive deep when the user specifically asked for that.
18
16
  - To spawn a background task, include a fenced block in your reply:
19
17
 
20
18
  \`\`\`castle-task
@@ -27,6 +25,11 @@ what to build or fix, which files matter, and what "done" looks like.
27
25
 
28
26
  - Use \`after:\` only when a task truly builds on or would conflict with another (it may reference tasks spawned in this same reply, by title). Independent tasks must NOT wait on each other.
29
27
  - Use \`supersedes:\` whenever the new task fixes, redoes, or makes obsolete an earlier task -- the old row is checked off the user's board automatically. Keep the board meaning "what to look at right now".
28
+ - When the user confirms something works (or tells you to clear items), check those finished tasks off the board by including:
29
+
30
+ \`\`\`castle-done
31
+ comma-separated titles or ids of the finished tasks
32
+ \`\`\`
30
33
  - Tasks are one-and-done -- when the user gives feedback on a finished task, spawn a new fix task rather than reopening the old one.
31
34
  - Task agents are capable coding agents working in this same deck directory, but they know nothing about this conversation beyond your prompt.
32
35
 
@@ -92,6 +95,7 @@ Your task (id ${opts.taskId}): ${opts.title}
92
95
  ${opts.prompt}${deps}
93
96
 
94
97
  Operating rules:
98
+ - Speed is of the essence -- go for the quickest viable, testable change, unless this prompt explicitly tells you to take longer and dive deep. It must still amount to at least one meaningful step up (one or a few features, pieces of art, etc.), never a token gesture.
95
99
  - Do this one task completely, then stop. Do not expand scope.
96
100
  - Update your progress VERY frequently: write a bare integer 0-100 to ${opts.progressPath} (e.g. \`echo 30 > ${opts.progressPath}\`) every time you advance -- at least every 10 points, or every 20 for properly small tasks. Start near 10, write 90 just before wrapping up. Never let it sit stale while you work.
97
101
  - Before finishing, write ${opts.notesPath}: a SHORT test guide for the user -- 2-4 brief sentences. Lead with exactly what to try in the running deck; mention a blocker or open question if you hit one. NO file-by-file implementation detail, no code names unless the user needs them to test. The user reads this verbatim when checking your work off.
package/dist/agent.js CHANGED
@@ -70,7 +70,7 @@ const ROUTER_TIMEOUT_MS = 3 * 60_000;
70
70
  const TASK_TIMEOUT_MS = 30 * 60_000;
71
71
  const MAX_TASK_ATTEMPTS = 3;
72
72
  const TASK_POLL_MS = 1_000;
73
- const TASK_FENCE = '```castle-task';
73
+ const FENCE_HOLDBACK = '```castle-';
74
74
  const RESULT_SUMMARY_CHARS = 600;
75
75
  const MAX_ATTACHMENTS = 6;
76
76
  const MAX_ATTACHMENT_BYTES = 8 * 1024 * 1024;
@@ -94,11 +94,11 @@ function readJsonFile(filePath) {
94
94
  // marker at the tail so "```cast..." never flashes up before we know what it
95
95
  // is. The full cleaned text replaces the streamed text when the reply ends.
96
96
  function visibleLength(raw) {
97
- const idx = raw.indexOf(TASK_FENCE);
97
+ const idx = raw.indexOf(FENCE_HOLDBACK);
98
98
  if (idx >= 0)
99
99
  return idx;
100
- for (let k = TASK_FENCE.length - 1; k > 0; k--) {
101
- if (raw.endsWith(TASK_FENCE.slice(0, k)))
100
+ for (let k = FENCE_HOLDBACK.length - 1; k > 0; k--) {
101
+ if (raw.endsWith(FENCE_HOLDBACK.slice(0, k)))
102
102
  return raw.length - k;
103
103
  }
104
104
  return raw.length;
@@ -108,8 +108,18 @@ function visibleLength(raw) {
108
108
  // either order), then the task prompt.
109
109
  function extractDirectives(full) {
110
110
  const directives = [];
111
+ const checkoffs = [];
112
+ const doneRe = /```castle-done[ \t]*\r?\n([\s\S]*?)```/g;
113
+ const withoutDone = full.replace(doneRe, (_match, body) => {
114
+ for (const token of String(body).split(/[,\n]/)) {
115
+ const trimmed = token.trim();
116
+ if (trimmed)
117
+ checkoffs.push(trimmed);
118
+ }
119
+ return '';
120
+ });
111
121
  const fenceRe = /```castle-task[ \t]*\r?\n([\s\S]*?)```/g;
112
- const cleaned = full.replace(fenceRe, (_match, body) => {
122
+ const cleaned = withoutDone.replace(fenceRe, (_match, body) => {
113
123
  const lines = String(body).replace(/\r/g, '').split('\n');
114
124
  const title = (lines.shift() ?? '').trim();
115
125
  const headers = { after: [], supersedes: [] };
@@ -129,7 +139,7 @@ function extractDirectives(full) {
129
139
  }
130
140
  return '';
131
141
  });
132
- return { cleaned: cleaned.replace(/\n{3,}/g, '\n\n').trim(), directives };
142
+ return { cleaned: cleaned.replace(/\n{3,}/g, '\n\n').trim(), directives, checkoffs };
133
143
  }
134
144
  // Claude names tools directly (Read, Edit, Bash, ...).
135
145
  function claudeToolActivityLabel(name) {
@@ -412,7 +422,7 @@ function createTaskStore(opts) {
412
422
  });
413
423
  }
414
424
  function maybeStart(task) {
415
- if (task.status !== 'waiting' || !depsAreSettled(task))
425
+ if (task.status !== 'waiting' || task.acknowledged || !depsAreSettled(task))
416
426
  return;
417
427
  start(task);
418
428
  }
@@ -434,6 +444,18 @@ function createTaskStore(opts) {
434
444
  // equivalent slash command.
435
445
  const invocation = buildAgentInvocation(backend, 'task', backend === 'claude' ? `/goal ${taskPrompt}` : taskPrompt, opts.claudeModel());
436
446
  let result = { ok: false, finalText: '', error: 'not run' };
447
+ let lineBuf = '';
448
+ const flushFeedLines = (delta) => {
449
+ lineBuf += delta;
450
+ let nl = lineBuf.indexOf('\n');
451
+ while (nl >= 0) {
452
+ const line = lineBuf.slice(0, nl).trim();
453
+ lineBuf = lineBuf.slice(nl + 1);
454
+ if (line)
455
+ opts.onFeed(task, line);
456
+ nl = lineBuf.indexOf('\n');
457
+ }
458
+ };
437
459
  for (let attempt = 1; attempt <= MAX_TASK_ATTEMPTS; attempt++) {
438
460
  result = await runAgentCli({
439
461
  cwd: deckDir,
@@ -446,6 +468,11 @@ function createTaskStore(opts) {
446
468
  onSpawn: (pid) => {
447
469
  task.pid = pid;
448
470
  },
471
+ onDelta: (delta) => flushFeedLines(delta),
472
+ onActivity: (activity) => {
473
+ if (activity)
474
+ opts.onFeed(task, `[${activity}]`);
475
+ },
449
476
  });
450
477
  if (!result.crashed)
451
478
  return result;
@@ -480,10 +507,13 @@ function createTaskStore(opts) {
480
507
  });
481
508
  }
482
509
  function spawnFromDirective(directive, originMessageId) {
483
- // A fix/redo task sweeps the rows it obsoletes off the user's board.
510
+ // A fix/redo task sweeps the rows it obsoletes off the user's board. A
511
+ // superseded task that never started must not start later either.
484
512
  for (const id of resolveDeps(tasks, directive.supersedes)) {
485
513
  const old = tasks.get(id);
486
514
  if (old && !old.acknowledged) {
515
+ if (old.status === 'waiting')
516
+ old.status = 'interrupted';
487
517
  old.acknowledged = true;
488
518
  touch(old);
489
519
  opts.onSuperseded(old);
@@ -536,7 +566,12 @@ function createTaskStore(opts) {
536
566
  }
537
567
  }
538
568
  }
539
- return { sorted, get: (id) => tasks.get(id), spawnFromDirective, acknowledge, shutdown };
569
+ // The router checks finished tasks off by title or id (castle-done fence).
570
+ function checkOff(tokens) {
571
+ for (const id of resolveDeps(tasks, tokens))
572
+ acknowledge(id, false);
573
+ }
574
+ return { sorted, get: (id) => tasks.get(id), spawnFromDirective, acknowledge, checkOff, shutdown };
540
575
  }
541
576
  // -- attachments ----------------------------------------------------------------
542
577
  const ATTACHMENT_MIME = {
@@ -581,6 +616,23 @@ function asPromptTask(task) {
581
616
  notes: task.notes,
582
617
  };
583
618
  }
619
+ function createTaskFeeds(broadcast) {
620
+ const map = new Map();
621
+ function push(task, entry) {
622
+ let feed = map.get(task.id);
623
+ if (!feed) {
624
+ feed = [];
625
+ map.set(task.id, feed);
626
+ }
627
+ if (feed[feed.length - 1] === entry)
628
+ return;
629
+ feed.push(entry);
630
+ if (feed.length > 80)
631
+ feed.splice(0, feed.length - 80);
632
+ broadcast({ type: 'task-feed', id: task.id, entry });
633
+ }
634
+ return { map, push };
635
+ }
584
636
  function createMessageLog(messagesPath, broadcast) {
585
637
  const loaded = readJsonFile(messagesPath) ?? [];
586
638
  const messages = loaded
@@ -695,7 +747,9 @@ function runRouterTurnIn(ctx, instruction) {
695
747
  });
696
748
  return;
697
749
  }
698
- const { cleaned, directives } = extractDirectives(result.finalText);
750
+ const { cleaned, directives, checkoffs } = extractDirectives(result.finalText);
751
+ if (result.ok && checkoffs.length > 0)
752
+ ctx.taskStore.checkOff(checkoffs);
699
753
  // Drop directives from stale turns, and any whose title matches a task
700
754
  // already in flight (two runs reacting to the same ask).
701
755
  const stale = epoch !== ctx.currentEpoch();
@@ -721,6 +775,27 @@ function runRouterTurnIn(ctx, instruction) {
721
775
  });
722
776
  });
723
777
  }
778
+ // Merge settings changes from the client: validate, persist, broadcast, log.
779
+ function applyAgentSettings(incoming, ctx) {
780
+ const { settings } = ctx;
781
+ const changes = [];
782
+ for (const key of ['router', 'tasks']) {
783
+ const value = normalizeBackend(incoming[key]);
784
+ if (value && value !== settings[key]) {
785
+ settings[key] = value;
786
+ changes.push(`${key} agent -> ${value}`);
787
+ }
788
+ }
789
+ const model = normalizeClaudeModel(incoming.claudeModel);
790
+ if (model && model !== settings.claudeModel) {
791
+ settings.claudeModel = model;
792
+ changes.push(`claude model -> ${model}`);
793
+ }
794
+ if (changes.length === 0)
795
+ return;
796
+ fs.writeFileSync(ctx.settingsPath, JSON.stringify(settings, null, 2) + '\n');
797
+ ctx.broadcast({ type: 'settings', settings });
798
+ }
724
799
  export function createAgentServer(opts) {
725
800
  const { deckDir, deckLabel } = opts;
726
801
  const agentDir = path.join(deckDir, '.castle', 'agent');
@@ -750,27 +825,8 @@ export function createAgentServer(opts) {
750
825
  tasks: normalizeBackend(storedSettings?.tasks) ?? DEFAULT_SETTINGS.tasks,
751
826
  claudeModel: normalizeClaudeModel(storedSettings?.claudeModel) ?? DEFAULT_SETTINGS.claudeModel,
752
827
  };
753
- function applySettings(incoming) {
754
- const changes = [];
755
- for (const key of ['router', 'tasks']) {
756
- const value = normalizeBackend(incoming[key]);
757
- if (value && value !== settings[key]) {
758
- settings[key] = value;
759
- changes.push(`${key} agent -> ${value}`);
760
- }
761
- }
762
- const model = normalizeClaudeModel(incoming.claudeModel);
763
- if (model && model !== settings.claudeModel) {
764
- settings.claudeModel = model;
765
- changes.push(`claude model -> ${model}`);
766
- }
767
- if (changes.length === 0)
768
- return;
769
- fs.writeFileSync(settingsPath, JSON.stringify(settings, null, 2) + '\n');
770
- broadcast({ type: 'settings', settings });
771
- for (const change of changes)
772
- addLog(`settings: ${change}`);
773
- }
828
+ const applySettings = (incoming) => applyAgentSettings(incoming, { settings, settingsPath, broadcast });
829
+ const taskFeeds = createTaskFeeds(broadcast);
774
830
  const taskStore = createTaskStore({
775
831
  deckDir,
776
832
  deckLabel,
@@ -782,8 +838,9 @@ export function createAgentServer(opts) {
782
838
  onUpdate: (task) => broadcast({ type: 'task-update', task }),
783
839
  onStarted: () => undefined,
784
840
  onRetry: (task, attempt) => addLog(`agent died, retrying (${attempt}/${MAX_TASK_ATTEMPTS}): ${task.title}`),
785
- onFinished: () => undefined,
841
+ onFinished: (task) => taskFeeds.map.delete(task.id),
786
842
  onSuperseded: () => undefined,
843
+ onFeed: (task, entry) => taskFeeds.push(task, entry),
787
844
  });
788
845
  // A new user message interrupts the in-flight router reply: its partial
789
846
  // text stays in the log, and the next turn continues both threads. The
@@ -845,7 +902,8 @@ export function createAgentServer(opts) {
845
902
  const wss = new WebSocketServer({ noServer: true });
846
903
  function attachClient(socket) {
847
904
  clients.add(socket);
848
- socket.send(JSON.stringify({ type: 'hello', messages, tasks: taskStore.sorted(), settings }));
905
+ const hello = { type: 'hello', messages, tasks: taskStore.sorted(), settings, feeds: Object.fromEntries(taskFeeds.map) };
906
+ socket.send(JSON.stringify(hello));
849
907
  socket.on('message', (rawData) => {
850
908
  let msg;
851
909
  try {
@@ -136,15 +136,22 @@ function initPanelChrome() {
136
136
  //
137
137
  // React app.
138
138
  //
139
- function applyServerEvent(ev, setMessages, setTasks) {
139
+ function applyServerEvent(ev, setMessages, setTasks, setFeeds) {
140
140
  if (ev.type === 'hello') {
141
141
  const messages = ev.messages ?? [];
142
142
  const tasks = ev.tasks ?? [];
143
+ const feeds = ev.feeds ?? {};
143
144
  setMessages(() => messages);
144
145
  setTasks(() => tasks);
146
+ setFeeds(() => feeds);
145
147
  if (ev.settings)
146
148
  applyBackendSegs(ev.settings);
147
149
  }
150
+ else if (ev.type === 'task-feed' && ev.id) {
151
+ const id = ev.id;
152
+ const entry = ev.entry ?? '';
153
+ setFeeds((prev) => ({ ...prev, [id]: [...(prev[id] ?? []).slice(-79), entry] }));
154
+ }
148
155
  else if (ev.type === 'settings' && ev.settings) {
149
156
  applyBackendSegs(ev.settings);
150
157
  }
@@ -176,8 +183,26 @@ function applyServerEvent(ev, setMessages, setTasks) {
176
183
  const next = known ? prev.map((t) => (t.id === task.id ? task : t)) : [...prev, task];
177
184
  return next.sort((a, b) => a.createdAt.localeCompare(b.createdAt));
178
185
  });
186
+ if (TERMINAL_TASK_STATUSES.includes(task.status)) {
187
+ setFeeds((prev) => {
188
+ if (!(task.id in prev))
189
+ return prev;
190
+ const next = { ...prev };
191
+ delete next[task.id];
192
+ return next;
193
+ });
194
+ }
179
195
  }
180
196
  }
197
+ function TaskFeed(props) {
198
+ const hostRef = React.useRef(null);
199
+ React.useLayoutEffect(() => {
200
+ const host = hostRef.current;
201
+ if (host)
202
+ host.scrollTop = host.scrollHeight;
203
+ }, [props.lines]);
204
+ return (React.createElement("div", { className: "task-feed", ref: hostRef, onClick: (event) => event.stopPropagation() }, props.lines.map((line, index) => (React.createElement("div", { key: index }, line)))));
205
+ }
181
206
  function TaskRow(props) {
182
207
  const { task, onAck } = props;
183
208
  const [expanded, setExpanded] = React.useState(false);
@@ -186,7 +211,7 @@ function TaskRow(props) {
186
211
  const notes = task.notes.trim() || task.resultSummary?.trim() || '(no notes from the agent yet)';
187
212
  return (React.createElement("div", { className: `task-card${task.status === 'waiting' ? ' waiting' : ''}`, onClick: () => setExpanded(!expanded) },
188
213
  React.createElement("div", { className: "task-row" },
189
- React.createElement("span", { className: `task-pie${finished ? ' task-ack-pie' : ''}`, title: finished ? 'tested -- check off' : undefined, style: { background: `conic-gradient(#0969da ${pct}%, #eaeef2 0)` }, onClick: finished
214
+ React.createElement("span", { className: `task-pie${finished ? ' task-ack-pie' : ''}`, title: finished ? 'tested -- check off' : undefined, style: { background: `conic-gradient(#000000 ${pct}%, #eaeef2 0)` }, onClick: finished
190
215
  ? (event) => {
191
216
  event.stopPropagation();
192
217
  onAck(task.id, false);
@@ -196,13 +221,14 @@ function TaskRow(props) {
196
221
  task.status !== 'done' ? (React.createElement("span", { className: `task-status ${task.status}` }, task.status === 'running' ? `${pct}%` : task.status)) : null),
197
222
  React.createElement("div", { className: "task-bar" },
198
223
  React.createElement("div", { style: { width: `${pct}%` } })),
199
- expanded ? (React.createElement("div", { className: "task-notes", dangerouslySetInnerHTML: renderMarkdown(notes) })) : null));
224
+ expanded && task.status === 'running' && props.feed && props.feed.length > 0 ? (React.createElement(TaskFeed, { lines: props.feed })) : null,
225
+ expanded && task.status !== 'running' ? (React.createElement("div", { className: "task-notes", dangerouslySetInnerHTML: renderMarkdown(notes) })) : null));
200
226
  }
201
227
  function TaskBoard(props) {
202
228
  const visible = props.tasks.filter((t) => !t.acknowledged);
203
229
  if (visible.length === 0)
204
230
  return null;
205
- return (React.createElement("div", { id: "chat-strip" }, visible.map((task) => (React.createElement(TaskRow, { key: task.id, task: task, onAck: props.onAck })))));
231
+ return (React.createElement("div", { id: "chat-strip" }, visible.map((task) => (React.createElement(TaskRow, { key: task.id, task: task, feed: props.feeds[task.id], onAck: props.onAck })))));
206
232
  }
207
233
  function Message(props) {
208
234
  const { msg } = props;
@@ -315,6 +341,7 @@ function InputRow(props) {
315
341
  function App() {
316
342
  const [messages, setMessages] = React.useState([]);
317
343
  const [tasks, setTasks] = React.useState([]);
344
+ const [feeds, setFeeds] = React.useState({});
318
345
  const sendRef = React.useRef(() => undefined);
319
346
  React.useEffect(() => {
320
347
  const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
@@ -345,7 +372,7 @@ function App() {
345
372
  });
346
373
  sock.addEventListener('message', (event) => {
347
374
  try {
348
- applyServerEvent(JSON.parse(String(event.data)), setMessages, setTasks);
375
+ applyServerEvent(JSON.parse(String(event.data)), setMessages, setTasks, setFeeds);
349
376
  }
350
377
  catch {
351
378
  /* malformed frame */
@@ -382,7 +409,7 @@ function App() {
382
409
  };
383
410
  }, []);
384
411
  return (React.createElement(React.Fragment, null,
385
- React.createElement(TaskBoard, { tasks: tasks, onAck: (id, rejected) => sendRef.current({ type: 'task-ack', id, rejected }) }),
412
+ React.createElement(TaskBoard, { tasks: tasks, feeds: feeds, onAck: (id, rejected) => sendRef.current({ type: 'task-ack', id, rejected }) }),
386
413
  React.createElement(MessageList, { messages: messages }),
387
414
  React.createElement(InputRow, { onSend: (text, images) => sendRef.current({ type: 'user-message', text, images }) })));
388
415
  }
package/dist/ide.js CHANGED
@@ -626,6 +626,32 @@ const IDE_STYLES = `
626
626
  word-break: break-word;
627
627
  font-size: 14px;
628
628
  }
629
+ /* Live stream of a running task agent (text lines + [tool labels]) shown
630
+ on expand -- capped height, auto-scrolled, fading at both scroll edges. */
631
+ .task-feed {
632
+ margin-top: 6px;
633
+ max-height: 150px;
634
+ overflow-y: auto;
635
+ font-size: 12px;
636
+ line-height: 1.5;
637
+ color: #57606a;
638
+ cursor: default;
639
+ word-break: break-word;
640
+ -webkit-mask-image: linear-gradient(
641
+ to bottom,
642
+ transparent,
643
+ #000000 14px,
644
+ #000000 calc(100% - 14px),
645
+ transparent
646
+ );
647
+ mask-image: linear-gradient(
648
+ to bottom,
649
+ transparent,
650
+ #000000 14px,
651
+ #000000 calc(100% - 14px),
652
+ transparent
653
+ );
654
+ }
629
655
  .task-row { display: flex; align-items: center; gap: 8px; }
630
656
  .task-title {
631
657
  flex: 1 1 auto;
@@ -634,6 +660,7 @@ const IDE_STYLES = `
634
660
  text-overflow: ellipsis;
635
661
  white-space: nowrap;
636
662
  }
663
+ .task-title::first-letter { text-transform: uppercase; }
637
664
  .task-status { color: #6e7781; font-size: 12px; flex: 0 0 auto; }
638
665
  .task-status.failed, .task-status.interrupted { color: #a40e26; }
639
666
  .task-pie {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "castle-web-cli",
3
- "version": "0.4.12",
3
+ "version": "0.4.13",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "castle-web": "./dist/index.js"