castle-web-cli 0.4.25 → 0.4.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -34,6 +34,13 @@ comma-separated titles or ids of the finished tasks
34
34
  \`\`\`
35
35
 
36
36
  - NEVER check a task off on your own judgment -- only a clear user statement that it works (or an explicit ask to clear it) counts. When in doubt, leave the row on the board.
37
+ - To STOP tasks (running or waiting) when the user asks or their work is clearly no longer wanted, include:
38
+
39
+ \`\`\`castle-stop
40
+ comma-separated titles or ids of the tasks to stop
41
+ \`\`\`
42
+
43
+ Stopped tasks show as interrupted on the board. If a new task replaces the stopped work, prefer \`supersedes:\` on the new task instead.
37
44
  - Tasks are one-and-done -- when the user gives feedback on a finished task, spawn a new fix task rather than reopening the old one.
38
45
  - Task agents are capable coding agents working in this same deck directory, but they know nothing about this conversation beyond your prompt.
39
46
 
@@ -102,7 +109,7 @@ ${opts.prompt}${deps}
102
109
 
103
110
  Operating rules:
104
111
  - Speed is of the essence -- go for the quickest viable, testable change, unless this prompt explicitly tells you to take longer and dive deep. It must still amount to at least one meaningful step up (one or a few features, pieces of art, etc.), never a token gesture.
105
- - Verify the cheapest way that actually proves the change: reading the code path, serve logs, a quick console assertion. Screenshots are allowed but expensive -- at most ONE screenshot attempt, and only when a still image can really capture the change. For timing-dependent visuals (mid-animation effects, trails, transient states) skip screenshots entirely, verify in code, and note that in your notes. NEVER retry screenshots in a loop; if one attempt is inconclusive, move on and let the user playtest.
112
+ - The USER is the verifier -- the whole tasks system exists so the user playtests every change themselves. Your first priority is to finish as soon as possible with the change genuinely in place and reachable in the running deck, so the user can test it right away. Do NOT run verification (screenshots especially) unless you are really sure it will catch something a re-read of your own change cannot -- and even then at most one cheap check, never a retry loop. Time spent verifying is time the user is left waiting.
106
113
  - The moment implementation is complete and you switch to verifying, write 90 to the progress file -- verification time must not read as stalled progress.
107
114
  - Do this one task completely, then stop. Do not expand scope.
108
115
  - Update your progress VERY frequently: write a bare integer 0-100 to ${opts.progressPath} (e.g. \`echo 30 > ${opts.progressPath}\`) every time you advance -- at least every 10 points, or every 20 for properly small tasks. Start near 10, write 90 just before wrapping up. Never let it sit stale while you work.
package/dist/agent.js CHANGED
@@ -14,7 +14,7 @@
14
14
  // Backend CLI: cursor-agent in headless print mode (stream-json). The router
15
15
  // runs with --mode ask (read-only at the CLI level); task agents run with
16
16
  // --force. Claude support can slot in later behind runAgentCli.
17
- import { spawn } from 'child_process';
17
+ import { execFileSync, spawn } from 'child_process';
18
18
  import * as fs from 'fs';
19
19
  import * as path from 'path';
20
20
  import { nanoid } from 'nanoid';
@@ -118,15 +118,19 @@ function visibleLength(raw) {
118
118
  function extractDirectives(full) {
119
119
  const directives = [];
120
120
  const checkoffs = [];
121
- const doneRe = /```castle-done[ \t]*\r?\n([\s\S]*?)```/g;
122
- const withoutDone = full.replace(doneRe, (_match, body) => {
123
- for (const token of String(body).split(/[,\n]/)) {
124
- const trimmed = token.trim();
125
- if (trimmed)
126
- checkoffs.push(trimmed);
127
- }
128
- return '';
129
- });
121
+ const stops = [];
122
+ const listFence = (source, name, into) => {
123
+ const re = new RegExp('```' + name + '[ \\t]*\\r?\\n([\\s\\S]*?)```', 'g');
124
+ return source.replace(re, (_match, body) => {
125
+ for (const token of String(body).split(/[,\n]/)) {
126
+ const trimmed = token.trim();
127
+ if (trimmed)
128
+ into.push(trimmed);
129
+ }
130
+ return '';
131
+ });
132
+ };
133
+ const withoutDone = listFence(listFence(full, 'castle-done', checkoffs), 'castle-stop', stops);
130
134
  const fenceRe = /```castle-task[ \t]*\r?\n([\s\S]*?)```/g;
131
135
  const cleaned = withoutDone.replace(fenceRe, (_match, body) => {
132
136
  const lines = String(body).replace(/\r/g, '').split('\n');
@@ -148,7 +152,7 @@ function extractDirectives(full) {
148
152
  }
149
153
  return '';
150
154
  });
151
- return { cleaned: cleaned.replace(/\n{3,}/g, '\n\n').trim(), directives, checkoffs };
155
+ return { cleaned: cleaned.replace(/\n{3,}/g, '\n\n').trim(), directives, checkoffs, stops };
152
156
  }
153
157
  // Claude names tools directly (Read, Edit, Bash, ...).
154
158
  function claudeToolActivityLabel(name) {
@@ -413,9 +417,68 @@ function depsSummaryFor(tasks, task) {
413
417
  .map((dep) => `- "${dep.title}" finished ${dep.status}${dep.notes.trim() ? `; notes: ${dep.notes.trim()}` : ''}`);
414
418
  return lines.join('\n') || undefined;
415
419
  }
420
+ async function runTaskAgentIn(ctx, task) {
421
+ const dir = path.join(ctx.tasksDir, task.id);
422
+ const relDir = path.relative(ctx.deckDir, dir);
423
+ const taskPrompt = buildTaskPrompt({
424
+ deckLabel: ctx.deckLabel,
425
+ taskId: task.id,
426
+ title: task.title,
427
+ prompt: task.prompt,
428
+ progressPath: path.join(relDir, 'progress'),
429
+ notesPath: path.join(relDir, 'notes.md'),
430
+ depsSummary: ctx.depsSummary,
431
+ });
432
+ // Claude task runs get /goal (claude code's built-in commit-to-completion
433
+ // command) plus the system-prompt autonomy reminder.
434
+ const invocation = buildAgentInvocation(ctx.backend, 'task', ctx.backend === 'claude' ? `/goal ${taskPrompt}` : taskPrompt, ctx.claudeModel);
435
+ let result = { ok: false, finalText: '', error: 'not run' };
436
+ let lineBuf = '';
437
+ const flushFeedLines = (delta) => {
438
+ lineBuf += delta;
439
+ let nl = lineBuf.indexOf('\n');
440
+ while (nl >= 0) {
441
+ const line = lineBuf.slice(0, nl).trim();
442
+ lineBuf = lineBuf.slice(nl + 1);
443
+ if (line)
444
+ ctx.onFeed(line);
445
+ nl = lineBuf.indexOf('\n');
446
+ }
447
+ };
448
+ for (let attempt = 1; attempt <= MAX_TASK_ATTEMPTS; attempt++) {
449
+ result = await runAgentCli({
450
+ cwd: ctx.deckDir,
451
+ command: invocation.command,
452
+ args: invocation.args,
453
+ parser: ctx.backend,
454
+ timeoutMs: TASK_TIMEOUT_MS,
455
+ logPath: path.join(dir, 'log.jsonl'),
456
+ children: ctx.children,
457
+ onSpawn: (pid) => {
458
+ task.pid = pid;
459
+ },
460
+ onDelta: (delta) => flushFeedLines(delta),
461
+ onActivity: (activity) => {
462
+ if (activity)
463
+ ctx.onFeed(`[${activity}]`);
464
+ },
465
+ });
466
+ if (ctx.stopRequested.has(task.id))
467
+ return result;
468
+ if (!result.crashed)
469
+ return result;
470
+ if (attempt < MAX_TASK_ATTEMPTS)
471
+ ctx.onRetry(attempt + 1);
472
+ }
473
+ result.error = `agent process kept dying (${MAX_TASK_ATTEMPTS} attempts): ${result.error ?? ''}`;
474
+ return result;
475
+ }
416
476
  function createTaskStore(opts) {
417
477
  const { deckDir, deckLabel, tasksDir, children } = opts;
418
478
  const tasks = loadTasks(tasksDir);
479
+ // Tasks the router asked to stop: their killed process must not read as a
480
+ // crash (no retry) and they finalize as interrupted, not failed.
481
+ const stopRequested = new Set();
419
482
  function sorted() {
420
483
  return [...tasks.values()].sort((a, b) => a.createdAt.localeCompare(b.createdAt));
421
484
  }
@@ -435,62 +498,6 @@ function createTaskStore(opts) {
435
498
  return;
436
499
  start(task);
437
500
  }
438
- // Run the task agent, re-running if the process dies mid-task (crash, not
439
- // a normal finish). After MAX_TASK_ATTEMPTS dead processes the task fails.
440
- async function runTaskAgent(task, dir) {
441
- const relDir = path.relative(deckDir, dir);
442
- const backend = opts.backend();
443
- const taskPrompt = buildTaskPrompt({
444
- deckLabel,
445
- taskId: task.id,
446
- title: task.title,
447
- prompt: task.prompt,
448
- progressPath: path.join(relDir, 'progress'),
449
- notesPath: path.join(relDir, 'notes.md'),
450
- depsSummary: depsSummaryFor(tasks, task),
451
- });
452
- // Claude task runs get /goal (claude code's built-in commit-to-completion
453
- // command) plus the system-prompt autonomy reminder.
454
- const invocation = buildAgentInvocation(backend, 'task', backend === 'claude' ? `/goal ${taskPrompt}` : taskPrompt, opts.claudeModel());
455
- let result = { ok: false, finalText: '', error: 'not run' };
456
- let lineBuf = '';
457
- const flushFeedLines = (delta) => {
458
- lineBuf += delta;
459
- let nl = lineBuf.indexOf('\n');
460
- while (nl >= 0) {
461
- const line = lineBuf.slice(0, nl).trim();
462
- lineBuf = lineBuf.slice(nl + 1);
463
- if (line)
464
- opts.onFeed(task, line);
465
- nl = lineBuf.indexOf('\n');
466
- }
467
- };
468
- for (let attempt = 1; attempt <= MAX_TASK_ATTEMPTS; attempt++) {
469
- result = await runAgentCli({
470
- cwd: deckDir,
471
- command: invocation.command,
472
- args: invocation.args,
473
- parser: backend,
474
- timeoutMs: TASK_TIMEOUT_MS,
475
- logPath: path.join(dir, 'log.jsonl'),
476
- children,
477
- onSpawn: (pid) => {
478
- task.pid = pid;
479
- },
480
- onDelta: (delta) => flushFeedLines(delta),
481
- onActivity: (activity) => {
482
- if (activity)
483
- opts.onFeed(task, `[${activity}]`);
484
- },
485
- });
486
- if (!result.crashed)
487
- return result;
488
- if (attempt < MAX_TASK_ATTEMPTS)
489
- opts.onRetry(task, attempt + 1);
490
- }
491
- result.error = `agent process kept dying (${MAX_TASK_ATTEMPTS} attempts): ${result.error ?? ''}`;
492
- return result;
493
- }
494
501
  function start(task) {
495
502
  const dir = path.join(tasksDir, task.id);
496
503
  fs.writeFileSync(path.join(dir, 'progress'), '0\n');
@@ -500,15 +507,30 @@ function createTaskStore(opts) {
500
507
  task.startedAt = nowIso();
501
508
  touch(task);
502
509
  opts.onStarted(task);
503
- void runTaskAgent(task, dir).then((result) => {
510
+ const runCtx = {
511
+ deckDir,
512
+ deckLabel,
513
+ tasksDir,
514
+ children,
515
+ backend: opts.backend(),
516
+ claudeModel: opts.claudeModel(),
517
+ stopRequested,
518
+ depsSummary: depsSummaryFor(tasks, task),
519
+ onFeed: (entry) => opts.onFeed(task, entry),
520
+ onRetry: (attempt) => opts.onRetry(task, attempt),
521
+ };
522
+ void runTaskAgentIn(runCtx, task).then((result) => {
504
523
  refreshTaskFiles(tasksDir, task);
505
- task.status = result.ok ? 'done' : 'failed';
506
- if (result.ok)
524
+ const wasStopped = stopRequested.delete(task.id);
525
+ task.status = wasStopped ? 'interrupted' : result.ok ? 'done' : 'failed';
526
+ if (result.ok && !wasStopped)
507
527
  task.progress = 100;
508
528
  task.finishedAt = nowIso();
509
- task.resultSummary = result.ok
510
- ? result.finalText.slice(-RESULT_SUMMARY_CHARS)
511
- : `${result.error ?? 'failed'}\n${result.finalText.slice(-RESULT_SUMMARY_CHARS)}`;
529
+ task.resultSummary = wasStopped
530
+ ? 'stopped by the router'
531
+ : result.ok
532
+ ? result.finalText.slice(-RESULT_SUMMARY_CHARS)
533
+ : `${result.error ?? 'failed'}\n${result.finalText.slice(-RESULT_SUMMARY_CHARS)}`;
512
534
  touch(task);
513
535
  opts.onFinished(task);
514
536
  for (const waiting of tasks.values())
@@ -580,7 +602,34 @@ function createTaskStore(opts) {
580
602
  for (const id of resolveDeps(tasks, tokens))
581
603
  acknowledge(id, false);
582
604
  }
583
- return { sorted, get: (id) => tasks.get(id), spawnFromDirective, acknowledge, checkOff, shutdown };
605
+ // The router stops tasks by title or id (castle-stop fence). Waiting tasks
606
+ // are cancelled outright; running ones get their agent process killed and
607
+ // finalize as interrupted via the stopRequested path.
608
+ function stop(tokens) {
609
+ for (const id of resolveDeps(tasks, tokens)) {
610
+ const task = tasks.get(id);
611
+ if (!task)
612
+ continue;
613
+ if (task.status === 'waiting') {
614
+ task.status = 'interrupted';
615
+ touch(task);
616
+ }
617
+ else if (task.status === 'running') {
618
+ stopRequested.add(task.id);
619
+ for (const child of children) {
620
+ if (child.pid === task.pid) {
621
+ try {
622
+ child.kill('SIGKILL');
623
+ }
624
+ catch {
625
+ /* already gone */
626
+ }
627
+ }
628
+ }
629
+ }
630
+ }
631
+ }
632
+ return { sorted, get: (id) => tasks.get(id), spawnFromDirective, acknowledge, checkOff, stop, shutdown };
584
633
  }
585
634
  // -- attachments ----------------------------------------------------------------
586
635
  const ATTACHMENT_MIME = {
@@ -756,9 +805,11 @@ function runRouterTurnIn(ctx, instruction) {
756
805
  });
757
806
  return;
758
807
  }
759
- const { cleaned, directives, checkoffs } = extractDirectives(result.finalText);
808
+ const { cleaned, directives, checkoffs, stops } = extractDirectives(result.finalText);
760
809
  if (result.ok && checkoffs.length > 0)
761
810
  ctx.taskStore.checkOff(checkoffs);
811
+ if (result.ok && stops.length > 0)
812
+ ctx.taskStore.stop(stops);
762
813
  // Drop directives from stale turns, and any whose title matches a task
763
814
  // already in flight (two runs reacting to the same ask).
764
815
  const stale = epoch !== ctx.currentEpoch();
@@ -805,6 +856,62 @@ function applyAgentSettings(incoming, ctx) {
805
856
  fs.writeFileSync(ctx.settingsPath, JSON.stringify(settings, null, 2) + '\n');
806
857
  ctx.broadcast({ type: 'settings', settings });
807
858
  }
859
+ function killOrphanAgents(registryPath) {
860
+ const recorded = readJsonFile(registryPath) ?? [];
861
+ for (const entry of recorded) {
862
+ if (typeof entry?.pid !== 'number')
863
+ continue;
864
+ try {
865
+ const cmd = execFileSync('ps', ['-p', String(entry.pid), '-o', 'command='], {
866
+ encoding: 'utf8',
867
+ }).trim();
868
+ if (cmd.includes('cursor-agent') || cmd.includes('claude')) {
869
+ process.kill(entry.pid, 'SIGKILL');
870
+ }
871
+ }
872
+ catch {
873
+ /* pid not running anymore */
874
+ }
875
+ }
876
+ try {
877
+ fs.writeFileSync(registryPath, '[]\n');
878
+ }
879
+ catch {
880
+ /* registry dir missing -- created later */
881
+ }
882
+ }
883
+ function startChildRegistry(registryPath, groups) {
884
+ let last = '';
885
+ const timer = setInterval(() => {
886
+ const live = [];
887
+ for (const group of groups) {
888
+ for (const child of group) {
889
+ if (typeof child.pid === 'number' && child.exitCode === null) {
890
+ live.push({ pid: child.pid, command: child.spawnfile });
891
+ }
892
+ }
893
+ }
894
+ const snapshot = JSON.stringify(live);
895
+ if (snapshot === last)
896
+ return;
897
+ last = snapshot;
898
+ try {
899
+ fs.writeFileSync(registryPath, snapshot + '\n');
900
+ }
901
+ catch {
902
+ /* best effort */
903
+ }
904
+ }, 2_000);
905
+ return () => {
906
+ clearInterval(timer);
907
+ try {
908
+ fs.writeFileSync(registryPath, '[]\n');
909
+ }
910
+ catch {
911
+ /* best effort */
912
+ }
913
+ };
914
+ }
808
915
  export function createAgentServer(opts) {
809
916
  const { deckDir, deckLabel } = opts;
810
917
  const agentDir = path.join(deckDir, '.castle', 'agent');
@@ -815,6 +922,11 @@ export function createAgentServer(opts) {
815
922
  const taskChildren = new Set();
816
923
  const routerChildren = new Set();
817
924
  const clients = new Set();
925
+ // Kill agent processes orphaned by a previous serve that died uncleanly,
926
+ // then start tracking this serve's own children.
927
+ const childRegistryPath = path.join(agentDir, 'children.json');
928
+ killOrphanAgents(childRegistryPath);
929
+ const stopChildRegistry = startChildRegistry(childRegistryPath, [taskChildren, routerChildren]);
818
930
  function broadcast(body) {
819
931
  const payload = JSON.stringify(body);
820
932
  for (const socket of clients) {
@@ -955,6 +1067,7 @@ export function createAgentServer(opts) {
955
1067
  /* already gone */
956
1068
  }
957
1069
  }
1070
+ stopChildRegistry();
958
1071
  wss.close();
959
1072
  }
960
1073
  return { handleUpgrade, handleHttpRequest, shutdown };
package/dist/serve.js CHANGED
@@ -170,6 +170,9 @@ export async function serve(dir, options = {}) {
170
170
  });
171
171
  process.on('SIGINT', () => process.exit());
172
172
  process.on('SIGTERM', () => process.exit());
173
+ // Terminal close: exit through the handlers so child agent processes die
174
+ // with the serve instead of orphaning.
175
+ process.on('SIGHUP', () => process.exit());
173
176
  // The WS server forwards `restart` to the browser, but it also needs the
174
177
  // Vite instance so it can drop transform caches first (see invalidateModuleCaches).
175
178
  const viteHolder = { vite: null };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "castle-web-cli",
3
- "version": "0.4.25",
3
+ "version": "0.4.27",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "castle-web": "./dist/index.js"