@synaplink/orqlaude 0.8.0 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/dist/__tests__/v09.test.d.ts +1 -0
  2. package/dist/__tests__/v09.test.js +189 -0
  3. package/dist/__tests__/v09.test.js.map +1 -0
  4. package/dist/__tests__/v092.test.d.ts +1 -0
  5. package/dist/__tests__/v092.test.js +125 -0
  6. package/dist/__tests__/v092.test.js.map +1 -0
  7. package/dist/cli/easter_egg.js +127 -39
  8. package/dist/cli/easter_egg.js.map +1 -1
  9. package/dist/cli.js +1 -1
  10. package/dist/cli.js.map +1 -1
  11. package/dist/lib/jsonl_tail.d.ts +68 -5
  12. package/dist/lib/jsonl_tail.js +70 -11
  13. package/dist/lib/jsonl_tail.js.map +1 -1
  14. package/dist/lib/spawn_cli.d.ts +26 -0
  15. package/dist/lib/spawn_cli.js +45 -1
  16. package/dist/lib/spawn_cli.js.map +1 -1
  17. package/dist/lib/state.d.ts +33 -0
  18. package/dist/lib/state.js +14 -4
  19. package/dist/lib/state.js.map +1 -1
  20. package/dist/lib/telegram_status.js +7 -1
  21. package/dist/lib/telegram_status.js.map +1 -1
  22. package/dist/lib/version.d.ts +18 -0
  23. package/dist/lib/version.js +19 -0
  24. package/dist/lib/version.js.map +1 -0
  25. package/dist/server.js +2 -1
  26. package/dist/server.js.map +1 -1
  27. package/dist/telegram/notifier.js +33 -1
  28. package/dist/telegram/notifier.js.map +1 -1
  29. package/dist/tools/dispatch.d.ts +29 -1
  30. package/dist/tools/dispatch.js +445 -35
  31. package/dist/tools/dispatch.js.map +1 -1
  32. package/dist/tools/lifecycle.js +44 -2
  33. package/dist/tools/lifecycle.js.map +1 -1
  34. package/dist/tools/ping.js +2 -1
  35. package/dist/tools/ping.js.map +1 -1
  36. package/dist/tools/planning.js +4 -1
  37. package/dist/tools/planning.js.map +1 -1
  38. package/dist/tools/userio.js +16 -6
  39. package/dist/tools/userio.js.map +1 -1
  40. package/package.json +1 -1
@@ -2,9 +2,11 @@ import { z } from "zod";
2
2
  import { findPlan, findTask } from "../lib/state.js";
3
3
  import { jsonlPathFor, snapshotSession } from "../lib/jsonl_tail.js";
4
4
  import { detectHallucination, extractToolUses } from "../lib/hallucination.js";
5
- import { spawnAgnetViaCli, findGitRoot, cleanupPlanWorktrees } from "../lib/spawn_cli.js";
5
+ import { spawnAgnetViaCli, findGitRoot, cleanupPlanWorktrees, readChildExitRecord } from "../lib/spawn_cli.js";
6
6
  import { isProcessAlive } from "../lib/process_lib.js";
7
7
  import { resolveStateDir } from "../lib/state_dir.js";
8
+ import { probeTelegramStatus } from "../lib/telegram_status.js";
9
+ import { VERSION } from "../lib/version.js";
8
10
  import { promises as fs } from "node:fs";
9
11
  /**
10
12
  * Dispatch-phase tools: next_task, register_spawn, status, collect.
@@ -124,6 +126,7 @@ export function registerDispatch(server, store, audit) {
124
126
  task.commandLine = spawn.commandLine;
125
127
  task.stderrPath = spawn.stderrPath;
126
128
  task.stdoutPath = spawn.stdoutPath;
129
+ task.exitJsonPath = spawn.exitJsonPath;
127
130
  task.status = "running";
128
131
  task.startedAt = Date.now();
129
132
  return {
@@ -145,13 +148,50 @@ export function registerDispatch(server, store, audit) {
145
148
  return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
146
149
  }, ({ plan_id }) => ({ planId: plan_id })));
147
150
  // ---- cleanup_worktrees (companion to spawn_via_cli, v0.5.3+) -------------
148
- server.tool("cleanup_worktrees", "Remove all orqlaude-managed worktrees for a plan (typically called after `collect`). Only touches paths under <project>/.orqlaude-worktrees/<plan_short>-*. Force-removes via `git worktree remove --force` then falls back to rm -rf if git refuses.", {
151
+ server.tool("cleanup_worktrees", "Remove all orqlaude-managed worktrees for a plan (typically called after `collect`). Only touches paths under <project>/.orqlaude-worktrees/<plan_short>-*. Force-removes via `git worktree remove --force` then falls back to rm -rf if git refuses. v0.9.0: also releases the spawn locks on every task whose worktree was removed - so the orchestrator can re-spawn against the same plan_id + task_id without create_plan churn.", {
149
152
  plan_id: z.string(),
150
153
  project_root: z.string().optional(),
151
154
  }, audit.wrap("cleanup_worktrees", async ({ plan_id, project_root }) => {
152
155
  const root = project_root ?? findGitRoot(process.cwd());
153
156
  const removed = await cleanupPlanWorktrees(root, plan_id);
154
- return { content: [{ type: "text", text: JSON.stringify({ plan_id, removed_count: removed.length, removed }, null, 2) }] };
157
+ // v0.9.0: walk the plan's tasks and release any spawn lock whose
158
+ // worktreePath was just removed. This makes cleanup_worktrees the
159
+ // canonical "reset this plan, let me re-spawn fresh" entry point.
160
+ const released = await store.update((state) => {
161
+ const plan = findPlan(state, plan_id);
162
+ const releasedIds = [];
163
+ for (const t of plan.tasks) {
164
+ if (t.worktreePath && removed.includes(t.worktreePath)) {
165
+ t.spawnedSessionId = undefined;
166
+ t.pid = undefined;
167
+ t.exitJsonPath = undefined;
168
+ // Reset to pending so next_task / spawn_via_cli treat it fresh.
169
+ // Preserve worktreePath/branch for audit but they'll be
170
+ // overwritten on the next spawn.
171
+ if (t.status === "running" || t.status === "dispatched" || t.status === "died_at_launch") {
172
+ t.status = "pending";
173
+ }
174
+ releasedIds.push(t.id);
175
+ }
176
+ }
177
+ return releasedIds;
178
+ });
179
+ return {
180
+ content: [
181
+ {
182
+ type: "text",
183
+ text: JSON.stringify({
184
+ plan_id,
185
+ removed_count: removed.length,
186
+ removed,
187
+ released_task_ids: released,
188
+ next_step: released.length > 0
189
+ ? `Released ${released.length} spawn lock(s). You can call spawn_via_cli on the same task_ids to re-fire.`
190
+ : undefined,
191
+ }, null, 2),
192
+ },
193
+ ],
194
+ };
155
195
  }, ({ plan_id }) => ({ planId: plan_id })));
156
196
  // ---- register_spawn (MANUAL FALLBACK; rarely needed) ---------------------
157
197
  server.tool("register_spawn", "MANUAL FALLBACK ONLY. Normally the spawned agent self-registers on its first `checkin` call (the prompt next_task generates instructs it to). Only call this if a child fails to self-register within ~30s — symptom: status() shows the task as `dispatched` long after spawn_task succeeded.", {
@@ -185,7 +225,13 @@ export function registerDispatch(server, store, audit) {
185
225
  note: "Not yet spawned (waiting for chip click + self-registration).",
186
226
  };
187
227
  }
188
- const snap = await snapshotSession(cwd, t.spawnedSessionId);
228
+ const snap = await snapshotSession(cwd, t.spawnedSessionId, t.stdoutPath);
229
+ // v0.9.0: fast-path terminal-state read. If the child wrote an
230
+ // exit record via the spawn_via_cli on('exit') handler, surface
231
+ // it - the orchestrator doesn't have to re-poll until isProcessAlive
232
+ // ticks over. Falls through to the regular PID/snapshot path if
233
+ // the file is missing (server restart after spawn, etc).
234
+ const exitRecord = t.exitJsonPath ? await readChildExitRecord(t.exitJsonPath) : null;
189
235
  const toolUses = await extractToolUses(jsonlPathFor(cwd, t.spawnedSessionId));
190
236
  const hallu = await detectHallucination(toolUses, cwd);
191
237
  const taskWarnings = [];
@@ -195,10 +241,39 @@ export function registerDispatch(server, store, audit) {
195
241
  // stops polling a corpse.
196
242
  let derivedStatus = snap.terminated ? "done" : t.status;
197
243
  let stderrExcerpt = null;
244
+ // v0.9.0: terminal-state precedence:
245
+ // 1. exit-record file (most authoritative; written by the
246
+ // parent's on('exit') handler).
247
+ // 2. snap.terminated (result row in the event stream).
248
+ // 3. PID liveness + empty stream = died_at_launch.
249
+ if (exitRecord) {
250
+ derivedStatus = exitRecord.success ? "done" : "failed";
251
+ if (!exitRecord.success && t.stderrPath) {
252
+ try {
253
+ const buf = await fs.readFile(t.stderrPath, "utf8");
254
+ stderrExcerpt = buf.slice(0, 1000);
255
+ }
256
+ catch {
257
+ /* file missing */
258
+ }
259
+ }
260
+ t.status = derivedStatus;
261
+ if (!t.finishedAt)
262
+ t.finishedAt = exitRecord.terminated_at;
263
+ }
264
+ // died_at_launch is now defined as "PID dead AND no event was
265
+ // ever parsed from either stream source." Earlier versions
266
+ // checked `!snap.exists` which broke once we started creating
267
+ // the stdout log file at spawn time (the file exists but is
268
+ // empty when the child exits before writing). Use
269
+ // lastActivityAt + tokens-used == 0 as the canonical signal.
270
+ const producedNothing = !snap.lastActivityAt &&
271
+ snap.totalEffectiveTokens === 0 &&
272
+ !snap.lastAssistantText &&
273
+ !snap.lastToolUse;
198
274
  if (t.pid &&
199
275
  !isProcessAlive(t.pid) &&
200
- !snap.exists &&
201
- !snap.lastActivityAt &&
276
+ producedNothing &&
202
277
  (t.status === "running" || t.status === "dispatched")) {
203
278
  derivedStatus = "died_at_launch";
204
279
  if (t.stderrPath) {
@@ -210,7 +285,7 @@ export function registerDispatch(server, store, audit) {
210
285
  /* file missing or unreadable */
211
286
  }
212
287
  }
213
- taskWarnings.push(`Child PID ${t.pid} is dead and no JSONL was written. ` +
288
+ taskWarnings.push(`Child PID ${t.pid} is dead and no events were parsed from either the Desktop JSONL or the spawn_via_cli stdout log. ` +
214
289
  `Inspect stderr at ${t.stderrPath ?? "(unknown)"} or re-run the command: ${t.commandLine ?? "(unknown)"}`);
215
290
  // Persist the new status so subsequent calls don't redo this.
216
291
  t.status = "died_at_launch";
@@ -218,9 +293,14 @@ export function registerDispatch(server, store, audit) {
218
293
  // Per-task soft budget warning: if the task has a budgetHintTokens
219
294
  // hint and we've blown past 70% of it, surface a yellow flag so the
220
295
  // orchestrator can intervene before the plan-wide hard cap fires.
221
- if (t.budgetHintTokens && snap.totalEffectiveTokens > 0.7 * t.budgetHintTokens) {
222
- const pct = Math.round((snap.totalEffectiveTokens / t.budgetHintTokens) * 100);
223
- taskWarnings.push(`task at ${pct}% of hint (${snap.totalEffectiveTokens.toLocaleString()} / ${t.budgetHintTokens.toLocaleString()} tokens). Consider request_stop if it's stalling.`);
296
+ // v0.9.2: compare against billed (not total) to match the new
297
+ // default plan-level budget mode.
298
+ const taskBudgetRelevant = (plan0.budgetMode ?? "billed") === "billed"
299
+ ? snap.billedTokens
300
+ : snap.totalEffectiveTokens;
301
+ if (t.budgetHintTokens && taskBudgetRelevant > 0.7 * t.budgetHintTokens) {
302
+ const pct = Math.round((taskBudgetRelevant / t.budgetHintTokens) * 100);
303
+ taskWarnings.push(`task at ${pct}% of hint (${taskBudgetRelevant.toLocaleString()} / ${t.budgetHintTokens.toLocaleString()} tokens, mode=${plan0.budgetMode ?? "billed"}). Consider request_stop if it's stalling.`);
224
304
  }
225
305
  return {
226
306
  task_id: t.id,
@@ -229,7 +309,12 @@ export function registerDispatch(server, store, audit) {
229
309
  session_id: t.spawnedSessionId,
230
310
  pid: t.pid ?? null,
231
311
  pid_alive: t.pid ? isProcessAlive(t.pid) : null,
312
+ // v0.9.2: `tokens_used` retained as the back-compat field
313
+ // (= totalEffectiveTokens). Prefer `billed_tokens` for
314
+ // Plan-cost decisions.
232
315
  tokens_used: snap.totalEffectiveTokens,
316
+ billed_tokens: snap.billedTokens,
317
+ cached_tokens: snap.cachedTokens,
233
318
  budget_hint_tokens: t.budgetHintTokens ?? null,
234
319
  cost_usd: snap.totalCostUsd,
235
320
  last_event_type: snap.lastEventType,
@@ -243,10 +328,15 @@ export function registerDispatch(server, store, audit) {
243
328
  stop_requested: t.stopRequested ?? null,
244
329
  stderr_excerpt: stderrExcerpt,
245
330
  stderr_path: t.stderrPath ?? null,
331
+ stdout_path: t.stdoutPath ?? null,
332
+ stream_source: snap.source,
333
+ exit_record: exitRecord,
246
334
  command_line: t.commandLine ?? null,
335
+ // Internal hand-off for enforceBudget below.
336
+ __billed: snap.billedTokens,
337
+ __cached: snap.cachedTokens,
247
338
  };
248
339
  }));
249
- const totalTokens = snapshots.reduce((sum, s) => sum + (s.tokens_used ?? 0), 0);
250
340
  const totalCost = snapshots.reduce((sum, s) => sum + (s.cost_usd ?? 0), 0);
251
341
  // v0.5.2: orphan detection — dispatched > 60s ago without
252
342
  // self-registering. Often means the orchestrator used a non-orqlaude
@@ -266,28 +356,21 @@ export function registerDispatch(server, store, audit) {
266
356
  remedy: "If you can identify its session id via mcp__ccd_session_mgmt__list_sessions, call register_spawn manually. Otherwise the task is invisible to orqlaude until a follow-up checkin arrives.",
267
357
  }));
268
358
  // ---- budget enforcement: kill on overbudget --------------------------
269
- const overbudget = totalTokens > plan0.budgetCapTokens;
270
- let autoCancelled = false;
271
- if (overbudget && plan0.status !== "cancelled_overbudget" && plan0.status !== "cancelled") {
272
- await store.update((state) => {
273
- const plan = findPlan(state, plan_id);
274
- plan.status = "cancelled_overbudget";
275
- for (const t of plan.tasks) {
276
- if (t.spawnedSessionId && !t.stopRequested) {
277
- t.stopRequested = { reason: "fleet overbudget", requestedAt: Date.now(), kind: "hard" };
278
- plan.messages.push({
279
- id: cryptoRandomId(),
280
- toSessionId: t.spawnedSessionId,
281
- text: `STOP: fleet exceeded token budget (used ${Math.round(totalTokens / 1000)}k of ${Math.round(plan.budgetCapTokens / 1000)}k cap). Commit what you have and exit.`,
282
- queuedAt: Date.now(),
283
- delivered: false,
284
- kind: "stop",
285
- });
286
- }
287
- }
288
- });
289
- autoCancelled = true;
359
+ // v0.9.2: shared helper, billed-vs-total aware. The helper picks
360
+ // billed (input + output, default) or total (all four buckets)
361
+ // based on `plan.budgetMode`.
362
+ const budget = await enforceBudget(store, plan_id, snapshots.map((s) => ({
363
+ billed: s.__billed ?? 0,
364
+ cached: s.__cached ?? 0,
365
+ })));
366
+ // Strip the internal hand-off keys so they don't leak into the
367
+ // public response.
368
+ for (const s of snapshots) {
369
+ delete s.__billed;
370
+ delete s.__cached;
290
371
  }
372
+ const totalTokens = budget.total_all; // for the legacy field
373
+ const autoCancelled = budget.auto_cancelled;
291
374
  // ---- aggregated hallucination warning -------------------------------
292
375
  const concerningAgents = snapshots
293
376
  .filter((s) => s.hallucination && s.hallucination.score >= 0.3)
@@ -310,10 +393,22 @@ export function registerDispatch(server, store, audit) {
310
393
  text: JSON.stringify({
311
394
  plan_id,
312
395
  plan_status: autoCancelled ? "cancelled_overbudget" : plan0.status,
313
- budget_cap_tokens: plan0.budgetCapTokens,
396
+ budget_cap_tokens: budget.budget_cap_tokens,
397
+ // v0.9.2: legacy field; sum of all four token buckets.
398
+ // For Plan-cost decisions read `tokens.billed` instead.
314
399
  total_tokens_used: totalTokens,
315
- budget_remaining_tokens: Math.max(0, plan0.budgetCapTokens - totalTokens),
400
+ budget_remaining_tokens: budget.budget_remaining_tokens,
316
401
  total_cost_usd: totalCost,
402
+ // v0.9.2: explicit token breakdown so orchestrators can
403
+ // distinguish "cost-relevant" from "cache churn".
404
+ tokens: {
405
+ billed: budget.total_billed,
406
+ cached: budget.total_cached,
407
+ total: budget.total_all,
408
+ budget_mode: budget.budget_mode,
409
+ budget_relevant: budget.total_for_budget,
410
+ budget_pct: budget.budget_pct,
411
+ },
317
412
  hallucination_alerts: concerningAgents,
318
413
  orphan_alerts: orphans,
319
414
  died_at_launch_alerts: deadAgents,
@@ -332,6 +427,156 @@ export function registerDispatch(server, store, audit) {
332
427
  ],
333
428
  };
334
429
  }, ({ plan_id }) => ({ planId: plan_id })));
430
+ // ---- wait_for_status_change (long-poll) ----------------------------------
431
+ // v0.9.0: replaces the orchestrator's polling-loop pattern. Blocks for up
432
+ // to `timeout_sec` (default 60s) until ANY task in the plan transitions
433
+ // state, finishes, opens a PR, dies at launch, or chews through a
434
+ // material slice of its token budget. Cheap internal poll (every 2s
435
+ // file-stat + tiny snapshot read), but holds the connection open so the
436
+ // primary Claude can sleep without waking up to call status() every 90s.
437
+ server.tool("wait_for_status_change", "Long-poll: blocks up to `timeout_sec` (default 60, max 600) and returns as soon as the fleet state changes (task transition, new PR url, exit-record, +1 KB token delta) - OR returns the unchanged state when the timeout elapses. Use this INSTEAD of ScheduleWakeup + status() polling: pass the `fingerprint` from the prior response as `since_fingerprint` and the call returns the moment something useful happens. v0.9.0+.", {
438
+ plan_id: z.string(),
439
+ since_fingerprint: z.string().optional().describe("The `fingerprint` field from the prior wait_for_status_change (or status) response. Omit on first call - the server returns immediately with the current snapshot + the fresh fingerprint to thread through subsequent calls."),
440
+ timeout_sec: z.number().int().positive().max(600).default(60).describe("Max seconds the call blocks before returning the unchanged state. Default 60. Cap 600 (10 min)."),
441
+ }, audit.wrap("wait_for_status_change", async ({ plan_id, since_fingerprint, timeout_sec }) => {
442
+ const cwd = process.cwd();
443
+ const POLL_INTERVAL_MS = 2_000;
444
+ const deadline = Date.now() + timeout_sec * 1000;
445
+ const buildSnapshot = async () => {
446
+ const plan = await store.read((state) => findPlan(state, plan_id));
447
+ const agents = await Promise.all(plan.tasks.map(async (t) => {
448
+ if (!t.spawnedSessionId) {
449
+ return {
450
+ task_id: t.id,
451
+ title: t.title,
452
+ status: t.status,
453
+ tokens_used: 0,
454
+ billed_tokens: 0,
455
+ cached_tokens: 0,
456
+ pr_url: t.prUrl ?? null,
457
+ pid_alive: null,
458
+ exit_record: null,
459
+ terminated: false,
460
+ stop_kind: t.stopRequested?.kind ?? null,
461
+ };
462
+ }
463
+ const snap = await snapshotSession(cwd, t.spawnedSessionId, t.stdoutPath);
464
+ const exitRecord = t.exitJsonPath ? await readChildExitRecord(t.exitJsonPath) : null;
465
+ return {
466
+ task_id: t.id,
467
+ title: t.title,
468
+ status: exitRecord ? (exitRecord.success ? "done" : "failed") : t.status,
469
+ tokens_used: snap.totalEffectiveTokens,
470
+ billed_tokens: snap.billedTokens,
471
+ cached_tokens: snap.cachedTokens,
472
+ pr_url: t.prUrl ?? null,
473
+ pid_alive: t.pid ? isProcessAlive(t.pid) : null,
474
+ exit_record: exitRecord,
475
+ terminated: snap.terminated || !!exitRecord,
476
+ stop_kind: t.stopRequested?.kind ?? null,
477
+ };
478
+ }));
479
+ // v0.9.2: enforce budget on every poll, not just from status().
480
+ // The plan-level kill needs to fire whether the orchestrator is
481
+ // calling status() or wait_for_status_change.
482
+ const budget = await enforceBudget(store, plan_id, agents.map((a) => ({ billed: a.billed_tokens, cached: a.cached_tokens })));
483
+ return {
484
+ plan_id,
485
+ plan_status: budget.auto_cancelled ? "cancelled_overbudget" : plan.status,
486
+ agents,
487
+ budget,
488
+ };
489
+ };
490
+ const computeFingerprint = (snap) => {
491
+ // v0.9.1: hash-safe encoding via JSON.stringify. The previous
492
+ // pipe-joined / colon-separated form was fragile if a task_id or
493
+ // pr_url ever contained a `|` or `:` (today both are sanitized
494
+ // UUIDs / GitHub URLs, but pinning the structure costs nothing).
495
+ // Also includes `stop_kind` so kill_task / request_stop transitions
496
+ // wake the long-poll without waiting for the child to actually
497
+ // terminate - useful when a soft-stop is in flight.
498
+ // v0.9.2: KB bucket runs off `billed_tokens` (input + output) so
499
+ // cache-read churn doesn't trip the fingerprint every 2s. The
500
+ // long-poll now fires only when something cost-relevant moves.
501
+ const parts = [snap.plan_status];
502
+ for (const a of snap.agents) {
503
+ const kb = Math.floor(a.billed_tokens / 1024);
504
+ parts.push([
505
+ a.task_id,
506
+ a.status,
507
+ a.pr_url ?? null,
508
+ kb,
509
+ a.exit_record
510
+ ? { code: a.exit_record.exit_code, sig: a.exit_record.signal }
511
+ : null,
512
+ a.terminated,
513
+ a.pid_alive,
514
+ a.stop_kind,
515
+ ]);
516
+ }
517
+ return JSON.stringify(parts);
518
+ };
519
+ // First read - if no fingerprint, return immediately with the
520
+ // current state (still useful as a fresh dispatch).
521
+ let snapshot = await buildSnapshot();
522
+ let fingerprint = computeFingerprint(snapshot);
523
+ if (!since_fingerprint || fingerprint !== since_fingerprint) {
524
+ return {
525
+ content: [
526
+ {
527
+ type: "text",
528
+ text: JSON.stringify({
529
+ ...snapshot,
530
+ fingerprint,
531
+ changed: !!since_fingerprint,
532
+ elapsed_sec: 0,
533
+ timed_out: false,
534
+ next_step: "Call wait_for_status_change again with this `fingerprint` as `since_fingerprint` to block until the next transition.",
535
+ }, null, 2),
536
+ },
537
+ ],
538
+ };
539
+ }
540
+ // Poll loop until fingerprint changes or deadline hits.
541
+ const startedAt = Date.now();
542
+ while (Date.now() < deadline) {
543
+ await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
544
+ snapshot = await buildSnapshot();
545
+ fingerprint = computeFingerprint(snapshot);
546
+ if (fingerprint !== since_fingerprint) {
547
+ return {
548
+ content: [
549
+ {
550
+ type: "text",
551
+ text: JSON.stringify({
552
+ ...snapshot,
553
+ fingerprint,
554
+ changed: true,
555
+ elapsed_sec: Math.round((Date.now() - startedAt) / 1000),
556
+ timed_out: false,
557
+ }, null, 2),
558
+ },
559
+ ],
560
+ };
561
+ }
562
+ }
563
+ // Timeout - return unchanged state.
564
+ return {
565
+ content: [
566
+ {
567
+ type: "text",
568
+ text: JSON.stringify({
569
+ ...snapshot,
570
+ fingerprint,
571
+ changed: false,
572
+ elapsed_sec: Math.round((Date.now() - startedAt) / 1000),
573
+ timed_out: true,
574
+ next_step: "Nothing changed during the wait window. Call wait_for_status_change again with the same `since_fingerprint` to keep waiting, OR call status() for a deeper read.",
575
+ }, null, 2),
576
+ },
577
+ ],
578
+ };
579
+ }, ({ plan_id }) => ({ planId: plan_id })));
335
580
  // ---- collect --------------------------------------------------------------
336
581
  server.tool("collect", "Final result aggregation: per-task summary, PR URLs, total tokens+cost, exit reasons. Call once all agents have terminated. The result includes a `ready_for_review` flag suggesting you call `review_prs` (when available) to spawn reviewers.", { plan_id: z.string() }, audit.wrap("collect", async ({ plan_id }) => {
337
582
  const plan = await store.update((state) => {
@@ -343,7 +588,9 @@ export function registerDispatch(server, store, audit) {
343
588
  });
344
589
  const cwd = process.cwd();
345
590
  const results = await Promise.all(plan.tasks.map(async (t) => {
346
- const snap = t.spawnedSessionId ? await snapshotSession(cwd, t.spawnedSessionId) : null;
591
+ const snap = t.spawnedSessionId
592
+ ? await snapshotSession(cwd, t.spawnedSessionId, t.stdoutPath)
593
+ : null;
347
594
  return {
348
595
  task_id: t.id,
349
596
  title: t.title,
@@ -380,6 +627,121 @@ export function registerDispatch(server, store, audit) {
380
627
  ],
381
628
  };
382
629
  }, ({ plan_id }) => ({ planId: plan_id })));
630
+ // ---- fleet_summary (v0.9.0 dashboard, one-tool aggregation) --------------
631
+ // Single-call replacement for ping + status + list_plans + telegram probe.
632
+ // Use this at the START of a fresh session ("what's in flight?") and any
633
+ // time you want a wide view of every active plan without making 4 round
634
+ // trips. Returns:
635
+ // - server health (version, cwd, state dir, telegram status)
636
+ // - per-plan rollup (counts of pending/running/done/failed, PR list)
637
+ // - cross-plan totals (active Agnets, total tokens spent today)
638
+ server.tool("fleet_summary", "Single-call dashboard for the entire orqlaude state. Returns server health + per-plan rollup + cross-plan totals. Use at session start to discover in-flight fleets; use mid-fleet for a wide view without ping + status + list_plans round-trips. v0.9.0+.", {}, audit.wrap("fleet_summary", async () => {
639
+ const cwd = process.cwd();
640
+ const stateDir = resolveStateDir().path;
641
+ const tg = await probeTelegramStatus(stateDir);
642
+ const { plans, orphanNotificationCount, orphanResponseCount } = await store.read((s) => ({
643
+ plans: Object.values(s.plans),
644
+ orphanNotificationCount: (s.orphanNotifications ?? []).length,
645
+ orphanResponseCount: (s.orphanResponseRequests ?? []).length,
646
+ }));
647
+ const planRollups = await Promise.all(plans.map(async (p) => {
648
+ // Fast per-task counts WITHOUT the full snapshotSession read.
649
+ const counts = { pending: 0, dispatched: 0, running: 0, done: 0, failed: 0, cancelled: 0, died_at_launch: 0 };
650
+ const prs = [];
651
+ // v0.9.1: parallelize the per-task snapshot reads. The first
652
+ // post-restart call is O(plans × tasks) IO; the inner Promise.all
653
+ // makes the per-plan inner loop concurrent. Cache makes
654
+ // subsequent calls cheap regardless.
655
+ for (const t of p.tasks) {
656
+ const status = t.status ?? "pending";
657
+ counts[status] = (counts[status] ?? 0) + 1;
658
+ if (t.prUrl)
659
+ prs.push(t.prUrl);
660
+ }
661
+ const taskTokens = await Promise.all(p.tasks.map(async (t) => {
662
+ if (!t.spawnedSessionId)
663
+ return { billed: 0, cached: 0, total: 0 };
664
+ const snap = await snapshotSession(cwd, t.spawnedSessionId, t.stdoutPath);
665
+ return {
666
+ billed: snap.billedTokens,
667
+ cached: snap.cachedTokens,
668
+ total: snap.totalEffectiveTokens,
669
+ };
670
+ }));
671
+ const tokensBilled = taskTokens.reduce((s, v) => s + v.billed, 0);
672
+ const tokensCached = taskTokens.reduce((s, v) => s + v.cached, 0);
673
+ const tokensTotal = taskTokens.reduce((s, v) => s + v.total, 0);
674
+ // v0.9.2: budget_pct reflects the plan's chosen mode (billed
675
+ // default). Plan users see the cost-relevant pct, not the
676
+ // cache-inflated total.
677
+ const mode = p.budgetMode ?? "billed";
678
+ const tokensForBudget = mode === "billed" ? tokensBilled : tokensTotal;
679
+ const allDone = p.tasks.length > 0 && p.tasks.every((t) => t.status === "done" || t.status === "failed" || t.status === "cancelled");
680
+ return {
681
+ plan_id: p.id,
682
+ status: p.status,
683
+ created_at: p.createdAt,
684
+ root_task: p.rootTask.slice(0, 120),
685
+ task_count: p.tasks.length,
686
+ task_status_counts: counts,
687
+ tokens_used: tokensTotal, // legacy field (sum of all four buckets)
688
+ tokens: {
689
+ billed: tokensBilled,
690
+ cached: tokensCached,
691
+ total: tokensTotal,
692
+ budget_mode: mode,
693
+ budget_relevant: tokensForBudget,
694
+ },
695
+ budget_cap_tokens: p.budgetCapTokens,
696
+ budget_pct: p.budgetCapTokens ? Math.round((tokensForBudget / p.budgetCapTokens) * 100) : 0,
697
+ prs,
698
+ all_terminal: allDone,
699
+ suggested_next: p.status === "draft"
700
+ ? "request_approval + confirm"
701
+ : counts.pending > 0
702
+ ? "spawn_via_cli (per-task) or next_task"
703
+ : counts.running + counts.dispatched > 0
704
+ ? "wait_for_status_change"
705
+ : allDone
706
+ ? "collect + cleanup_worktrees"
707
+ : "status",
708
+ };
709
+ }));
710
+ const activeAgnets = planRollups.reduce((sum, r) => sum + (r.task_status_counts.running ?? 0) + (r.task_status_counts.dispatched ?? 0), 0);
711
+ const grandTokens = planRollups.reduce((sum, r) => sum + r.tokens_used, 0);
712
+ const grandBilled = planRollups.reduce((sum, r) => sum + r.tokens.billed, 0);
713
+ const grandCached = planRollups.reduce((sum, r) => sum + r.tokens.cached, 0);
714
+ return {
715
+ content: [
716
+ {
717
+ type: "text",
718
+ text: JSON.stringify({
719
+ server: { version: VERSION, cwd, state_dir: stateDir },
720
+ telegram: tg,
721
+ plans: planRollups.sort((a, b) => b.created_at - a.created_at),
722
+ totals: {
723
+ plan_count: plans.length,
724
+ active_agnets: activeAgnets,
725
+ grand_total_tokens: grandTokens, // legacy: sum of all buckets
726
+ grand_billed_tokens: grandBilled, // v0.9.2: input + output only
727
+ grand_cached_tokens: grandCached, // v0.9.2: cache reads + creations
728
+ },
729
+ orphan_queue: {
730
+ notifications: orphanNotificationCount,
731
+ response_requests: orphanResponseCount,
732
+ },
733
+ next_step: activeAgnets > 0
734
+ ? `${activeAgnets} Agnet(s) actively running. Call wait_for_status_change(<plan_id>) to block until any transitions.`
735
+ : planRollups.some((r) => r.status === "draft")
736
+ ? "One or more plans are draft - confirm or cancel."
737
+ : planRollups.some((r) => r.all_terminal && r.status !== "collected")
738
+ ? "All Agnets on at least one plan are terminal. Call collect + cleanup_worktrees."
739
+ : "Idle.",
740
+ }, null, 2),
741
+ },
742
+ ],
743
+ };
744
+ }, () => ({})));
383
745
  }
384
746
  function buildSpawnPrompt(planId, taskId, userPrompt, branchHint) {
385
747
  const branchSection = branchHint ? `\n\nSuggested branch: \`${branchHint}\`.` : "";
@@ -431,6 +793,54 @@ task_id: ${taskId}
431
793
  ═══════════════════════════════════════════════════════════════
432
794
  `;
433
795
  }
796
+ async function enforceBudget(store, plan_id, agents) {
797
+ const plan0 = await store.read((state) => findPlan(state, plan_id));
798
+ const totalBilled = agents.reduce((s, a) => s + a.billed, 0);
799
+ const totalCached = agents.reduce((s, a) => s + a.cached, 0);
800
+ const totalAll = totalBilled + totalCached;
801
+ const budgetMode = plan0.budgetMode ?? "billed";
802
+ const totalForBudget = budgetMode === "billed" ? totalBilled : totalAll;
803
+ const cap = plan0.budgetCapTokens;
804
+ const overbudget = totalForBudget > cap;
805
+ const alreadyCancelled = plan0.status === "cancelled_overbudget" || plan0.status === "cancelled";
806
+ let autoCancelled = false;
807
+ if (overbudget && !alreadyCancelled) {
808
+ await store.update((state) => {
809
+ const plan = findPlan(state, plan_id);
810
+ // Re-check inside the lock - another concurrent call may have raced us.
811
+ if (plan.status === "cancelled_overbudget" || plan.status === "cancelled")
812
+ return;
813
+ plan.status = "cancelled_overbudget";
814
+ for (const t of plan.tasks) {
815
+ if (t.spawnedSessionId && !t.stopRequested) {
816
+ t.stopRequested = { reason: "fleet overbudget", requestedAt: Date.now(), kind: "hard" };
817
+ plan.messages.push({
818
+ id: cryptoRandomId(),
819
+ toSessionId: t.spawnedSessionId,
820
+ text: `STOP: fleet exceeded token budget (used ${Math.round(totalForBudget / 1000)}k of ` +
821
+ `${Math.round(cap / 1000)}k cap, mode=${budgetMode}). Commit what you have and exit.`,
822
+ queuedAt: Date.now(),
823
+ delivered: false,
824
+ kind: "stop",
825
+ });
826
+ }
827
+ }
828
+ });
829
+ autoCancelled = true;
830
+ }
831
+ return {
832
+ total_billed: totalBilled,
833
+ total_cached: totalCached,
834
+ total_all: totalAll,
835
+ total_for_budget: totalForBudget,
836
+ budget_mode: budgetMode,
837
+ budget_cap_tokens: cap,
838
+ budget_remaining_tokens: Math.max(0, cap - totalForBudget),
839
+ budget_pct: cap > 0 ? Math.round((totalForBudget / cap) * 100) : 0,
840
+ overbudget,
841
+ auto_cancelled: autoCancelled,
842
+ };
843
+ }
434
844
  function cryptoRandomId() {
435
845
  // small helper to avoid importing crypto in this top-of-file scope twice
436
846
  return Math.random().toString(36).slice(2) + Date.now().toString(36);