@kenkaiiii/gg-boss 4.3.153 → 4.3.155

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -87567,7 +87567,12 @@ function ensureLoaded() {
87567
87567
  return void 0;
87568
87568
  loadAttempted = true;
87569
87569
  import("./dist-VXOVSHZ5.js").then((m) => {
87570
- hlModule = m;
87570
+ const candidate = m;
87571
+ if (typeof candidate.supportsLanguage === "function") {
87572
+ hlModule = candidate;
87573
+ } else if (candidate.default && typeof candidate.default.supportsLanguage === "function") {
87574
+ hlModule = candidate.default;
87575
+ }
87571
87576
  }, () => {
87572
87577
  });
87573
87578
  return void 0;
@@ -91403,6 +91408,81 @@ var bossStore = {
91403
91408
  };
91404
91409
 
91405
91410
  // src/worker.ts
91411
+ var CONTEXT_OVERFLOW_PATTERNS = [
91412
+ /context_length_exceeded/i,
91413
+ /context length exceeded/i,
91414
+ /context window/i,
91415
+ // OpenAI Codex / Responses
91416
+ /maximum context length/i,
91417
+ // OpenAI / OpenRouter / Mistral
91418
+ /prompt is too long/i,
91419
+ // Anthropic
91420
+ /request_too_large/i,
91421
+ // Anthropic HTTP 413
91422
+ /input is too long/i,
91423
+ // Bedrock
91424
+ /input token count.*exceeds the maximum/i,
91425
+ // Gemini
91426
+ /maximum prompt length/i,
91427
+ // xAI / Grok
91428
+ /reduce the length of the messages/i,
91429
+ // Groq
91430
+ /too large for model/i,
91431
+ // Mistral
91432
+ /token limit/i
91433
+ // generic
91434
+ ];
91435
+ var RATE_LIMIT_PATTERNS2 = [
91436
+ /rate[ _-]?limit/i,
91437
+ /\b429\b/,
91438
+ /too many requests/i,
91439
+ /tokens per minute/i,
91440
+ /requests per minute/i
91441
+ ];
91442
+ var BILLING_PATTERNS = [
91443
+ /insufficient balance/i,
91444
+ /insufficient[ _]quota/i,
91445
+ /quota exceeded/i,
91446
+ /quota_exceeded/i,
91447
+ /credit balance/i,
91448
+ /please recharge/i,
91449
+ /payment required/i,
91450
+ /\b402\b/
91451
+ ];
91452
+ var AUTH_PATTERNS = [
91453
+ /invalid[ _]api[ _]key/i,
91454
+ /unauthorized/i,
91455
+ /\b401\b/,
91456
+ /authentication[ _]failed/i,
91457
+ /please run \/login/i
91458
+ // Anthropic Claude Code-style hint
91459
+ ];
91460
+ function matchesAny(message, patterns) {
91461
+ return patterns.some((p) => p.test(message));
91462
+ }
91463
+ function classifyWorkerError(message) {
91464
+ if (matchesAny(message, CONTEXT_OVERFLOW_PATTERNS)) {
91465
+ return `[context_overflow] Worker context window exceeded \u2014 the conversation is too large to continue. Recovery: call reset_worker(project) to wipe history, then re-prompt with the task. Re-prompting WITHOUT reset will fail the same way.
91466
+
91467
+ Original: ${message}`;
91468
+ }
91469
+ if (matchesAny(message, BILLING_PATTERNS)) {
91470
+ return `[billing] Provider billing/quota issue. Recovery: surface to the user \u2014 they need to top up or switch providers. Do NOT retry.
91471
+
91472
+ Original: ${message}`;
91473
+ }
91474
+ if (matchesAny(message, AUTH_PATTERNS)) {
91475
+ return `[auth] Provider authentication failed. Recovery: surface to the user \u2014 they need to re-login. Do NOT retry.
91476
+
91477
+ Original: ${message}`;
91478
+ }
91479
+ if (matchesAny(message, RATE_LIMIT_PATTERNS2)) {
91480
+ return `[rate_limited] Provider rate limit hit. Recovery: wait ~30s, then re-prompt the same worker (no reset needed).
91481
+
91482
+ Original: ${message}`;
91483
+ }
91484
+ return message;
91485
+ }
91406
91486
  function safeBusHandler(workerName, handlerName, fn, onError) {
91407
91487
  return (event) => {
91408
91488
  try {
@@ -91427,10 +91507,19 @@ var Worker = class {
91427
91507
  currentText = "";
91428
91508
  currentTools = [];
91429
91509
  activeTools = /* @__PURE__ */ new Map();
91510
+ /** Parent (orchestrator-wide) signal — fires only on full shutdown. */
91511
+ parentSignal;
91512
+ /** Per-turn AbortController so the boss can cancel one worker mid-flight without taking down the whole pool. */
91513
+ turnAc = null;
91514
+ /** Set true when cancel() fired so the silent-death guard reports "Cancelled by boss" instead of a generic abort error. */
91515
+ wasCancelled = false;
91516
+ startedAt = null;
91517
+ lastEventAt = null;
91430
91518
  constructor(opts) {
91431
91519
  this.name = opts.name;
91432
91520
  this.cwd = opts.cwd;
91433
91521
  this.queue = opts.queue;
91522
+ this.parentSignal = opts.signal;
91434
91523
  this.session = new AgentSession({
91435
91524
  provider: opts.provider,
91436
91525
  model: opts.model,
@@ -91454,10 +91543,44 @@ var Worker = class {
91454
91543
  bossStore.setWorkerStatus(this.name, "working");
91455
91544
  this.currentText = "";
91456
91545
  this.currentTools = [];
91457
- void this.session.prompt(text).catch((err) => {
91458
- const message = err instanceof Error ? err.message : String(err);
91546
+ this.activeTools.clear();
91547
+ this.wasCancelled = false;
91548
+ this.startedAt = Date.now();
91549
+ this.lastEventAt = null;
91550
+ const turnAc = new AbortController();
91551
+ this.turnAc = turnAc;
91552
+ const onParentAbort = () => turnAc.abort();
91553
+ if (this.parentSignal.aborted) turnAc.abort();
91554
+ else this.parentSignal.addEventListener("abort", onParentAbort, { once: true });
91555
+ this.session.setSignal(turnAc.signal);
91556
+ void this.session.prompt(text).then(() => {
91557
+ if (this.status === "working") {
91558
+ const message = this.wasCancelled ? "Cancelled by boss." : "Session ended without agent_done \u2014 likely a silently swallowed abort or stream interruption.";
91559
+ const ts = (/* @__PURE__ */ new Date()).toISOString();
91560
+ this.status = "error";
91561
+ this.startedAt = null;
91562
+ log2(
91563
+ this.wasCancelled ? "INFO" : "ERROR",
91564
+ "worker",
91565
+ this.wasCancelled ? "cancelled" : "silent session end",
91566
+ { worker: this.name }
91567
+ );
91568
+ this.queue.removeStuckFor(this.name);
91569
+ bossStore.appendWorkerError(this.name, message, ts);
91570
+ this.queue.push({
91571
+ kind: "worker_error",
91572
+ project: this.name,
91573
+ message,
91574
+ timestamp: ts
91575
+ });
91576
+ }
91577
+ }).catch((err) => {
91578
+ const rawMessage = this.wasCancelled ? "Cancelled by boss." : err instanceof Error ? err.message : String(err);
91579
+ const message = this.wasCancelled ? rawMessage : classifyWorkerError(rawMessage);
91459
91580
  this.status = "error";
91581
+ this.startedAt = null;
91460
91582
  const ts = (/* @__PURE__ */ new Date()).toISOString();
91583
+ this.queue.removeStuckFor(this.name);
91461
91584
  bossStore.appendWorkerError(this.name, message, ts);
91462
91585
  this.queue.push({
91463
91586
  kind: "worker_error",
@@ -91465,8 +91588,64 @@ var Worker = class {
91465
91588
  message,
91466
91589
  timestamp: ts
91467
91590
  });
91591
+ }).finally(() => {
91592
+ this.parentSignal.removeEventListener("abort", onParentAbort);
91593
+ if (this.turnAc === turnAc) this.turnAc = null;
91468
91594
  });
91469
91595
  }
91596
+ /**
91597
+ * Cancel the current turn. Aborts only this worker's per-turn controller —
91598
+ * other workers keep running. The aborted turn surfaces as a `worker_error`
91599
+ * event with message "Cancelled by boss." so the orchestrator clears its
91600
+ * in-flight task entry and the boss is notified.
91601
+ *
91602
+ * Returns true if a turn was actually cancelled.
91603
+ */
91604
+ cancel() {
91605
+ if (this.status !== "working" || !this.turnAc) return false;
91606
+ this.wasCancelled = true;
91607
+ this.turnAc.abort();
91608
+ return true;
91609
+ }
91610
+ /**
91611
+ * Snapshot of the worker's current activity. Cheap to call; safe while the
91612
+ * worker is mid-turn. Used by the boss's get_worker_activity tool to peek
91613
+ * inside a long-running turn without waiting for completion.
91614
+ */
91615
+ getActivity() {
91616
+ const now2 = Date.now();
91617
+ const TEXT_TAIL = 400;
91618
+ const tail = this.currentText.length > TEXT_TAIL ? "\u2026" + this.currentText.slice(-TEXT_TAIL) : this.currentText;
91619
+ return {
91620
+ status: this.status,
91621
+ startedAt: this.startedAt ? new Date(this.startedAt).toISOString() : null,
91622
+ lastEventAt: this.lastEventAt ? new Date(this.lastEventAt).toISOString() : null,
91623
+ workingSeconds: this.startedAt ? Math.floor((now2 - this.startedAt) / 1e3) : 0,
91624
+ silentSeconds: this.lastEventAt ? Math.floor((now2 - this.lastEventAt) / 1e3) : 0,
91625
+ activeTools: [...this.activeTools.values()],
91626
+ completedTools: [...this.currentTools],
91627
+ textTail: tail,
91628
+ lastEventAtMs: this.lastEventAt
91629
+ };
91630
+ }
91631
+ /**
91632
+ * Hard reset: cancel any in-flight turn, wipe conversation history, force
91633
+ * status back to idle. Use when a worker is wedged in `error` or stuck on a
91634
+ * bad context that re-prompting can't recover from.
91635
+ */
91636
+ async reset() {
91637
+ this.cancel();
91638
+ await this.session.newSession();
91639
+ this.turnCount = 0;
91640
+ this.currentText = "";
91641
+ this.currentTools = [];
91642
+ this.activeTools.clear();
91643
+ this.startedAt = null;
91644
+ this.lastEventAt = null;
91645
+ this.wasCancelled = false;
91646
+ this.status = "idle";
91647
+ bossStore.setWorkerStatus(this.name, "idle");
91648
+ }
91470
91649
  async dispose() {
91471
91650
  await this.session.dispose();
91472
91651
  }
@@ -91485,9 +91664,11 @@ var Worker = class {
91485
91664
  }
91486
91665
  wireEvents() {
91487
91666
  const bus = this.session.eventBus;
91488
- const reportError2 = (message) => {
91667
+ const reportError2 = (rawMessage) => {
91489
91668
  const ts = (/* @__PURE__ */ new Date()).toISOString();
91669
+ const message = classifyWorkerError(rawMessage);
91490
91670
  this.status = "error";
91671
+ this.queue.removeStuckFor(this.name);
91491
91672
  bossStore.appendWorkerError(this.name, message, ts);
91492
91673
  this.queue.push({
91493
91674
  kind: "worker_error",
@@ -91503,6 +91684,7 @@ var Worker = class {
91503
91684
  "text_delta",
91504
91685
  ({ text }) => {
91505
91686
  this.currentText += text;
91687
+ this.lastEventAt = Date.now();
91506
91688
  },
91507
91689
  reportError2
91508
91690
  )
@@ -91514,6 +91696,7 @@ var Worker = class {
91514
91696
  "tool_call_start",
91515
91697
  ({ toolCallId, name }) => {
91516
91698
  this.activeTools.set(toolCallId, name);
91699
+ this.lastEventAt = Date.now();
91517
91700
  },
91518
91701
  reportError2
91519
91702
  )
@@ -91527,6 +91710,7 @@ var Worker = class {
91527
91710
  const name = this.activeTools.get(toolCallId);
91528
91711
  this.activeTools.delete(toolCallId);
91529
91712
  if (name) this.currentTools.push({ name, ok: !isError });
91713
+ this.lastEventAt = Date.now();
91530
91714
  },
91531
91715
  reportError2
91532
91716
  )
@@ -91549,7 +91733,11 @@ var Worker = class {
91549
91733
  };
91550
91734
  this.currentText = "";
91551
91735
  this.currentTools = [];
91736
+ this.activeTools.clear();
91737
+ this.startedAt = null;
91738
+ this.lastEventAt = null;
91552
91739
  this.status = "idle";
91740
+ this.queue.removeStuckFor(this.name);
91553
91741
  bossStore.appendWorkerEvent(summary);
91554
91742
  this.queue.push({ kind: "worker_turn_complete", summary });
91555
91743
  },
@@ -91598,6 +91786,19 @@ var EventQueue = class {
91598
91786
  size() {
91599
91787
  return this.user.length + this.rest.length;
91600
91788
  }
91789
+ /**
91790
+ * Drop any queued `worker_stuck` events for the given project. Called when a
91791
+ * `worker_turn_complete` or `worker_error` fires — the worker is no longer
91792
+ * running, so any pending stuck ping is now stale and would mislead the boss
91793
+ * (e.g. tell it to cancel a worker that already finished).
91794
+ *
91795
+ * Returns the number of events dropped.
91796
+ */
91797
+ removeStuckFor(project) {
91798
+ const before = this.rest.length;
91799
+ this.rest = this.rest.filter((e) => !(e.kind === "worker_stuck" && e.project === project));
91800
+ return before - this.rest.length;
91801
+ }
91601
91802
  };
91602
91803
 
91603
91804
  // src/settings.ts
@@ -91659,6 +91860,7 @@ Every user-role message is one of:
91659
91860
  1. A direct user message \u2014 respond to the user.
91660
91861
  2. \`[event:worker_turn_complete]\` \u2014 a worker finished a turn. Contains project, turn number, tools used (\u2713/\u2717), the worker's final text, AND a trailing \`other_workers:\` line listing every other project's current status (e.g. \`other_workers: B(working) C(idle) D(working)\`).
91661
91862
  3. \`[event:worker_error]\` \u2014 a worker hit an error. Diagnose, then retry or surface to the user. Same \`other_workers:\` trailer.
91863
+ 4. \`[event:worker_stuck]\` \u2014 a queued ping from the orchestrator's watchdog: a worker has been silent or running unusually long. Includes \`reason\` (silent | long_running), \`working_seconds\`, \`silent_seconds\`, \`active_tools\`, \`completed_this_turn\`, and a \`text_tail\` snippet. The worker is STILL RUNNING \u2014 this is informational, not an error. Decide: wait (most cases), \`cancel_worker\`, or surface. The watchdog won't ping again for the same worker until it emits new activity AND stalls again, so you won't be spammed.
91662
91864
 
91663
91865
  **Always read the \`other_workers:\` trailer before deciding "the run is done".** During a parallel dispatch you receive ONE event per finishing worker, in arrival order. It is wrong to treat the event you're processing as "the last one" unless \`other_workers:\` shows every other worker is \`idle\` (or \`error\`). If any are \`working\`, more events are coming \u2014 finish your routing for THIS event, then wait.
91664
91866
 
@@ -91678,6 +91880,9 @@ Worker dispatch:
91678
91880
  - \`get_worker_status(project)\` \u2014 single-project status check.
91679
91881
  - \`prompt_worker(project, message, fresh?)\` \u2014 send a prompt directly to a worker. FIRE-AND-FORGET. Returns immediately; you'll get \`worker_turn_complete\` later. NEVER call this on a worker whose status is "working".
91680
91882
  - \`get_worker_summary(project)\` \u2014 most recent turn summary. Use to inspect what was actually done.
91883
+ - \`get_worker_activity(project)\` \u2014 mid-turn peek: working/silent seconds, active tools, text tail. Use ONLY when a worker has been \`working\` long enough to wonder if it's stuck.
91884
+ - \`cancel_worker(project)\` \u2014 abort the current turn. Surfaces as a \`worker_error\` ("Cancelled by boss."). Other workers untouched.
91885
+ - \`reset_worker(project)\` \u2014 last resort: cancel + wipe history + force idle. Only when re-prompting can't recover.
91681
91886
 
91682
91887
  Task plan (persistent backlog, visible in the user's Ctrl+T overlay):
91683
91888
 
@@ -91796,6 +92001,30 @@ The worker has full context of its prior turn (you set fresh=false), so don't re
91796
92001
 
91797
92002
  This keeps the loop bounded \u2014 workers don't grind forever on a stuck task.
91798
92003
 
92004
+ # Recoverable error tags on worker_error
92005
+
92006
+ Worker errors are pre-classified \u2014 the message starts with a tag like \`[context_overflow]\`, \`[rate_limited]\`, \`[billing]\`, or \`[auth]\` when recovery is well-defined. Route off the tag, NOT a generic re-prompt:
92007
+
92008
+ - \`[context_overflow]\` \u2014 conversation outgrew the model's window. Call \`reset_worker(project)\` first, THEN re-prompt with the task. Re-prompting without reset fails the same way. Tell the user briefly that you reset.
92009
+ - \`[rate_limited]\` \u2014 wait for the next event (~30s of natural delay) or briefly note to user, then re-prompt the same worker. No reset.
92010
+ - \`[billing]\` / \`[auth]\` \u2014 surface to the user. Do not retry. The user must fix it.
92011
+ - Untagged \u2014 fall back to the normal BLOCKED handling (one corrective re-prompt, then surface).
92012
+
92013
+ # Checking on a stuck or slow worker
92014
+
92015
+ The orchestrator's watchdog will queue a \`[event:worker_stuck]\` ping if a worker is silent for too long. **It arrives like every other event \u2014 you process it AFTER finishing your current turn.** It does NOT interrupt you. Don't drop what you're doing to chase it; just route it when it's its turn.
92016
+
92017
+ When a stuck ping arrives (or you otherwise suspect a hang):
92018
+
92019
+ 1. The ping itself usually has enough info (\`silent_seconds\`, \`active_tools\`, \`text_tail\`). Only call \`get_worker_activity(project)\` if you need a fresher snapshot \u2014 the ping data may already be 30+ seconds old by the time you read it.
92020
+ - Active tool + recent activity \u2192 it's working, leave it alone. Stay silent or briefly note to the user.
92021
+ - High \`silent_seconds\` with no active tool \u2192 likely a stalled stream. Cancel.
92022
+ - Active \`bash\` for several minutes \u2192 probably a long command (test suite, build). Wait unless the user is impatient.
92023
+ 2. \`cancel_worker(project)\` if you decide to intervene. A \`worker_error\` arrives; treat it as a normal failed turn (re-prompt with a tighter instruction, or surface to the user).
92024
+ 3. \`reset_worker(project)\` ONLY if the worker is in \`error\` and re-prompting fails repeatedly, OR its context is clearly poisoned. Reset wipes history \u2014 the worker forgets everything. Always tell the user when you reset.
92025
+
92026
+ Don't poll \`get_worker_activity\` \u2014 call it at most once per concern. Don't cancel routinely; the user is mostly fine waiting.
92027
+
91799
92028
  # Style
91800
92029
 
91801
92030
  - Terse with the user. They want results, not narration.
@@ -91859,6 +92088,15 @@ var promptWorkerParams = external_exports.object({
91859
92088
  var getWorkerSummaryParams = external_exports.object({
91860
92089
  project: external_exports.string().describe("Project name as listed by list_workers.")
91861
92090
  });
92091
+ var getWorkerActivityParams = external_exports.object({
92092
+ project: external_exports.string().describe("Project name as listed by list_workers.")
92093
+ });
92094
+ var cancelWorkerParams = external_exports.object({
92095
+ project: external_exports.string().describe("Project name as listed by list_workers.")
92096
+ });
92097
+ var resetWorkerParams = external_exports.object({
92098
+ project: external_exports.string().describe("Project name as listed by list_workers.")
92099
+ });
91862
92100
  function createBossTools(deps) {
91863
92101
  const { workers, lastSummaries } = deps;
91864
92102
  const listWorkers = {
@@ -91921,7 +92159,58 @@ Final text:
91921
92159
  ${summary.finalText || "(empty)"}`;
91922
92160
  }
91923
92161
  };
91924
- return [listWorkers, getWorkerStatus, promptWorker, getWorkerSummary];
92162
+ const getWorkerActivity = {
92163
+ name: "get_worker_activity",
92164
+ description: "Peek at what a worker is doing RIGHT NOW (mid-turn). Returns working/silent durations in seconds, currently-running tool names, completed tools so far, and the tail of its streamed text. Use this when a worker has been `working` for a long time to decide whether it's making progress, hung, or worth cancelling.",
92165
+ parameters: getWorkerActivityParams,
92166
+ execute(args) {
92167
+ const w = workers.get(args.project);
92168
+ if (!w) return `Unknown project: ${args.project}`;
92169
+ const a = w.getActivity();
92170
+ const lines = [
92171
+ `Project: ${args.project}`,
92172
+ `Status: ${a.status}`,
92173
+ `Working: ${a.workingSeconds}s (last event ${a.silentSeconds}s ago)`,
92174
+ `Active tools: ${a.activeTools.length > 0 ? a.activeTools.join(", ") : "(none)"}`,
92175
+ `Completed this turn: ${a.completedTools.length > 0 ? a.completedTools.map((t) => `${t.ok ? "\u2713" : "\u2717"}${t.name}`).join(", ") : "(none)"}`,
92176
+ "",
92177
+ "Text tail:",
92178
+ a.textTail || "(no text yet)"
92179
+ ];
92180
+ return lines.join("\n");
92181
+ }
92182
+ };
92183
+ const cancelWorker = {
92184
+ name: "cancel_worker",
92185
+ description: "Abort a worker's current turn. Other workers are untouched. The cancelled worker emits a `worker_error` event with message 'Cancelled by boss.' so any in-flight task is cleared. After cancelling, you can re-prompt or reset the worker. No-op if the worker isn't `working`.",
92186
+ parameters: cancelWorkerParams,
92187
+ execute(args) {
92188
+ const w = workers.get(args.project);
92189
+ if (!w) return `Unknown project: ${args.project}`;
92190
+ const cancelled = w.cancel();
92191
+ return cancelled ? `Cancellation requested for "${args.project}". A worker_error event will arrive shortly.` : `Worker "${args.project}" is not working \u2014 nothing to cancel (status: ${w.getStatus()}).`;
92192
+ }
92193
+ };
92194
+ const resetWorker = {
92195
+ name: "reset_worker",
92196
+ description: "Hard reset: cancel any in-flight turn, wipe conversation history, force status back to idle. Use as a last resort when a worker is wedged in `error` or its context is so polluted that re-prompting won't recover. Equivalent to `fresh: true` plus a forced unstuck. After reset, the worker is ready for a new prompt_worker call.",
92197
+ parameters: resetWorkerParams,
92198
+ async execute(args) {
92199
+ const w = workers.get(args.project);
92200
+ if (!w) return `Unknown project: ${args.project}`;
92201
+ await w.reset();
92202
+ return `Worker "${args.project}" reset \u2014 status is idle, history wiped.`;
92203
+ }
92204
+ };
92205
+ return [
92206
+ listWorkers,
92207
+ getWorkerStatus,
92208
+ promptWorker,
92209
+ getWorkerSummary,
92210
+ getWorkerActivity,
92211
+ cancelWorker,
92212
+ resetWorker
92213
+ ];
91925
92214
  }
91926
92215
 
91927
92216
  // src/task-tools.ts
@@ -92553,6 +92842,9 @@ async function getSessionById(id2) {
92553
92842
  }
92554
92843
 
92555
92844
  // src/orchestrator.ts
92845
+ var WATCHDOG_INTERVAL_MS = 3e4;
92846
+ var SILENT_THRESHOLD_SEC = 90;
92847
+ var WORKING_THRESHOLD_SEC = 600;
92556
92848
  var GGBoss = class {
92557
92849
  workers = /* @__PURE__ */ new Map();
92558
92850
  lastSummaries = /* @__PURE__ */ new Map();
@@ -92591,6 +92883,23 @@ var GGBoss = class {
92591
92883
  * message that didn't dispatch any workers.
92592
92884
  */
92593
92885
  hadWorkerActivitySinceReady = false;
92886
+ /**
92887
+ * Watchdog for stuck workers. Fires every WATCHDOG_INTERVAL_MS; if any
92888
+ * "working" worker has been silent past SILENT_THRESHOLD_SEC or running
92889
+ * past WORKING_THRESHOLD_SEC, push a `worker_stuck` event onto the queue.
92890
+ * The boss processes it like any other event — AFTER its current turn
92891
+ * (queue is FIFO, boss is single-event-at-a-time), so this never
92892
+ * interrupts an in-flight boss turn.
92893
+ */
92894
+ watchdogTimer = null;
92895
+ /**
92896
+ * Per-project debounce. Stores the worker's lastEventAtMs at the moment we
92897
+ * pushed the stuck event. If the worker's lastEventAt advances past that,
92898
+ * we know the worker recovered (emitted a new event), so we clear the entry
92899
+ * and become eligible to fire again on the next stall. Also cleared on
92900
+ * worker_turn_complete / worker_error.
92901
+ */
92902
+ stuckPushedAt = /* @__PURE__ */ new Map();
92594
92903
  constructor(opts) {
92595
92904
  this.opts = opts;
92596
92905
  }
@@ -92884,6 +93193,7 @@ var GGBoss = class {
92884
93193
  }
92885
93194
  async run() {
92886
93195
  this.running = true;
93196
+ this.startWatchdog();
92887
93197
  while (this.running) {
92888
93198
  try {
92889
93199
  await this.runIteration();
@@ -92906,6 +93216,7 @@ var GGBoss = class {
92906
93216
  }
92907
93217
  let finishedTaskId = null;
92908
93218
  if (event.kind === "worker_turn_complete") {
93219
+ this.stuckPushedAt.delete(event.summary.project);
92909
93220
  void playDoneAudio();
92910
93221
  this.hadWorkerActivitySinceReady = true;
92911
93222
  this.lastSummaries.set(event.summary.project, event.summary);
@@ -92935,6 +93246,7 @@ var GGBoss = class {
92935
93246
  }
92936
93247
  if (event.kind === "worker_error") {
92937
93248
  this.hadWorkerActivitySinceReady = true;
93249
+ this.stuckPushedAt.delete(event.project);
92938
93250
  log2("ERROR", "worker_error", event.message, { project: event.project });
92939
93251
  const taskId = this.inFlightTaskByProject.get(event.project);
92940
93252
  if (taskId) {
@@ -92945,6 +93257,14 @@ var GGBoss = class {
92945
93257
  });
92946
93258
  }
92947
93259
  }
93260
+ if (event.kind === "worker_stuck") {
93261
+ log2("WARN", "worker_stuck", `worker silent\u2014pinging boss`, {
93262
+ project: event.project,
93263
+ reason: event.reason,
93264
+ silentSeconds: event.snapshot.silentSeconds,
93265
+ workingSeconds: event.snapshot.workingSeconds
93266
+ });
93267
+ }
92948
93268
  await this.runCompaction(false);
92949
93269
  const workerSnapshot = [...this.workers.entries()].map(([name, w]) => ({
92950
93270
  name,
@@ -93052,8 +93372,62 @@ var GGBoss = class {
93052
93372
  await this.dispatchTaskByDescription(project, next.description, next.fresh === true, next.id);
93053
93373
  this.pendingAutoChainNotices.push({ project, title: next.title });
93054
93374
  }
93375
+ /**
93376
+ * Start the stuck-worker watchdog. Idempotent.
93377
+ *
93378
+ * Safety properties:
93379
+ * - Pushes onto the same FIFO queue the boss already drains, so the boss
93380
+ * never gets interrupted mid-turn — stuck pings are processed AFTER
93381
+ * whatever it's currently doing.
93382
+ * - Per-worker debounce (`stuckPushedAt`) prevents spam; a worker only
93383
+ * gets re-flagged after it emits a new event AND stalls again, or after
93384
+ * it completes/errors and stalls on a fresh turn.
93385
+ */
93386
+ startWatchdog() {
93387
+ if (this.watchdogTimer) return;
93388
+ this.watchdogTimer = setInterval(() => {
93389
+ try {
93390
+ this.checkStuckWorkers();
93391
+ } catch (err) {
93392
+ const message = err instanceof Error ? err.message : String(err);
93393
+ log2("ERROR", "watchdog", "tick threw", { message });
93394
+ }
93395
+ }, WATCHDOG_INTERVAL_MS);
93396
+ this.watchdogTimer.unref?.();
93397
+ }
93398
+ stopWatchdog() {
93399
+ if (this.watchdogTimer) {
93400
+ clearInterval(this.watchdogTimer);
93401
+ this.watchdogTimer = null;
93402
+ }
93403
+ }
93404
+ checkStuckWorkers() {
93405
+ for (const [name, worker] of this.workers) {
93406
+ const decision = decideStuckEvent({
93407
+ status: worker.getStatus(),
93408
+ activity: worker.getStatus() === "working" ? worker.getActivity() : null,
93409
+ lastPushedAt: this.stuckPushedAt.has(name) ? this.stuckPushedAt.get(name) ?? null : void 0,
93410
+ silentThresholdSec: SILENT_THRESHOLD_SEC,
93411
+ workingThresholdSec: WORKING_THRESHOLD_SEC
93412
+ });
93413
+ if (decision.kind === "clear_debounce") {
93414
+ this.stuckPushedAt.delete(name);
93415
+ continue;
93416
+ }
93417
+ if (decision.kind === "skip") continue;
93418
+ this.stuckPushedAt.set(name, decision.lastEventAtMs);
93419
+ this.queue.push({
93420
+ kind: "worker_stuck",
93421
+ project: name,
93422
+ reason: decision.reason,
93423
+ snapshot: decision.snapshot,
93424
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
93425
+ });
93426
+ }
93427
+ }
93055
93428
  async dispose() {
93056
93429
  this.running = false;
93430
+ this.stopWatchdog();
93057
93431
  this.ac.abort();
93058
93432
  this.queue.push({
93059
93433
  kind: "user_message",
@@ -93078,6 +93452,37 @@ function reportedToTaskStatus(reported, anyToolFailed) {
93078
93452
  if (reported === "UNVERIFIED" || reported === "PARTIAL") return "in_progress";
93079
93453
  return anyToolFailed ? "blocked" : "done";
93080
93454
  }
93455
+ function decideStuckEvent(input) {
93456
+ const { status, activity, lastPushedAt, silentThresholdSec, workingThresholdSec } = input;
93457
+ if (status !== "working" || !activity) {
93458
+ return lastPushedAt !== void 0 ? { kind: "clear_debounce" } : { kind: "skip" };
93459
+ }
93460
+ if (lastPushedAt !== void 0) {
93461
+ const lastEvent = activity.lastEventAtMs;
93462
+ if (lastEvent === null || lastPushedAt === null || lastEvent <= lastPushedAt) {
93463
+ return { kind: "skip" };
93464
+ }
93465
+ }
93466
+ let reason = null;
93467
+ if (activity.lastEventAtMs !== null && activity.silentSeconds >= silentThresholdSec) {
93468
+ reason = "silent";
93469
+ } else if (activity.workingSeconds >= workingThresholdSec) {
93470
+ reason = "long_running";
93471
+ }
93472
+ if (!reason) return { kind: "skip" };
93473
+ return {
93474
+ kind: "push",
93475
+ reason,
93476
+ lastEventAtMs: activity.lastEventAtMs,
93477
+ snapshot: {
93478
+ workingSeconds: activity.workingSeconds,
93479
+ silentSeconds: activity.silentSeconds,
93480
+ activeTools: activity.activeTools,
93481
+ completedTools: activity.completedTools,
93482
+ textTail: activity.textTail
93483
+ }
93484
+ };
93485
+ }
93081
93486
  function formatEventForBoss(event, workerSnapshot, autoChainNotices) {
93082
93487
  if (event.kind === "user_message") {
93083
93488
  return event.text;
@@ -93095,15 +93500,27 @@ auto_dispatched_since_last_event:
93095
93500
  ${lines.join("\n")}`;
93096
93501
  };
93097
93502
  if (event.kind === "worker_turn_complete") {
93098
- const s = event.summary;
93099
- const tools = s.toolsUsed.length > 0 ? s.toolsUsed.map((t) => `${t.ok ? "\u2713" : "\u2717"}${t.name}`).join(", ") : "(none)";
93100
- return `[event:worker_turn_complete] project="${s.project}" turn=${s.turnIndex} timestamp=${s.timestamp}
93503
+ const s2 = event.summary;
93504
+ const tools = s2.toolsUsed.length > 0 ? s2.toolsUsed.map((t) => `${t.ok ? "\u2713" : "\u2717"}${t.name}`).join(", ") : "(none)";
93505
+ return `[event:worker_turn_complete] project="${s2.project}" turn=${s2.turnIndex} timestamp=${s2.timestamp}
93101
93506
  tools_used: ${tools}
93102
93507
  final_text:
93103
- ${s.finalText || "(empty)"}${renderOthers(s.project)}${renderAutoChain()}`;
93508
+ ${s2.finalText || "(empty)"}${renderOthers(s2.project)}${renderAutoChain()}`;
93104
93509
  }
93105
- return `[event:worker_error] project="${event.project}" timestamp=${event.timestamp}
93510
+ if (event.kind === "worker_error") {
93511
+ return `[event:worker_error] project="${event.project}" timestamp=${event.timestamp}
93106
93512
  ${event.message}${renderOthers(event.project)}${renderAutoChain()}`;
93513
+ }
93514
+ const s = event.snapshot;
93515
+ const completed = s.completedTools.length > 0 ? s.completedTools.map((t) => `${t.ok ? "\u2713" : "\u2717"}${t.name}`).join(", ") : "(none)";
93516
+ const active = s.activeTools.length > 0 ? s.activeTools.join(", ") : "(none)";
93517
+ return `[event:worker_stuck] project="${event.project}" reason=${event.reason} timestamp=${event.timestamp}
93518
+ working_seconds: ${s.workingSeconds}
93519
+ silent_seconds: ${s.silentSeconds}
93520
+ active_tools: ${active}
93521
+ completed_this_turn: ${completed}
93522
+ text_tail:
93523
+ ${s.textTail || "(no text yet)"}${renderOthers(event.project)}${renderAutoChain()}`;
93107
93524
  }
93108
93525
  function computeContextUsed(usage, provider) {
93109
93526
  const inputContext = (usage.inputTokens ?? 0) + (usage.cacheRead ?? 0) + (usage.cacheWrite ?? 0);
@@ -93293,4 +93710,4 @@ react/cjs/react-jsx-runtime.development.js:
93293
93710
  * LICENSE file in the root directory of this source tree.
93294
93711
  *)
93295
93712
  */
93296
- //# sourceMappingURL=chunk-2SKO3MTH.js.map
93713
+ //# sourceMappingURL=chunk-YT5BRKQW.js.map