@kenkaiiii/gg-boss 4.3.153 → 4.3.154
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -91403,6 +91403,81 @@ var bossStore = {
|
|
|
91403
91403
|
};
|
|
91404
91404
|
|
|
91405
91405
|
// src/worker.ts
|
|
91406
|
+
var CONTEXT_OVERFLOW_PATTERNS = [
|
|
91407
|
+
/context_length_exceeded/i,
|
|
91408
|
+
/context length exceeded/i,
|
|
91409
|
+
/context window/i,
|
|
91410
|
+
// OpenAI Codex / Responses
|
|
91411
|
+
/maximum context length/i,
|
|
91412
|
+
// OpenAI / OpenRouter / Mistral
|
|
91413
|
+
/prompt is too long/i,
|
|
91414
|
+
// Anthropic
|
|
91415
|
+
/request_too_large/i,
|
|
91416
|
+
// Anthropic HTTP 413
|
|
91417
|
+
/input is too long/i,
|
|
91418
|
+
// Bedrock
|
|
91419
|
+
/input token count.*exceeds the maximum/i,
|
|
91420
|
+
// Gemini
|
|
91421
|
+
/maximum prompt length/i,
|
|
91422
|
+
// xAI / Grok
|
|
91423
|
+
/reduce the length of the messages/i,
|
|
91424
|
+
// Groq
|
|
91425
|
+
/too large for model/i,
|
|
91426
|
+
// Mistral
|
|
91427
|
+
/token limit/i
|
|
91428
|
+
// generic
|
|
91429
|
+
];
|
|
91430
|
+
var RATE_LIMIT_PATTERNS2 = [
|
|
91431
|
+
/rate[ _-]?limit/i,
|
|
91432
|
+
/\b429\b/,
|
|
91433
|
+
/too many requests/i,
|
|
91434
|
+
/tokens per minute/i,
|
|
91435
|
+
/requests per minute/i
|
|
91436
|
+
];
|
|
91437
|
+
var BILLING_PATTERNS = [
|
|
91438
|
+
/insufficient balance/i,
|
|
91439
|
+
/insufficient[ _]quota/i,
|
|
91440
|
+
/quota exceeded/i,
|
|
91441
|
+
/quota_exceeded/i,
|
|
91442
|
+
/credit balance/i,
|
|
91443
|
+
/please recharge/i,
|
|
91444
|
+
/payment required/i,
|
|
91445
|
+
/\b402\b/
|
|
91446
|
+
];
|
|
91447
|
+
var AUTH_PATTERNS = [
|
|
91448
|
+
/invalid[ _]api[ _]key/i,
|
|
91449
|
+
/unauthorized/i,
|
|
91450
|
+
/\b401\b/,
|
|
91451
|
+
/authentication[ _]failed/i,
|
|
91452
|
+
/please run \/login/i
|
|
91453
|
+
// Anthropic Claude Code-style hint
|
|
91454
|
+
];
|
|
91455
|
+
function matchesAny(message, patterns) {
|
|
91456
|
+
return patterns.some((p) => p.test(message));
|
|
91457
|
+
}
|
|
91458
|
+
function classifyWorkerError(message) {
|
|
91459
|
+
if (matchesAny(message, CONTEXT_OVERFLOW_PATTERNS)) {
|
|
91460
|
+
return `[context_overflow] Worker context window exceeded \u2014 the conversation is too large to continue. Recovery: call reset_worker(project) to wipe history, then re-prompt with the task. Re-prompting WITHOUT reset will fail the same way.
|
|
91461
|
+
|
|
91462
|
+
Original: ${message}`;
|
|
91463
|
+
}
|
|
91464
|
+
if (matchesAny(message, BILLING_PATTERNS)) {
|
|
91465
|
+
return `[billing] Provider billing/quota issue. Recovery: surface to the user \u2014 they need to top up or switch providers. Do NOT retry.
|
|
91466
|
+
|
|
91467
|
+
Original: ${message}`;
|
|
91468
|
+
}
|
|
91469
|
+
if (matchesAny(message, AUTH_PATTERNS)) {
|
|
91470
|
+
return `[auth] Provider authentication failed. Recovery: surface to the user \u2014 they need to re-login. Do NOT retry.
|
|
91471
|
+
|
|
91472
|
+
Original: ${message}`;
|
|
91473
|
+
}
|
|
91474
|
+
if (matchesAny(message, RATE_LIMIT_PATTERNS2)) {
|
|
91475
|
+
return `[rate_limited] Provider rate limit hit. Recovery: wait ~30s, then re-prompt the same worker (no reset needed).
|
|
91476
|
+
|
|
91477
|
+
Original: ${message}`;
|
|
91478
|
+
}
|
|
91479
|
+
return message;
|
|
91480
|
+
}
|
|
91406
91481
|
function safeBusHandler(workerName, handlerName, fn, onError) {
|
|
91407
91482
|
return (event) => {
|
|
91408
91483
|
try {
|
|
@@ -91427,10 +91502,19 @@ var Worker = class {
|
|
|
91427
91502
|
currentText = "";
|
|
91428
91503
|
currentTools = [];
|
|
91429
91504
|
activeTools = /* @__PURE__ */ new Map();
|
|
91505
|
+
/** Parent (orchestrator-wide) signal — fires only on full shutdown. */
|
|
91506
|
+
parentSignal;
|
|
91507
|
+
/** Per-turn AbortController so the boss can cancel one worker mid-flight without taking down the whole pool. */
|
|
91508
|
+
turnAc = null;
|
|
91509
|
+
/** Set true when cancel() fired so the silent-death guard reports "Cancelled by boss" instead of a generic abort error. */
|
|
91510
|
+
wasCancelled = false;
|
|
91511
|
+
startedAt = null;
|
|
91512
|
+
lastEventAt = null;
|
|
91430
91513
|
constructor(opts) {
|
|
91431
91514
|
this.name = opts.name;
|
|
91432
91515
|
this.cwd = opts.cwd;
|
|
91433
91516
|
this.queue = opts.queue;
|
|
91517
|
+
this.parentSignal = opts.signal;
|
|
91434
91518
|
this.session = new AgentSession({
|
|
91435
91519
|
provider: opts.provider,
|
|
91436
91520
|
model: opts.model,
|
|
@@ -91454,10 +91538,44 @@ var Worker = class {
|
|
|
91454
91538
|
bossStore.setWorkerStatus(this.name, "working");
|
|
91455
91539
|
this.currentText = "";
|
|
91456
91540
|
this.currentTools = [];
|
|
91457
|
-
|
|
91458
|
-
|
|
91541
|
+
this.activeTools.clear();
|
|
91542
|
+
this.wasCancelled = false;
|
|
91543
|
+
this.startedAt = Date.now();
|
|
91544
|
+
this.lastEventAt = null;
|
|
91545
|
+
const turnAc = new AbortController();
|
|
91546
|
+
this.turnAc = turnAc;
|
|
91547
|
+
const onParentAbort = () => turnAc.abort();
|
|
91548
|
+
if (this.parentSignal.aborted) turnAc.abort();
|
|
91549
|
+
else this.parentSignal.addEventListener("abort", onParentAbort, { once: true });
|
|
91550
|
+
this.session.setSignal(turnAc.signal);
|
|
91551
|
+
void this.session.prompt(text).then(() => {
|
|
91552
|
+
if (this.status === "working") {
|
|
91553
|
+
const message = this.wasCancelled ? "Cancelled by boss." : "Session ended without agent_done \u2014 likely a silently swallowed abort or stream interruption.";
|
|
91554
|
+
const ts = (/* @__PURE__ */ new Date()).toISOString();
|
|
91555
|
+
this.status = "error";
|
|
91556
|
+
this.startedAt = null;
|
|
91557
|
+
log2(
|
|
91558
|
+
this.wasCancelled ? "INFO" : "ERROR",
|
|
91559
|
+
"worker",
|
|
91560
|
+
this.wasCancelled ? "cancelled" : "silent session end",
|
|
91561
|
+
{ worker: this.name }
|
|
91562
|
+
);
|
|
91563
|
+
this.queue.removeStuckFor(this.name);
|
|
91564
|
+
bossStore.appendWorkerError(this.name, message, ts);
|
|
91565
|
+
this.queue.push({
|
|
91566
|
+
kind: "worker_error",
|
|
91567
|
+
project: this.name,
|
|
91568
|
+
message,
|
|
91569
|
+
timestamp: ts
|
|
91570
|
+
});
|
|
91571
|
+
}
|
|
91572
|
+
}).catch((err) => {
|
|
91573
|
+
const rawMessage = this.wasCancelled ? "Cancelled by boss." : err instanceof Error ? err.message : String(err);
|
|
91574
|
+
const message = this.wasCancelled ? rawMessage : classifyWorkerError(rawMessage);
|
|
91459
91575
|
this.status = "error";
|
|
91576
|
+
this.startedAt = null;
|
|
91460
91577
|
const ts = (/* @__PURE__ */ new Date()).toISOString();
|
|
91578
|
+
this.queue.removeStuckFor(this.name);
|
|
91461
91579
|
bossStore.appendWorkerError(this.name, message, ts);
|
|
91462
91580
|
this.queue.push({
|
|
91463
91581
|
kind: "worker_error",
|
|
@@ -91465,8 +91583,64 @@ var Worker = class {
|
|
|
91465
91583
|
message,
|
|
91466
91584
|
timestamp: ts
|
|
91467
91585
|
});
|
|
91586
|
+
}).finally(() => {
|
|
91587
|
+
this.parentSignal.removeEventListener("abort", onParentAbort);
|
|
91588
|
+
if (this.turnAc === turnAc) this.turnAc = null;
|
|
91468
91589
|
});
|
|
91469
91590
|
}
|
|
91591
|
+
/**
|
|
91592
|
+
* Cancel the current turn. Aborts only this worker's per-turn controller —
|
|
91593
|
+
* other workers keep running. The aborted turn surfaces as a `worker_error`
|
|
91594
|
+
* event with message "Cancelled by boss." so the orchestrator clears its
|
|
91595
|
+
* in-flight task entry and the boss is notified.
|
|
91596
|
+
*
|
|
91597
|
+
* Returns true if a turn was actually cancelled.
|
|
91598
|
+
*/
|
|
91599
|
+
cancel() {
|
|
91600
|
+
if (this.status !== "working" || !this.turnAc) return false;
|
|
91601
|
+
this.wasCancelled = true;
|
|
91602
|
+
this.turnAc.abort();
|
|
91603
|
+
return true;
|
|
91604
|
+
}
|
|
91605
|
+
/**
|
|
91606
|
+
* Snapshot of the worker's current activity. Cheap to call; safe while the
|
|
91607
|
+
* worker is mid-turn. Used by the boss's get_worker_activity tool to peek
|
|
91608
|
+
* inside a long-running turn without waiting for completion.
|
|
91609
|
+
*/
|
|
91610
|
+
getActivity() {
|
|
91611
|
+
const now2 = Date.now();
|
|
91612
|
+
const TEXT_TAIL = 400;
|
|
91613
|
+
const tail = this.currentText.length > TEXT_TAIL ? "\u2026" + this.currentText.slice(-TEXT_TAIL) : this.currentText;
|
|
91614
|
+
return {
|
|
91615
|
+
status: this.status,
|
|
91616
|
+
startedAt: this.startedAt ? new Date(this.startedAt).toISOString() : null,
|
|
91617
|
+
lastEventAt: this.lastEventAt ? new Date(this.lastEventAt).toISOString() : null,
|
|
91618
|
+
workingSeconds: this.startedAt ? Math.floor((now2 - this.startedAt) / 1e3) : 0,
|
|
91619
|
+
silentSeconds: this.lastEventAt ? Math.floor((now2 - this.lastEventAt) / 1e3) : 0,
|
|
91620
|
+
activeTools: [...this.activeTools.values()],
|
|
91621
|
+
completedTools: [...this.currentTools],
|
|
91622
|
+
textTail: tail,
|
|
91623
|
+
lastEventAtMs: this.lastEventAt
|
|
91624
|
+
};
|
|
91625
|
+
}
|
|
91626
|
+
/**
|
|
91627
|
+
* Hard reset: cancel any in-flight turn, wipe conversation history, force
|
|
91628
|
+
* status back to idle. Use when a worker is wedged in `error` or stuck on a
|
|
91629
|
+
* bad context that re-prompting can't recover from.
|
|
91630
|
+
*/
|
|
91631
|
+
async reset() {
|
|
91632
|
+
this.cancel();
|
|
91633
|
+
await this.session.newSession();
|
|
91634
|
+
this.turnCount = 0;
|
|
91635
|
+
this.currentText = "";
|
|
91636
|
+
this.currentTools = [];
|
|
91637
|
+
this.activeTools.clear();
|
|
91638
|
+
this.startedAt = null;
|
|
91639
|
+
this.lastEventAt = null;
|
|
91640
|
+
this.wasCancelled = false;
|
|
91641
|
+
this.status = "idle";
|
|
91642
|
+
bossStore.setWorkerStatus(this.name, "idle");
|
|
91643
|
+
}
|
|
91470
91644
|
async dispose() {
|
|
91471
91645
|
await this.session.dispose();
|
|
91472
91646
|
}
|
|
@@ -91485,9 +91659,11 @@ var Worker = class {
|
|
|
91485
91659
|
}
|
|
91486
91660
|
wireEvents() {
|
|
91487
91661
|
const bus = this.session.eventBus;
|
|
91488
|
-
const reportError2 = (
|
|
91662
|
+
const reportError2 = (rawMessage) => {
|
|
91489
91663
|
const ts = (/* @__PURE__ */ new Date()).toISOString();
|
|
91664
|
+
const message = classifyWorkerError(rawMessage);
|
|
91490
91665
|
this.status = "error";
|
|
91666
|
+
this.queue.removeStuckFor(this.name);
|
|
91491
91667
|
bossStore.appendWorkerError(this.name, message, ts);
|
|
91492
91668
|
this.queue.push({
|
|
91493
91669
|
kind: "worker_error",
|
|
@@ -91503,6 +91679,7 @@ var Worker = class {
|
|
|
91503
91679
|
"text_delta",
|
|
91504
91680
|
({ text }) => {
|
|
91505
91681
|
this.currentText += text;
|
|
91682
|
+
this.lastEventAt = Date.now();
|
|
91506
91683
|
},
|
|
91507
91684
|
reportError2
|
|
91508
91685
|
)
|
|
@@ -91514,6 +91691,7 @@ var Worker = class {
|
|
|
91514
91691
|
"tool_call_start",
|
|
91515
91692
|
({ toolCallId, name }) => {
|
|
91516
91693
|
this.activeTools.set(toolCallId, name);
|
|
91694
|
+
this.lastEventAt = Date.now();
|
|
91517
91695
|
},
|
|
91518
91696
|
reportError2
|
|
91519
91697
|
)
|
|
@@ -91527,6 +91705,7 @@ var Worker = class {
|
|
|
91527
91705
|
const name = this.activeTools.get(toolCallId);
|
|
91528
91706
|
this.activeTools.delete(toolCallId);
|
|
91529
91707
|
if (name) this.currentTools.push({ name, ok: !isError });
|
|
91708
|
+
this.lastEventAt = Date.now();
|
|
91530
91709
|
},
|
|
91531
91710
|
reportError2
|
|
91532
91711
|
)
|
|
@@ -91549,7 +91728,11 @@ var Worker = class {
|
|
|
91549
91728
|
};
|
|
91550
91729
|
this.currentText = "";
|
|
91551
91730
|
this.currentTools = [];
|
|
91731
|
+
this.activeTools.clear();
|
|
91732
|
+
this.startedAt = null;
|
|
91733
|
+
this.lastEventAt = null;
|
|
91552
91734
|
this.status = "idle";
|
|
91735
|
+
this.queue.removeStuckFor(this.name);
|
|
91553
91736
|
bossStore.appendWorkerEvent(summary);
|
|
91554
91737
|
this.queue.push({ kind: "worker_turn_complete", summary });
|
|
91555
91738
|
},
|
|
@@ -91598,6 +91781,19 @@ var EventQueue = class {
|
|
|
91598
91781
|
size() {
|
|
91599
91782
|
return this.user.length + this.rest.length;
|
|
91600
91783
|
}
|
|
91784
|
+
/**
|
|
91785
|
+
* Drop any queued `worker_stuck` events for the given project. Called when a
|
|
91786
|
+
* `worker_turn_complete` or `worker_error` fires — the worker is no longer
|
|
91787
|
+
* running, so any pending stuck ping is now stale and would mislead the boss
|
|
91788
|
+
* (e.g. tell it to cancel a worker that already finished).
|
|
91789
|
+
*
|
|
91790
|
+
* Returns the number of events dropped.
|
|
91791
|
+
*/
|
|
91792
|
+
removeStuckFor(project) {
|
|
91793
|
+
const before = this.rest.length;
|
|
91794
|
+
this.rest = this.rest.filter((e) => !(e.kind === "worker_stuck" && e.project === project));
|
|
91795
|
+
return before - this.rest.length;
|
|
91796
|
+
}
|
|
91601
91797
|
};
|
|
91602
91798
|
|
|
91603
91799
|
// src/settings.ts
|
|
@@ -91659,6 +91855,7 @@ Every user-role message is one of:
|
|
|
91659
91855
|
1. A direct user message \u2014 respond to the user.
|
|
91660
91856
|
2. \`[event:worker_turn_complete]\` \u2014 a worker finished a turn. Contains project, turn number, tools used (\u2713/\u2717), the worker's final text, AND a trailing \`other_workers:\` line listing every other project's current status (e.g. \`other_workers: B(working) C(idle) D(working)\`).
|
|
91661
91857
|
3. \`[event:worker_error]\` \u2014 a worker hit an error. Diagnose, then retry or surface to the user. Same \`other_workers:\` trailer.
|
|
91858
|
+
4. \`[event:worker_stuck]\` \u2014 a queued ping from the orchestrator's watchdog: a worker has been silent or running unusually long. Includes \`reason\` (silent | long_running), \`working_seconds\`, \`silent_seconds\`, \`active_tools\`, \`completed_this_turn\`, and a \`text_tail\` snippet. The worker is STILL RUNNING \u2014 this is informational, not an error. Decide: wait (most cases), \`cancel_worker\`, or surface. The watchdog won't ping again for the same worker until it emits new activity AND stalls again, so you won't be spammed.
|
|
91662
91859
|
|
|
91663
91860
|
**Always read the \`other_workers:\` trailer before deciding "the run is done".** During a parallel dispatch you receive ONE event per finishing worker, in arrival order. It is wrong to treat the event you're processing as "the last one" unless \`other_workers:\` shows every other worker is \`idle\` (or \`error\`). If any are \`working\`, more events are coming \u2014 finish your routing for THIS event, then wait.
|
|
91664
91861
|
|
|
@@ -91678,6 +91875,9 @@ Worker dispatch:
|
|
|
91678
91875
|
- \`get_worker_status(project)\` \u2014 single-project status check.
|
|
91679
91876
|
- \`prompt_worker(project, message, fresh?)\` \u2014 send a prompt directly to a worker. FIRE-AND-FORGET. Returns immediately; you'll get \`worker_turn_complete\` later. NEVER call this on a worker whose status is "working".
|
|
91680
91877
|
- \`get_worker_summary(project)\` \u2014 most recent turn summary. Use to inspect what was actually done.
|
|
91878
|
+
- \`get_worker_activity(project)\` \u2014 mid-turn peek: working/silent seconds, active tools, text tail. Use ONLY when a worker has been \`working\` long enough to wonder if it's stuck.
|
|
91879
|
+
- \`cancel_worker(project)\` \u2014 abort the current turn. Surfaces as a \`worker_error\` ("Cancelled by boss."). Other workers untouched.
|
|
91880
|
+
- \`reset_worker(project)\` \u2014 last resort: cancel + wipe history + force idle. Only when re-prompting can't recover.
|
|
91681
91881
|
|
|
91682
91882
|
Task plan (persistent backlog, visible in the user's Ctrl+T overlay):
|
|
91683
91883
|
|
|
@@ -91796,6 +91996,30 @@ The worker has full context of its prior turn (you set fresh=false), so don't re
|
|
|
91796
91996
|
|
|
91797
91997
|
This keeps the loop bounded \u2014 workers don't grind forever on a stuck task.
|
|
91798
91998
|
|
|
91999
|
+
# Recoverable error tags on worker_error
|
|
92000
|
+
|
|
92001
|
+
Worker errors are pre-classified \u2014 the message starts with a tag like \`[context_overflow]\`, \`[rate_limited]\`, \`[billing]\`, or \`[auth]\` when recovery is well-defined. Route off the tag, NOT a generic re-prompt:
|
|
92002
|
+
|
|
92003
|
+
- \`[context_overflow]\` \u2014 conversation outgrew the model's window. Call \`reset_worker(project)\` first, THEN re-prompt with the task. Re-prompting without reset fails the same way. Tell the user briefly that you reset.
|
|
92004
|
+
- \`[rate_limited]\` \u2014 wait for the next event (~30s of natural delay) or briefly note to user, then re-prompt the same worker. No reset.
|
|
92005
|
+
- \`[billing]\` / \`[auth]\` \u2014 surface to the user. Do not retry. The user must fix it.
|
|
92006
|
+
- Untagged \u2014 fall back to the normal BLOCKED handling (one corrective re-prompt, then surface).
|
|
92007
|
+
|
|
92008
|
+
# Checking on a stuck or slow worker
|
|
92009
|
+
|
|
92010
|
+
The orchestrator's watchdog will queue a \`[event:worker_stuck]\` ping if a worker is silent for too long. **It arrives like every other event \u2014 you process it AFTER finishing your current turn.** It does NOT interrupt you. Don't drop what you're doing to chase it; just route it when it's its turn.
|
|
92011
|
+
|
|
92012
|
+
When a stuck ping arrives (or you otherwise suspect a hang):
|
|
92013
|
+
|
|
92014
|
+
1. The ping itself usually has enough info (\`silent_seconds\`, \`active_tools\`, \`text_tail\`). Only call \`get_worker_activity(project)\` if you need a fresher snapshot \u2014 the ping data may already be 30+ seconds old by the time you read it.
|
|
92015
|
+
- Active tool + recent activity \u2192 it's working, leave it alone. Stay silent or briefly note to the user.
|
|
92016
|
+
- High \`silent_seconds\` with no active tool \u2192 likely a stalled stream. Cancel.
|
|
92017
|
+
- Active \`bash\` for several minutes \u2192 probably a long command (test suite, build). Wait unless the user is impatient.
|
|
92018
|
+
2. \`cancel_worker(project)\` if you decide to intervene. A \`worker_error\` arrives; treat it as a normal failed turn (re-prompt with a tighter instruction, or surface to the user).
|
|
92019
|
+
3. \`reset_worker(project)\` ONLY if the worker is in \`error\` and re-prompting fails repeatedly, OR its context is clearly poisoned. Reset wipes history \u2014 the worker forgets everything. Always tell the user when you reset.
|
|
92020
|
+
|
|
92021
|
+
Don't poll \`get_worker_activity\` \u2014 call it at most once per concern. Don't cancel routinely; the user is mostly fine waiting.
|
|
92022
|
+
|
|
91799
92023
|
# Style
|
|
91800
92024
|
|
|
91801
92025
|
- Terse with the user. They want results, not narration.
|
|
@@ -91859,6 +92083,15 @@ var promptWorkerParams = external_exports.object({
|
|
|
91859
92083
|
var getWorkerSummaryParams = external_exports.object({
|
|
91860
92084
|
project: external_exports.string().describe("Project name as listed by list_workers.")
|
|
91861
92085
|
});
|
|
92086
|
+
var getWorkerActivityParams = external_exports.object({
|
|
92087
|
+
project: external_exports.string().describe("Project name as listed by list_workers.")
|
|
92088
|
+
});
|
|
92089
|
+
var cancelWorkerParams = external_exports.object({
|
|
92090
|
+
project: external_exports.string().describe("Project name as listed by list_workers.")
|
|
92091
|
+
});
|
|
92092
|
+
var resetWorkerParams = external_exports.object({
|
|
92093
|
+
project: external_exports.string().describe("Project name as listed by list_workers.")
|
|
92094
|
+
});
|
|
91862
92095
|
function createBossTools(deps) {
|
|
91863
92096
|
const { workers, lastSummaries } = deps;
|
|
91864
92097
|
const listWorkers = {
|
|
@@ -91921,7 +92154,58 @@ Final text:
|
|
|
91921
92154
|
${summary.finalText || "(empty)"}`;
|
|
91922
92155
|
}
|
|
91923
92156
|
};
|
|
91924
|
-
|
|
92157
|
+
const getWorkerActivity = {
|
|
92158
|
+
name: "get_worker_activity",
|
|
92159
|
+
description: "Peek at what a worker is doing RIGHT NOW (mid-turn). Returns working/silent durations in seconds, currently-running tool names, completed tools so far, and the tail of its streamed text. Use this when a worker has been `working` for a long time to decide whether it's making progress, hung, or worth cancelling.",
|
|
92160
|
+
parameters: getWorkerActivityParams,
|
|
92161
|
+
execute(args) {
|
|
92162
|
+
const w = workers.get(args.project);
|
|
92163
|
+
if (!w) return `Unknown project: ${args.project}`;
|
|
92164
|
+
const a = w.getActivity();
|
|
92165
|
+
const lines = [
|
|
92166
|
+
`Project: ${args.project}`,
|
|
92167
|
+
`Status: ${a.status}`,
|
|
92168
|
+
`Working: ${a.workingSeconds}s (last event ${a.silentSeconds}s ago)`,
|
|
92169
|
+
`Active tools: ${a.activeTools.length > 0 ? a.activeTools.join(", ") : "(none)"}`,
|
|
92170
|
+
`Completed this turn: ${a.completedTools.length > 0 ? a.completedTools.map((t) => `${t.ok ? "\u2713" : "\u2717"}${t.name}`).join(", ") : "(none)"}`,
|
|
92171
|
+
"",
|
|
92172
|
+
"Text tail:",
|
|
92173
|
+
a.textTail || "(no text yet)"
|
|
92174
|
+
];
|
|
92175
|
+
return lines.join("\n");
|
|
92176
|
+
}
|
|
92177
|
+
};
|
|
92178
|
+
const cancelWorker = {
|
|
92179
|
+
name: "cancel_worker",
|
|
92180
|
+
description: "Abort a worker's current turn. Other workers are untouched. The cancelled worker emits a `worker_error` event with message 'Cancelled by boss.' so any in-flight task is cleared. After cancelling, you can re-prompt or reset the worker. No-op if the worker isn't `working`.",
|
|
92181
|
+
parameters: cancelWorkerParams,
|
|
92182
|
+
execute(args) {
|
|
92183
|
+
const w = workers.get(args.project);
|
|
92184
|
+
if (!w) return `Unknown project: ${args.project}`;
|
|
92185
|
+
const cancelled = w.cancel();
|
|
92186
|
+
return cancelled ? `Cancellation requested for "${args.project}". A worker_error event will arrive shortly.` : `Worker "${args.project}" is not working \u2014 nothing to cancel (status: ${w.getStatus()}).`;
|
|
92187
|
+
}
|
|
92188
|
+
};
|
|
92189
|
+
const resetWorker = {
|
|
92190
|
+
name: "reset_worker",
|
|
92191
|
+
description: "Hard reset: cancel any in-flight turn, wipe conversation history, force status back to idle. Use as a last resort when a worker is wedged in `error` or its context is so polluted that re-prompting won't recover. Equivalent to `fresh: true` plus a forced unstuck. After reset, the worker is ready for a new prompt_worker call.",
|
|
92192
|
+
parameters: resetWorkerParams,
|
|
92193
|
+
async execute(args) {
|
|
92194
|
+
const w = workers.get(args.project);
|
|
92195
|
+
if (!w) return `Unknown project: ${args.project}`;
|
|
92196
|
+
await w.reset();
|
|
92197
|
+
return `Worker "${args.project}" reset \u2014 status is idle, history wiped.`;
|
|
92198
|
+
}
|
|
92199
|
+
};
|
|
92200
|
+
return [
|
|
92201
|
+
listWorkers,
|
|
92202
|
+
getWorkerStatus,
|
|
92203
|
+
promptWorker,
|
|
92204
|
+
getWorkerSummary,
|
|
92205
|
+
getWorkerActivity,
|
|
92206
|
+
cancelWorker,
|
|
92207
|
+
resetWorker
|
|
92208
|
+
];
|
|
91925
92209
|
}
|
|
91926
92210
|
|
|
91927
92211
|
// src/task-tools.ts
|
|
@@ -92553,6 +92837,9 @@ async function getSessionById(id2) {
|
|
|
92553
92837
|
}
|
|
92554
92838
|
|
|
92555
92839
|
// src/orchestrator.ts
|
|
92840
|
+
var WATCHDOG_INTERVAL_MS = 3e4;
|
|
92841
|
+
var SILENT_THRESHOLD_SEC = 90;
|
|
92842
|
+
var WORKING_THRESHOLD_SEC = 600;
|
|
92556
92843
|
var GGBoss = class {
|
|
92557
92844
|
workers = /* @__PURE__ */ new Map();
|
|
92558
92845
|
lastSummaries = /* @__PURE__ */ new Map();
|
|
@@ -92591,6 +92878,23 @@ var GGBoss = class {
|
|
|
92591
92878
|
* message that didn't dispatch any workers.
|
|
92592
92879
|
*/
|
|
92593
92880
|
hadWorkerActivitySinceReady = false;
|
|
92881
|
+
/**
|
|
92882
|
+
* Watchdog for stuck workers. Fires every WATCHDOG_INTERVAL_MS; if any
|
|
92883
|
+
* "working" worker has been silent past SILENT_THRESHOLD_SEC or running
|
|
92884
|
+
* past WORKING_THRESHOLD_SEC, push a `worker_stuck` event onto the queue.
|
|
92885
|
+
* The boss processes it like any other event — AFTER its current turn
|
|
92886
|
+
* (queue is FIFO, boss is single-event-at-a-time), so this never
|
|
92887
|
+
* interrupts an in-flight boss turn.
|
|
92888
|
+
*/
|
|
92889
|
+
watchdogTimer = null;
|
|
92890
|
+
/**
|
|
92891
|
+
* Per-project debounce. Stores the worker's lastEventAtMs at the moment we
|
|
92892
|
+
* pushed the stuck event. If the worker's lastEventAt advances past that,
|
|
92893
|
+
* we know the worker recovered (emitted a new event), so we clear the entry
|
|
92894
|
+
* and become eligible to fire again on the next stall. Also cleared on
|
|
92895
|
+
* worker_turn_complete / worker_error.
|
|
92896
|
+
*/
|
|
92897
|
+
stuckPushedAt = /* @__PURE__ */ new Map();
|
|
92594
92898
|
constructor(opts) {
|
|
92595
92899
|
this.opts = opts;
|
|
92596
92900
|
}
|
|
@@ -92884,6 +93188,7 @@ var GGBoss = class {
|
|
|
92884
93188
|
}
|
|
92885
93189
|
async run() {
|
|
92886
93190
|
this.running = true;
|
|
93191
|
+
this.startWatchdog();
|
|
92887
93192
|
while (this.running) {
|
|
92888
93193
|
try {
|
|
92889
93194
|
await this.runIteration();
|
|
@@ -92906,6 +93211,7 @@ var GGBoss = class {
|
|
|
92906
93211
|
}
|
|
92907
93212
|
let finishedTaskId = null;
|
|
92908
93213
|
if (event.kind === "worker_turn_complete") {
|
|
93214
|
+
this.stuckPushedAt.delete(event.summary.project);
|
|
92909
93215
|
void playDoneAudio();
|
|
92910
93216
|
this.hadWorkerActivitySinceReady = true;
|
|
92911
93217
|
this.lastSummaries.set(event.summary.project, event.summary);
|
|
@@ -92935,6 +93241,7 @@ var GGBoss = class {
|
|
|
92935
93241
|
}
|
|
92936
93242
|
if (event.kind === "worker_error") {
|
|
92937
93243
|
this.hadWorkerActivitySinceReady = true;
|
|
93244
|
+
this.stuckPushedAt.delete(event.project);
|
|
92938
93245
|
log2("ERROR", "worker_error", event.message, { project: event.project });
|
|
92939
93246
|
const taskId = this.inFlightTaskByProject.get(event.project);
|
|
92940
93247
|
if (taskId) {
|
|
@@ -92945,6 +93252,14 @@ var GGBoss = class {
|
|
|
92945
93252
|
});
|
|
92946
93253
|
}
|
|
92947
93254
|
}
|
|
93255
|
+
if (event.kind === "worker_stuck") {
|
|
93256
|
+
log2("WARN", "worker_stuck", `worker silent\u2014pinging boss`, {
|
|
93257
|
+
project: event.project,
|
|
93258
|
+
reason: event.reason,
|
|
93259
|
+
silentSeconds: event.snapshot.silentSeconds,
|
|
93260
|
+
workingSeconds: event.snapshot.workingSeconds
|
|
93261
|
+
});
|
|
93262
|
+
}
|
|
92948
93263
|
await this.runCompaction(false);
|
|
92949
93264
|
const workerSnapshot = [...this.workers.entries()].map(([name, w]) => ({
|
|
92950
93265
|
name,
|
|
@@ -93052,8 +93367,62 @@ var GGBoss = class {
|
|
|
93052
93367
|
await this.dispatchTaskByDescription(project, next.description, next.fresh === true, next.id);
|
|
93053
93368
|
this.pendingAutoChainNotices.push({ project, title: next.title });
|
|
93054
93369
|
}
|
|
93370
|
+
/**
|
|
93371
|
+
* Start the stuck-worker watchdog. Idempotent.
|
|
93372
|
+
*
|
|
93373
|
+
* Safety properties:
|
|
93374
|
+
* - Pushes onto the same FIFO queue the boss already drains, so the boss
|
|
93375
|
+
* never gets interrupted mid-turn — stuck pings are processed AFTER
|
|
93376
|
+
* whatever it's currently doing.
|
|
93377
|
+
* - Per-worker debounce (`stuckPushedAt`) prevents spam; a worker only
|
|
93378
|
+
* gets re-flagged after it emits a new event AND stalls again, or after
|
|
93379
|
+
* it completes/errors and stalls on a fresh turn.
|
|
93380
|
+
*/
|
|
93381
|
+
startWatchdog() {
|
|
93382
|
+
if (this.watchdogTimer) return;
|
|
93383
|
+
this.watchdogTimer = setInterval(() => {
|
|
93384
|
+
try {
|
|
93385
|
+
this.checkStuckWorkers();
|
|
93386
|
+
} catch (err) {
|
|
93387
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
93388
|
+
log2("ERROR", "watchdog", "tick threw", { message });
|
|
93389
|
+
}
|
|
93390
|
+
}, WATCHDOG_INTERVAL_MS);
|
|
93391
|
+
this.watchdogTimer.unref?.();
|
|
93392
|
+
}
|
|
93393
|
+
stopWatchdog() {
|
|
93394
|
+
if (this.watchdogTimer) {
|
|
93395
|
+
clearInterval(this.watchdogTimer);
|
|
93396
|
+
this.watchdogTimer = null;
|
|
93397
|
+
}
|
|
93398
|
+
}
|
|
93399
|
+
checkStuckWorkers() {
|
|
93400
|
+
for (const [name, worker] of this.workers) {
|
|
93401
|
+
const decision = decideStuckEvent({
|
|
93402
|
+
status: worker.getStatus(),
|
|
93403
|
+
activity: worker.getStatus() === "working" ? worker.getActivity() : null,
|
|
93404
|
+
lastPushedAt: this.stuckPushedAt.has(name) ? this.stuckPushedAt.get(name) ?? null : void 0,
|
|
93405
|
+
silentThresholdSec: SILENT_THRESHOLD_SEC,
|
|
93406
|
+
workingThresholdSec: WORKING_THRESHOLD_SEC
|
|
93407
|
+
});
|
|
93408
|
+
if (decision.kind === "clear_debounce") {
|
|
93409
|
+
this.stuckPushedAt.delete(name);
|
|
93410
|
+
continue;
|
|
93411
|
+
}
|
|
93412
|
+
if (decision.kind === "skip") continue;
|
|
93413
|
+
this.stuckPushedAt.set(name, decision.lastEventAtMs);
|
|
93414
|
+
this.queue.push({
|
|
93415
|
+
kind: "worker_stuck",
|
|
93416
|
+
project: name,
|
|
93417
|
+
reason: decision.reason,
|
|
93418
|
+
snapshot: decision.snapshot,
|
|
93419
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
93420
|
+
});
|
|
93421
|
+
}
|
|
93422
|
+
}
|
|
93055
93423
|
async dispose() {
|
|
93056
93424
|
this.running = false;
|
|
93425
|
+
this.stopWatchdog();
|
|
93057
93426
|
this.ac.abort();
|
|
93058
93427
|
this.queue.push({
|
|
93059
93428
|
kind: "user_message",
|
|
@@ -93078,6 +93447,37 @@ function reportedToTaskStatus(reported, anyToolFailed) {
|
|
|
93078
93447
|
if (reported === "UNVERIFIED" || reported === "PARTIAL") return "in_progress";
|
|
93079
93448
|
return anyToolFailed ? "blocked" : "done";
|
|
93080
93449
|
}
|
|
93450
|
+
function decideStuckEvent(input) {
|
|
93451
|
+
const { status, activity, lastPushedAt, silentThresholdSec, workingThresholdSec } = input;
|
|
93452
|
+
if (status !== "working" || !activity) {
|
|
93453
|
+
return lastPushedAt !== void 0 ? { kind: "clear_debounce" } : { kind: "skip" };
|
|
93454
|
+
}
|
|
93455
|
+
if (lastPushedAt !== void 0) {
|
|
93456
|
+
const lastEvent = activity.lastEventAtMs;
|
|
93457
|
+
if (lastEvent === null || lastPushedAt === null || lastEvent <= lastPushedAt) {
|
|
93458
|
+
return { kind: "skip" };
|
|
93459
|
+
}
|
|
93460
|
+
}
|
|
93461
|
+
let reason = null;
|
|
93462
|
+
if (activity.lastEventAtMs !== null && activity.silentSeconds >= silentThresholdSec) {
|
|
93463
|
+
reason = "silent";
|
|
93464
|
+
} else if (activity.workingSeconds >= workingThresholdSec) {
|
|
93465
|
+
reason = "long_running";
|
|
93466
|
+
}
|
|
93467
|
+
if (!reason) return { kind: "skip" };
|
|
93468
|
+
return {
|
|
93469
|
+
kind: "push",
|
|
93470
|
+
reason,
|
|
93471
|
+
lastEventAtMs: activity.lastEventAtMs,
|
|
93472
|
+
snapshot: {
|
|
93473
|
+
workingSeconds: activity.workingSeconds,
|
|
93474
|
+
silentSeconds: activity.silentSeconds,
|
|
93475
|
+
activeTools: activity.activeTools,
|
|
93476
|
+
completedTools: activity.completedTools,
|
|
93477
|
+
textTail: activity.textTail
|
|
93478
|
+
}
|
|
93479
|
+
};
|
|
93480
|
+
}
|
|
93081
93481
|
function formatEventForBoss(event, workerSnapshot, autoChainNotices) {
|
|
93082
93482
|
if (event.kind === "user_message") {
|
|
93083
93483
|
return event.text;
|
|
@@ -93095,15 +93495,27 @@ auto_dispatched_since_last_event:
|
|
|
93095
93495
|
${lines.join("\n")}`;
|
|
93096
93496
|
};
|
|
93097
93497
|
if (event.kind === "worker_turn_complete") {
|
|
93098
|
-
const
|
|
93099
|
-
const tools =
|
|
93100
|
-
return `[event:worker_turn_complete] project="${
|
|
93498
|
+
const s2 = event.summary;
|
|
93499
|
+
const tools = s2.toolsUsed.length > 0 ? s2.toolsUsed.map((t) => `${t.ok ? "\u2713" : "\u2717"}${t.name}`).join(", ") : "(none)";
|
|
93500
|
+
return `[event:worker_turn_complete] project="${s2.project}" turn=${s2.turnIndex} timestamp=${s2.timestamp}
|
|
93101
93501
|
tools_used: ${tools}
|
|
93102
93502
|
final_text:
|
|
93103
|
-
${
|
|
93503
|
+
${s2.finalText || "(empty)"}${renderOthers(s2.project)}${renderAutoChain()}`;
|
|
93104
93504
|
}
|
|
93105
|
-
|
|
93505
|
+
if (event.kind === "worker_error") {
|
|
93506
|
+
return `[event:worker_error] project="${event.project}" timestamp=${event.timestamp}
|
|
93106
93507
|
${event.message}${renderOthers(event.project)}${renderAutoChain()}`;
|
|
93508
|
+
}
|
|
93509
|
+
const s = event.snapshot;
|
|
93510
|
+
const completed = s.completedTools.length > 0 ? s.completedTools.map((t) => `${t.ok ? "\u2713" : "\u2717"}${t.name}`).join(", ") : "(none)";
|
|
93511
|
+
const active = s.activeTools.length > 0 ? s.activeTools.join(", ") : "(none)";
|
|
93512
|
+
return `[event:worker_stuck] project="${event.project}" reason=${event.reason} timestamp=${event.timestamp}
|
|
93513
|
+
working_seconds: ${s.workingSeconds}
|
|
93514
|
+
silent_seconds: ${s.silentSeconds}
|
|
93515
|
+
active_tools: ${active}
|
|
93516
|
+
completed_this_turn: ${completed}
|
|
93517
|
+
text_tail:
|
|
93518
|
+
${s.textTail || "(no text yet)"}${renderOthers(event.project)}${renderAutoChain()}`;
|
|
93107
93519
|
}
|
|
93108
93520
|
function computeContextUsed(usage, provider) {
|
|
93109
93521
|
const inputContext = (usage.inputTokens ?? 0) + (usage.cacheRead ?? 0) + (usage.cacheWrite ?? 0);
|
|
@@ -93293,4 +93705,4 @@ react/cjs/react-jsx-runtime.development.js:
|
|
|
93293
93705
|
* LICENSE file in the root directory of this source tree.
|
|
93294
93706
|
*)
|
|
93295
93707
|
*/
|
|
93296
|
-
//# sourceMappingURL=chunk-
|
|
93708
|
+
//# sourceMappingURL=chunk-WGJRDNT6.js.map
|