@genesislcap/ai-assistant 14.452.0 → 14.452.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai-assistant.api.json +74 -3
- package/dist/ai-assistant.d.ts +62 -4
- package/dist/dts/components/chat-driver/chat-driver.d.ts +60 -3
- package/dist/dts/components/chat-driver/chat-driver.d.ts.map +1 -1
- package/dist/dts/main/main.d.ts +1 -1
- package/dist/dts/state/debug-event-log.d.ts +1 -1
- package/dist/dts/state/debug-event-log.d.ts.map +1 -1
- package/dist/esm/components/chat-driver/chat-driver.js +215 -43
- package/dist/esm/components/chat-driver/chat-driver.test.js +134 -4
- package/dist/esm/main/main.js +1 -1
- package/dist/esm/state/debug-event-log.js +2 -1
- package/docs/migration-GENC-1312.md +176 -0
- package/docs/sub_agent.md +35 -15
- package/package.json +16 -16
- package/src/components/chat-driver/chat-driver.test.ts +187 -4
- package/src/components/chat-driver/chat-driver.ts +247 -51
- package/src/main/main.ts +1 -1
- package/src/state/debug-event-log.ts +3 -1
|
@@ -18,8 +18,19 @@ const DEFAULT_MAX_FOLD_OPERATIONS = 5;
|
|
|
18
18
|
// cap reach thousands for full-session capture without the memory blowup.
|
|
19
19
|
const DEFAULT_MAX_TURN_SNAPSHOTS = 400;
|
|
20
20
|
const DEFAULT_MAX_UNKNOWN_TOOL_CALLS = 5;
|
|
21
|
-
|
|
21
|
+
// Stale tools (advertised in an earlier state, retired now) and fold-hidden tools are
|
|
22
|
+
// self-correcting — the model drops them once guided — so they get a higher loop-protection
|
|
23
|
+
// ceiling than hallucinated names: a few legitimate stale calls across state transitions must
|
|
24
|
+
// not prematurely end the turn. Still bounded so a genuinely stuck loop terminates.
|
|
25
|
+
const MAX_STALE_TOOL_CALLS = DEFAULT_MAX_UNKNOWN_TOOL_CALLS * 2;
|
|
26
|
+
// Gemini in particular emits short bursts of MALFORMED_FUNCTION_CALL; allow more CONSECUTIVE
|
|
27
|
+
// retries. These counters reset on any productive response, so this is a consecutive-failure
|
|
28
|
+
// ceiling, not a per-turn total.
|
|
29
|
+
const MAX_MALFORMED_RETRIES = 5;
|
|
22
30
|
const MAX_EMPTY_RESPONSE_RETRIES = 3;
|
|
31
|
+
// Transient throws while building the per-turn tool surface or calling the provider retry the
|
|
32
|
+
// SAME iteration up to this many times before propagating, rather than tearing down the turn.
|
|
33
|
+
const MAX_SETUP_TRANSPORT_RETRIES = 3;
|
|
23
34
|
const SUGGESTIONS_HISTORY_WINDOW = 8;
|
|
24
35
|
/** Name reserved for the cross-agent handoff tool — injected by OrchestratingDriver. */
|
|
25
36
|
export const REQUEST_CONTINUATION_TOOL = 'request_continuation';
|
|
@@ -77,6 +88,14 @@ export class ChatDriver extends EventTarget {
|
|
|
77
88
|
this.recentStaleToolNames = new Set();
|
|
78
89
|
/** Sub-agents declared on the active agent config, keyed by name. */
|
|
79
90
|
this.subAgentsMap = new Map();
|
|
91
|
+
/**
|
|
92
|
+
* True when this driver runs as a child sub-agent (created by a parent
|
|
93
|
+
* driver's `invokeSubAgent`). Sub-agents force tool use every turn so a turn
|
|
94
|
+
* can only end via their completion tool, and on any non-completion exit they
|
|
95
|
+
* record a typed `SubAgentFailureReason` instead of appending a
|
|
96
|
+
* user-facing message — the parent decides how to surface the failure.
|
|
97
|
+
*/
|
|
98
|
+
this.isSubAgent = false;
|
|
80
99
|
/**
|
|
81
100
|
* Set by `releaseAgent` inside a top-level tool handler — typically a stateful
|
|
82
101
|
* agent's terminal-state handler signalling that its flow is complete and the
|
|
@@ -239,6 +258,35 @@ export class ChatDriver extends EventTarget {
|
|
|
239
258
|
getSubAgentCompletion() {
|
|
240
259
|
return this.subAgentCompletion;
|
|
241
260
|
}
|
|
261
|
+
/**
|
|
262
|
+
* Mark this driver as running as a sub-agent. Called by a parent driver's
|
|
263
|
+
* `invokeSubAgent` immediately after construction, before the first turn.
|
|
264
|
+
* Enables forced tool use and typed failure reporting (see `isSubAgent`).
|
|
265
|
+
*/
|
|
266
|
+
markAsSubAgent() {
|
|
267
|
+
this.isSubAgent = true;
|
|
268
|
+
}
|
|
269
|
+
/**
|
|
270
|
+
* Returns the typed failure recorded when a sub-agent run ended without
|
|
271
|
+
* `completeSubAgent`, if any. Called by a parent `ChatDriver` after running
|
|
272
|
+
* this instance as a sub-agent.
|
|
273
|
+
*/
|
|
274
|
+
getSubAgentFailure() {
|
|
275
|
+
return this.subAgentFailure;
|
|
276
|
+
}
|
|
277
|
+
/**
|
|
278
|
+
* Record a sub-agent failure reason (first one wins). No-op for top-level
|
|
279
|
+
* agents, so loop-exit sites can call it unconditionally. The parent reads
|
|
280
|
+
* this via `getSubAgentFailure()` and emits the `subagent.failed` meta event
|
|
281
|
+
* under its *own* session — see `invokeSubAgent`. (A child sub-agent runs
|
|
282
|
+
* under a separate session key, so recording here would orphan the event off
|
|
283
|
+
* the user-visible debug-log timeline.)
|
|
284
|
+
*/
|
|
285
|
+
failSubAgent(reason) {
|
|
286
|
+
if (!this.isSubAgent || this.subAgentFailure)
|
|
287
|
+
return;
|
|
288
|
+
this.subAgentFailure = { reason };
|
|
289
|
+
}
|
|
242
290
|
/**
|
|
243
291
|
* Returns true if `releaseAgent` was called during the most recent turn.
|
|
244
292
|
* Consumed by the orchestrator to trigger the auto-pin release path.
|
|
@@ -256,6 +304,47 @@ export class ChatDriver extends EventTarget {
|
|
|
256
304
|
getTurnSnapshots() {
|
|
257
305
|
return this.turnSnapshots;
|
|
258
306
|
}
|
|
307
|
+
/**
|
|
308
|
+
* Merge a sub-agent's turn snapshots into this driver's buffer so they surface
|
|
309
|
+
* as `kind:'turn'` entries in the exported debug log. The child runs as a
|
|
310
|
+
* separate, discarded driver, so its snapshots would otherwise be lost. Each is
|
|
311
|
+
* re-labelled under the parent turn that activated the sub-agent: the child's
|
|
312
|
+
* own (numeric) turns become `"<parentTurn>-1"`, `"-2"`, … (1-based, in order);
|
|
313
|
+
* any already-forwarded grand-child labels (strings) have their leading segment
|
|
314
|
+
* remapped the same way, so nesting composes (`"5-2"` → `"5-2-1"`).
|
|
315
|
+
*
|
|
316
|
+
* Note: two sub-agents invoked in the *same* parent turn share the prefix, so
|
|
317
|
+
* their labels can repeat — `agentName` on each snapshot disambiguates them.
|
|
318
|
+
*/
|
|
319
|
+
forwardSubAgentSnapshots(childSnapshots) {
|
|
320
|
+
var _a;
|
|
321
|
+
if (childSnapshots.length === 0)
|
|
322
|
+
return;
|
|
323
|
+
// The activating parent turn = the most recent snapshot this driver recorded
|
|
324
|
+
// before entering the tool handler that invoked the sub-agent.
|
|
325
|
+
const parentTurn = Math.max(0, this.globalTurnIndex - 1);
|
|
326
|
+
const ownTurnLabel = new Map();
|
|
327
|
+
let ownPos = 0;
|
|
328
|
+
for (const snap of childSnapshots) {
|
|
329
|
+
let turnIndex;
|
|
330
|
+
if (!snap.turnIndex.includes('-')) {
|
|
331
|
+
// The child's own turn (a bare counter) → number it under the parent turn.
|
|
332
|
+
ownPos += 1;
|
|
333
|
+
turnIndex = `${parentTurn}-${ownPos}`;
|
|
334
|
+
ownTurnLabel.set(snap.turnIndex, turnIndex);
|
|
335
|
+
}
|
|
336
|
+
else {
|
|
337
|
+
// An already-forwarded grand-child label — remap its leading segment.
|
|
338
|
+
const [lead, ...rest] = snap.turnIndex.split('-');
|
|
339
|
+
const leadLabel = (_a = ownTurnLabel.get(lead)) !== null && _a !== void 0 ? _a : `${parentTurn}-${lead}`;
|
|
340
|
+
turnIndex = [leadLabel, ...rest].join('-');
|
|
341
|
+
}
|
|
342
|
+
this.turnSnapshots.push(Object.assign(Object.assign({}, snap), { turnIndex }));
|
|
343
|
+
}
|
|
344
|
+
while (this.turnSnapshots.length > this.maxTurnSnapshots) {
|
|
345
|
+
this.turnSnapshots.shift();
|
|
346
|
+
}
|
|
347
|
+
}
|
|
259
348
|
/**
|
|
260
349
|
* Push one snapshot to the ring buffer. Called inside `runToolLoop` just
|
|
261
350
|
* before each LLM call — that's the latest point where the prompt, tool
|
|
@@ -274,7 +363,7 @@ export class ChatDriver extends EventTarget {
|
|
|
274
363
|
agentSnapshot = `<getDebugSnapshot threw: ${e instanceof Error ? e.message : String(e)}>`;
|
|
275
364
|
}
|
|
276
365
|
}
|
|
277
|
-
const turnIndex = this.globalTurnIndex;
|
|
366
|
+
const turnIndex = String(this.globalTurnIndex);
|
|
278
367
|
this.globalTurnIndex += 1;
|
|
279
368
|
this.turnSnapshots.push({
|
|
280
369
|
turnIndex,
|
|
@@ -514,6 +603,7 @@ export class ChatDriver extends EventTarget {
|
|
|
514
603
|
return { reason: 'done' };
|
|
515
604
|
this.busy = true;
|
|
516
605
|
this.subAgentCompletion = undefined;
|
|
606
|
+
this.subAgentFailure = undefined;
|
|
517
607
|
this.agentReleaseRequested = false;
|
|
518
608
|
this.appendToHistory({ role: 'user', content: userInput, attachments });
|
|
519
609
|
this.turnStartedAt = Date.now();
|
|
@@ -562,10 +652,10 @@ export class ChatDriver extends EventTarget {
|
|
|
562
652
|
*/
|
|
563
653
|
buildHandlerContext(traceCapture) {
|
|
564
654
|
return Object.assign(Object.assign({ requestInteraction: (componentName, data, options) => this.requestInteraction(componentName, data, options) }, (this.subAgentsMap.size > 0 && {
|
|
565
|
-
requestSubAgent: (name, options) => this.invokeSubAgent(name, options).then(({
|
|
655
|
+
requestSubAgent: (name, options) => this.invokeSubAgent(name, options).then(({ outcome, trace }) => {
|
|
566
656
|
if (traceCapture)
|
|
567
657
|
traceCapture.trace = trace;
|
|
568
|
-
return
|
|
658
|
+
return outcome;
|
|
569
659
|
}),
|
|
570
660
|
})), { completeSubAgent: (result) => {
|
|
571
661
|
var _a;
|
|
@@ -591,7 +681,7 @@ export class ChatDriver extends EventTarget {
|
|
|
591
681
|
*/
|
|
592
682
|
invokeSubAgent(name, options) {
|
|
593
683
|
return __awaiter(this, void 0, void 0, function* () {
|
|
594
|
-
var _a, _b, _c;
|
|
684
|
+
var _a, _b, _c, _d;
|
|
595
685
|
const subConfig = this.subAgentsMap.get(name);
|
|
596
686
|
if (!subConfig) {
|
|
597
687
|
const available = [...this.subAgentsMap.keys()].join(', ') || '(none)';
|
|
@@ -615,6 +705,9 @@ export class ChatDriver extends EventTarget {
|
|
|
615
705
|
...((_b = subConfig.primerHistory) !== null && _b !== void 0 ? _b : []),
|
|
616
706
|
];
|
|
617
707
|
const child = new ChatDriver(this.providerRegistry);
|
|
708
|
+
// Mark before the first turn so the child forces tool use and reports a
|
|
709
|
+
// typed failure (rather than user-facing text) if it never completes.
|
|
710
|
+
child.markAsSubAgent();
|
|
618
711
|
child.applyAgent(Object.assign(Object.assign({}, subConfig), { primerHistory: effectivePrimer }));
|
|
619
712
|
// Route interactions back through this driver so widgets render in the
|
|
620
713
|
// parent's (ultimately the root's) history and resolve via the same
|
|
@@ -650,14 +743,28 @@ export class ChatDriver extends EventTarget {
|
|
|
650
743
|
this.dispatchEvent(new CustomEvent('sub-agent-stop', { detail: lifecycleDetail }));
|
|
651
744
|
}
|
|
652
745
|
const trace = child.getHistory();
|
|
746
|
+
// Forward the child's per-LLM-call snapshots onto this (parent) driver's
|
|
747
|
+
// buffer so they show as `kind:'turn'` entries in the exported debug log,
|
|
748
|
+
// re-numbered under the activating parent turn. Runs for both success and
|
|
749
|
+
// failure so the sub-agent's turns are always visible.
|
|
750
|
+
this.forwardSubAgentSnapshots(child.getTurnSnapshots());
|
|
653
751
|
const completion = child.getSubAgentCompletion();
|
|
654
752
|
if (completion) {
|
|
655
|
-
return { result: completion.result, trace };
|
|
753
|
+
return { outcome: { ok: true, result: completion.result }, trace };
|
|
656
754
|
}
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
755
|
+
// No completion → the sub-agent's loop ended without calling its completion
|
|
756
|
+
// tool. Surface the typed reason it recorded; default to 'max_iterations'
|
|
757
|
+
// for the defensive case where the loop ended with no reason set (e.g. a
|
|
758
|
+
// provider ignored forced tool use and returned text). The previous
|
|
759
|
+
// final-text fallback is intentionally gone — sub-agents return a
|
|
760
|
+
// structured outcome only, and the parent handler decides how to recover.
|
|
761
|
+
const reason = (_d = (_c = child.getSubAgentFailure()) === null || _c === void 0 ? void 0 : _c.reason) !== null && _d !== void 0 ? _d : 'max_iterations';
|
|
762
|
+
// Record under THIS (parent) driver's session so the failure lands on the
|
|
763
|
+
// user-visible debug-log timeline — the child ran under its own session key.
|
|
764
|
+
// This is also the only telemetry for the defensive default above, where the
|
|
765
|
+
// child's loop ended without recording an explicit failure reason.
|
|
766
|
+
recordMetaEvent(this.sessionKey, 'subagent.failed', { agent: name, reason });
|
|
767
|
+
return { outcome: { ok: false, reason }, trace };
|
|
661
768
|
});
|
|
662
769
|
}
|
|
663
770
|
/**
|
|
@@ -670,6 +777,7 @@ export class ChatDriver extends EventTarget {
|
|
|
670
777
|
return { reason: 'done' };
|
|
671
778
|
this.busy = true;
|
|
672
779
|
this.subAgentCompletion = undefined;
|
|
780
|
+
this.subAgentFailure = undefined;
|
|
673
781
|
this.turnStartedAt = Date.now();
|
|
674
782
|
recordMetaEvent(this.sessionKey, 'turn.start', {
|
|
675
783
|
phase: 'continueFromHistory',
|
|
@@ -864,6 +972,10 @@ export class ChatDriver extends EventTarget {
|
|
|
864
972
|
let iterations = 0;
|
|
865
973
|
let malformedAttempts = 0;
|
|
866
974
|
let emptyResponseAttempts = 0;
|
|
975
|
+
// Bounded retries for transient throws while resolving the per-turn tool surface or
|
|
976
|
+
// calling the provider. Without this, a single transient throw tears down the whole turn
|
|
977
|
+
// and strands the agent's unflushed work behind an opaque error.
|
|
978
|
+
let setupTransportAttempts = 0;
|
|
867
979
|
// True only for the very first LLM call. Used to exclude the pending user message
|
|
868
980
|
// from history (it is passed separately as currentInput). Must not be derived from
|
|
869
981
|
// `iterations` because fold operations decrement iterations, which would incorrectly
|
|
@@ -883,17 +995,30 @@ export class ChatDriver extends EventTarget {
|
|
|
883
995
|
// forbidden when a factory is set, so the array form is always valid.
|
|
884
996
|
// Sequential await is required — each iteration must see fresh values
|
|
885
997
|
// before constructing the LLM request.
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
998
|
+
// A transient throw while building the tool surface should retry the iteration, not
|
|
999
|
+
// tear down the whole turn and strand the agent's unflushed buffer behind an opaque
|
|
1000
|
+
// error. The handler-map factory re-resolves in lockstep so dispatch sees only the
|
|
1001
|
+
// handlers valid for the current state, in step with the tool definitions exposed
|
|
1002
|
+
// above. Folds are forbidden when either factory is set, so the fold-mutation paths
|
|
1003
|
+
// on `this.toolDefinitions` / `this.toolHandlers` are unreachable.
|
|
1004
|
+
try {
|
|
1005
|
+
if (this.toolDefinitionsFactory) {
|
|
1006
|
+
// oxlint-disable-next-line no-await-in-loop
|
|
1007
|
+
this.toolDefinitions = yield this.toolDefinitionsFactory(promptCtx);
|
|
1008
|
+
}
|
|
1009
|
+
if (this.toolHandlersFactory) {
|
|
1010
|
+
// oxlint-disable-next-line no-await-in-loop
|
|
1011
|
+
this.toolHandlers = yield this.toolHandlersFactory(promptCtx);
|
|
1012
|
+
}
|
|
889
1013
|
}
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
1014
|
+
catch (e) {
|
|
1015
|
+
setupTransportAttempts += 1;
|
|
1016
|
+
if (setupTransportAttempts < MAX_SETUP_TRANSPORT_RETRIES) {
|
|
1017
|
+
logger.warn(`ChatDriver: tool-surface resolution failed, retrying (${setupTransportAttempts}/${MAX_SETUP_TRANSPORT_RETRIES})`);
|
|
1018
|
+
iterations -= 1;
|
|
1019
|
+
continue;
|
|
1020
|
+
}
|
|
1021
|
+
throw e;
|
|
897
1022
|
}
|
|
898
1023
|
// Record everything advertised this turn so the unknown-tool path can tell
|
|
899
1024
|
// a stale tool (real earlier, retired now) from a hallucinated one. Runs
|
|
@@ -945,6 +1070,11 @@ export class ChatDriver extends EventTarget {
|
|
|
945
1070
|
// Strip fold-only properties (foldEvent, foldPath) before sending to provider
|
|
946
1071
|
tools: this.toolDefinitions.length ? this.toolDefinitions : undefined,
|
|
947
1072
|
attachments: attachmentsForCall,
|
|
1073
|
+
// Sub-agents must finish by calling a tool (their completion tool), never
|
|
1074
|
+
// by emitting a free-text turn — force tool use so the provider can't
|
|
1075
|
+
// return a bare text answer. Top-level agents stay on the default 'auto'.
|
|
1076
|
+
// (Transports no-op the force when no tools are advertised.)
|
|
1077
|
+
toolChoice: this.isSubAgent ? 'required' : undefined,
|
|
948
1078
|
};
|
|
949
1079
|
// Resolve the active provider for this turn. Static names were validated
|
|
950
1080
|
// in `applyAgent`; function-form names are validated on first resolution
|
|
@@ -977,13 +1107,29 @@ export class ChatDriver extends EventTarget {
|
|
|
977
1107
|
provider: this.lastResolvedProviderName,
|
|
978
1108
|
attempts: malformedAttempts,
|
|
979
1109
|
finishMessage: e.finishMessage,
|
|
1110
|
+
isSubAgent: this.isSubAgent,
|
|
980
1111
|
});
|
|
981
|
-
this.
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
}
|
|
1112
|
+
if (this.isSubAgent) {
|
|
1113
|
+
// Bubble a typed failure to the parent instead of speaking to the user.
|
|
1114
|
+
this.failSubAgent('malformed_tool_call');
|
|
1115
|
+
}
|
|
1116
|
+
else {
|
|
1117
|
+
this.appendToHistory({
|
|
1118
|
+
role: 'assistant',
|
|
1119
|
+
content: 'While working on your request, I repeatedly called my tools incorrectly. This often works on a second try — would you like me to try again? If it happens again, try breaking your request into smaller steps.',
|
|
1120
|
+
});
|
|
1121
|
+
}
|
|
985
1122
|
return { reason: 'done' };
|
|
986
1123
|
}
|
|
1124
|
+
// A transient provider/transport error should retry the SAME iteration a bounded
|
|
1125
|
+
// number of times rather than tearing down the whole turn (which strands the
|
|
1126
|
+
// agent's unflushed buffer behind an opaque error message).
|
|
1127
|
+
setupTransportAttempts += 1;
|
|
1128
|
+
if (setupTransportAttempts < MAX_SETUP_TRANSPORT_RETRIES) {
|
|
1129
|
+
logger.warn(`ChatDriver: provider/transport error, retrying (${setupTransportAttempts}/${MAX_SETUP_TRANSPORT_RETRIES})`);
|
|
1130
|
+
iterations -= 1;
|
|
1131
|
+
continue;
|
|
1132
|
+
}
|
|
987
1133
|
throw e;
|
|
988
1134
|
}
|
|
989
1135
|
const isThinkingStep = response.content && ((_c = response.toolCalls) === null || _c === void 0 ? void 0 : _c.length);
|
|
@@ -1006,11 +1152,17 @@ export class ChatDriver extends EventTarget {
|
|
|
1006
1152
|
agent: this.activeAgentName,
|
|
1007
1153
|
provider: this.lastResolvedProviderName,
|
|
1008
1154
|
attempts: emptyResponseAttempts,
|
|
1155
|
+
isSubAgent: this.isSubAgent,
|
|
1009
1156
|
});
|
|
1010
|
-
this.
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1157
|
+
if (this.isSubAgent) {
|
|
1158
|
+
this.failSubAgent('empty_response');
|
|
1159
|
+
}
|
|
1160
|
+
else {
|
|
1161
|
+
this.appendToHistory({
|
|
1162
|
+
role: 'assistant',
|
|
1163
|
+
content: 'While working on your request, I repeatedly generated a blank response. This often works on a second try — would you like me to try again? If it happens again, try breaking your request into smaller steps.',
|
|
1164
|
+
});
|
|
1165
|
+
}
|
|
1014
1166
|
return { reason: 'done' };
|
|
1015
1167
|
}
|
|
1016
1168
|
else if (isThinkingStep) {
|
|
@@ -1020,6 +1172,11 @@ export class ChatDriver extends EventTarget {
|
|
|
1020
1172
|
else {
|
|
1021
1173
|
this.appendToHistory(response);
|
|
1022
1174
|
}
|
|
1175
|
+
// Reset retry budgets on any productive (non-empty) response, so the caps mean
|
|
1176
|
+
// "N CONSECUTIVE failures" not "N total per turn".
|
|
1177
|
+
emptyResponseAttempts = 0;
|
|
1178
|
+
malformedAttempts = 0;
|
|
1179
|
+
setupTransportAttempts = 0;
|
|
1023
1180
|
if (!((_f = response.toolCalls) === null || _f === void 0 ? void 0 : _f.length)) {
|
|
1024
1181
|
break;
|
|
1025
1182
|
}
|
|
@@ -1101,19 +1258,20 @@ export class ChatDriver extends EventTarget {
|
|
|
1101
1258
|
// or an exclusive fold is hiding it) rather than hallucinated — a
|
|
1102
1259
|
// distinction worth making, because the model should stop retrying
|
|
1103
1260
|
// a retired tool rather than treat the failure as a typo. Stale
|
|
1104
|
-
// calls still
|
|
1105
|
-
//
|
|
1261
|
+
// calls still trip loop protection, but at a higher ceiling than
|
|
1262
|
+
// hallucinated tools (see below) — they are self-correcting, so the
|
|
1263
|
+
// guidance, telemetry, and limit differ.
|
|
1106
1264
|
if (this.everSeenToolNames.has(tc.name)) {
|
|
1107
1265
|
this.consecutiveUnknownToolCalls += 1;
|
|
1108
1266
|
const hidingFold = this.foldHidingTool(tc.name);
|
|
1109
1267
|
let content;
|
|
1110
1268
|
if (hidingFold) {
|
|
1111
1269
|
content = `"${tc.name}" is not available while the "${hidingFold}" fold is open. Call close_${hidingFold} to return to the previous set of tools, then call ${tc.name}.`;
|
|
1112
|
-
logger.warn(`ChatDriver: tool "${tc.name}" is hidden behind open fold "${hidingFold}" (${this.consecutiveUnknownToolCalls}/${
|
|
1270
|
+
logger.warn(`ChatDriver: tool "${tc.name}" is hidden behind open fold "${hidingFold}" (${this.consecutiveUnknownToolCalls}/${MAX_STALE_TOOL_CALLS})`);
|
|
1113
1271
|
}
|
|
1114
1272
|
else {
|
|
1115
1273
|
content = `"${tc.name}" was available earlier but is not part of the current step — that step is complete, so do not call it again. Continue with the tools available now: ${Object.keys(this.toolHandlers).join(', ') || '(none)'}.`;
|
|
1116
|
-
logger.warn(`ChatDriver: stale tool "${tc.name}" — advertised earlier this activation but retired in the current state (${this.consecutiveUnknownToolCalls}/${
|
|
1274
|
+
logger.warn(`ChatDriver: stale tool "${tc.name}" — advertised earlier this activation but retired in the current state (${this.consecutiveUnknownToolCalls}/${MAX_STALE_TOOL_CALLS})`);
|
|
1117
1275
|
}
|
|
1118
1276
|
recordMetaEvent(this.sessionKey, 'tool.unresolved', {
|
|
1119
1277
|
tool: tc.name,
|
|
@@ -1121,14 +1279,14 @@ export class ChatDriver extends EventTarget {
|
|
|
1121
1279
|
kind: hidingFold ? 'fold-hidden' : 'stale',
|
|
1122
1280
|
fold: hidingFold !== null && hidingFold !== void 0 ? hidingFold : undefined,
|
|
1123
1281
|
consecutive: this.consecutiveUnknownToolCalls,
|
|
1124
|
-
max:
|
|
1282
|
+
max: MAX_STALE_TOOL_CALLS,
|
|
1125
1283
|
});
|
|
1126
1284
|
executedById.set(tc.id, { toolCallId: tc.id, content });
|
|
1127
1285
|
unknownToolIds.add(tc.id);
|
|
1128
1286
|
staleToolIds.add(tc.id);
|
|
1129
1287
|
this.recentUnknownToolNames.add(tc.name);
|
|
1130
1288
|
this.recentStaleToolNames.add(tc.name);
|
|
1131
|
-
if (this.consecutiveUnknownToolCalls >=
|
|
1289
|
+
if (this.consecutiveUnknownToolCalls >= MAX_STALE_TOOL_CALLS) {
|
|
1132
1290
|
hitUnknownToolLimit = true;
|
|
1133
1291
|
}
|
|
1134
1292
|
return;
|
|
@@ -1173,7 +1331,9 @@ export class ChatDriver extends EventTarget {
|
|
|
1173
1331
|
});
|
|
1174
1332
|
executedById.set(tc.id, {
|
|
1175
1333
|
toolCallId: tc.id,
|
|
1176
|
-
|
|
1334
|
+
// Structured recovery hint so the model retries or routes around a tool
|
|
1335
|
+
// failure instead of apologising and giving up.
|
|
1336
|
+
content: `Tool error: ${e.message}\nRECOVERY: this tool failed once — you may retry it, or take a different valid action to make progress. Do NOT abandon the task, ask the user to rephrase, or claim you cannot make changes. If a planning tool failed, retry it or proceed with the information you already have.`,
|
|
1177
1337
|
});
|
|
1178
1338
|
anyRealToolExecuted = true; // treat errors as real work for fold op counting
|
|
1179
1339
|
}
|
|
@@ -1270,11 +1430,17 @@ export class ChatDriver extends EventTarget {
|
|
|
1270
1430
|
staleTools,
|
|
1271
1431
|
hallucinatedTools,
|
|
1272
1432
|
availableTools: Object.keys(this.toolHandlers),
|
|
1433
|
+
isSubAgent: this.isSubAgent,
|
|
1273
1434
|
});
|
|
1274
|
-
this.
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1435
|
+
if (this.isSubAgent) {
|
|
1436
|
+
this.failSubAgent('unknown_tool_limit');
|
|
1437
|
+
}
|
|
1438
|
+
else {
|
|
1439
|
+
this.appendToHistory({
|
|
1440
|
+
role: 'assistant',
|
|
1441
|
+
content: "I'm sorry, I repeatedly tried to use tools that aren't available to me, so I couldn't complete that. If a 'Download agent log' option appears in the Settings (cog) menu, you can download the log and share it with whoever set up this assistant to help fix the issue.",
|
|
1442
|
+
});
|
|
1443
|
+
}
|
|
1278
1444
|
return { reason: 'done' };
|
|
1279
1445
|
}
|
|
1280
1446
|
const firstContinuation = systemCalls[0];
|
|
@@ -1295,11 +1461,17 @@ export class ChatDriver extends EventTarget {
|
|
|
1295
1461
|
provider: this.lastResolvedProviderName,
|
|
1296
1462
|
iterations,
|
|
1297
1463
|
limit: this.maxToolIterations,
|
|
1464
|
+
isSubAgent: this.isSubAgent,
|
|
1298
1465
|
});
|
|
1299
|
-
this.
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1466
|
+
if (this.isSubAgent) {
|
|
1467
|
+
this.failSubAgent('max_iterations');
|
|
1468
|
+
}
|
|
1469
|
+
else {
|
|
1470
|
+
this.appendToHistory({
|
|
1471
|
+
role: 'assistant',
|
|
1472
|
+
content: "I've reached my limit for this response. You can ask me to continue and I'll pick up where I left off.",
|
|
1473
|
+
});
|
|
1474
|
+
}
|
|
1303
1475
|
}
|
|
1304
1476
|
return { reason: 'done' };
|
|
1305
1477
|
});
|
|
@@ -12,11 +12,14 @@ import { ChatDriver } from './chat-driver';
|
|
|
12
12
|
const scriptedProvider = (responses) => {
|
|
13
13
|
const queue = [...responses];
|
|
14
14
|
const advertisedPerCall = [];
|
|
15
|
+
const toolChoicePerCall = [];
|
|
15
16
|
return {
|
|
16
17
|
advertisedPerCall,
|
|
18
|
+
toolChoicePerCall,
|
|
17
19
|
chat: (_history, _userMessage, options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
18
20
|
var _a, _b;
|
|
19
21
|
advertisedPerCall.push(((_a = options === null || options === void 0 ? void 0 : options.tools) !== null && _a !== void 0 ? _a : []).map((t) => t.name));
|
|
22
|
+
toolChoicePerCall.push(options === null || options === void 0 ? void 0 : options.toolChoice);
|
|
20
23
|
// Once the script is exhausted, end the turn with a plain text reply.
|
|
21
24
|
return (_b = queue.shift()) !== null && _b !== void 0 ? _b : { role: 'assistant', content: 'done' };
|
|
22
25
|
}),
|
|
@@ -173,11 +176,11 @@ stale('splits stale vs hallucinated tools on the unknown-tool-limit error', () =
|
|
|
173
176
|
}
|
|
174
177
|
: { tool_b: () => __awaiter(void 0, void 0, void 0, function* () { return 'b done'; }) },
|
|
175
178
|
});
|
|
176
|
-
// One real call to advance to B, then
|
|
177
|
-
// trips
|
|
179
|
+
// One real call to advance to B, then 10 consecutive stale calls — the 10th
|
|
180
|
+
// trips the stale ceiling (MAX_STALE_TOOL_CALLS, 2x the hallucination limit) and ends the turn.
|
|
178
181
|
const provider = scriptedProvider([
|
|
179
182
|
callsTool('tool_a', 'real'),
|
|
180
|
-
...Array.from({ length:
|
|
183
|
+
...Array.from({ length: 10 }, (_unused, i) => callsTool('tool_a', `stale-${i}`)),
|
|
181
184
|
]);
|
|
182
185
|
const driver = makeDriver(config, provider, sessionKey);
|
|
183
186
|
const result = yield driver.sendMessage('go');
|
|
@@ -188,9 +191,136 @@ stale('splits stale vs hallucinated tools on the unknown-tool-limit error', () =
|
|
|
188
191
|
assert.equal(detail.staleTools, ['tool_a'], 'tool_a should be classified as stale');
|
|
189
192
|
assert.equal(detail.hallucinatedTools, [], 'nothing was hallucinated');
|
|
190
193
|
// Every stale attempt — not just the final limit error — is in the download log.
|
|
191
|
-
assert.is(unresolvedEvents(sessionKey).filter((d) => d.kind === 'stale').length,
|
|
194
|
+
assert.is(unresolvedEvents(sessionKey).filter((d) => d.kind === 'stale').length, 10, 'each stale attempt should be recorded as its own tool.unresolved event');
|
|
192
195
|
// The user-facing turn ends with the apology, not a crash.
|
|
193
196
|
const last = driver.getHistory().at(-1);
|
|
194
197
|
assert.ok((last === null || last === void 0 ? void 0 : last.role) === 'assistant' && last.content.startsWith("I'm sorry"));
|
|
195
198
|
}));
|
|
196
199
|
stale.run();
|
|
200
|
+
// ---------------------------------------------------------------------------
|
|
201
|
+
// sub-agents — forced tool use + typed completion/failure union (GENC-1312)
|
|
202
|
+
//
|
|
203
|
+
// A child sub-agent driver shares the parent's provider registry, so one
|
|
204
|
+
// scripted queue drives both: script the parent's delegating turn, then the
|
|
205
|
+
// worker's turn(s), in order.
|
|
206
|
+
// ---------------------------------------------------------------------------
|
|
207
|
+
const subagent = createLogicSuite('ChatDriver sub-agents');
|
|
208
|
+
subagent.after(() => {
|
|
209
|
+
// Safe to call again even if `stale` already closed it — close() is
|
|
210
|
+
// idempotent and cross-tab publishes are guarded by `&& this.channel`.
|
|
211
|
+
agenticActivityBus.close();
|
|
212
|
+
});
|
|
213
|
+
/** A sub-agent named `worker` that finishes by calling `completeSubAgent`. */
|
|
214
|
+
const completingWorker = (result) => agent({
|
|
215
|
+
name: 'worker',
|
|
216
|
+
toolDefinitions: [def('finish')],
|
|
217
|
+
toolHandlers: {
|
|
218
|
+
finish: (_args, ctx) => __awaiter(void 0, void 0, void 0, function* () {
|
|
219
|
+
var _a;
|
|
220
|
+
(_a = ctx.completeSubAgent) === null || _a === void 0 ? void 0 : _a.call(ctx, result);
|
|
221
|
+
return 'finished';
|
|
222
|
+
}),
|
|
223
|
+
},
|
|
224
|
+
});
|
|
225
|
+
/** A parent that delegates to `worker` and reports the outcome via `capture`. */
|
|
226
|
+
const delegatingParent = (sub, capture) => agent({
|
|
227
|
+
name: 'boss',
|
|
228
|
+
subAgents: [sub],
|
|
229
|
+
toolDefinitions: [def('delegate')],
|
|
230
|
+
toolHandlers: {
|
|
231
|
+
delegate: (_args, ctx) => __awaiter(void 0, void 0, void 0, function* () {
|
|
232
|
+
const outcome = yield ctx.requestSubAgent('worker', { task: 'do it' });
|
|
233
|
+
capture(outcome);
|
|
234
|
+
return outcome.ok ? 'sub-agent completed' : `sub-agent failed: ${outcome.reason}`;
|
|
235
|
+
}),
|
|
236
|
+
},
|
|
237
|
+
});
|
|
238
|
+
subagent('resolves { ok: true, result } when the sub-agent calls completeSubAgent', () => __awaiter(void 0, void 0, void 0, function* () {
|
|
239
|
+
let outcome;
|
|
240
|
+
const parent = delegatingParent(completingWorker({ value: 42 }), (o) => {
|
|
241
|
+
outcome = o;
|
|
242
|
+
});
|
|
243
|
+
const provider = scriptedProvider([
|
|
244
|
+
callsTool('delegate', 'd1'), // parent delegates to the worker
|
|
245
|
+
callsTool('finish', 'f1'), // worker completes
|
|
246
|
+
]);
|
|
247
|
+
yield makeDriver(parent, provider).sendMessage('go');
|
|
248
|
+
assert.equal(outcome, { ok: true, result: { value: 42 } });
|
|
249
|
+
}));
|
|
250
|
+
subagent('forces tool use on the sub-agent turn but not the parent turn', () => __awaiter(void 0, void 0, void 0, function* () {
|
|
251
|
+
const parent = delegatingParent(completingWorker({ done: true }), () => { });
|
|
252
|
+
const provider = scriptedProvider([callsTool('delegate', 'd1'), callsTool('finish', 'f1')]);
|
|
253
|
+
yield makeDriver(parent, provider).sendMessage('go');
|
|
254
|
+
// Call 0 is the parent's turn (may-call); call 1 is the worker's turn (must-call).
|
|
255
|
+
assert.is(provider.toolChoicePerCall[0], undefined, 'parent turn is not forced');
|
|
256
|
+
assert.is(provider.toolChoicePerCall[1], 'required', 'sub-agent turn forces a tool call');
|
|
257
|
+
assert.ok(provider.advertisedPerCall[1].includes('finish'), 'the worker advertised its completion tool');
|
|
258
|
+
}));
|
|
259
|
+
subagent('resolves { ok: false, reason } and records telemetry when the sub-agent never completes', () => __awaiter(void 0, void 0, void 0, function* () {
|
|
260
|
+
const sessionKey = 'subagent-unknown-tool-test';
|
|
261
|
+
clearMetaEventRegistry();
|
|
262
|
+
let outcome;
|
|
263
|
+
const worker = agent({
|
|
264
|
+
name: 'worker',
|
|
265
|
+
toolDefinitions: [def('real')],
|
|
266
|
+
toolHandlers: { real: () => __awaiter(void 0, void 0, void 0, function* () { return 'ok'; }) },
|
|
267
|
+
});
|
|
268
|
+
const parent = delegatingParent(worker, (o) => {
|
|
269
|
+
outcome = o;
|
|
270
|
+
});
|
|
271
|
+
// The worker repeatedly calls a tool it was never given, tripping the
|
|
272
|
+
// unknown-tool limit (DEFAULT_MAX_UNKNOWN_TOOL_CALLS = 5) without completing.
|
|
273
|
+
const provider = scriptedProvider([
|
|
274
|
+
callsTool('delegate', 'd1'),
|
|
275
|
+
...Array.from({ length: 5 }, (_unused, i) => callsTool('made_up', `u${i}`)),
|
|
276
|
+
]);
|
|
277
|
+
yield makeDriver(parent, provider, sessionKey).sendMessage('go');
|
|
278
|
+
assert.equal(outcome, { ok: false, reason: 'unknown_tool_limit' });
|
|
279
|
+
// The failure surfaces as a high-importance `subagent.failed` meta event,
|
|
280
|
+
// recorded under the PARENT driver's session so it lands on the user-visible
|
|
281
|
+
// debug-log timeline — not orphaned in the child's own session bucket.
|
|
282
|
+
assert.ok(getMetaEvents(sessionKey).some((e) => {
|
|
283
|
+
var _a, _b;
|
|
284
|
+
return e.type === 'subagent.failed' &&
|
|
285
|
+
((_a = e.detail) === null || _a === void 0 ? void 0 : _a.agent) === 'worker' &&
|
|
286
|
+
((_b = e.detail) === null || _b === void 0 ? void 0 : _b.reason) === 'unknown_tool_limit';
|
|
287
|
+
}), 'a subagent.failed meta event should be recorded under the parent session');
|
|
288
|
+
assert.not.ok(getMetaEvents('').some((e) => e.type === 'subagent.failed'), 'the failure must not be orphaned in the child default session bucket');
|
|
289
|
+
}));
|
|
290
|
+
subagent('defaults to { ok: false, reason: "max_iterations" } when the sub-agent ends without completing', () => __awaiter(void 0, void 0, void 0, function* () {
|
|
291
|
+
const sessionKey = 'subagent-default-fail-test';
|
|
292
|
+
clearMetaEventRegistry();
|
|
293
|
+
let outcome;
|
|
294
|
+
const worker = agent({
|
|
295
|
+
name: 'worker',
|
|
296
|
+
toolDefinitions: [def('noop')],
|
|
297
|
+
toolHandlers: { noop: () => __awaiter(void 0, void 0, void 0, function* () { return 'ok'; }) },
|
|
298
|
+
});
|
|
299
|
+
const parent = delegatingParent(worker, (o) => {
|
|
300
|
+
outcome = o;
|
|
301
|
+
});
|
|
302
|
+
// No script for the worker turn → it returns a plain-text reply and ends
|
|
303
|
+
// without ever calling a completion tool (the child records no explicit
|
|
304
|
+
// failure reason).
|
|
305
|
+
const provider = scriptedProvider([callsTool('delegate', 'd1')]);
|
|
306
|
+
yield makeDriver(parent, provider, sessionKey).sendMessage('go');
|
|
307
|
+
assert.equal(outcome, { ok: false, reason: 'max_iterations' });
|
|
308
|
+
// Even the defensive default is reported to the parent session — this is the
|
|
309
|
+
// only telemetry path when the child recorded no explicit failure.
|
|
310
|
+
assert.ok(getMetaEvents(sessionKey).some((e) => { var _a; return e.type === 'subagent.failed' && ((_a = e.detail) === null || _a === void 0 ? void 0 : _a.reason) === 'max_iterations'; }), 'the default failure should still record a subagent.failed meta event');
|
|
311
|
+
}));
|
|
312
|
+
subagent("forwards the sub-agent's turns onto the parent timeline, numbered under the activating turn", () => __awaiter(void 0, void 0, void 0, function* () {
|
|
313
|
+
const parent = delegatingParent(completingWorker({ done: true }), () => { });
|
|
314
|
+
const provider = scriptedProvider([callsTool('delegate', 'd1'), callsTool('finish', 'f1')]);
|
|
315
|
+
const driver = makeDriver(parent, provider);
|
|
316
|
+
yield driver.sendMessage('go');
|
|
317
|
+
const snaps = driver.getTurnSnapshots();
|
|
318
|
+
// Parent turn 0 activated the sub-agent, so the worker's single turn is "0-1".
|
|
319
|
+
const childSnap = snaps.find((s) => s.turnIndex === '0-1');
|
|
320
|
+
assert.ok(childSnap, 'the sub-agent\'s turn should be forwarded as "0-1"');
|
|
321
|
+
assert.is(childSnap.agentName, 'worker', 'the forwarded snapshot keeps the sub-agent name');
|
|
322
|
+
assert.ok(childSnap.toolNames.includes('finish'), 'and records the tools the sub-agent saw');
|
|
323
|
+
// The parent's own turns stay numeric.
|
|
324
|
+
assert.ok(snaps.some((s) => s.turnIndex === '0'), 'the activating parent turn is present as a bare string counter');
|
|
325
|
+
}));
|
|
326
|
+
subagent.run();
|
package/dist/esm/main/main.js
CHANGED
|
@@ -1293,7 +1293,7 @@ let FoundationAiAssistant = FoundationAiAssistant_1 = class FoundationAiAssistan
|
|
|
1293
1293
|
// prompt is still shown in full whenever it changes, so prompt evolution
|
|
1294
1294
|
// stays visible.
|
|
1295
1295
|
let lastFullPrompt;
|
|
1296
|
-
let lastFullIndex =
|
|
1296
|
+
let lastFullIndex = '';
|
|
1297
1297
|
const turns = ((_e = (_d = (_c = this.driver) === null || _c === void 0 ? void 0 : _c.getTurnSnapshots) === null || _d === void 0 ? void 0 : _d.call(_c)) !== null && _e !== void 0 ? _e : []).map((t) => {
|
|
1298
1298
|
let { systemPrompt } = t;
|
|
1299
1299
|
if (systemPrompt != null && systemPrompt === lastFullPrompt) {
|
|
@@ -36,6 +36,7 @@
|
|
|
36
36
|
export const META_EVENT_IMPORTANCE = {
|
|
37
37
|
'turn.error': 'high',
|
|
38
38
|
'tool.failed': 'high',
|
|
39
|
+
'subagent.failed': 'high',
|
|
39
40
|
'file.read-failed': 'high',
|
|
40
41
|
'suggestions.failed': 'high',
|
|
41
42
|
'context.threshold-crossed': 'high',
|
|
@@ -135,7 +136,7 @@ export const DEBUG_LOG_README = [
|
|
|
135
136
|
'This is an exported debug log for the Genesis AI assistant. Read it top-to-bottom.',
|
|
136
137
|
'`timeline` is the entire session as one array, already sorted chronologically by `timestamp` (ISO 8601). Every entry has a `kind`.',
|
|
137
138
|
"kind:'message' — the conversation. `role` is user/assistant/tool/system-event; `agentName` says which agent produced it; `toolCalls`/`toolResult`/`interaction` carry tool and widget activity; `inputTokens`/`outputTokens`/`cost` are per-message usage.",
|
|
138
|
-
"kind:'turn' — one LLM call. `systemPrompt` and `toolNames` are what the model saw. A systemPrompt of '<repeated — identical to turn N>' was byte-identical to turn N and de-duplicated; the full prompt is shown whenever it changes (often because a stateful agent advanced), so prompt evolution is visible.",
|
|
139
|
+
"kind:'turn' — one LLM call. `turnIndex` is a string: a top-level turn is the bare counter ('0', '1', …); a sub-agent's turns are numbered under the parent turn that activated them ('3-1', '3-2', …, and a nested sub-agent contributes '3-2-1', …), and `agentName` names the agent that ran the turn. `systemPrompt` and `toolNames` are what the model saw. A systemPrompt of '<repeated — identical to turn N>' was byte-identical to turn N and de-duplicated; the full prompt is shown whenever it changes (often because a stateful agent advanced), so prompt evolution is visible.",
|
|
139
140
|
"kind:'turn'.`agentSnapshot` — the active agent's own view of its internal state, captured at that turn. An agent opts into this by exposing a `getDebugSnapshot()` that returns JSON-serializable per-state info; stateful/flow agents wire it automatically, so you can watch a flow advance turn-by-turn (e.g. current step, cursor, collected fields, pending changes). Absent for agents that don't expose one.",
|
|
140
141
|
"kind:'event' — a meta/lifecycle event. `type` names it (see below); `detail` carries structured data. `detail.placement` is the emitting UI instance: 'bubble' (collapsed), 'panel' (popped-out), or 'standalone'.",
|
|
141
142
|
"Each 'event' also has an `importance`: 'high' (failures/limits — turn.error, tool.failed, file.read-failed, suggestions.failed, context.threshold-crossed), 'normal' (session flow — connects, turns, retries, handoffs, agent/provider changes, interactions), or 'low' (skippable UI/bookkeeping noise — panel.toggled, attachment.added, driver.wired/unwired, context.updated). To skim, ignore importance:'low'; to triage a failure, filter to importance:'high' then read the nearby messages and turns. A 'high' turn.error is often preceded by one or more 'normal' turn.retry events for the same reason — read them together to see how many attempts were made before bailing. 'message' and 'turn' entries carry no importance — they are the substance, always read them.",
|