@lloyal-labs/lloyal-agents 2.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/LICENSE +107 -0
  2. package/LICENSE-FAQ.md +256 -0
  3. package/README.md +31 -15
  4. package/dist/Agent.d.ts +15 -4
  5. package/dist/Agent.d.ts.map +1 -1
  6. package/dist/Agent.js +12 -2
  7. package/dist/Agent.js.map +1 -1
  8. package/dist/AgentPolicy.d.ts +92 -15
  9. package/dist/AgentPolicy.d.ts.map +1 -1
  10. package/dist/AgentPolicy.js +42 -14
  11. package/dist/AgentPolicy.js.map +1 -1
  12. package/dist/Tool.d.ts +45 -1
  13. package/dist/Tool.d.ts.map +1 -1
  14. package/dist/Tool.js +50 -2
  15. package/dist/Tool.js.map +1 -1
  16. package/dist/agent-pool.d.ts +4 -4
  17. package/dist/agent-pool.d.ts.map +1 -1
  18. package/dist/agent-pool.js +224 -53
  19. package/dist/agent-pool.js.map +1 -1
  20. package/dist/app-config.d.ts +50 -0
  21. package/dist/app-config.d.ts.map +1 -0
  22. package/dist/app-config.js +27 -0
  23. package/dist/app-config.js.map +1 -0
  24. package/dist/app-types.d.ts +309 -0
  25. package/dist/app-types.d.ts.map +1 -0
  26. package/dist/app-types.js +28 -0
  27. package/dist/app-types.js.map +1 -0
  28. package/dist/chunk.d.ts +118 -0
  29. package/dist/chunk.d.ts.map +1 -0
  30. package/dist/chunk.js +19 -0
  31. package/dist/chunk.js.map +1 -0
  32. package/dist/context.d.ts +76 -20
  33. package/dist/context.d.ts.map +1 -1
  34. package/dist/context.js +72 -20
  35. package/dist/context.js.map +1 -1
  36. package/dist/create-agent-pool.d.ts +18 -12
  37. package/dist/create-agent-pool.d.ts.map +1 -1
  38. package/dist/create-agent-pool.js +30 -29
  39. package/dist/create-agent-pool.js.map +1 -1
  40. package/dist/grant-store.d.ts +49 -0
  41. package/dist/grant-store.d.ts.map +1 -0
  42. package/dist/grant-store.js +33 -0
  43. package/dist/grant-store.js.map +1 -0
  44. package/dist/index.d.ts +10 -6
  45. package/dist/index.d.ts.map +1 -1
  46. package/dist/index.js +9 -5
  47. package/dist/index.js.map +1 -1
  48. package/dist/orchestrators.d.ts +15 -8
  49. package/dist/orchestrators.d.ts.map +1 -1
  50. package/dist/orchestrators.js +10 -10
  51. package/dist/orchestrators.js.map +1 -1
  52. package/dist/replay.d.ts +19 -19
  53. package/dist/replay.d.ts.map +1 -1
  54. package/dist/replay.js +29 -29
  55. package/dist/replay.js.map +1 -1
  56. package/dist/source.d.ts +31 -1
  57. package/dist/source.d.ts.map +1 -1
  58. package/dist/source.js +32 -2
  59. package/dist/source.js.map +1 -1
  60. package/dist/spine.d.ts +100 -0
  61. package/dist/spine.d.ts.map +1 -0
  62. package/dist/{shared-root.js → spine.js} +57 -38
  63. package/dist/spine.js.map +1 -0
  64. package/dist/toolkit.d.ts +44 -17
  65. package/dist/toolkit.d.ts.map +1 -1
  66. package/dist/toolkit.js +24 -14
  67. package/dist/toolkit.js.map +1 -1
  68. package/dist/trace-types.d.ts +36 -4
  69. package/dist/trace-types.d.ts.map +1 -1
  70. package/dist/types.d.ts +46 -15
  71. package/dist/types.d.ts.map +1 -1
  72. package/dist/use-agent.d.ts +10 -5
  73. package/dist/use-agent.d.ts.map +1 -1
  74. package/dist/use-agent.js +18 -15
  75. package/dist/use-agent.js.map +1 -1
  76. package/package.json +7 -5
  77. package/dist/shared-root.d.ts +0 -96
  78. package/dist/shared-root.d.ts.map +0 -1
  79. package/dist/shared-root.js.map +0 -1
@@ -9,6 +9,7 @@ const sdk_2 = require("@lloyal-labs/sdk");
9
9
  const trace_scope_1 = require("./trace-scope");
10
10
  const Agent_1 = require("./Agent");
11
11
  const AgentPolicy_1 = require("./AgentPolicy");
12
+ const Tool_1 = require("./Tool");
12
13
  /**
13
14
  * Immutable KV budget snapshot for one tick of the agent loop
14
15
  *
@@ -177,16 +178,16 @@ function* recoverInline(agent, policy, ctx, store, tw, parentTraceId, events, pr
177
178
  type: 'pool:recoveryProduce', agentId: agent.id,
178
179
  tokenCount: producedTokens, outputLength: output.length,
179
180
  });
180
- // Parse + report
181
+ // Parse + return (recovery path — emits agent:recovered, NOT agent:return)
181
182
  try {
182
183
  const parsed = JSON.parse(output);
183
184
  if (parsed?.result) {
184
- agent.reportResult(parsed.result, 'scratchpad');
185
- yield* events.send({ type: 'agent:report', agentId: agent.id, result: agent.result });
185
+ agent.setResult(stripDanglingToolCall(parsed.result), 'recovery');
186
+ yield* events.send({ type: 'agent:recovered', agentId: agent.id, result: agent.result });
186
187
  reported = true;
187
188
  tw.write({
188
189
  traceId: tw.nextId(), parentTraceId, ts: performance.now(),
189
- type: 'pool:recoveryReport', agentId: agent.id,
190
+ type: 'pool:recoveryReturn', agentId: agent.id,
190
191
  resultLength: parsed.result.length,
191
192
  });
192
193
  }
@@ -221,10 +222,27 @@ function* recoverInline(agent, policy, ctx, store, tw, parentTraceId, events, pr
221
222
  // ── PRODUCE action handlers ─────────────────────────────────────
222
223
  // Each handler encapsulates state transitions, events, and trace for one
223
224
  // policy action outcome. The PRODUCE switch dispatches to these.
224
- function* handleFreeTextReport(a, content, events) {
225
- a.reportResult(content, 'free_text');
225
+ /**
226
+ * Strip a trailing UNCLOSED `<tool_call>` fragment from text captured as an
227
+ * agent result. When generation is cut mid-tool-call-emission (produce
228
+ * budget, pressure, maxTurns), the parser finds no complete call and the
229
+ * raw tail — `…</think>\n<tool_call><function=read_file>…` with no closing
230
+ * tags — rides into `a.result` verbatim. Any downstream consumer that
231
+ * injects results into another agent's prompt (synth findings, delegation
232
+ * returns) then carries a literal in-context demonstration of emitting tool
233
+ * calls, priming no-tool agents to imitate it (observed:
234
+ * trace-2026-06-11T00-02, agent 65539 → synth rabbit hole).
235
+ *
236
+ * Complete `<tool_call>…</tool_call>` blocks are left alone — they are
237
+ * either parsed before reaching a capture path or deliberate quoting.
238
+ */
239
+ function stripDanglingToolCall(text) {
240
+ return text.replace(/<tool_call>(?:(?!<\/tool_call>)[\s\S])*$/, '').trimEnd();
241
+ }
242
+ function* handleFreeTextReturn(a, content, events) {
243
+ a.setResult(stripDanglingToolCall(content), 'free_text');
226
244
  a.transition('idle');
227
- yield* events.send({ type: 'agent:report', agentId: a.id, result: a.result });
245
+ yield* events.send({ type: 'agent:return', agentId: a.id, result: a.result });
228
246
  yield* events.send({ type: 'agent:done', agentId: a.id });
229
247
  }
230
248
  function* handleIdleDrop(a, reason, events, tw, parentTraceId) {
@@ -246,14 +264,14 @@ function* handleNudge(a, message, tc, ctx, tools) {
246
264
  a.resetTurn();
247
265
  return { agentId: a.id, prefillTokens, toolName: tc?.name || '', callId, args: tc?.arguments || '', probe };
248
266
  }
249
- function* handleReport(a, result, tc, terminalTool, pruneOnReport, events) {
250
- a.reportResult(result, 'report_tool');
267
+ function* handleReturn(a, result, tc, terminalToolName, pruneOnReturn, events) {
268
+ a.setResult(stripDanglingToolCall(result), 'voluntary_return');
251
269
  a.transition('idle');
252
270
  a.incrementToolCalls();
253
- yield* events.send({ type: 'agent:tool_call', agentId: a.id, tool: terminalTool, args: tc.arguments });
254
- yield* events.send({ type: 'agent:report', agentId: a.id, result: a.result });
271
+ yield* events.send({ type: 'agent:tool_call', agentId: a.id, tool: terminalToolName, args: tc.arguments });
272
+ yield* events.send({ type: 'agent:return', agentId: a.id, result: a.result });
255
273
  yield* events.send({ type: 'agent:done', agentId: a.id });
256
- if (pruneOnReport && !a.branch.disposed)
274
+ if (pruneOnReturn && !a.branch.disposed)
257
275
  a.branch.pruneSync();
258
276
  }
259
277
  /**
@@ -264,19 +282,19 @@ function* handleReport(a, result, tc, terminalTool, pruneOnReport, events) {
264
282
  * automatically — the orphaned-branch leak is structurally impossible.
265
283
  */
266
284
  function* setupAgent(parent, task, ctx, enableThinking) {
267
- // Probe shared-root mode. When set, the queryRoot already has the
268
- // [system + tools] chat header prefilled and we MUST NOT re-emit them
269
- // in the agent's suffix — the bytes are already in attention via fork
270
- // prefix-share. The new agent inherits parser/grammar/format/triggers
271
- // from sharedFmt so tool dispatch keeps working.
285
+ // Probe shared-mode. When set, the spine already has the [system + tools]
286
+ // chat header prefilled and we MUST NOT re-emit them in the agent's
287
+ // suffix — the bytes are already in attention via fork prefix-share. The
288
+ // new agent inherits parser/grammar/format/triggers from sharedFmt so
289
+ // tool dispatch keeps working.
272
290
  let sharedFmt = null;
273
291
  try {
274
- sharedFmt = (yield* context_1.RootFmt.get()) ?? null;
292
+ sharedFmt = (yield* context_1.SpineFmt.get()) ?? null;
275
293
  }
276
294
  catch { /* not in shared mode */ }
277
295
  // Compose the messages to format into the suffix. In shared mode with
278
296
  // an empty per-spec systemPrompt, drop the system message — the role
279
- // lives at the root, the agent only contributes a user turn. With a
297
+ // lives at the spine, the agent only contributes a user turn. With a
280
298
  // non-empty per-spec systemPrompt, include it: the agent's KV will
281
299
  // contain TWO system messages in lineage, which Qwen3 handles (recovery
282
300
  // ships on the same multi-system pattern).
@@ -287,13 +305,13 @@ function* setupAgent(parent, task, ctx, enableThinking) {
287
305
  { role: 'user', content: task.content },
288
306
  ];
289
307
  const fmtOpts = { enableThinking };
290
- // Tools belong at the root in shared mode; emitting them again here
308
+ // Tools belong at the spine in shared mode; emitting them again here
291
309
  // would re-prefill the same schema bytes for nothing.
292
310
  if (task.tools && !sharedFmt)
293
311
  fmtOpts.tools = task.tools;
294
312
  const fmt = ctx.formatChatSync(JSON.stringify(messages), fmtOpts);
295
313
  // Tool-support guard runs only on the non-shared path. Shared mode's
296
- // root already passed the equivalent check at withSharedRoot setup.
314
+ // spine already passed the equivalent check at withSpine setup.
297
315
  if (task.tools && !sharedFmt
298
316
  && (fmt.format === sdk_1.CHAT_FORMAT_CONTENT_ONLY || fmt.format === sdk_1.CHAT_FORMAT_GENERIC)) {
299
317
  // Error before fork — no branch to clean up
@@ -312,9 +330,14 @@ function* setupAgent(parent, task, ctx, enableThinking) {
312
330
  callingAgent = a;
313
331
  }
314
332
  catch { /* top-level — no caller */ }
333
+ // The spawn's app membership is now a non-enforcing label:
334
+ // the authGuard gates tools by `Tool.protected` + session grants at the
335
+ // pool level, not by app-scoped allow-lists. The label is carried for
336
+ // trace attribution (`tool:authReject`) and harness UI only.
337
+ const assignedApp = task.assignedApp ?? null;
315
338
  // In shared mode the new agent's parser/grammar/format/triggers come
316
- // from the root's pre-computed fmt — those fields know about the tool
317
- // palette that's in attention via the inherited prefix. In non-shared
339
+ // from the spine's pre-computed fmt — those fields know about the tool
340
+ // set that's in attention via the inherited prefix. In non-shared
318
341
  // mode, fresh fmt drives those fields (existing behavior).
319
342
  const fmtConfig = sharedFmt
320
343
  ? {
@@ -344,6 +367,7 @@ function* setupAgent(parent, task, ctx, enableThinking) {
344
367
  parent: callingAgent,
345
368
  task: task.content,
346
369
  fmt: fmtConfig,
370
+ assignedApp,
347
371
  });
348
372
  return { agent, suffixTokens, formattedPrompt: fmt.prompt };
349
373
  }
@@ -373,17 +397,17 @@ function* setupAgent(parent, task, ctx, enableThinking) {
373
397
  * @param opts - Pool configuration: tasks, tools, sampling params, max turns
374
398
  * @returns Agent pool result with per-agent findings and aggregate statistics
375
399
  *
376
- * @example Shared root with agent pool
400
+ * @example Spine with agent pool
377
401
  * ```typescript
378
- * const pool = yield* withSharedRoot(
402
+ * const pool = yield* withSpine(
379
403
  * { systemPrompt: RESEARCH_PROMPT, tools: toolsJson },
380
- * function*(root) {
404
+ * function*(spine) {
381
405
  * return yield* useAgentPool({
382
406
  * tasks: questions.map(q => ({
383
407
  * systemPrompt: RESEARCH_PROMPT,
384
408
  * content: q,
385
409
  * tools: toolsJson,
386
- * parent: root,
410
+ * parent: spine,
387
411
  * })),
388
412
  * tools: toolMap,
389
413
  * maxTurns: 6,
@@ -409,7 +433,7 @@ function useAgentPool(opts) {
409
433
  }
410
434
  });
411
435
  const tw = yield* context_1.Trace.expect();
412
- const { root, orchestrate, toolsJson, tools, maxTurns = 100, terminalTool, trace = false, pruneOnReport = false, enableThinking = false } = opts;
436
+ const { spine, orchestrate, toolsJson, tools, maxTurns = 100, terminalToolName, trace = false, pruneOnReturn = false, enableThinking = false, eagerGrammar } = opts;
413
437
  // Tool index map for trace — position in toolkit array
414
438
  const toolIndexMap = new Map([...tools.keys()].map((name, i) => [name, i]));
415
439
  const toolkitSize = tools.size;
@@ -421,7 +445,7 @@ function useAgentPool(opts) {
421
445
  poolParentTraceId = p;
422
446
  }
423
447
  catch { /* top level */ }
424
- const poolScope = (0, trace_scope_1.traceScope)(tw, poolParentTraceId, 'pool', { maxTurns, terminalTool });
448
+ const poolScope = (0, trace_scope_1.traceScope)(tw, poolParentTraceId, 'pool', { maxTurns, terminalToolName });
425
449
  // Whether the pool's tool registry contains tools besides the terminal tool.
426
450
  // When false, agents are allowed to call the terminal tool as their first
427
451
  // action (e.g. reporter sub-agents that only have `report()`). When true,
@@ -432,7 +456,7 @@ function useAgentPool(opts) {
432
456
  // schemas (`task.tools`). A reporter pool must pass only the terminal tool
433
457
  // in its registry — passing the full tool map makes this flag true and
434
458
  // traps reporters in an infinite rejection loop.
435
- const hasNonTerminalTools = terminalTool ? [...tools.keys()].some(k => k !== terminalTool) : tools.size > 0;
459
+ const hasNonTerminalTools = terminalToolName ? [...tools.keys()].some(k => k !== terminalToolName) : tools.size > 0;
436
460
  const policy = opts.policy ?? new AgentPolicy_1.DefaultAgentPolicy();
437
461
  const pressureOpts = policy.pressureThresholds
438
462
  ?? { softLimit: ContextPressure.DEFAULT_SOFT_LIMIT, hardLimit: ContextPressure.DEFAULT_HARD_LIMIT };
@@ -450,7 +474,23 @@ function useAgentPool(opts) {
450
474
  `Recovery reserves hardLimit cells for its own decode; if smaller than nBatch, the next batch ` +
451
475
  `allocation will OOM. Increase policy.budget.context.hardLimit to at least ${nBatch}.`);
452
476
  }
453
- const policyConfig = { maxTurns, terminalTool, hasNonTerminalTools };
477
+ // authGuard inputs, resolved once per pool:
478
+ // • protectedTools — names this pool's registry flags `Tool.protected`.
479
+ // • grants — protected names the session is authorized to call, read
480
+ // from GrantStoreCtx. Absent store = fail-closed (no grants).
481
+ // When nothing is protected (the common case) the authGuard never fires.
482
+ const protectedTools = new Set([...tools].filter(([, t]) => t.protected).map(([name]) => name));
483
+ let grants = new Set();
484
+ if (protectedTools.size > 0) {
485
+ try {
486
+ const grantStore = yield* context_1.GrantStoreCtx.expect();
487
+ grants = new Set(yield* grantStore.granted());
488
+ }
489
+ catch { /* no grant store on context — fail-closed (no grants) */ }
490
+ }
491
+ const policyConfig = {
492
+ maxTurns, terminalToolName, hasNonTerminalTools, protectedTools, grants,
493
+ };
454
494
  // ── Orchestrator-driven setup ────────────────────────────
455
495
  // Agents are spawned lazily via `ctx.spawn` from the orchestrator.
456
496
  // The tick loop iterates over whatever agents are currently active.
@@ -469,7 +509,24 @@ function useAgentPool(opts) {
469
509
  });
470
510
  // Lazy grammar setup — applied inside ctx.spawn after prefill completes.
471
511
  const applyLazyGrammar = (a) => {
472
- if (a.fmt.grammar && a.fmt.grammarLazy && a.fmt.grammarTriggers.length > 0) {
512
+ // Eager grammar (schema-based agents like the planner) takes priority
513
+ // over lazy tool-call grammar. Qwen3.5's chat template emits a lazy
514
+ // tool-call grammar even when no tools are passed (a non-empty
515
+ // fmt.grammar with a `<tool_call>` trigger), which would otherwise
516
+ // overwrite a schema grammar set elsewhere — the planner would still
517
+ // be unconstrained. With eager set, we use the strict schema grammar
518
+ // and skip the (no-tools-anyway) lazy trigger.
519
+ if (eagerGrammar) {
520
+ a.branch.setGrammar(eagerGrammar);
521
+ }
522
+ else if (tools.size > 0 && a.fmt.grammar && a.fmt.grammarLazy && a.fmt.grammarTriggers.length > 0) {
523
+ // tools.size guard: with an empty toolkit there is nothing to
524
+ // dispatch, but the template still emits a tool-call grammar (see
525
+ // above). Installing it would not BLOCK the `<tool_call>` trigger —
526
+ // lazy grammars activate on the trigger, they don't prevent it —
527
+ // but once triggered it FORCES syntactic completion of a full call
528
+ // the model may have sampled into by accident. A no-tool agent
529
+ // (synth, eval) must be free to wander back to prose instead.
473
530
  const triggers = a.fmt.grammarTriggers.map(t => {
474
531
  if (t.type === sdk_1.GrammarTriggerType.WORD) {
475
532
  const nlIdx = t.value.indexOf('\n');
@@ -492,21 +549,29 @@ function useAgentPool(opts) {
492
549
  });
493
550
  // ── PoolContext — orchestrator's API surface ─────────────
494
551
  const poolContext = {
495
- root,
552
+ spine,
496
553
  *spawn(spec) {
497
- const parent = spec.parent ?? root;
554
+ const parent = spec.parent ?? spine;
498
555
  const task = {
499
556
  systemPrompt: spec.systemPrompt,
500
557
  content: spec.content,
501
558
  tools: toolsJson,
502
559
  seed: spec.seed,
503
560
  parent,
561
+ assignedApp: spec.assignedApp,
504
562
  };
505
563
  // Synchronous setup — fork, tokenize suffix, pressure check.
506
564
  // No native store call yet; that's the tick loop's SPAWN phase's job.
507
565
  const { agent, suffixTokens, formattedPrompt } = yield* setupAgent(parent, task, ctx, enableThinking);
508
566
  const pressure = new ContextPressure(ctx, pressureOpts);
509
- if (!pressure.canFit(suffixTokens.length)) {
567
+ // Reserve for batch-mates: spawns/extends admitted earlier this tick
568
+ // haven't prefilled yet, so raw pressure doesn't see them. Without
569
+ // the reservation, N individually-valid spawns cram N suffixes into
570
+ // one SPAWN-phase prefill and every agent dies pressure_softcut on
571
+ // turn 0 (trace-2026-06-11T06-21: 6 × 4,819-token suffixes vs 32k).
572
+ const reserved = pendingSpawns.reduce((acc, ps) => acc + ps.suffixTokens.length, 0) +
573
+ pendingExtends.reduce((acc, pe) => acc + (pe.discarded ? 0 : pe.tokens.length), 0);
574
+ if (!pressure.canFit(reserved + suffixTokens.length)) {
510
575
  agent.branch.pruneSync();
511
576
  agent.dispose();
512
577
  tw.write({
@@ -543,7 +608,7 @@ function useAgentPool(opts) {
543
608
  }
544
609
  return agent;
545
610
  },
546
- *extendRoot(userContent, assistantContent) {
611
+ *extendSpine(userContent, assistantContent) {
547
612
  if (!assistantContent)
548
613
  return 0;
549
614
  const turnTokens = (0, sdk_2.buildTurnDelta)(ctx, userContent, assistantContent);
@@ -661,10 +726,18 @@ function useAgentPool(opts) {
661
726
  }
662
727
  return deferred;
663
728
  }
729
+ /** Transient-failure parking: a ToolRetryError'd call waits here with its
730
+ * agent in `awaiting_tool` (PRODUCE skips it — no turns, no tokens, no
731
+ * KV) until `notBefore`, then re-enters DISPATCH. Whether to park and
732
+ * for how long is the POLICY's call (`onToolRetry`); this queue is
733
+ * pure mechanism, like SETTLE's deferral. Keep retry delays above the
734
+ * provider's own breaker cooldown or the retry lands on an open
735
+ * breaker. */
736
+ const pendingRetries = [];
664
737
  /** DISPATCH: execute tool calls sequentially, return settled items for next tick */
665
738
  function* dispatch(calls) {
666
739
  const results = [];
667
- for (const { agent, tc } of calls) {
740
+ for (const { agent, tc, retryAttempt, retryCallId } of calls) {
668
741
  let toolArgs;
669
742
  try {
670
743
  toolArgs = JSON.parse(tc.arguments);
@@ -672,11 +745,15 @@ function useAgentPool(opts) {
672
745
  catch {
673
746
  toolArgs = {};
674
747
  }
675
- const callId = tc.id || `call_${agent.toolCallCount}`;
676
- agent.incrementToolCalls();
677
- totalToolCalls++;
678
- agent.incrementTurns();
679
- yield* poolChannel.send({ type: 'agent:tool_call', agentId: agent.id, tool: tc.name, args: tc.arguments });
748
+ const callId = retryCallId ?? (tc.id || `call_${agent.toolCallCount}`);
749
+ // Retries re-execute the SAME call — turn/tool-call counters and the
750
+ // agent:tool_call event belong to the original attempt only.
751
+ if (retryAttempt === undefined) {
752
+ agent.incrementToolCalls();
753
+ totalToolCalls++;
754
+ agent.incrementTurns();
755
+ yield* poolChannel.send({ type: 'agent:tool_call', agentId: agent.id, tool: tc.name, args: tc.arguments });
756
+ }
680
757
  const tool = tools.get(tc.name);
681
758
  const dispatchPressure = new ContextPressure(ctx, pressureOpts);
682
759
  const explore = policy.shouldExplore?.(agent, dispatchPressure) ?? true;
@@ -704,8 +781,19 @@ function useAgentPool(opts) {
704
781
  try {
705
782
  yield* context_1.TraceParent.set(dispatchTraceId);
706
783
  yield* context_1.CallingAgent.set(agent);
784
+ // Unknown-tool messaging branches on toolkit emptiness: a no-tool
785
+ // agent emitting tool calls is imitating markup from its context
786
+ // (inherited spine KV or contaminated findings) — a generic
787
+ // "Unknown tool" error reads as transient and invites rephrased
788
+ // retries until maxTurns (observed: trace-2026-06-11T00-02 synth,
789
+ // 10 turns of mimicry). The directive form names the actual
790
+ // situation so the model can recover in one turn.
707
791
  const result = yield* (0, effection_1.scoped)(function* () {
708
- return yield* (0, effection_1.call)(() => tool ? tool.execute(toolArgs, toolContext) : Promise.resolve({ error: `Unknown tool: ${tc.name}` }));
792
+ return yield* (0, effection_1.call)(() => tool ? tool.execute(toolArgs, toolContext) : Promise.resolve({
793
+ error: tools.size === 0
794
+ ? 'No tools are available to this agent. Do not emit tool calls — write your answer directly as plain text.'
795
+ : `Unknown tool: ${tc.name}`,
796
+ }));
709
797
  });
710
798
  const postToolPressure = new ContextPressure(ctx, pressureOpts);
711
799
  const contextAvailablePercent = postToolPressure.percentAvailable;
@@ -730,8 +818,52 @@ function useAgentPool(opts) {
730
818
  durationMs: performance.now() - toolT0 });
731
819
  }
732
820
  catch (err) {
821
+ if (err instanceof Tool_1.ToolRetryError) {
822
+ const attempt = (retryAttempt ?? 0) + 1;
823
+ // Strategy is the policy's: park-and-retry (optionally overriding
824
+ // the tool's delay estimate) or fail the call so the model can
825
+ // pivot. Hook absent → one retry at the tool's estimate.
826
+ const retryAction = policy.onToolRetry?.(agent, tc.name, err, attempt)
827
+ ?? (attempt <= 1 ? { type: 'retry' } : { type: 'fail' });
828
+ if (retryAction.type === 'retry') {
829
+ // Park: no SettledTool, nothing prefilled — the agent's KV
830
+ // never sees transient infrastructure weather. Surfaced to
831
+ // the TUI + trace so a waiting agent reads as waiting, not hung.
832
+ const afterMs = retryAction.afterMs ?? err.retryAfterMs;
833
+ pendingRetries.push({
834
+ agent, tc, callId,
835
+ notBefore: performance.now() + afterMs,
836
+ attempt,
837
+ });
838
+ yield* poolChannel.send({
839
+ type: 'agent:tool_retry', agentId: agent.id, tool: tc.name,
840
+ retryAfterMs: afterMs, attempt,
841
+ });
842
+ tw.write({ traceId: tw.nextId(), parentTraceId: dispatchTraceId, ts: performance.now(),
843
+ type: 'tool:retry', agentId: agent.id, tool: tc.name,
844
+ callId, retryAfterMs: afterMs, attempt });
845
+ continue;
846
+ }
847
+ // Policy chose fail — the outage is now a fact the model needs.
848
+ // Settle an honest, directive result through the normal path
849
+ // (NOT the tool_error path, which kills the agent's run).
850
+ const exhausted = {
851
+ error: retryAction.message
852
+ ?? `${tc.name} is currently unavailable (rate-limited; retry failed). ` +
853
+ `Do not call ${tc.name} again — use other sources or proceed with your current findings.`,
854
+ };
855
+ const resultStr = JSON.stringify(exhausted);
856
+ yield* poolChannel.send({ type: 'agent:tool_result', agentId: agent.id, tool: tc.name, result: resultStr });
857
+ const prefillTokens = (0, sdk_2.buildToolResultDelta)(ctx, resultStr, callId, { enableThinking: agent.fmt.enableThinking });
858
+ results.push({ agentId: agent.id, prefillTokens, toolName: tc.name, callId, args: tc.arguments, probe: undefined });
859
+ tw.write({ traceId: tw.nextId(), parentTraceId: dispatchTraceId, ts: performance.now(),
860
+ type: 'tool:result', agentId: agent.id, tool: tc.name,
861
+ result: exhausted, prefillTokenCount: prefillTokens.length,
862
+ durationMs: performance.now() - toolT0 });
863
+ continue;
864
+ }
733
865
  agent.transition('idle');
734
- agent.reportResult(`Tool error: ${err.message}`, 'tool_error');
866
+ agent.setResult(`Tool error: ${err.message}`, 'tool_error');
735
867
  tw.write({ traceId: tw.nextId(), parentTraceId: dispatchTraceId, ts: performance.now(),
736
868
  type: 'tool:error', agentId: agent.id, tool: tc.name,
737
869
  error: err.message });
@@ -757,7 +889,7 @@ function useAgentPool(opts) {
757
889
  continue;
758
890
  }
759
891
  // -- Phase 0: SPAWN+EXTEND -- drain pending spawns AND pending extends,
760
- // batching all fork-suffix prefills and extend-onto-root prefills into
892
+ // batching all fork-suffix prefills and extend-onto-spine prefills into
761
893
  // ONE native store.prefill call. All store-level native calls in this
762
894
  // pool are issued from this fiber (the tick loop), never concurrently
763
895
  // with the orchestrator's fiber. Piggybacking extend in this phase
@@ -770,7 +902,7 @@ function useAgentPool(opts) {
770
902
  .filter(e => !e.discarded);
771
903
  const prefillPairs = [
772
904
  ...drainedSpawns.map(s => [s.agent.branch, s.suffixTokens]),
773
- ...drainedExtends.map(e => [root, e.tokens]),
905
+ ...drainedExtends.map(e => [spine, e.tokens]),
774
906
  ];
775
907
  try {
776
908
  if (prefillPairs.length > 0) {
@@ -782,7 +914,7 @@ function useAgentPool(opts) {
782
914
  e.reject(err);
783
915
  throw err;
784
916
  }
785
- // Resolve extend requests with the delta token count. root.position
917
+ // Resolve extend requests with the delta token count. spine.position
786
918
  // has advanced by the sum of extend token counts at this point.
787
919
  for (const e of drainedExtends) {
788
920
  tw.write({
@@ -791,7 +923,7 @@ function useAgentPool(opts) {
791
923
  userContent: e.userContent,
792
924
  assistantContent: e.assistantContent,
793
925
  deltaTokens: e.tokens.length,
794
- positionAfter: root.position,
926
+ positionAfter: spine.position,
795
927
  });
796
928
  e.resolve(e.tokens.length);
797
929
  }
@@ -860,19 +992,32 @@ function useAgentPool(opts) {
860
992
  // Policy decides what to do with the parsed output
861
993
  const action = policy.onProduced(a, parsed, pressure, policyConfig);
862
994
  switch (action.type) {
863
- case 'free_text_report':
864
- yield* handleFreeTextReport(a, action.content, poolChannel);
995
+ case 'free_text_return':
996
+ yield* handleFreeTextReturn(a, action.content, poolChannel);
865
997
  continue;
866
998
  case 'idle':
867
999
  yield* handleIdleDrop(a, action.reason, poolChannel, tw, poolScope.traceId);
868
1000
  continue;
869
1001
  case 'nudge':
1002
+ // authGuard rejection: emit the structured
1003
+ // tool:authReject event BEFORE the generic agentNudge so a
1004
+ // single trace pass captures attribution + rejection context.
1005
+ if (action.guard === 'auth_reject') {
1006
+ tw.write({
1007
+ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
1008
+ type: 'tool:authReject',
1009
+ agentId: a.id,
1010
+ assignedApp: a.assignedApp,
1011
+ attemptedTool: parsed.toolCalls[0].name,
1012
+ lineageHistory: a.walkAncestors((x) => x.toolHistory),
1013
+ });
1014
+ }
870
1015
  nudges.push(yield* handleNudge(a, action.message, parsed.toolCalls[0], ctx, tools));
871
1016
  tw.write({ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
872
1017
  type: 'pool:agentNudge', agentId: a.id, reason: 'nudge', message: action.message });
873
1018
  continue;
874
- case 'report':
875
- yield* handleReport(a, action.result, parsed.toolCalls[0], terminalTool, pruneOnReport, poolChannel);
1019
+ case 'return':
1020
+ yield* handleReturn(a, action.result, parsed.toolCalls[0], terminalToolName, pruneOnReturn, poolChannel);
876
1021
  totalToolCalls++;
877
1022
  continue;
878
1023
  case 'tool_call':
@@ -971,7 +1116,19 @@ function useAgentPool(opts) {
971
1116
  deferred.push(...resolved);
972
1117
  }
973
1118
  // -- Phase 4: DISPATCH
974
- const dispatched = yield* dispatch(toolCalls);
1119
+ // Due retries re-enter first — their agents have been parked since the
1120
+ // ToolRetryError and re-execute the same call (same callId, no counter
1121
+ // increments).
1122
+ const nowTs = performance.now();
1123
+ const dueRetries = [];
1124
+ for (let i = pendingRetries.length - 1; i >= 0; i--) {
1125
+ if (pendingRetries[i].notBefore <= nowTs)
1126
+ dueRetries.unshift(...pendingRetries.splice(i, 1));
1127
+ }
1128
+ const dispatched = yield* dispatch([
1129
+ ...dueRetries.map(r => ({ agent: r.agent, tc: r.tc, retryAttempt: r.attempt, retryCallId: r.callId })),
1130
+ ...toolCalls,
1131
+ ]);
975
1132
  // Deferred + new dispatch results → next tick's SETTLE
976
1133
  pendingSettled = [...deferred, ...dispatched];
977
1134
  // -- Termination + recovery
@@ -996,6 +1153,20 @@ function useAgentPool(opts) {
996
1153
  // All current agents done but orchestrator may spawn more.
997
1154
  yield* (0, effection_1.sleep)(1);
998
1155
  }
1156
+ // All-parked: nothing active, nothing to settle — only future retries.
1157
+ // Without this the loop busy-spins until the earliest notBefore (parked
1158
+ // agents are awaiting_tool, so the allIdle sleep above never fires).
1159
+ // Cap the nap at 50ms so orchestrator spawns/extends are picked up
1160
+ // promptly.
1161
+ if (pendingRetries.length > 0
1162
+ && pendingSettled.length === 0
1163
+ && pendingSpawns.length === 0
1164
+ && pendingExtends.length === 0
1165
+ && !agents.some(a => a.status === 'active')) {
1166
+ const nextDue = Math.min(...pendingRetries.map(r => r.notBefore));
1167
+ const nap = Math.max(1, Math.min(50, nextDue - performance.now()));
1168
+ yield* (0, effection_1.sleep)(nap);
1169
+ }
999
1170
  }
1000
1171
  // ── Close channel with result — consumers get AgentPoolResult as close value ───────
1001
1172
  // Branch cleanup is handled by each branch's ensure() from setupAgent —