@lloyal-labs/lloyal-agents 1.7.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +81 -97
  2. package/dist/Agent.d.ts +26 -0
  3. package/dist/Agent.d.ts.map +1 -1
  4. package/dist/Agent.js +22 -0
  5. package/dist/Agent.js.map +1 -1
  6. package/dist/AgentPolicy.d.ts +27 -10
  7. package/dist/AgentPolicy.d.ts.map +1 -1
  8. package/dist/AgentPolicy.js +78 -16
  9. package/dist/AgentPolicy.js.map +1 -1
  10. package/dist/agent-pool.d.ts +14 -2
  11. package/dist/agent-pool.d.ts.map +1 -1
  12. package/dist/agent-pool.js +415 -148
  13. package/dist/agent-pool.js.map +1 -1
  14. package/dist/context.d.ts +18 -1
  15. package/dist/context.d.ts.map +1 -1
  16. package/dist/context.js +18 -1
  17. package/dist/context.js.map +1 -1
  18. package/dist/create-agent-pool.d.ts +33 -15
  19. package/dist/create-agent-pool.d.ts.map +1 -1
  20. package/dist/create-agent-pool.js +34 -10
  21. package/dist/create-agent-pool.js.map +1 -1
  22. package/dist/index.d.ts +5 -1
  23. package/dist/index.d.ts.map +1 -1
  24. package/dist/index.js +10 -1
  25. package/dist/index.js.map +1 -1
  26. package/dist/orchestrators.d.ts +161 -0
  27. package/dist/orchestrators.d.ts.map +1 -0
  28. package/dist/orchestrators.js +173 -0
  29. package/dist/orchestrators.js.map +1 -0
  30. package/dist/replay.d.ts +96 -0
  31. package/dist/replay.d.ts.map +1 -0
  32. package/dist/replay.js +108 -0
  33. package/dist/replay.js.map +1 -0
  34. package/dist/shared-root.d.ts +56 -18
  35. package/dist/shared-root.d.ts.map +1 -1
  36. package/dist/shared-root.js +79 -52
  37. package/dist/shared-root.js.map +1 -1
  38. package/dist/trace-types.d.ts +22 -2
  39. package/dist/trace-types.d.ts.map +1 -1
  40. package/dist/types.d.ts +33 -5
  41. package/dist/types.d.ts.map +1 -1
  42. package/dist/use-agent.d.ts.map +1 -1
  43. package/dist/use-agent.js +13 -17
  44. package/dist/use-agent.js.map +1 -1
  45. package/package.json +5 -5
@@ -52,8 +52,20 @@ const AgentPolicy_1 = require("./AgentPolicy");
52
52
  class ContextPressure {
53
53
  /** Default softLimit: 1024 tokens reserved for downstream work */
54
54
  static DEFAULT_SOFT_LIMIT = 1024;
55
- /** Default hardLimit: 128 tokens crash-prevention floor */
56
- static DEFAULT_HARD_LIMIT = 128;
55
+ /**
56
+ * Default hardLimit: 512 tokens — matches llama.cpp's default `n_batch`.
57
+ * The pool validates at startup that `hardLimit >= nBatch`; the default
58
+ * is sized to satisfy the invariant for the default llama.cpp context.
59
+ * Recovery fits within the `hardLimit` reserve.
60
+ */
61
+ static DEFAULT_HARD_LIMIT = 512;
62
+ /**
63
+ * Assumed `nBatch` when the native binding doesn't expose it.
64
+ * Pool startup validates `pressureThresholds.hardLimit >= this`.
65
+ * TODO: once `SessionContext.nBatch` is exposed (lloyal.node
66
+ * follow-up), read from ctx.nBatch instead.
67
+ */
68
+ static ASSUMED_N_BATCH = 512;
57
69
  /** Total KV cache capacity (max positions). 0 when no context limit. */
58
70
  nCtx;
59
71
  /** KV cells currently in use (monotonic within a pool run). */
@@ -108,8 +120,10 @@ exports.ContextPressure = ContextPressure;
108
120
  *
109
121
  * Returns true if the agent reported findings.
110
122
  */
111
- function* recoverInline(agent, policy, ctx, store, tw, parentTraceId, events) {
112
- const recovery = policy.onRecovery?.(agent);
123
+ function* recoverInline(agent, policy, ctx, store, tw, parentTraceId, events, pressureOpts) {
124
+ // Fresh snapshot — the policy uses this to compute the recovery budget
125
+ // (reflected in the rendered prompt via `<%= it.budget %>`).
126
+ const recovery = policy.onRecovery?.(agent, new ContextPressure(ctx, pressureOpts));
113
127
  if (!recovery || recovery.type === 'skip') {
114
128
  if (!agent.branch.disposed)
115
129
  agent.branch.pruneSync();
@@ -132,8 +146,13 @@ function* recoverInline(agent, policy, ctx, store, tw, parentTraceId, events) {
132
146
  required: ['result'],
133
147
  })));
134
148
  // Recovery runs in its own scope — if prefill or decode fails
135
- // (KV exhaustion), the scope tears down cleanly.
149
+ // (KV exhaustion), the scope tears down cleanly. Diagnostic trace
150
+ // events (pool:recoveryProduce + recoveryReport/recoveryFailed) make
151
+ // silent recovery failures observable in traces.
136
152
  let reported = false;
153
+ let output = '';
154
+ let producedTokens = 0;
155
+ let failureReason = null;
137
156
  try {
138
157
  yield* (0, effection_1.scoped)(function* () {
139
158
  yield* (0, effection_1.call)(() => store.prefill([[agent.branch, tokens]]));
@@ -144,27 +163,53 @@ function* recoverInline(agent, policy, ctx, store, tw, parentTraceId, events) {
144
163
  tokenCount: tokens.length, role: 'recovery',
145
164
  });
146
165
  // Single-agent produce/commit loop
147
- let output = '';
148
- let tokenCount = 0;
149
166
  for (;;) {
150
167
  const { token, text, isStop } = agent.branch.produceSync();
151
168
  if (isStop)
152
169
  break;
153
170
  output += text;
154
- tokenCount++;
171
+ producedTokens++;
155
172
  yield* (0, effection_1.call)(() => store.commit([[agent.branch, token]]));
156
- yield* events.send({ type: 'agent:produce', agentId: agent.id, text, tokenCount });
173
+ yield* events.send({ type: 'agent:produce', agentId: agent.id, text, tokenCount: producedTokens });
157
174
  }
175
+ tw.write({
176
+ traceId: tw.nextId(), parentTraceId, ts: performance.now(),
177
+ type: 'pool:recoveryProduce', agentId: agent.id,
178
+ tokenCount: producedTokens, outputLength: output.length,
179
+ });
158
180
  // Parse + report
159
- const parsed = JSON.parse(output);
160
- if (parsed?.result) {
161
- agent.reportResult(parsed.result, 'scratchpad');
162
- yield* events.send({ type: 'agent:report', agentId: agent.id, result: agent.result });
163
- reported = true;
181
+ try {
182
+ const parsed = JSON.parse(output);
183
+ if (parsed?.result) {
184
+ agent.reportResult(parsed.result, 'scratchpad');
185
+ yield* events.send({ type: 'agent:report', agentId: agent.id, result: agent.result });
186
+ reported = true;
187
+ tw.write({
188
+ traceId: tw.nextId(), parentTraceId, ts: performance.now(),
189
+ type: 'pool:recoveryReport', agentId: agent.id,
190
+ resultLength: parsed.result.length,
191
+ });
192
+ }
193
+ else {
194
+ failureReason = 'no_result_field';
195
+ }
164
196
  }
197
+ catch (e) {
198
+ failureReason = `parse_error: ${e.message ?? 'unknown'}`;
199
+ }
200
+ });
201
+ }
202
+ catch (e) {
203
+ failureReason = `scope_error: ${e.message ?? 'unknown'}`;
204
+ }
205
+ if (!reported) {
206
+ tw.write({
207
+ traceId: tw.nextId(), parentTraceId, ts: performance.now(),
208
+ type: 'pool:recoveryFailed', agentId: agent.id,
209
+ reason: failureReason ?? 'unknown',
210
+ outputExcerpt: output.slice(0, 200),
165
211
  });
166
212
  }
167
- catch { /* prefill overflow, decode failure, or malformed JSON — non-fatal */ }
168
213
  // Always prune after scope exits (success or failure)
169
214
  if (!agent.branch.disposed)
170
215
  agent.branch.pruneSync();
@@ -196,7 +241,7 @@ function* handleNudge(a, message, tc, ctx, tools) {
196
241
  const nudgeResult = { error: message };
197
242
  a.incrementTurns();
198
243
  a.transition('awaiting_tool');
199
- const prefillTokens = (0, sdk_2.buildToolResultDelta)(ctx, JSON.stringify(nudgeResult), callId);
244
+ const prefillTokens = (0, sdk_2.buildToolResultDelta)(ctx, JSON.stringify(nudgeResult), callId, { enableThinking: a.fmt.enableThinking });
200
245
  const probe = tools?.get(tc?.name || '')?.probe(nudgeResult) ?? undefined;
201
246
  a.resetTurn();
202
247
  return { agentId: a.id, prefillTokens, toolName: tc?.name || '', callId, args: tc?.arguments || '', probe };
@@ -218,22 +263,43 @@ function* handleReport(a, result, tc, terminalTool, pruneOnReport, events) {
218
263
  * On scope exit (error, cancellation), `ensure()` prunes the branch
219
264
  * automatically — the orphaned-branch leak is structurally impossible.
220
265
  */
221
- function* setupAgent(parent, task, ctx) {
222
- const messages = [
223
- { role: 'system', content: task.systemPrompt },
224
- { role: 'user', content: task.content },
225
- ];
226
- const fmtOpts = { enableThinking: false };
227
- if (task.tools)
266
+ function* setupAgent(parent, task, ctx, enableThinking) {
267
+ // Probe shared-root mode. When set, the queryRoot already has the
268
+ // [system + tools] chat header prefilled and we MUST NOT re-emit them
269
+ // in the agent's suffix the bytes are already in attention via fork
270
+ // prefix-share. The new agent inherits parser/grammar/format/triggers
271
+ // from sharedFmt so tool dispatch keeps working.
272
+ let sharedFmt = null;
273
+ try {
274
+ sharedFmt = (yield* context_1.RootFmt.get()) ?? null;
275
+ }
276
+ catch { /* not in shared mode */ }
277
+ // Compose the messages to format into the suffix. In shared mode with
278
+ // an empty per-spec systemPrompt, drop the system message — the role
279
+ // lives at the root, the agent only contributes a user turn. With a
280
+ // non-empty per-spec systemPrompt, include it: the agent's KV will
281
+ // contain TWO system messages in lineage, which Qwen3 handles (recovery
282
+ // ships on the same multi-system pattern).
283
+ const messages = sharedFmt && task.systemPrompt === ''
284
+ ? [{ role: 'user', content: task.content }]
285
+ : [
286
+ { role: 'system', content: task.systemPrompt },
287
+ { role: 'user', content: task.content },
288
+ ];
289
+ const fmtOpts = { enableThinking };
290
+ // Tools belong at the root in shared mode; emitting them again here
291
+ // would re-prefill the same schema bytes for nothing.
292
+ if (task.tools && !sharedFmt)
228
293
  fmtOpts.tools = task.tools;
229
294
  const fmt = ctx.formatChatSync(JSON.stringify(messages), fmtOpts);
230
- if (task.tools && (fmt.format === sdk_1.CHAT_FORMAT_CONTENT_ONLY || fmt.format === sdk_1.CHAT_FORMAT_GENERIC)) {
295
+ // Tool-support guard runs only on the non-shared path. Shared mode's
296
+ // root already passed the equivalent check at withSharedRoot setup.
297
+ if (task.tools && !sharedFmt
298
+ && (fmt.format === sdk_1.CHAT_FORMAT_CONTENT_ONLY || fmt.format === sdk_1.CHAT_FORMAT_GENERIC)) {
231
299
  // Error before fork — no branch to clean up
232
300
  throw new Error('Model does not support tool calling. Please use a model with native tool support (e.g. Qwen3, Llama 3.x, Mistral).');
233
301
  }
234
302
  const branch = parent.forkSync();
235
- yield* (0, effection_1.ensure)(() => { if (!branch.disposed)
236
- branch.pruneSync(); });
237
303
  const sep = ctx.getTurnSeparator();
238
304
  const suffixTokens = [...sep, ...ctx.tokenizeSync(fmt.prompt, false)];
239
305
  if (task.seed != null)
@@ -246,13 +312,22 @@ function* setupAgent(parent, task, ctx) {
246
312
  callingAgent = a;
247
313
  }
248
314
  catch { /* top-level — no caller */ }
249
- const agent = new Agent_1.Agent({
250
- id: branch.handle,
251
- parentId: parent.handle,
252
- branch,
253
- parent: callingAgent,
254
- task: task.content,
255
- fmt: {
315
+ // In shared mode the new agent's parser/grammar/format/triggers come
316
+ // from the root's pre-computed fmt — those fields know about the tool
317
+ // palette that's in attention via the inherited prefix. In non-shared
318
+ // mode, fresh fmt drives those fields (existing behavior).
319
+ const fmtConfig = sharedFmt
320
+ ? {
321
+ format: sharedFmt.format,
322
+ reasoningFormat: sharedFmt.reasoningFormat,
323
+ generationPrompt: sharedFmt.generationPrompt,
324
+ parser: sharedFmt.parser,
325
+ grammar: sharedFmt.grammar,
326
+ grammarLazy: sharedFmt.grammarLazy,
327
+ grammarTriggers: sharedFmt.grammarTriggers,
328
+ enableThinking,
329
+ }
330
+ : {
256
331
  format: fmt.format,
257
332
  reasoningFormat: fmt.reasoningFormat,
258
333
  generationPrompt: fmt.generationPrompt,
@@ -260,7 +335,15 @@ function* setupAgent(parent, task, ctx) {
260
335
  grammar: fmt.grammar,
261
336
  grammarLazy: fmt.grammarLazy,
262
337
  grammarTriggers: fmt.grammarTriggers,
263
- },
338
+ enableThinking,
339
+ };
340
+ const agent = new Agent_1.Agent({
341
+ id: branch.handle,
342
+ parentId: parent.handle,
343
+ branch,
344
+ parent: callingAgent,
345
+ task: task.content,
346
+ fmt: fmtConfig,
264
347
  });
265
348
  return { agent, suffixTokens, formattedPrompt: fmt.prompt };
266
349
  }
@@ -326,7 +409,7 @@ function useAgentPool(opts) {
326
409
  }
327
410
  });
328
411
  const tw = yield* context_1.Trace.expect();
329
- const { tasks, tools, maxTurns = 100, terminalTool, trace = false, pruneOnReport = false } = opts;
412
+ const { root, orchestrate, toolsJson, tools, maxTurns = 100, terminalTool, trace = false, pruneOnReport = false, enableThinking = false } = opts;
330
413
  // Tool index map for trace — position in toolkit array
331
414
  const toolIndexMap = new Map([...tools.keys()].map((name, i) => [name, i]));
332
415
  const toolkitSize = tools.size;
@@ -338,7 +421,7 @@ function useAgentPool(opts) {
338
421
  poolParentTraceId = p;
339
422
  }
340
423
  catch { /* top level */ }
341
- const poolScope = (0, trace_scope_1.traceScope)(tw, poolParentTraceId, 'pool', { agentCount: tasks.length, maxTurns, terminalTool });
424
+ const poolScope = (0, trace_scope_1.traceScope)(tw, poolParentTraceId, 'pool', { maxTurns, terminalTool });
342
425
  // Whether the pool's tool registry contains tools besides the terminal tool.
343
426
  // When false, agents are allowed to call the terminal tool as their first
344
427
  // action (e.g. reporter sub-agents that only have `report()`). When true,
@@ -353,68 +436,38 @@ function useAgentPool(opts) {
353
436
  const policy = opts.policy ?? new AgentPolicy_1.DefaultAgentPolicy();
354
437
  const pressureOpts = policy.pressureThresholds
355
438
  ?? { softLimit: ContextPressure.DEFAULT_SOFT_LIMIT, hardLimit: ContextPressure.DEFAULT_HARD_LIMIT };
439
+ // Invariant: hardLimit must be at least the native batch size (nBatch).
440
+ // When `pressure.critical` fires and the kill path runs recovery, the
441
+ // reserve cells (hardLimit count) must accommodate `recoverInline`'s
442
+ // next batch allocation — otherwise native decode will OOM with
443
+ // "failed to find a memory slot for batch of size N".
444
+ // Until `SessionContext.nBatch` is exposed natively, we validate against
445
+ // `ContextPressure.ASSUMED_N_BATCH` (512, matches llama.cpp default).
446
+ const nBatch = ContextPressure.ASSUMED_N_BATCH;
447
+ const hardLimitVal = pressureOpts.hardLimit ?? ContextPressure.DEFAULT_HARD_LIMIT;
448
+ if (hardLimitVal < nBatch) {
449
+ throw new Error(`useAgentPool: Invariant Violation — hardLimit (${hardLimitVal}) must be >= nBatch (${nBatch}). ` +
450
+ `Recovery reserves hardLimit cells for its own decode; if smaller than nBatch, the next batch ` +
451
+ `allocation will OOM. Increase policy.budget.context.hardLimit to at least ${nBatch}.`);
452
+ }
356
453
  const policyConfig = { maxTurns, terminalTool, hasNonTerminalTools };
357
- // ── Setup: fork branches, collect suffix tokens ──────────
358
- // setupAgent is now a generator each branch registers its own ensure()
359
- // for cleanup. No manual try/finally needed here.
454
+ // ── Orchestrator-driven setup ────────────────────────────
455
+ // Agents are spawned lazily via `ctx.spawn` from the orchestrator.
456
+ // The tick loop iterates over whatever agents are currently active.
457
+ // decode_each batches across all active agents regardless of spawn order.
360
458
  const agents = [];
361
- const prefillSetup = [];
362
- for (const task of tasks) {
363
- const parent = task.parent;
364
- if (!parent)
365
- throw new Error('useAgentPool: each task must have a parent branch');
366
- const { agent, suffixTokens, formattedPrompt } = yield* setupAgent(parent, task, ctx);
367
- agents.push(agent);
368
- prefillSetup.push([agent.branch, suffixTokens]);
369
- tw.write({
370
- traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
371
- type: 'branch:create', branchHandle: agent.id, parentHandle: agent.parentId,
372
- position: 0, role: 'agentFork',
373
- });
374
- tw.write({
375
- traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
376
- type: 'prompt:format', promptText: formattedPrompt,
377
- taskContent: task.content,
378
- tokenCount: suffixTokens.length,
379
- messages: JSON.stringify([
380
- { role: 'system', content: task.systemPrompt },
381
- { role: 'user', content: task.content },
382
- ]),
383
- tools: task.tools, role: 'agentSuffix',
384
- });
385
- }
386
- // Batch prefill all agent suffixes — pressure-gated.
387
- // Each suffix is the full formatted chat (system prompt + tools JSON +
388
- // user message + generation prompt), tokenized via formatChatSync().
389
- // Suffix cost is model-dependent: ~250-400 tokens per agent depending
390
- // on chat template verbosity and tool schema size.
391
- const initPressure = new ContextPressure(ctx, pressureOpts);
392
- const totalSuffix = prefillSetup.reduce((s, [, t]) => s + t.length, 0);
393
- if (!initPressure.canFit(totalSuffix)) {
394
- // Not enough room — drop agents from the end until it fits
395
- while (prefillSetup.length > 0) {
396
- const needed = prefillSetup.reduce((s, [, t]) => s + t.length, 0);
397
- if (initPressure.canFit(needed))
398
- break;
399
- prefillSetup.pop();
400
- const dropped = agents.pop();
401
- dropped.dispose();
402
- tw.write({
403
- traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
404
- type: 'pool:agentDrop', agentId: dropped.id, reason: 'pressure_init',
405
- });
459
+ const agentById = new Map();
460
+ const pendingSpawns = [];
461
+ const pendingExtends = [];
462
+ // Pool-level branch cleanup — ensures orphan-branch cleanup even when
463
+ // spawns are lazy and the orchestrator's spawn scope exits early.
464
+ yield* (0, effection_1.ensure)(() => {
465
+ for (const a of agents) {
466
+ if (!a.branch.disposed)
467
+ a.branch.pruneSync();
406
468
  }
407
- }
408
- if (prefillSetup.length > 0) {
409
- yield* (0, effection_1.call)(() => store.prefill(prefillSetup));
410
- }
411
- tw.write({
412
- traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
413
- type: 'pool:open', agentCount: agents.length,
414
- taskSuffixTokens: prefillSetup.map(([, t]) => t.length),
415
- pressure: { remaining: initPressure.remaining, softLimit: initPressure.softLimit, headroom: initPressure.headroom },
416
469
  });
417
- // ── Lazy grammar setup ───────────────────────────────────
470
+ // Lazy grammar setup — applied inside ctx.spawn after prefill completes.
418
471
  const applyLazyGrammar = (a) => {
419
472
  if (a.fmt.grammar && a.fmt.grammarLazy && a.fmt.grammarTriggers.length > 0) {
420
473
  const triggers = a.fmt.grammarTriggers.map(t => {
@@ -429,11 +482,112 @@ function useAgentPool(opts) {
429
482
  a.branch.setGrammarLazy(a.fmt.grammar, triggers);
430
483
  }
431
484
  };
432
- for (const a of agents)
433
- applyLazyGrammar(a);
434
- const agentById = new Map(agents.map(a => [a.id, a]));
435
- // Subscribe BEFORE spawning tick loop — no events missed
485
+ tw.write({
486
+ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
487
+ type: 'pool:open', agentCount: 0, taskSuffixTokens: [],
488
+ pressure: (() => {
489
+ const p = new ContextPressure(ctx, pressureOpts);
490
+ return { remaining: p.remaining, softLimit: p.softLimit, headroom: p.headroom };
491
+ })(),
492
+ });
493
+ // ── PoolContext — orchestrator's API surface ─────────────
494
+ const poolContext = {
495
+ root,
496
+ *spawn(spec) {
497
+ const parent = spec.parent ?? root;
498
+ const task = {
499
+ systemPrompt: spec.systemPrompt,
500
+ content: spec.content,
501
+ tools: toolsJson,
502
+ seed: spec.seed,
503
+ parent,
504
+ };
505
+ // Synchronous setup — fork, tokenize suffix, pressure check.
506
+ // No native store call yet; that's the tick loop's SPAWN phase's job.
507
+ const { agent, suffixTokens, formattedPrompt } = yield* setupAgent(parent, task, ctx, enableThinking);
508
+ const pressure = new ContextPressure(ctx, pressureOpts);
509
+ if (!pressure.canFit(suffixTokens.length)) {
510
+ agent.branch.pruneSync();
511
+ agent.dispose();
512
+ tw.write({
513
+ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
514
+ type: 'pool:agentDrop', agentId: agent.id, reason: 'pressure_init',
515
+ });
516
+ throw new Error(`useAgentPool: cannot fit agent suffix (${suffixTokens.length} tokens) under current pressure`);
517
+ }
518
+ // Enqueue for SPAWN phase. The tick loop will batch this with any
519
+ // other pending spawns into ONE store.prefill, transition to active,
520
+ // write trace events, and emit agent:spawn. Return the agent
521
+ // immediately — waitFor() is keyed off a transition, not a status
522
+ // snapshot, so the pre-activation 'idle' status doesn't race with
523
+ // the real terminal-idle signal.
524
+ pendingSpawns.push({ agent, suffixTokens, formattedPrompt, task });
525
+ agents.push(agent);
526
+ agentById.set(agent.id, agent);
527
+ return agent;
528
+ },
529
+ *waitFor(agent) {
530
+ // Agent completion = terminal 'idle' OR 'disposed'. Pre-activation
531
+ // 'idle' (the constructor default) would be a false positive, so we
532
+ // wait for a TRANSITION signal rather than checking status.snapshot.
533
+ // The SPAWN phase transitions 'idle' → 'active' when it activates the
534
+ // agent; subsequent transitions lead to a terminal 'idle' or 'disposed'.
535
+ const stream = yield* (0, effection_1.each)(agent.statusSignal);
536
+ // Only short-circuit for already-disposed — no further signal is coming.
537
+ if (agent.status === 'disposed')
538
+ return agent;
539
+ for (const s of stream) {
540
+ if (s === 'idle' || s === 'disposed')
541
+ return agent;
542
+ yield* effection_1.each.next();
543
+ }
544
+ return agent;
545
+ },
546
+ *extendRoot(userContent, assistantContent) {
547
+ if (!assistantContent)
548
+ return 0;
549
+ const turnTokens = (0, sdk_2.buildTurnDelta)(ctx, userContent, assistantContent);
550
+ // Rendezvous with the tick loop's SPAWN phase — see pendingExtends.
551
+ // action() is the Effection-native one-shot suspend: orchestrator
552
+ // queues the request, suspends; tick loop drains + resolves; this
553
+ // operation returns the deltaTokens. The finally returned from the
554
+ // executor marks the request discarded if this fiber is cancelled
555
+ // before the drain runs, so the drain doesn't touch a dead action.
556
+ return yield* (0, effection_1.action)((resolve, reject) => {
557
+ const req = {
558
+ tokens: turnTokens,
559
+ userContent,
560
+ assistantContent,
561
+ resolve,
562
+ reject,
563
+ discarded: false,
564
+ };
565
+ pendingExtends.push(req);
566
+ return () => { req.discarded = true; };
567
+ });
568
+ },
569
+ canFit(estimatedSuffixTokens) {
570
+ return new ContextPressure(ctx, pressureOpts).canFit(estimatedSuffixTokens);
571
+ },
572
+ };
573
+ // Subscribe BEFORE spawning orchestrator or tick loop — no events missed
436
574
  const subscription = yield* poolChannel;
575
+ // Orchestrator runs concurrently with tick loop under the pool scope.
576
+ // Sets orchestratorDone when complete; tick loop terminates on
577
+ // (orchestratorDone && all agents idle/disposed).
578
+ let orchestratorDone = false;
579
+ let orchestratorError = null;
580
+ yield* (0, effection_1.spawn)(function* () {
581
+ try {
582
+ yield* orchestrate(poolContext);
583
+ }
584
+ catch (e) {
585
+ orchestratorError = e;
586
+ }
587
+ finally {
588
+ orchestratorDone = true;
589
+ }
590
+ });
437
591
  // Spawn tick loop — runs concurrently with Subscription consumption.
438
592
  // scoped() creates an error boundary: if llama_decode fails (KV exhaustion),
439
593
  // the scope tears down and the channel closes with whatever results exist.
@@ -442,42 +596,32 @@ function useAgentPool(opts) {
442
596
  let totalToolCalls = 0;
443
597
  const counters = { warmPrefillCalls: 0, warmPrefillBranches: 0 };
444
598
  try {
445
- // Emit spawn events and activate agents
446
- for (const a of agents) {
447
- a.transition('active');
448
- yield* poolChannel.send({ type: 'agent:spawn', agentId: a.id, parentAgentId: a.parentId });
449
- }
450
599
  // ── Phase operations (close over pool scope) ────────────
451
600
  /** SETTLE: prefill tool results that fit, defer oversized items for next tick */
452
601
  function* settle(items) {
453
602
  const settlePressure = new ContextPressure(ctx, pressureOpts);
454
603
  let headroom = settlePressure.headroom;
455
- if (trace) {
456
- const desc = items.map(s => `${s.toolName}:${s.prefillTokens.length}`).join(', ');
457
- try {
458
- process.stderr.write(`[SETTLE] remaining=${settlePressure.remaining} headroom=${headroom} cellsUsed=${settlePressure.cellsUsed} nCtx=${settlePressure.nCtx} items=[${desc}]\n`);
459
- }
460
- catch { }
461
- }
462
604
  const prefillPairs = [];
463
605
  const settledAgents = [];
606
+ const itemProbes = new Map();
464
607
  const deferred = [];
465
608
  for (const item of items) {
466
609
  const a = agentById.get(item.agentId);
467
610
  if (!a || a.status === 'idle')
468
611
  continue;
469
612
  if (item.prefillTokens.length > headroom) {
470
- if (trace) {
471
- try {
472
- process.stderr.write(`[SETTLE] DEFER ${item.toolName}:${item.prefillTokens.length} > headroom=${headroom}\n`);
473
- }
474
- catch { }
475
- }
613
+ // Defer — siblings may finish and free KV, letting this result
614
+ // settle next tick (staggered-exit for parallel orchestration).
615
+ // Policy is consulted at stall-break time, not here: invoking
616
+ // it eagerly would break "wait for a sibling to report and
617
+ // free cells" by nudging/dropping on first over-headroom.
476
618
  deferred.push(item);
477
619
  continue;
478
620
  }
479
621
  prefillPairs.push([a.branch, item.prefillTokens]);
480
622
  settledAgents.push(a);
623
+ if (item.probe)
624
+ itemProbes.set(a.id, item.probe);
481
625
  headroom -= item.prefillTokens.length;
482
626
  const postSettle = new ContextPressure(ctx, pressureOpts);
483
627
  a.recordToolResult({
@@ -491,20 +635,13 @@ function useAgentPool(opts) {
491
635
  tokenCount: item.prefillTokens.length, role: 'toolResult' });
492
636
  }
493
637
  if (prefillPairs.length > 0) {
494
- if (trace) {
495
- const total = prefillPairs.reduce((s, [, t]) => s + t.length, 0);
496
- try {
497
- process.stderr.write(`[SETTLE] PREFILL ${prefillPairs.length} branches, ${total} tokens, headroom_after=${headroom}\n`);
498
- }
499
- catch { }
500
- }
501
638
  yield* (0, effection_1.call)(() => store.prefill(prefillPairs));
502
639
  counters.warmPrefillCalls++;
503
640
  counters.warmPrefillBranches += prefillPairs.length;
504
- // Probe prefill from DISPATCH
641
+ // Probe prefill from DISPATCH or nudge-replacement.
505
642
  const probePairs = [];
506
643
  for (const a of settledAgents) {
507
- const probe = items.find(s => s.agentId === a.id)?.probe;
644
+ const probe = itemProbes.get(a.id);
508
645
  if (probe) {
509
646
  const probeTokens = ctx.tokenizeSync(probe, false);
510
647
  probePairs.push([a.branch, probeTokens]);
@@ -584,7 +721,7 @@ function useAgentPool(opts) {
584
721
  }
585
722
  const resultStr = JSON.stringify(result);
586
723
  yield* poolChannel.send({ type: 'agent:tool_result', agentId: agent.id, tool: tc.name, result: resultStr, contextAvailablePercent });
587
- const prefillTokens = (0, sdk_2.buildToolResultDelta)(ctx, resultStr, callId);
724
+ const prefillTokens = (0, sdk_2.buildToolResultDelta)(ctx, resultStr, callId, { enableThinking: agent.fmt.enableThinking });
588
725
  const probe = tool?.probe(result) ?? undefined;
589
726
  results.push({ agentId: agent.id, prefillTokens, toolName: tc.name, callId, args: tc.arguments, probe });
590
727
  tw.write({ traceId: tw.nextId(), parentTraceId: dispatchTraceId, ts: performance.now(),
@@ -607,15 +744,86 @@ function useAgentPool(opts) {
607
744
  // ── Four-phase tick loop ─────────────────────────────────
608
745
  let recoveryAttempted = false;
609
746
  for (;;) {
610
- // -- Phase 1: PRODUCE -- sample from active agents, collect tool calls
611
- policy.resetTick?.();
612
- const pressure = new ContextPressure(ctx, pressureOpts);
613
- if (trace && (pressure.critical || pressure.headroom < 0)) {
747
+ // Idle until orchestrator enqueues work (spawn or extend) or completes.
748
+ // Include pendingExtends: the final extend after the last task in chain
749
+ // mode must drain before the loop exits, otherwise the orchestrator fiber
750
+ // is left suspended on a dead action.
751
+ if (agents.length === 0
752
+ && pendingSpawns.length === 0
753
+ && pendingExtends.length === 0) {
754
+ if (orchestratorDone)
755
+ break;
756
+ yield* (0, effection_1.sleep)(1);
757
+ continue;
758
+ }
759
+ // -- Phase 0: SPAWN+EXTEND -- drain pending spawns AND pending extends,
760
+ // batching all fork-suffix prefills and extend-onto-root prefills into
761
+ // ONE native store.prefill call. All store-level native calls in this
762
+ // pool are issued from this fiber (the tick loop), never concurrently
763
+ // with the orchestrator's fiber. Piggybacking extend in this phase
764
+ // preserves the continuous-tree-batching invariant (one GPU round-trip
765
+ // per tick) and naturally atomic-orders both kinds of work.
766
+ if (pendingSpawns.length > 0 || pendingExtends.length > 0) {
767
+ const drainedSpawns = pendingSpawns.splice(0, pendingSpawns.length);
768
+ const drainedExtends = pendingExtends
769
+ .splice(0, pendingExtends.length)
770
+ .filter(e => !e.discarded);
771
+ const prefillPairs = [
772
+ ...drainedSpawns.map(s => [s.agent.branch, s.suffixTokens]),
773
+ ...drainedExtends.map(e => [root, e.tokens]),
774
+ ];
614
775
  try {
615
- process.stderr.write(`[PRODUCE] ${pressure.critical ? 'CRITICAL' : 'SOFT_LIMIT'} remaining=${pressure.remaining} headroom=${pressure.headroom} cellsUsed=${pressure.cellsUsed} nCtx=${pressure.nCtx}\n`);
776
+ if (prefillPairs.length > 0) {
777
+ yield* (0, effection_1.call)(() => store.prefill(prefillPairs));
778
+ }
779
+ }
780
+ catch (err) {
781
+ for (const e of drainedExtends)
782
+ e.reject(err);
783
+ throw err;
784
+ }
785
+ // Resolve extend requests with the delta token count. root.position
786
+ // has advanced by the sum of extend token counts at this point.
787
+ for (const e of drainedExtends) {
788
+ tw.write({
789
+ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
790
+ type: 'spine:extend',
791
+ userContent: e.userContent,
792
+ assistantContent: e.assistantContent,
793
+ deltaTokens: e.tokens.length,
794
+ positionAfter: root.position,
795
+ });
796
+ e.resolve(e.tokens.length);
797
+ }
798
+ for (const s of drainedSpawns) {
799
+ tw.write({
800
+ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
801
+ type: 'branch:create', branchHandle: s.agent.id, parentHandle: s.agent.parentId,
802
+ position: 0, role: 'agentFork',
803
+ });
804
+ tw.write({
805
+ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
806
+ type: 'prompt:format', promptText: s.formattedPrompt,
807
+ taskContent: s.task.content, tokenCount: s.suffixTokens.length,
808
+ messages: JSON.stringify([
809
+ { role: 'system', content: s.task.systemPrompt },
810
+ { role: 'user', content: s.task.content },
811
+ ]),
812
+ tools: s.task.tools, role: 'agentSuffix',
813
+ });
814
+ applyLazyGrammar(s.agent);
815
+ // transition fires agent.statusSignal — ctx.spawn's subscriber is waiting on this.
816
+ s.agent.transition('active');
817
+ yield* poolChannel.send({ type: 'agent:spawn', agentId: s.agent.id, parentAgentId: s.agent.parentId });
616
818
  }
617
- catch { }
618
819
  }
820
+ // If all we had was pending spawns, and none of them activated (shouldn't happen
821
+ // normally — SPAWN always transitions to active), nothing to produce. Loop back.
822
+ if (agents.length === 0)
823
+ continue;
824
+ // -- Phase 1: PRODUCE -- sample from active agents, collect tool calls
825
+ policy.resetTick?.();
826
+ const pressure = new ContextPressure(ctx, pressureOpts);
619
827
  const entries = [];
620
828
  const toolCalls = [];
621
829
  const nudges = [];
@@ -624,15 +832,19 @@ function useAgentPool(opts) {
624
832
  continue;
625
833
  const policyExit = policy.shouldExit?.(a, pressure);
626
834
  if (policyExit ?? pressure.critical) {
627
- a.transition('idle');
628
835
  const exitReason = pressure.critical ? 'pressure_critical'
629
836
  : policyExit ? 'policy_exit'
630
837
  : 'pressure_critical';
631
838
  tw.write({ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
632
839
  type: 'pool:agentDrop', agentId: a.id, reason: exitReason });
633
840
  yield* poolChannel.send({ type: 'agent:done', agentId: a.id });
634
- // Trailing stop: extract findings inline, free KV for remaining agents
635
- yield* recoverInline(a, policy, ctx, store, tw, poolScope.traceId, poolChannel);
841
+ // Run recovery BEFORE transitioning to idle otherwise the statusSignal
842
+ // fires 'idle' mid-recovery, PoolContext.waitFor returns early, the
843
+ // orchestrator resumes and starts spawning/prefilling the next task
844
+ // while this agent is still being decoded by recoverInline. Concurrent
845
+ // native calls on the same llama_context → SEGV.
846
+ yield* recoverInline(a, policy, ctx, store, tw, poolScope.traceId, poolChannel, pressureOpts);
847
+ a.transition('idle');
636
848
  continue;
637
849
  }
638
850
  const { token, text, isStop } = a.branch.produceSync();
@@ -697,38 +909,93 @@ function useAgentPool(opts) {
697
909
  // -- Phase 3: SETTLE (settle what fits, defer what doesn't)
698
910
  const toSettle = [...pendingSettled, ...nudges];
699
911
  const deferred = toSettle.length > 0 ? yield* settle(toSettle) : [];
700
- // Stall-breaker: if items are deferred and no active agents remain,
701
- // sacrifice an awaiting_tool agent to free KV. Without this, agents
702
- // with oversized results stay awaiting_tool indefinitely — PRODUCE
703
- // skips them, headroom never recovers, the pool loops forever.
912
+ // Stall-breaker: `deferred` has items but no active siblings can free
913
+ // KV. Consult policy per deferred item the policy is the "last
914
+ // resort" decision point (staggered-exit for parallel orchestration
915
+ // still works because defer-on-oversize above lets items wait while
916
+ // siblings are active; only when ALL siblings are awaiting_tool or
917
+ // idle do we reach here). Distinct drop reasons:
918
+ // - `pressure_settle_reject` — policy said idle, or nudge but the
919
+ // nudge payload itself doesn't fit (policy suggestion infeasible).
920
+ // - `settle_stall_break` — policy hook absent (legacy fallback).
704
921
  if (deferred.length > 0 && !agents.some(a => a.status === 'active')) {
705
- const victim = agents.find(a => a.status === 'awaiting_tool' && !a.branch.disposed);
706
- if (victim) {
707
- victim.transition('idle');
708
- tw.write({ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
709
- type: 'pool:agentDrop', agentId: victim.id, reason: 'pressure_settle_reject' });
710
- yield* poolChannel.send({ type: 'agent:done', agentId: victim.id });
711
- yield* recoverInline(victim, policy, ctx, store, tw, poolScope.traceId, poolChannel);
922
+ const stallPressure = new ContextPressure(ctx, pressureOpts);
923
+ let stallHeadroom = stallPressure.headroom;
924
+ const resolved = [];
925
+ for (const item of deferred) {
926
+ const a = agentById.get(item.agentId);
927
+ if (!a || a.status !== 'awaiting_tool' || a.branch.disposed)
928
+ continue;
929
+ const action = policy.onSettleReject?.(a, item.prefillTokens.length, stallPressure, policyConfig);
930
+ if (action?.type === 'nudge') {
931
+ // Record the policy's decision regardless of whether the
932
+ // nudge itself fits — the event captures "policy consulted,
933
+ // returned nudge" which is separate from "nudge was actionable".
934
+ tw.write({
935
+ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
936
+ type: 'pool:agentNudge', agentId: a.id, reason: 'settle_reject', message: action.message,
937
+ });
938
+ const nudgeResult = { error: action.message };
939
+ const nudgeTokens = (0, sdk_2.buildToolResultDelta)(ctx, JSON.stringify(nudgeResult), item.callId, { enableThinking: a.fmt.enableThinking });
940
+ if (nudgeTokens.length <= stallHeadroom) {
941
+ const probe = tools.get(item.toolName)?.probe(nudgeResult) ?? undefined;
942
+ a.incrementTurns();
943
+ resolved.push({
944
+ agentId: a.id,
945
+ prefillTokens: nudgeTokens,
946
+ toolName: item.toolName,
947
+ callId: item.callId,
948
+ args: item.args,
949
+ probe,
950
+ });
951
+ stallHeadroom -= nudgeTokens.length;
952
+ continue;
953
+ }
954
+ // Nudge doesn't fit — policy's suggestion is infeasible, fall through to drop.
955
+ }
956
+ // Drop. Reason: policy-said-idle OR nudge-didn't-fit →
957
+ // `pressure_settle_reject` (policy path). Policy hook absent →
958
+ // `settle_stall_break` (legacy fallback).
959
+ const reason = action ? 'pressure_settle_reject' : 'settle_stall_break';
960
+ tw.write({
961
+ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
962
+ type: 'pool:agentDrop', agentId: a.id, reason,
963
+ });
964
+ yield* poolChannel.send({ type: 'agent:done', agentId: a.id });
965
+ // Recover BEFORE transition — single-fiber store discipline.
966
+ yield* recoverInline(a, policy, ctx, store, tw, poolScope.traceId, poolChannel, pressureOpts);
967
+ a.transition('idle');
712
968
  }
969
+ // Replace deferred with the surviving (nudged) items for next tick.
970
+ deferred.length = 0;
971
+ deferred.push(...resolved);
713
972
  }
714
973
  // -- Phase 4: DISPATCH
715
974
  const dispatched = yield* dispatch(toolCalls);
716
975
  // Deferred + new dispatch results → next tick's SETTLE
717
976
  pendingSettled = [...deferred, ...dispatched];
718
977
  // -- Termination + recovery
719
- if (agents.every(a => a.status === 'idle' || a.status === 'disposed')) {
978
+ // Wait for the orchestrator to finish before closing it may spawn more agents.
979
+ const allIdle = agents.every(a => a.status === 'idle' || a.status === 'disposed');
980
+ if (allIdle && orchestratorDone) {
720
981
  if (!recoveryAttempted) {
721
982
  recoveryAttempted = true;
722
983
  // Recover any idle agents that weren't handled by inline recovery
723
984
  // (e.g., killed by max_turns, time budget, or free_text_stop)
724
985
  for (const a of agents) {
725
986
  if (a.status === 'idle' && !a.result && !a.branch.disposed) {
726
- yield* recoverInline(a, policy, ctx, store, tw, poolScope.traceId, poolChannel);
987
+ yield* recoverInline(a, policy, ctx, store, tw, poolScope.traceId, poolChannel, pressureOpts);
727
988
  }
728
989
  }
729
990
  }
991
+ if (orchestratorError)
992
+ throw orchestratorError;
730
993
  break;
731
994
  }
995
+ if (allIdle && !orchestratorDone) {
996
+ // All current agents done but orchestrator may spawn more.
997
+ yield* (0, effection_1.sleep)(1);
998
+ }
732
999
  }
733
1000
  // ── Close channel with result — consumers get AgentPoolResult as close value ───────
734
1001
  // Branch cleanup is handled by each branch's ensure() from setupAgent —