@lloyal-labs/lloyal-agents 1.5.8 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +81 -97
  2. package/dist/Agent.d.ts +60 -1
  3. package/dist/Agent.d.ts.map +1 -1
  4. package/dist/Agent.js +87 -1
  5. package/dist/Agent.js.map +1 -1
  6. package/dist/AgentPolicy.d.ts +52 -16
  7. package/dist/AgentPolicy.d.ts.map +1 -1
  8. package/dist/AgentPolicy.js +114 -43
  9. package/dist/AgentPolicy.js.map +1 -1
  10. package/dist/agent-pool.d.ts +17 -5
  11. package/dist/agent-pool.d.ts.map +1 -1
  12. package/dist/agent-pool.js +707 -417
  13. package/dist/agent-pool.js.map +1 -1
  14. package/dist/combinators.d.ts +29 -0
  15. package/dist/combinators.d.ts.map +1 -0
  16. package/dist/combinators.js +37 -0
  17. package/dist/combinators.js.map +1 -0
  18. package/dist/context.d.ts +18 -1
  19. package/dist/context.d.ts.map +1 -1
  20. package/dist/context.js +18 -1
  21. package/dist/context.js.map +1 -1
  22. package/dist/create-agent-pool.d.ts +96 -0
  23. package/dist/create-agent-pool.d.ts.map +1 -0
  24. package/dist/create-agent-pool.js +84 -0
  25. package/dist/create-agent-pool.js.map +1 -0
  26. package/dist/index.d.ts +10 -5
  27. package/dist/index.d.ts.map +1 -1
  28. package/dist/index.js +17 -8
  29. package/dist/index.js.map +1 -1
  30. package/dist/orchestrators.d.ts +161 -0
  31. package/dist/orchestrators.d.ts.map +1 -0
  32. package/dist/orchestrators.js +173 -0
  33. package/dist/orchestrators.js.map +1 -0
  34. package/dist/replay.d.ts +96 -0
  35. package/dist/replay.d.ts.map +1 -0
  36. package/dist/replay.js +108 -0
  37. package/dist/replay.js.map +1 -0
  38. package/dist/shared-root.d.ts +56 -18
  39. package/dist/shared-root.d.ts.map +1 -1
  40. package/dist/shared-root.js +79 -52
  41. package/dist/shared-root.js.map +1 -1
  42. package/dist/source.d.ts.map +1 -1
  43. package/dist/source.js.map +1 -1
  44. package/dist/trace-types.d.ts +23 -2
  45. package/dist/trace-types.d.ts.map +1 -1
  46. package/dist/trace-writer.d.ts +4 -1
  47. package/dist/trace-writer.d.ts.map +1 -1
  48. package/dist/trace-writer.js +6 -2
  49. package/dist/trace-writer.js.map +1 -1
  50. package/dist/types.d.ts +42 -5
  51. package/dist/types.d.ts.map +1 -1
  52. package/dist/use-agent.d.ts +92 -0
  53. package/dist/use-agent.d.ts.map +1 -0
  54. package/dist/use-agent.js +127 -0
  55. package/dist/use-agent.js.map +1 -0
  56. package/package.json +5 -5
  57. package/dist/generate.d.ts +0 -77
  58. package/dist/generate.d.ts.map +0 -1
  59. package/dist/generate.js +0 -166
  60. package/dist/generate.js.map +0 -1
  61. package/dist/run-agents.d.ts +0 -39
  62. package/dist/run-agents.d.ts.map +0 -1
  63. package/dist/run-agents.js +0 -46
  64. package/dist/run-agents.js.map +0 -1
  65. package/dist/spawn-agents.d.ts +0 -104
  66. package/dist/spawn-agents.d.ts.map +0 -1
  67. package/dist/spawn-agents.js +0 -255
  68. package/dist/spawn-agents.js.map +0 -1
@@ -52,8 +52,20 @@ const AgentPolicy_1 = require("./AgentPolicy");
52
52
  class ContextPressure {
53
53
  /** Default softLimit: 1024 tokens reserved for downstream work */
54
54
  static DEFAULT_SOFT_LIMIT = 1024;
55
- /** Default hardLimit: 128 tokens crash-prevention floor */
56
- static DEFAULT_HARD_LIMIT = 128;
55
+ /**
56
+ * Default hardLimit: 512 tokens — matches llama.cpp's default `n_batch`.
57
+ * The pool validates at startup that `hardLimit >= nBatch`; the default
58
+ * is sized to satisfy the invariant for the default llama.cpp context.
59
+ * Recovery fits within the `hardLimit` reserve.
60
+ */
61
+ static DEFAULT_HARD_LIMIT = 512;
62
+ /**
63
+ * Assumed `nBatch` when the native binding doesn't expose it.
64
+ * Pool startup validates `pressureThresholds.hardLimit >= this`.
65
+ * TODO: once `SessionContext.nBatch` is exposed (lloyal.node
66
+ * follow-up), read from ctx.nBatch instead.
67
+ */
68
+ static ASSUMED_N_BATCH = 512;
57
69
  /** Total KV cache capacity (max positions). 0 when no context limit. */
58
70
  nCtx;
59
71
  /** KV cells currently in use (monotonic within a pool run). */
@@ -108,13 +120,18 @@ exports.ContextPressure = ContextPressure;
108
120
  *
109
121
  * Returns true if the agent reported findings.
110
122
  */
111
- function* recoverInline(agent, policy, ctx, store, tw, parentTraceId, events) {
112
- const recovery = policy.onRecovery?.(agent);
123
+ function* recoverInline(agent, policy, ctx, store, tw, parentTraceId, events, pressureOpts) {
124
+ // Fresh snapshot — the policy uses this to compute the recovery budget
125
+ // (reflected in the rendered prompt via `<%= it.budget %>`).
126
+ const recovery = policy.onRecovery?.(agent, new ContextPressure(ctx, pressureOpts));
113
127
  if (!recovery || recovery.type === 'skip') {
114
128
  if (!agent.branch.disposed)
115
129
  agent.branch.pruneSync();
116
130
  return false;
117
131
  }
132
+ // Build the nudge prompt — a minimal turn injection that triggers
133
+ // report behavior. The agent's KV already contains the full
134
+ // conversation context; the prompt is just a nudge.
118
135
  const { prompt } = ctx.formatChatSync(JSON.stringify([
119
136
  { role: 'system', content: recovery.prompt.system },
120
137
  { role: 'user', content: recovery.prompt.user },
@@ -122,24 +139,20 @@ function* recoverInline(agent, policy, ctx, store, tw, parentTraceId, events) {
122
139
  const sep = ctx.getTurnSeparator();
123
140
  const delta = ctx.tokenizeSync(prompt, false);
124
141
  const tokens = [...sep, ...delta];
125
- // Check if extraction prompt fits
126
- const pressure = new ContextPressure(ctx);
127
- if (pressure.remaining < tokens.length) {
128
- if (!agent.branch.disposed)
129
- agent.branch.pruneSync();
130
- return false;
131
- }
132
- // Eager report grammar
142
+ // Eager report grammar forces { result: string } output
133
143
  const reportGrammar = yield* (0, effection_1.call)(() => ctx.jsonSchemaToGrammar(JSON.stringify({
134
144
  type: 'object',
135
145
  properties: { result: { type: 'string' } },
136
146
  required: ['result'],
137
147
  })));
138
- // Recovery runs in its own scope — if decode fails (KV exhaustion),
139
- // the scope tears down cleanly without propagating to the pool.
140
- // Mirrors the old prepare()-based recovery which used try/catch around
141
- // a Resource with its own ensure().
148
+ // Recovery runs in its own scope — if prefill or decode fails
149
+ // (KV exhaustion), the scope tears down cleanly. Diagnostic trace
150
+ // events (pool:recoveryProduce + recoveryReport/recoveryFailed) make
151
+ // silent recovery failures observable in traces.
142
152
  let reported = false;
153
+ let output = '';
154
+ let producedTokens = 0;
155
+ let failureReason = null;
143
156
  try {
144
157
  yield* (0, effection_1.scoped)(function* () {
145
158
  yield* (0, effection_1.call)(() => store.prefill([[agent.branch, tokens]]));
@@ -149,30 +162,55 @@ function* recoverInline(agent, policy, ctx, store, tw, parentTraceId, events) {
149
162
  type: 'branch:prefill', branchHandle: agent.id,
150
163
  tokenCount: tokens.length, role: 'recovery',
151
164
  });
152
- yield* events.send({ type: 'agent:spawn', agentId: agent.id, parentAgentId: agent.parentId });
153
165
  // Single-agent produce/commit loop
154
- let output = '';
155
- let tokenCount = 0;
156
166
  for (;;) {
157
167
  const { token, text, isStop } = agent.branch.produceSync();
158
168
  if (isStop)
159
169
  break;
160
170
  output += text;
161
- tokenCount++;
171
+ producedTokens++;
162
172
  yield* (0, effection_1.call)(() => store.commit([[agent.branch, token]]));
163
- yield* events.send({ type: 'agent:produce', agentId: agent.id, text, tokenCount });
173
+ yield* events.send({ type: 'agent:produce', agentId: agent.id, text, tokenCount: producedTokens });
164
174
  }
175
+ tw.write({
176
+ traceId: tw.nextId(), parentTraceId, ts: performance.now(),
177
+ type: 'pool:recoveryProduce', agentId: agent.id,
178
+ tokenCount: producedTokens, outputLength: output.length,
179
+ });
165
180
  // Parse + report
166
- const parsed = JSON.parse(output);
167
- if (parsed?.result) {
168
- agent.reportResult(parsed.result, 'scratchpad');
169
- yield* events.send({ type: 'agent:report', agentId: agent.id, result: agent.result });
170
- reported = true;
181
+ try {
182
+ const parsed = JSON.parse(output);
183
+ if (parsed?.result) {
184
+ agent.reportResult(parsed.result, 'scratchpad');
185
+ yield* events.send({ type: 'agent:report', agentId: agent.id, result: agent.result });
186
+ reported = true;
187
+ tw.write({
188
+ traceId: tw.nextId(), parentTraceId, ts: performance.now(),
189
+ type: 'pool:recoveryReport', agentId: agent.id,
190
+ resultLength: parsed.result.length,
191
+ });
192
+ }
193
+ else {
194
+ failureReason = 'no_result_field';
195
+ }
196
+ }
197
+ catch (e) {
198
+ failureReason = `parse_error: ${e.message ?? 'unknown'}`;
171
199
  }
172
200
  });
173
201
  }
174
- catch { /* decode failure or malformed JSON — non-fatal, prune below */ }
175
- // Always prune after scope exits (success or decode failure)
202
+ catch (e) {
203
+ failureReason = `scope_error: ${e.message ?? 'unknown'}`;
204
+ }
205
+ if (!reported) {
206
+ tw.write({
207
+ traceId: tw.nextId(), parentTraceId, ts: performance.now(),
208
+ type: 'pool:recoveryFailed', agentId: agent.id,
209
+ reason: failureReason ?? 'unknown',
210
+ outputExcerpt: output.slice(0, 200),
211
+ });
212
+ }
213
+ // Always prune after scope exits (success or failure)
176
214
  if (!agent.branch.disposed)
177
215
  agent.branch.pruneSync();
178
216
  // Emit tick so TUI updates pressure percentage after prune
@@ -203,10 +241,10 @@ function* handleNudge(a, message, tc, ctx, tools) {
203
241
  const nudgeResult = { error: message };
204
242
  a.incrementTurns();
205
243
  a.transition('awaiting_tool');
206
- const prefillTokens = (0, sdk_2.buildToolResultDelta)(ctx, JSON.stringify(nudgeResult), callId);
244
+ const prefillTokens = (0, sdk_2.buildToolResultDelta)(ctx, JSON.stringify(nudgeResult), callId, { enableThinking: a.fmt.enableThinking });
207
245
  const probe = tools?.get(tc?.name || '')?.probe(nudgeResult) ?? undefined;
208
246
  a.resetTurn();
209
- return { agentId: a.id, prefillTokens, toolName: tc?.name || '', callId, probe };
247
+ return { agentId: a.id, prefillTokens, toolName: tc?.name || '', callId, args: tc?.arguments || '', probe };
210
248
  }
211
249
  function* handleReport(a, result, tc, terminalTool, pruneOnReport, events) {
212
250
  a.reportResult(result, 'report_tool');
@@ -225,22 +263,43 @@ function* handleReport(a, result, tc, terminalTool, pruneOnReport, events) {
225
263
  * On scope exit (error, cancellation), `ensure()` prunes the branch
226
264
  * automatically — the orphaned-branch leak is structurally impossible.
227
265
  */
228
- function* setupAgent(parent, task, ctx) {
229
- const messages = [
230
- { role: 'system', content: task.systemPrompt },
231
- { role: 'user', content: task.content },
232
- ];
233
- const fmtOpts = { enableThinking: false };
234
- if (task.tools)
266
+ function* setupAgent(parent, task, ctx, enableThinking) {
267
+ // Probe shared-root mode. When set, the queryRoot already has the
268
+ // [system + tools] chat header prefilled and we MUST NOT re-emit them
269
+ // in the agent's suffix the bytes are already in attention via fork
270
+ // prefix-share. The new agent inherits parser/grammar/format/triggers
271
+ // from sharedFmt so tool dispatch keeps working.
272
+ let sharedFmt = null;
273
+ try {
274
+ sharedFmt = (yield* context_1.RootFmt.get()) ?? null;
275
+ }
276
+ catch { /* not in shared mode */ }
277
+ // Compose the messages to format into the suffix. In shared mode with
278
+ // an empty per-spec systemPrompt, drop the system message — the role
279
+ // lives at the root, the agent only contributes a user turn. With a
280
+ // non-empty per-spec systemPrompt, include it: the agent's KV will
281
+ // contain TWO system messages in lineage, which Qwen3 handles (recovery
282
+ // ships on the same multi-system pattern).
283
+ const messages = sharedFmt && task.systemPrompt === ''
284
+ ? [{ role: 'user', content: task.content }]
285
+ : [
286
+ { role: 'system', content: task.systemPrompt },
287
+ { role: 'user', content: task.content },
288
+ ];
289
+ const fmtOpts = { enableThinking };
290
+ // Tools belong at the root in shared mode; emitting them again here
291
+ // would re-prefill the same schema bytes for nothing.
292
+ if (task.tools && !sharedFmt)
235
293
  fmtOpts.tools = task.tools;
236
294
  const fmt = ctx.formatChatSync(JSON.stringify(messages), fmtOpts);
237
- if (task.tools && (fmt.format === sdk_1.CHAT_FORMAT_CONTENT_ONLY || fmt.format === sdk_1.CHAT_FORMAT_GENERIC)) {
295
+ // Tool-support guard runs only on the non-shared path. Shared mode's
296
+ // root already passed the equivalent check at withSharedRoot setup.
297
+ if (task.tools && !sharedFmt
298
+ && (fmt.format === sdk_1.CHAT_FORMAT_CONTENT_ONLY || fmt.format === sdk_1.CHAT_FORMAT_GENERIC)) {
238
299
  // Error before fork — no branch to clean up
239
300
  throw new Error('Model does not support tool calling. Please use a model with native tool support (e.g. Qwen3, Llama 3.x, Mistral).');
240
301
  }
241
302
  const branch = parent.forkSync();
242
- yield* (0, effection_1.ensure)(() => { if (!branch.disposed)
243
- branch.pruneSync(); });
244
303
  const sep = ctx.getTurnSeparator();
245
304
  const suffixTokens = [...sep, ...ctx.tokenizeSync(fmt.prompt, false)];
246
305
  if (task.seed != null)
@@ -253,13 +312,22 @@ function* setupAgent(parent, task, ctx) {
253
312
  callingAgent = a;
254
313
  }
255
314
  catch { /* top-level — no caller */ }
256
- const agent = new Agent_1.Agent({
257
- id: branch.handle,
258
- parentId: parent.handle,
259
- branch,
260
- parent: callingAgent,
261
- task: task.content,
262
- fmt: {
315
+ // In shared mode the new agent's parser/grammar/format/triggers come
316
+ // from the root's pre-computed fmt — those fields know about the tool
317
+ // palette that's in attention via the inherited prefix. In non-shared
318
+ // mode, fresh fmt drives those fields (existing behavior).
319
+ const fmtConfig = sharedFmt
320
+ ? {
321
+ format: sharedFmt.format,
322
+ reasoningFormat: sharedFmt.reasoningFormat,
323
+ generationPrompt: sharedFmt.generationPrompt,
324
+ parser: sharedFmt.parser,
325
+ grammar: sharedFmt.grammar,
326
+ grammarLazy: sharedFmt.grammarLazy,
327
+ grammarTriggers: sharedFmt.grammarTriggers,
328
+ enableThinking,
329
+ }
330
+ : {
263
331
  format: fmt.format,
264
332
  reasoningFormat: fmt.reasoningFormat,
265
333
  generationPrompt: fmt.generationPrompt,
@@ -267,7 +335,15 @@ function* setupAgent(parent, task, ctx) {
267
335
  grammar: fmt.grammar,
268
336
  grammarLazy: fmt.grammarLazy,
269
337
  grammarTriggers: fmt.grammarTriggers,
270
- },
338
+ enableThinking,
339
+ };
340
+ const agent = new Agent_1.Agent({
341
+ id: branch.handle,
342
+ parentId: parent.handle,
343
+ branch,
344
+ parent: callingAgent,
345
+ task: task.content,
346
+ fmt: fmtConfig,
271
347
  });
272
348
  return { agent, suffixTokens, formattedPrompt: fmt.prompt };
273
349
  }
@@ -322,18 +398,18 @@ function useAgentPool(opts) {
322
398
  return (0, effection_1.resource)(function* (provide) {
323
399
  const ctx = yield* context_1.Ctx.expect();
324
400
  const store = yield* context_1.Store.expect();
325
- const events = yield* context_1.Events.expect();
401
+ const poolChannel = (0, effection_1.createChannel)();
326
402
  // Bridge for onProgress callbacks — Signal is correct here (external callback).
327
- // A spawned forwarder drains the bridge into the Channel with proper scope context.
403
+ // A spawned forwarder drains the bridge into the poolChannel with proper scope context.
328
404
  const progressBridge = (0, effection_1.createSignal)();
329
405
  yield* (0, effection_1.spawn)(function* () {
330
406
  for (const ev of yield* (0, effection_1.each)(progressBridge)) {
331
- yield* events.send(ev);
407
+ yield* poolChannel.send(ev);
332
408
  yield* effection_1.each.next();
333
409
  }
334
410
  });
335
411
  const tw = yield* context_1.Trace.expect();
336
- const { tasks, tools, maxTurns = 100, terminalTool, trace = false, pruneOnReport = false } = opts;
412
+ const { root, orchestrate, toolsJson, tools, maxTurns = 100, terminalTool, trace = false, pruneOnReport = false, enableThinking = false } = opts;
337
413
  // Tool index map for trace — position in toolkit array
338
414
  const toolIndexMap = new Map([...tools.keys()].map((name, i) => [name, i]));
339
415
  const toolkitSize = tools.size;
@@ -345,7 +421,7 @@ function useAgentPool(opts) {
345
421
  poolParentTraceId = p;
346
422
  }
347
423
  catch { /* top level */ }
348
- const poolScope = (0, trace_scope_1.traceScope)(tw, poolParentTraceId, 'pool', { agentCount: tasks.length, maxTurns, terminalTool });
424
+ const poolScope = (0, trace_scope_1.traceScope)(tw, poolParentTraceId, 'pool', { maxTurns, terminalTool });
349
425
  // Whether the pool's tool registry contains tools besides the terminal tool.
350
426
  // When false, agents are allowed to call the terminal tool as their first
351
427
  // action (e.g. reporter sub-agents that only have `report()`). When true,
@@ -360,73 +436,38 @@ function useAgentPool(opts) {
360
436
  const policy = opts.policy ?? new AgentPolicy_1.DefaultAgentPolicy();
361
437
  const pressureOpts = policy.pressureThresholds
362
438
  ?? { softLimit: ContextPressure.DEFAULT_SOFT_LIMIT, hardLimit: ContextPressure.DEFAULT_HARD_LIMIT };
439
+ // Invariant: hardLimit must be at least the native batch size (nBatch).
440
+ // When `pressure.critical` fires and the kill path runs recovery, the
441
+ // reserve cells (hardLimit count) must accommodate `recoverInline`'s
442
+ // next batch allocation — otherwise native decode will OOM with
443
+ // "failed to find a memory slot for batch of size N".
444
+ // Until `SessionContext.nBatch` is exposed natively, we validate against
445
+ // `ContextPressure.ASSUMED_N_BATCH` (512, matches llama.cpp default).
446
+ const nBatch = ContextPressure.ASSUMED_N_BATCH;
447
+ const hardLimitVal = pressureOpts.hardLimit ?? ContextPressure.DEFAULT_HARD_LIMIT;
448
+ if (hardLimitVal < nBatch) {
449
+ throw new Error(`useAgentPool: Invariant Violation — hardLimit (${hardLimitVal}) must be >= nBatch (${nBatch}). ` +
450
+ `Recovery reserves hardLimit cells for its own decode; if smaller than nBatch, the next batch ` +
451
+ `allocation will OOM. Increase policy.budget.context.hardLimit to at least ${nBatch}.`);
452
+ }
363
453
  const policyConfig = { maxTurns, terminalTool, hasNonTerminalTools };
364
- // ── Setup: fork branches, collect suffix tokens ──────────
365
- // setupAgent is now a generator each branch registers its own ensure()
366
- // for cleanup. No manual try/finally needed here.
454
+ // ── Orchestrator-driven setup ────────────────────────────
455
+ // Agents are spawned lazily via `ctx.spawn` from the orchestrator.
456
+ // The tick loop iterates over whatever agents are currently active.
457
+ // decode_each batches across all active agents regardless of spawn order.
367
458
  const agents = [];
368
- const prefillSetup = [];
369
- for (const task of tasks) {
370
- const parent = task.parent;
371
- if (!parent)
372
- throw new Error('useAgentPool: each task must have a parent branch');
373
- const { agent, suffixTokens, formattedPrompt } = yield* setupAgent(parent, task, ctx);
374
- agents.push(agent);
375
- prefillSetup.push([agent.branch, suffixTokens]);
376
- tw.write({
377
- traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
378
- type: 'branch:create', branchHandle: agent.id, parentHandle: agent.parentId,
379
- position: 0, role: 'agentFork',
380
- });
381
- tw.write({
382
- traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
383
- type: 'prompt:format', promptText: formattedPrompt,
384
- taskContent: task.content,
385
- tokenCount: suffixTokens.length,
386
- messages: JSON.stringify([
387
- { role: 'system', content: task.systemPrompt },
388
- { role: 'user', content: task.content },
389
- ]),
390
- tools: task.tools, role: 'agentSuffix',
391
- });
392
- }
393
- // Batch prefill all agent suffixes — pressure-gated.
394
- // Each suffix is the full formatted chat (system prompt + tools JSON +
395
- // user message + generation prompt), tokenized via formatChatSync().
396
- // Suffix cost is model-dependent: ~250-400 tokens per agent depending
397
- // on chat template verbosity and tool schema size.
398
- const initPressure = new ContextPressure(ctx, pressureOpts);
399
- const totalSuffix = prefillSetup.reduce((s, [, t]) => s + t.length, 0);
400
- if (!initPressure.canFit(totalSuffix)) {
401
- // Not enough room — drop agents from the end until it fits
402
- while (prefillSetup.length > 0) {
403
- const needed = prefillSetup.reduce((s, [, t]) => s + t.length, 0);
404
- if (initPressure.canFit(needed))
405
- break;
406
- prefillSetup.pop();
407
- const dropped = agents.pop();
408
- dropped.dispose();
409
- tw.write({
410
- traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
411
- type: 'pool:agentDrop', agentId: dropped.id, reason: 'pressure_init',
412
- });
459
+ const agentById = new Map();
460
+ const pendingSpawns = [];
461
+ const pendingExtends = [];
462
+ // Pool-level branch cleanup — ensures orphan-branch cleanup even when
463
+ // spawns are lazy and the orchestrator's spawn scope exits early.
464
+ yield* (0, effection_1.ensure)(() => {
465
+ for (const a of agents) {
466
+ if (!a.branch.disposed)
467
+ a.branch.pruneSync();
413
468
  }
414
- }
415
- if (prefillSetup.length > 0) {
416
- yield* (0, effection_1.call)(() => store.prefill(prefillSetup));
417
- }
418
- tw.write({
419
- traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
420
- type: 'pool:open', agentCount: agents.length,
421
- taskSuffixTokens: prefillSetup.map(([, t]) => t.length),
422
- pressure: { remaining: initPressure.remaining, softLimit: initPressure.softLimit, headroom: initPressure.headroom },
423
469
  });
424
- // Emit spawn events and activate agents
425
- for (const a of agents) {
426
- a.transition('active');
427
- yield* events.send({ type: 'agent:spawn', agentId: a.id, parentAgentId: a.parentId });
428
- }
429
- // ── Lazy grammar setup ───────────────────────────────────
470
+ // Lazy grammar setup — applied inside ctx.spawn after prefill completes.
430
471
  const applyLazyGrammar = (a) => {
431
472
  if (a.fmt.grammar && a.fmt.grammarLazy && a.fmt.grammarTriggers.length > 0) {
432
473
  const triggers = a.fmt.grammarTriggers.map(t => {
@@ -441,327 +482,576 @@ function useAgentPool(opts) {
441
482
  a.branch.setGrammarLazy(a.fmt.grammar, triggers);
442
483
  }
443
484
  };
444
- for (const a of agents)
445
- applyLazyGrammar(a);
446
- const agentById = new Map(agents.map(a => [a.id, a]));
447
- let steps = 0;
448
- let totalToolCalls = 0;
449
- const counters = { warmPrefillCalls: 0, warmPrefillBranches: 0 };
450
- // ── Phase operations (close over pool scope) ────────────
451
- /** SETTLE: prefill tool results that fit, defer oversized items for next tick */
452
- function* settle(items) {
453
- const settlePressure = new ContextPressure(ctx, pressureOpts);
454
- let headroom = settlePressure.headroom;
455
- if (trace) {
456
- const desc = items.map(s => `${s.toolName}:${s.prefillTokens.length}`).join(', ');
457
- try {
458
- process.stderr.write(`[SETTLE] remaining=${settlePressure.remaining} headroom=${headroom} cellsUsed=${settlePressure.cellsUsed} nCtx=${settlePressure.nCtx} items=[${desc}]\n`);
485
+ tw.write({
486
+ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
487
+ type: 'pool:open', agentCount: 0, taskSuffixTokens: [],
488
+ pressure: (() => {
489
+ const p = new ContextPressure(ctx, pressureOpts);
490
+ return { remaining: p.remaining, softLimit: p.softLimit, headroom: p.headroom };
491
+ })(),
492
+ });
493
+ // ── PoolContext — orchestrator's API surface ─────────────
494
+ const poolContext = {
495
+ root,
496
+ *spawn(spec) {
497
+ const parent = spec.parent ?? root;
498
+ const task = {
499
+ systemPrompt: spec.systemPrompt,
500
+ content: spec.content,
501
+ tools: toolsJson,
502
+ seed: spec.seed,
503
+ parent,
504
+ };
505
+ // Synchronous setup — fork, tokenize suffix, pressure check.
506
+ // No native store call yet; that's the tick loop's SPAWN phase's job.
507
+ const { agent, suffixTokens, formattedPrompt } = yield* setupAgent(parent, task, ctx, enableThinking);
508
+ const pressure = new ContextPressure(ctx, pressureOpts);
509
+ if (!pressure.canFit(suffixTokens.length)) {
510
+ agent.branch.pruneSync();
511
+ agent.dispose();
512
+ tw.write({
513
+ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
514
+ type: 'pool:agentDrop', agentId: agent.id, reason: 'pressure_init',
515
+ });
516
+ throw new Error(`useAgentPool: cannot fit agent suffix (${suffixTokens.length} tokens) under current pressure`);
459
517
  }
460
- catch { }
461
- }
462
- const prefillPairs = [];
463
- const settledAgents = [];
464
- const deferred = [];
465
- for (const item of items) {
466
- const a = agentById.get(item.agentId);
467
- if (!a || a.status === 'idle')
468
- continue;
469
- if (item.prefillTokens.length > headroom) {
470
- if (trace) {
471
- try {
472
- process.stderr.write(`[SETTLE] DEFER ${item.toolName}:${item.prefillTokens.length} > headroom=${headroom}\n`);
473
- }
474
- catch { }
475
- }
476
- deferred.push(item);
477
- continue;
518
+ // Enqueue for SPAWN phase. The tick loop will batch this with any
519
+ // other pending spawns into ONE store.prefill, transition to active,
520
+ // write trace events, and emit agent:spawn. Return the agent
521
+ // immediately waitFor() is keyed off a transition, not a status
522
+ // snapshot, so the pre-activation 'idle' status doesn't race with
523
+ // the real terminal-idle signal.
524
+ pendingSpawns.push({ agent, suffixTokens, formattedPrompt, task });
525
+ agents.push(agent);
526
+ agentById.set(agent.id, agent);
527
+ return agent;
528
+ },
529
+ *waitFor(agent) {
530
+ // Agent completion = terminal 'idle' OR 'disposed'. Pre-activation
531
+ // 'idle' (the constructor default) would be a false positive, so we
532
+ // wait for a TRANSITION signal rather than checking status.snapshot.
533
+ // The SPAWN phase transitions 'idle' → 'active' when it activates the
534
+ // agent; subsequent transitions lead to a terminal 'idle' or 'disposed'.
535
+ const stream = yield* (0, effection_1.each)(agent.statusSignal);
536
+ // Only short-circuit for already-disposed — no further signal is coming.
537
+ if (agent.status === 'disposed')
538
+ return agent;
539
+ for (const s of stream) {
540
+ if (s === 'idle' || s === 'disposed')
541
+ return agent;
542
+ yield* effection_1.each.next();
478
543
  }
479
- prefillPairs.push([a.branch, item.prefillTokens]);
480
- settledAgents.push(a);
481
- headroom -= item.prefillTokens.length;
482
- const postSettle = new ContextPressure(ctx, pressureOpts);
483
- a.recordToolResult({
484
- name: item.toolName, args: item.callId,
485
- resultTokenCount: item.prefillTokens.length,
486
- contextAfterPercent: postSettle.percentAvailable,
487
- timestamp: performance.now(),
544
+ return agent;
545
+ },
546
+ *extendRoot(userContent, assistantContent) {
547
+ if (!assistantContent)
548
+ return 0;
549
+ const turnTokens = (0, sdk_2.buildTurnDelta)(ctx, userContent, assistantContent);
550
+ // Rendezvous with the tick loop's SPAWN phase — see pendingExtends.
551
+ // action() is the Effection-native one-shot suspend: orchestrator
552
+ // queues the request, suspends; tick loop drains + resolves; this
553
+ // operation returns the deltaTokens. The finally returned from the
554
+ // executor marks the request discarded if this fiber is cancelled
555
+ // before the drain runs, so the drain doesn't touch a dead action.
556
+ return yield* (0, effection_1.action)((resolve, reject) => {
557
+ const req = {
558
+ tokens: turnTokens,
559
+ userContent,
560
+ assistantContent,
561
+ resolve,
562
+ reject,
563
+ discarded: false,
564
+ };
565
+ pendingExtends.push(req);
566
+ return () => { req.discarded = true; };
488
567
  });
489
- tw.write({ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
490
- type: 'branch:prefill', branchHandle: a.id,
491
- tokenCount: item.prefillTokens.length, role: 'toolResult' });
568
+ },
569
+ canFit(estimatedSuffixTokens) {
570
+ return new ContextPressure(ctx, pressureOpts).canFit(estimatedSuffixTokens);
571
+ },
572
+ };
573
+ // Subscribe BEFORE spawning orchestrator or tick loop — no events missed
574
+ const subscription = yield* poolChannel;
575
+ // Orchestrator runs concurrently with tick loop under the pool scope.
576
+ // Sets orchestratorDone when complete; tick loop terminates on
577
+ // (orchestratorDone && all agents idle/disposed).
578
+ let orchestratorDone = false;
579
+ let orchestratorError = null;
580
+ yield* (0, effection_1.spawn)(function* () {
581
+ try {
582
+ yield* orchestrate(poolContext);
492
583
  }
493
- if (prefillPairs.length > 0) {
494
- if (trace) {
495
- const total = prefillPairs.reduce((s, [, t]) => s + t.length, 0);
496
- try {
497
- process.stderr.write(`[SETTLE] PREFILL ${prefillPairs.length} branches, ${total} tokens, headroom_after=${headroom}\n`);
498
- }
499
- catch { }
500
- }
501
- yield* (0, effection_1.call)(() => store.prefill(prefillPairs));
502
- counters.warmPrefillCalls++;
503
- counters.warmPrefillBranches += prefillPairs.length;
504
- // Probe prefill from DISPATCH
505
- const probePairs = [];
506
- for (const a of settledAgents) {
507
- const probe = items.find(s => s.agentId === a.id)?.probe;
508
- if (probe) {
509
- const probeTokens = ctx.tokenizeSync(probe, false);
510
- probePairs.push([a.branch, probeTokens]);
584
+ catch (e) {
585
+ orchestratorError = e;
586
+ }
587
+ finally {
588
+ orchestratorDone = true;
589
+ }
590
+ });
591
+ // Spawn tick loop — runs concurrently with Subscription consumption.
592
+ // scoped() creates an error boundary: if llama_decode fails (KV exhaustion),
593
+ // the scope tears down and the channel closes with whatever results exist.
594
+ yield* (0, effection_1.spawn)(function* () {
595
+ let steps = 0;
596
+ let totalToolCalls = 0;
597
+ const counters = { warmPrefillCalls: 0, warmPrefillBranches: 0 };
598
+ try {
599
+ // ── Phase operations (close over pool scope) ────────────
600
+ /** SETTLE: prefill tool results that fit, defer oversized items for next tick */
601
+ function* settle(items) {
602
+ const settlePressure = new ContextPressure(ctx, pressureOpts);
603
+ let headroom = settlePressure.headroom;
604
+ const prefillPairs = [];
605
+ const settledAgents = [];
606
+ const itemProbes = new Map();
607
+ const deferred = [];
608
+ for (const item of items) {
609
+ const a = agentById.get(item.agentId);
610
+ if (!a || a.status === 'idle')
611
+ continue;
612
+ if (item.prefillTokens.length > headroom) {
613
+ // Defer — siblings may finish and free KV, letting this result
614
+ // settle next tick (staggered-exit for parallel orchestration).
615
+ // Policy is consulted at stall-break time, not here: invoking
616
+ // it eagerly would break "wait for a sibling to report and
617
+ // free cells" by nudging/dropping on first over-headroom.
618
+ deferred.push(item);
619
+ continue;
620
+ }
621
+ prefillPairs.push([a.branch, item.prefillTokens]);
622
+ settledAgents.push(a);
623
+ if (item.probe)
624
+ itemProbes.set(a.id, item.probe);
625
+ headroom -= item.prefillTokens.length;
626
+ const postSettle = new ContextPressure(ctx, pressureOpts);
627
+ a.recordToolResult({
628
+ name: item.toolName, args: item.args,
629
+ resultTokenCount: item.prefillTokens.length,
630
+ contextAfterPercent: postSettle.percentAvailable,
631
+ timestamp: performance.now(),
632
+ });
511
633
  tw.write({ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
512
634
  type: 'branch:prefill', branchHandle: a.id,
513
- tokenCount: probeTokens.length, role: 'probe', probeText: probe });
635
+ tokenCount: item.prefillTokens.length, role: 'toolResult' });
514
636
  }
515
- }
516
- if (probePairs.length > 0) {
517
- yield* (0, effection_1.call)(() => store.prefill(probePairs));
518
- }
519
- for (const a of settledAgents) {
520
- a.transition('active');
521
- a.resetTurn();
522
- applyLazyGrammar(a);
523
- }
524
- }
525
- return deferred;
526
- }
527
- /** DISPATCH: execute tool calls sequentially, return settled items for next tick */
528
- function* dispatch(calls) {
529
- const results = [];
530
- for (const { agent, tc } of calls) {
531
- let toolArgs;
532
- try {
533
- toolArgs = JSON.parse(tc.arguments);
534
- }
535
- catch {
536
- toolArgs = {};
537
- }
538
- const callId = tc.id || `call_${agent.toolCallCount}`;
539
- agent.incrementToolCalls();
540
- totalToolCalls++;
541
- agent.incrementTurns();
542
- yield* events.send({ type: 'agent:tool_call', agentId: agent.id, tool: tc.name, args: tc.arguments });
543
- const tool = tools.get(tc.name);
544
- const dispatchPressure = new ContextPressure(ctx, pressureOpts);
545
- const explore = policy.shouldExplore?.(agent, dispatchPressure) ?? true;
546
- const dispatchTraceId = tw.nextId();
547
- const toolT0 = performance.now();
548
- tw.write({
549
- traceId: dispatchTraceId, parentTraceId: poolScope.traceId, ts: toolT0,
550
- type: 'tool:dispatch', agentId: agent.id, tool: tc.name,
551
- toolIndex: toolIndexMap.get(tc.name) ?? -1, toolkitSize,
552
- args: toolArgs, callId,
553
- explore, percentAvailable: dispatchPressure.percentAvailable,
554
- });
555
- const toolContext = {
556
- agentId: agent.id, branch: agent.branch,
557
- onProgress: (p) => {
558
- progressBridge.send({ type: 'agent:tool_progress', agentId: agent.id, tool: tc.name, filled: p.filled, total: p.total });
559
- },
560
- scorer: opts.scorer, explore,
561
- pressurePercentAvailable: dispatchPressure.percentAvailable,
562
- };
563
- try {
564
- yield* context_1.TraceParent.set(dispatchTraceId);
565
- yield* context_1.CallingAgent.set(agent);
566
- const result = yield* (0, effection_1.scoped)(function* () {
567
- return yield* (0, effection_1.call)(() => tool ? tool.execute(toolArgs, toolContext) : Promise.resolve({ error: `Unknown tool: ${tc.name}` }));
568
- });
569
- const postToolPressure = new ContextPressure(ctx, pressureOpts);
570
- const contextAvailablePercent = postToolPressure.percentAvailable;
571
- if (result && typeof result === 'object' && !Array.isArray(result)) {
572
- result._contextAvailablePercent = contextAvailablePercent;
573
- const resultObj = result;
574
- if (Array.isArray(resultObj.results)) {
575
- agent.addNestedResults(resultObj.results.filter((f) => typeof f === 'string'));
637
+ if (prefillPairs.length > 0) {
638
+ yield* (0, effection_1.call)(() => store.prefill(prefillPairs));
639
+ counters.warmPrefillCalls++;
640
+ counters.warmPrefillBranches += prefillPairs.length;
641
+ // Probe prefill from DISPATCH or nudge-replacement.
642
+ const probePairs = [];
643
+ for (const a of settledAgents) {
644
+ const probe = itemProbes.get(a.id);
645
+ if (probe) {
646
+ const probeTokens = ctx.tokenizeSync(probe, false);
647
+ probePairs.push([a.branch, probeTokens]);
648
+ tw.write({ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
649
+ type: 'branch:prefill', branchHandle: a.id,
650
+ tokenCount: probeTokens.length, role: 'probe', probeText: probe });
651
+ }
576
652
  }
577
- if (Array.isArray(resultObj.nestedResults)) {
578
- agent.addNestedResults(resultObj.nestedResults.filter((f) => typeof f === 'string'));
653
+ if (probePairs.length > 0) {
654
+ yield* (0, effection_1.call)(() => store.prefill(probePairs));
655
+ }
656
+ for (const a of settledAgents) {
657
+ a.transition('active');
658
+ a.resetTurn();
659
+ applyLazyGrammar(a);
579
660
  }
580
661
  }
581
- const resultStr = JSON.stringify(result);
582
- yield* events.send({ type: 'agent:tool_result', agentId: agent.id, tool: tc.name, result: resultStr, contextAvailablePercent });
583
- const prefillTokens = (0, sdk_2.buildToolResultDelta)(ctx, resultStr, callId);
584
- const probe = tool?.probe(result) ?? undefined;
585
- results.push({ agentId: agent.id, prefillTokens, toolName: tc.name, callId, probe });
586
- tw.write({ traceId: tw.nextId(), parentTraceId: dispatchTraceId, ts: performance.now(),
587
- type: 'tool:result', agentId: agent.id, tool: tc.name,
588
- result, prefillTokenCount: prefillTokens.length,
589
- durationMs: performance.now() - toolT0 });
590
- }
591
- catch (err) {
592
- agent.transition('idle');
593
- agent.reportResult(`Tool error: ${err.message}`, 'tool_error');
594
- tw.write({ traceId: tw.nextId(), parentTraceId: dispatchTraceId, ts: performance.now(),
595
- type: 'tool:error', agentId: agent.id, tool: tc.name,
596
- error: err.message });
597
- }
598
- }
599
- return results;
600
- }
601
- // ── Four-phase tick loop ─────────────────────────────────
602
- let pendingSettled = [];
603
- // ── Four-phase tick loop ─────────────────────────────────
604
- let recoveryAttempted = false;
605
- for (;;) {
606
- // -- Phase 1: PRODUCE -- sample from active agents, collect tool calls
607
- policy.resetTick?.();
608
- const pressure = new ContextPressure(ctx, pressureOpts);
609
- if (trace && (pressure.critical || pressure.headroom < 0)) {
610
- try {
611
- process.stderr.write(`[PRODUCE] ${pressure.critical ? 'CRITICAL' : 'SOFT_LIMIT'} remaining=${pressure.remaining} headroom=${pressure.headroom} cellsUsed=${pressure.cellsUsed} nCtx=${pressure.nCtx}\n`);
662
+ return deferred;
612
663
  }
613
- catch { }
614
- }
615
- const entries = [];
616
- const toolCalls = [];
617
- const nudges = [];
618
- for (const a of agents) {
619
- if (a.status !== 'active')
620
- continue;
621
- const policyExit = policy.shouldExit?.(a, pressure);
622
- if (policyExit ?? pressure.critical) {
623
- a.transition('idle');
624
- const exitReason = pressure.critical ? 'pressure_critical'
625
- : policyExit ? 'policy_exit'
626
- : 'pressure_critical';
627
- tw.write({ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
628
- type: 'pool:agentDrop', agentId: a.id, reason: exitReason });
629
- yield* events.send({ type: 'agent:done', agentId: a.id });
630
- // Trailing stop: extract findings inline, free KV for remaining agents
631
- yield* recoverInline(a, policy, ctx, store, tw, poolScope.traceId, events);
632
- continue;
664
+ /** DISPATCH: execute tool calls sequentially, return settled items for next tick */
665
+ function* dispatch(calls) {
666
+ const results = [];
667
+ for (const { agent, tc } of calls) {
668
+ let toolArgs;
669
+ try {
670
+ toolArgs = JSON.parse(tc.arguments);
671
+ }
672
+ catch {
673
+ toolArgs = {};
674
+ }
675
+ const callId = tc.id || `call_${agent.toolCallCount}`;
676
+ agent.incrementToolCalls();
677
+ totalToolCalls++;
678
+ agent.incrementTurns();
679
+ yield* poolChannel.send({ type: 'agent:tool_call', agentId: agent.id, tool: tc.name, args: tc.arguments });
680
+ const tool = tools.get(tc.name);
681
+ const dispatchPressure = new ContextPressure(ctx, pressureOpts);
682
+ const explore = policy.shouldExplore?.(agent, dispatchPressure) ?? true;
683
+ const dispatchTraceId = tw.nextId();
684
+ const toolT0 = performance.now();
685
+ tw.write({
686
+ traceId: dispatchTraceId, parentTraceId: poolScope.traceId, ts: toolT0,
687
+ type: 'tool:dispatch', agentId: agent.id, tool: tc.name,
688
+ toolIndex: toolIndexMap.get(tc.name) ?? -1, toolkitSize,
689
+ args: toolArgs, callId,
690
+ explore, percentAvailable: dispatchPressure.percentAvailable,
691
+ });
692
+ const peerHistory = agents
693
+ .filter(a => a.id !== agent.id)
694
+ .flatMap(a => a.toolHistory);
695
+ const toolContext = {
696
+ agentId: agent.id, branch: agent.branch,
697
+ onProgress: (p) => {
698
+ progressBridge.send({ type: 'agent:tool_progress', agentId: agent.id, tool: tc.name, filled: p.filled, total: p.total });
699
+ },
700
+ scorer: opts.scorer, explore,
701
+ pressurePercentAvailable: dispatchPressure.percentAvailable,
702
+ peerHistory,
703
+ };
704
+ try {
705
+ yield* context_1.TraceParent.set(dispatchTraceId);
706
+ yield* context_1.CallingAgent.set(agent);
707
+ const result = yield* (0, effection_1.scoped)(function* () {
708
+ return yield* (0, effection_1.call)(() => tool ? tool.execute(toolArgs, toolContext) : Promise.resolve({ error: `Unknown tool: ${tc.name}` }));
709
+ });
710
+ const postToolPressure = new ContextPressure(ctx, pressureOpts);
711
+ const contextAvailablePercent = postToolPressure.percentAvailable;
712
+ if (result && typeof result === 'object' && !Array.isArray(result)) {
713
+ result._contextAvailablePercent = contextAvailablePercent;
714
+ const resultObj = result;
715
+ if (Array.isArray(resultObj.results)) {
716
+ agent.addNestedResults(resultObj.results.filter((f) => typeof f === 'string'));
717
+ }
718
+ if (Array.isArray(resultObj.nestedResults)) {
719
+ agent.addNestedResults(resultObj.nestedResults.filter((f) => typeof f === 'string'));
720
+ }
721
+ }
722
+ const resultStr = JSON.stringify(result);
723
+ yield* poolChannel.send({ type: 'agent:tool_result', agentId: agent.id, tool: tc.name, result: resultStr, contextAvailablePercent });
724
+ const prefillTokens = (0, sdk_2.buildToolResultDelta)(ctx, resultStr, callId, { enableThinking: agent.fmt.enableThinking });
725
+ const probe = tool?.probe(result) ?? undefined;
726
+ results.push({ agentId: agent.id, prefillTokens, toolName: tc.name, callId, args: tc.arguments, probe });
727
+ tw.write({ traceId: tw.nextId(), parentTraceId: dispatchTraceId, ts: performance.now(),
728
+ type: 'tool:result', agentId: agent.id, tool: tc.name,
729
+ result, prefillTokenCount: prefillTokens.length,
730
+ durationMs: performance.now() - toolT0 });
731
+ }
732
+ catch (err) {
733
+ agent.transition('idle');
734
+ agent.reportResult(`Tool error: ${err.message}`, 'tool_error');
735
+ tw.write({ traceId: tw.nextId(), parentTraceId: dispatchTraceId, ts: performance.now(),
736
+ type: 'tool:error', agentId: agent.id, tool: tc.name,
737
+ error: err.message });
738
+ }
739
+ }
740
+ return results;
633
741
  }
634
- const { token, text, isStop } = a.branch.produceSync();
635
- if (isStop) {
636
- const parsed = ctx.parseChatOutput(a.rawOutput, a.fmt.format, {
637
- reasoningFormat: a.fmt.reasoningFormat,
638
- generationPrompt: a.fmt.generationPrompt,
639
- parser: a.fmt.parser,
640
- });
641
- tw.write({
642
- traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
643
- type: 'agent:turn', agentId: a.id, turn: a.turns,
644
- rawOutput: a.rawOutput,
645
- parsedContent: parsed.content || null,
646
- parsedToolCalls: parsed.toolCalls.map(tc => ({ name: tc.name, arguments: tc.arguments })),
647
- });
648
- // Policy decides what to do with the parsed output
649
- const action = policy.onProduced(a, parsed, pressure, policyConfig);
650
- switch (action.type) {
651
- case 'free_text_report':
652
- yield* handleFreeTextReport(a, action.content, events);
653
- continue;
654
- case 'idle':
655
- yield* handleIdleDrop(a, action.reason, events, tw, poolScope.traceId);
742
+ // ── Four-phase tick loop ─────────────────────────────────
743
+ let pendingSettled = [];
744
+ // ── Four-phase tick loop ─────────────────────────────────
745
+ let recoveryAttempted = false;
746
+ for (;;) {
747
+ // Idle until orchestrator enqueues work (spawn or extend) or completes.
748
+ // Include pendingExtends: the final extend after the last task in chain
749
+ // mode must drain before the loop exits, otherwise the orchestrator fiber
750
+ // is left suspended on a dead action.
751
+ if (agents.length === 0
752
+ && pendingSpawns.length === 0
753
+ && pendingExtends.length === 0) {
754
+ if (orchestratorDone)
755
+ break;
756
+ yield* (0, effection_1.sleep)(1);
757
+ continue;
758
+ }
759
+ // -- Phase 0: SPAWN+EXTEND -- drain pending spawns AND pending extends,
760
+ // batching all fork-suffix prefills and extend-onto-root prefills into
761
+ // ONE native store.prefill call. All store-level native calls in this
762
+ // pool are issued from this fiber (the tick loop), never concurrently
763
+ // with the orchestrator's fiber. Piggybacking extend in this phase
764
+ // preserves the continuous-tree-batching invariant (one GPU round-trip
765
+ // per tick) and naturally atomic-orders both kinds of work.
766
+ if (pendingSpawns.length > 0 || pendingExtends.length > 0) {
767
+ const drainedSpawns = pendingSpawns.splice(0, pendingSpawns.length);
768
+ const drainedExtends = pendingExtends
769
+ .splice(0, pendingExtends.length)
770
+ .filter(e => !e.discarded);
771
+ const prefillPairs = [
772
+ ...drainedSpawns.map(s => [s.agent.branch, s.suffixTokens]),
773
+ ...drainedExtends.map(e => [root, e.tokens]),
774
+ ];
775
+ try {
776
+ if (prefillPairs.length > 0) {
777
+ yield* (0, effection_1.call)(() => store.prefill(prefillPairs));
778
+ }
779
+ }
780
+ catch (err) {
781
+ for (const e of drainedExtends)
782
+ e.reject(err);
783
+ throw err;
784
+ }
785
+ // Resolve extend requests with the delta token count. root.position
786
+ // has advanced by the sum of extend token counts at this point.
787
+ for (const e of drainedExtends) {
788
+ tw.write({
789
+ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
790
+ type: 'spine:extend',
791
+ userContent: e.userContent,
792
+ assistantContent: e.assistantContent,
793
+ deltaTokens: e.tokens.length,
794
+ positionAfter: root.position,
795
+ });
796
+ e.resolve(e.tokens.length);
797
+ }
798
+ for (const s of drainedSpawns) {
799
+ tw.write({
800
+ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
801
+ type: 'branch:create', branchHandle: s.agent.id, parentHandle: s.agent.parentId,
802
+ position: 0, role: 'agentFork',
803
+ });
804
+ tw.write({
805
+ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
806
+ type: 'prompt:format', promptText: s.formattedPrompt,
807
+ taskContent: s.task.content, tokenCount: s.suffixTokens.length,
808
+ messages: JSON.stringify([
809
+ { role: 'system', content: s.task.systemPrompt },
810
+ { role: 'user', content: s.task.content },
811
+ ]),
812
+ tools: s.task.tools, role: 'agentSuffix',
813
+ });
814
+ applyLazyGrammar(s.agent);
815
+ // transition fires agent.statusSignal — ctx.spawn's subscriber is waiting on this.
816
+ s.agent.transition('active');
817
+ yield* poolChannel.send({ type: 'agent:spawn', agentId: s.agent.id, parentAgentId: s.agent.parentId });
818
+ }
819
+ }
820
+ // If all we had was pending spawns, and none of them activated (shouldn't happen
821
+ // normally — SPAWN always transitions to active), nothing to produce. Loop back.
822
+ if (agents.length === 0)
823
+ continue;
824
+ // -- Phase 1: PRODUCE -- sample from active agents, collect tool calls
825
+ policy.resetTick?.();
826
+ const pressure = new ContextPressure(ctx, pressureOpts);
827
+ const entries = [];
828
+ const toolCalls = [];
829
+ const nudges = [];
830
+ for (const a of agents) {
831
+ if (a.status !== 'active')
656
832
  continue;
657
- case 'nudge':
658
- nudges.push(yield* handleNudge(a, action.message, parsed.toolCalls[0], ctx, tools));
833
+ const policyExit = policy.shouldExit?.(a, pressure);
834
+ if (policyExit ?? pressure.critical) {
835
+ const exitReason = pressure.critical ? 'pressure_critical'
836
+ : policyExit ? 'policy_exit'
837
+ : 'pressure_critical';
659
838
  tw.write({ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
660
- type: 'pool:agentNudge', agentId: a.id, reason: 'pressure_softcut' });
661
- continue;
662
- case 'report':
663
- yield* handleReport(a, action.result, parsed.toolCalls[0], terminalTool, pruneOnReport, events);
664
- totalToolCalls++;
665
- continue;
666
- case 'tool_call':
667
- a.transition('awaiting_tool');
668
- toolCalls.push({ agent: a, tc: action.tc });
669
- a.resetTurn();
839
+ type: 'pool:agentDrop', agentId: a.id, reason: exitReason });
840
+ yield* poolChannel.send({ type: 'agent:done', agentId: a.id });
841
+ // Run recovery BEFORE transitioning to idle — otherwise the statusSignal
842
+ // fires 'idle' mid-recovery, PoolContext.waitFor returns early, the
843
+ // orchestrator resumes and starts spawning/prefilling the next task
844
+ // while this agent is still being decoded by recoverInline. Concurrent
845
+ // native calls on the same llama_context → SEGV.
846
+ yield* recoverInline(a, policy, ctx, store, tw, poolScope.traceId, poolChannel, pressureOpts);
847
+ a.transition('idle');
670
848
  continue;
849
+ }
850
+ const { token, text, isStop } = a.branch.produceSync();
851
+ if (isStop) {
852
+ const parsed = a.finalize(ctx);
853
+ tw.write({
854
+ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
855
+ type: 'agent:turn', agentId: a.id, turn: a.turns,
856
+ rawOutput: a.rawOutput,
857
+ parsedContent: parsed.content || null,
858
+ parsedToolCalls: parsed.toolCalls.map(tc => ({ name: tc.name, arguments: tc.arguments })),
859
+ });
860
+ // Policy decides what to do with the parsed output
861
+ const action = policy.onProduced(a, parsed, pressure, policyConfig);
862
+ switch (action.type) {
863
+ case 'free_text_report':
864
+ yield* handleFreeTextReport(a, action.content, poolChannel);
865
+ continue;
866
+ case 'idle':
867
+ yield* handleIdleDrop(a, action.reason, poolChannel, tw, poolScope.traceId);
868
+ continue;
869
+ case 'nudge':
870
+ nudges.push(yield* handleNudge(a, action.message, parsed.toolCalls[0], ctx, tools));
871
+ tw.write({ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
872
+ type: 'pool:agentNudge', agentId: a.id, reason: 'nudge', message: action.message });
873
+ continue;
874
+ case 'report':
875
+ yield* handleReport(a, action.result, parsed.toolCalls[0], terminalTool, pruneOnReport, poolChannel);
876
+ totalToolCalls++;
877
+ continue;
878
+ case 'tool_call':
879
+ a.transition('awaiting_tool');
880
+ toolCalls.push({ agent: a, tc: action.tc });
881
+ a.resetTurn();
882
+ continue;
883
+ }
884
+ }
885
+ entries.push([a.branch, token]);
886
+ if (trace) {
887
+ const entropy = a.branch.modelEntropy();
888
+ const surprisal = a.branch.modelSurprisal(token);
889
+ a.accumulateTokenWithTrace(text, entropy, surprisal);
890
+ a.observe(ctx);
891
+ yield* poolChannel.send({
892
+ type: 'agent:produce', agentId: a.id, text, tokenCount: a.tokenCount,
893
+ entropy, surprisal,
894
+ });
895
+ }
896
+ else {
897
+ a.accumulateToken(text);
898
+ a.observe(ctx);
899
+ yield* poolChannel.send({ type: 'agent:produce', agentId: a.id, text, tokenCount: a.tokenCount });
900
+ }
671
901
  }
672
- }
673
- entries.push([a.branch, token]);
674
- if (trace) {
675
- const entropy = a.branch.modelEntropy();
676
- const surprisal = a.branch.modelSurprisal(token);
677
- a.accumulateTokenWithTrace(text, entropy, surprisal);
678
- yield* events.send({
679
- type: 'agent:produce', agentId: a.id, text, tokenCount: a.tokenCount,
680
- entropy, surprisal,
681
- });
682
- }
683
- else {
684
- a.accumulateToken(text);
685
- yield* events.send({ type: 'agent:produce', agentId: a.id, text, tokenCount: a.tokenCount });
686
- }
687
- }
688
- // -- Phase 2: COMMIT -- batch-decode produced tokens
689
- if (entries.length > 0) {
690
- yield* (0, effection_1.call)(() => store.commit(entries));
691
- steps++;
692
- const commitPressure = new ContextPressure(ctx, pressureOpts);
693
- yield* events.send({ type: 'agent:tick', cellsUsed: commitPressure.cellsUsed, nCtx: commitPressure.nCtx });
694
- }
695
- // -- Phase 3: SETTLE (settle what fits, defer what doesn't)
696
- const toSettle = [...pendingSettled, ...nudges];
697
- const deferred = toSettle.length > 0 ? yield* settle(toSettle) : [];
698
- // Stall-breaker: if items are deferred and no active agents remain,
699
- // sacrifice an awaiting_tool agent to free KV. Without this, agents
700
- // with oversized results stay awaiting_tool indefinitely — PRODUCE
701
- // skips them, headroom never recovers, the pool loops forever.
702
- if (deferred.length > 0 && !agents.some(a => a.status === 'active')) {
703
- const victim = agents.find(a => a.status === 'awaiting_tool' && !a.branch.disposed);
704
- if (victim) {
705
- victim.transition('idle');
706
- tw.write({ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
707
- type: 'pool:agentDrop', agentId: victim.id, reason: 'pressure_settle_reject' });
708
- yield* events.send({ type: 'agent:done', agentId: victim.id });
709
- yield* recoverInline(victim, policy, ctx, store, tw, poolScope.traceId, events);
710
- }
711
- }
712
- // -- Phase 4: DISPATCH
713
- const dispatched = yield* dispatch(toolCalls);
714
- // Deferred + new dispatch results → next tick's SETTLE
715
- pendingSettled = [...deferred, ...dispatched];
716
- // -- Termination + recovery
717
- if (agents.every(a => a.status === 'idle' || a.status === 'disposed')) {
718
- if (!recoveryAttempted) {
719
- recoveryAttempted = true;
720
- // Recover any idle agents that weren't handled by inline recovery
721
- // (e.g., killed by max_turns, time budget, or free_text_stop)
722
- for (const a of agents) {
723
- if (a.status === 'idle' && !a.result && !a.branch.disposed) {
724
- yield* recoverInline(a, policy, ctx, store, tw, poolScope.traceId, events);
902
+ // -- Phase 2: COMMIT -- batch-decode produced tokens
903
+ if (entries.length > 0) {
904
+ yield* (0, effection_1.call)(() => store.commit(entries));
905
+ steps++;
906
+ const commitPressure = new ContextPressure(ctx, pressureOpts);
907
+ yield* poolChannel.send({ type: 'agent:tick', cellsUsed: commitPressure.cellsUsed, nCtx: commitPressure.nCtx });
908
+ }
909
+ // -- Phase 3: SETTLE (settle what fits, defer what doesn't)
910
+ const toSettle = [...pendingSettled, ...nudges];
911
+ const deferred = toSettle.length > 0 ? yield* settle(toSettle) : [];
912
+ // Stall-breaker: `deferred` has items but no active siblings can free
913
+ // KV. Consult policy per deferred item — the policy is the "last
914
+ // resort" decision point (staggered-exit for parallel orchestration
915
+ // still works because defer-on-oversize above lets items wait while
916
+ // siblings are active; only when ALL siblings are awaiting_tool or
917
+ // idle do we reach here). Distinct drop reasons:
918
+ // - `pressure_settle_reject` policy said idle, or nudge but the
919
+ // nudge payload itself doesn't fit (policy suggestion infeasible).
920
+ // - `settle_stall_break` policy hook absent (legacy fallback).
921
+ if (deferred.length > 0 && !agents.some(a => a.status === 'active')) {
922
+ const stallPressure = new ContextPressure(ctx, pressureOpts);
923
+ let stallHeadroom = stallPressure.headroom;
924
+ const resolved = [];
925
+ for (const item of deferred) {
926
+ const a = agentById.get(item.agentId);
927
+ if (!a || a.status !== 'awaiting_tool' || a.branch.disposed)
928
+ continue;
929
+ const action = policy.onSettleReject?.(a, item.prefillTokens.length, stallPressure, policyConfig);
930
+ if (action?.type === 'nudge') {
931
+ // Record the policy's decision regardless of whether the
932
+ // nudge itself fits the event captures "policy consulted,
933
+ // returned nudge" which is separate from "nudge was actionable".
934
+ tw.write({
935
+ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
936
+ type: 'pool:agentNudge', agentId: a.id, reason: 'settle_reject', message: action.message,
937
+ });
938
+ const nudgeResult = { error: action.message };
939
+ const nudgeTokens = (0, sdk_2.buildToolResultDelta)(ctx, JSON.stringify(nudgeResult), item.callId, { enableThinking: a.fmt.enableThinking });
940
+ if (nudgeTokens.length <= stallHeadroom) {
941
+ const probe = tools.get(item.toolName)?.probe(nudgeResult) ?? undefined;
942
+ a.incrementTurns();
943
+ resolved.push({
944
+ agentId: a.id,
945
+ prefillTokens: nudgeTokens,
946
+ toolName: item.toolName,
947
+ callId: item.callId,
948
+ args: item.args,
949
+ probe,
950
+ });
951
+ stallHeadroom -= nudgeTokens.length;
952
+ continue;
953
+ }
954
+ // Nudge doesn't fit — policy's suggestion is infeasible, fall through to drop.
955
+ }
956
+ // Drop. Reason: policy-said-idle OR nudge-didn't-fit →
957
+ // `pressure_settle_reject` (policy path). Policy hook absent →
958
+ // `settle_stall_break` (legacy fallback).
959
+ const reason = action ? 'pressure_settle_reject' : 'settle_stall_break';
960
+ tw.write({
961
+ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
962
+ type: 'pool:agentDrop', agentId: a.id, reason,
963
+ });
964
+ yield* poolChannel.send({ type: 'agent:done', agentId: a.id });
965
+ // Recover BEFORE transition — single-fiber store discipline.
966
+ yield* recoverInline(a, policy, ctx, store, tw, poolScope.traceId, poolChannel, pressureOpts);
967
+ a.transition('idle');
725
968
  }
969
+ // Replace deferred with the surviving (nudged) items for next tick.
970
+ deferred.length = 0;
971
+ deferred.push(...resolved);
972
+ }
973
+ // -- Phase 4: DISPATCH
974
+ const dispatched = yield* dispatch(toolCalls);
975
+ // Deferred + new dispatch results → next tick's SETTLE
976
+ pendingSettled = [...deferred, ...dispatched];
977
+ // -- Termination + recovery
978
+ // Wait for the orchestrator to finish before closing — it may spawn more agents.
979
+ const allIdle = agents.every(a => a.status === 'idle' || a.status === 'disposed');
980
+ if (allIdle && orchestratorDone) {
981
+ if (!recoveryAttempted) {
982
+ recoveryAttempted = true;
983
+ // Recover any idle agents that weren't handled by inline recovery
984
+ // (e.g., killed by max_turns, time budget, or free_text_stop)
985
+ for (const a of agents) {
986
+ if (a.status === 'idle' && !a.result && !a.branch.disposed) {
987
+ yield* recoverInline(a, policy, ctx, store, tw, poolScope.traceId, poolChannel, pressureOpts);
988
+ }
989
+ }
990
+ }
991
+ if (orchestratorError)
992
+ throw orchestratorError;
993
+ break;
994
+ }
995
+ if (allIdle && !orchestratorDone) {
996
+ // All current agents done but orchestrator may spawn more.
997
+ yield* (0, effection_1.sleep)(1);
726
998
  }
727
999
  }
728
- break;
1000
+ // ── Close channel with result — consumers get AgentPoolResult as close value ───────
1001
+ // Branch cleanup is handled by each branch's ensure() from setupAgent —
1002
+ // when this resource's scope exits, all ensure() callbacks fire.
1003
+ tw.write({
1004
+ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
1005
+ type: 'pool:close',
1006
+ agents: agents.map(a => ({
1007
+ agentId: a.id, tokenCount: a.tokenCount,
1008
+ toolCallCount: a.toolCallCount, result: a.result,
1009
+ ppl: a.branch.disposed ? 0 : a.branch.perplexity,
1010
+ })),
1011
+ totalTokens: agents.reduce((s, a) => s + a.tokenCount, 0),
1012
+ steps, durationMs: performance.now() - poolT0,
1013
+ });
1014
+ poolScope.close();
1015
+ const result = {
1016
+ agents: agents.map(a => ({
1017
+ agentId: a.id,
1018
+ parentAgentId: a.parentId,
1019
+ branch: a.branch,
1020
+ agent: a,
1021
+ result: a.result,
1022
+ toolCallCount: a.toolCallCount,
1023
+ tokenCount: a.tokenCount,
1024
+ ppl: a.branch.disposed ? 0 : a.branch.perplexity,
1025
+ samplingPpl: a.branch.disposed ? 0 : a.branch.samplingPerplexity,
1026
+ trace: trace ? a.traceBuffer : undefined,
1027
+ nestedResults: [...a.nestedResults],
1028
+ })),
1029
+ totalTokens: agents.reduce((s, a) => s + a.tokenCount, 0),
1030
+ totalToolCalls,
1031
+ steps,
1032
+ counters,
1033
+ };
1034
+ yield* poolChannel.close(result);
729
1035
  }
730
- }
731
- // ── Provide resultsuspends, branches stay alive ───────
732
- // Branch cleanup is handled by each branch's ensure() from setupAgent —
733
- // when this resource's scope exits, all ensure() callbacks fire.
734
- tw.write({
735
- traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
736
- type: 'pool:close',
737
- agents: agents.map(a => ({
738
- agentId: a.id, tokenCount: a.tokenCount,
739
- toolCallCount: a.toolCallCount, result: a.result,
740
- ppl: a.branch.disposed ? 0 : a.branch.perplexity,
741
- })),
742
- totalTokens: agents.reduce((s, a) => s + a.tokenCount, 0),
743
- steps, durationMs: performance.now() - poolT0,
744
- });
745
- poolScope.close();
746
- const result = {
747
- agents: agents.map(a => ({
748
- agentId: a.id,
749
- parentAgentId: a.parentId,
750
- branch: a.branch,
751
- result: a.result,
752
- toolCallCount: a.toolCallCount,
753
- tokenCount: a.tokenCount,
754
- ppl: a.branch.disposed ? 0 : a.branch.perplexity,
755
- samplingPpl: a.branch.disposed ? 0 : a.branch.samplingPerplexity,
756
- trace: trace ? a.traceBuffer : undefined,
757
- nestedResults: [...a.nestedResults],
758
- })),
759
- totalTokens: agents.reduce((s, a) => s + a.tokenCount, 0),
760
- totalToolCalls,
761
- steps,
762
- counters,
763
- };
764
- yield* provide(result);
1036
+ catch {
1037
+ // KV exhaustion or other decode failure close with partial results
1038
+ poolScope.close();
1039
+ const partial = {
1040
+ agents: agents.map(a => ({
1041
+ agentId: a.id, parentAgentId: a.parentId, branch: a.branch, agent: a,
1042
+ result: a.result, toolCallCount: a.toolCallCount, tokenCount: a.tokenCount,
1043
+ ppl: a.branch.disposed ? 0 : a.branch.perplexity,
1044
+ samplingPpl: a.branch.disposed ? 0 : a.branch.samplingPerplexity,
1045
+ trace: trace ? a.traceBuffer : undefined,
1046
+ nestedResults: [...a.nestedResults],
1047
+ })),
1048
+ totalTokens: agents.reduce((s, a) => s + a.tokenCount, 0),
1049
+ totalToolCalls, steps, counters,
1050
+ };
1051
+ yield* poolChannel.close(partial);
1052
+ }
1053
+ }); // end spawn — tick loop
1054
+ yield* provide(subscription);
765
1055
  });
766
1056
  }
767
1057
  //# sourceMappingURL=agent-pool.js.map