@lloyal-labs/lloyal-agents 1.5.6 → 1.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,12 +7,17 @@ const sdk_1 = require("@lloyal-labs/sdk");
7
7
  const context_1 = require("./context");
8
8
  const sdk_2 = require("@lloyal-labs/sdk");
9
9
  const trace_scope_1 = require("./trace-scope");
10
- const generate_1 = require("./generate");
11
10
  const Agent_1 = require("./Agent");
12
11
  const AgentPolicy_1 = require("./AgentPolicy");
13
12
  /**
14
13
  * Immutable KV budget snapshot for one tick of the agent loop
15
14
  *
15
+ * Frozen at phase boundaries (PRODUCE, SETTLE, DISPATCH) so that all
16
+ * decisions within a phase are evaluated against the same baseline.
17
+ * Without this, items processed earlier in a loop would see different
18
+ * pressure than items processed later — making reject/nudge/kill
19
+ * decisions order-dependent and nondeterministic.
20
+ *
16
21
  * Created from `SessionContext._storeKvPressure()` which returns
17
22
  * `{ nCtx, cellsUsed, remaining }` where `remaining = nCtx - cellsUsed`.
18
23
  * `cellsUsed` tracks unique KV cells per branch — incremented on
@@ -93,6 +98,126 @@ class ContextPressure {
93
98
  }
94
99
  }
95
100
  exports.ContextPressure = ContextPressure;
101
+ /**
102
+ * Inline recovery for a single killed agent (trailing stop).
103
+ *
104
+ * Prefills the extraction prompt into the agent's own branch, sets eager
105
+ * report grammar, generates to stop token, parses JSON, reports result,
106
+ * and prunes the branch — all before the tick loop continues. The freed
107
+ * KV lets remaining agents keep researching.
108
+ *
109
+ * Returns true if the agent reported findings.
110
+ */
111
+ function* recoverInline(agent, policy, ctx, store, tw, parentTraceId, events) {
112
+ const recovery = policy.onRecovery?.(agent);
113
+ if (!recovery || recovery.type === 'skip') {
114
+ if (!agent.branch.disposed)
115
+ agent.branch.pruneSync();
116
+ return false;
117
+ }
118
+ const { prompt } = ctx.formatChatSync(JSON.stringify([
119
+ { role: 'system', content: recovery.prompt.system },
120
+ { role: 'user', content: recovery.prompt.user },
121
+ ]), { enableThinking: false });
122
+ const sep = ctx.getTurnSeparator();
123
+ const delta = ctx.tokenizeSync(prompt, false);
124
+ const tokens = [...sep, ...delta];
125
+ // Check if extraction prompt fits
126
+ const pressure = new ContextPressure(ctx);
127
+ if (pressure.remaining < tokens.length) {
128
+ if (!agent.branch.disposed)
129
+ agent.branch.pruneSync();
130
+ return false;
131
+ }
132
+ // Eager report grammar
133
+ const reportGrammar = yield* (0, effection_1.call)(() => ctx.jsonSchemaToGrammar(JSON.stringify({
134
+ type: 'object',
135
+ properties: { result: { type: 'string' } },
136
+ required: ['result'],
137
+ })));
138
+ // Recovery runs in its own scope — if decode fails (KV exhaustion),
139
+ // the scope tears down cleanly without propagating to the pool.
140
+ // Mirrors the old prepare()-based recovery which used try/catch around
141
+ // a Resource with its own ensure().
142
+ let reported = false;
143
+ try {
144
+ yield* (0, effection_1.scoped)(function* () {
145
+ yield* (0, effection_1.call)(() => store.prefill([[agent.branch, tokens]]));
146
+ agent.branch.setGrammar(reportGrammar);
147
+ tw.write({
148
+ traceId: tw.nextId(), parentTraceId, ts: performance.now(),
149
+ type: 'branch:prefill', branchHandle: agent.id,
150
+ tokenCount: tokens.length, role: 'recovery',
151
+ });
152
+ yield* events.send({ type: 'agent:spawn', agentId: agent.id, parentAgentId: agent.parentId });
153
+ // Single-agent produce/commit loop
154
+ let output = '';
155
+ let tokenCount = 0;
156
+ for (;;) {
157
+ const { token, text, isStop } = agent.branch.produceSync();
158
+ if (isStop)
159
+ break;
160
+ output += text;
161
+ tokenCount++;
162
+ yield* (0, effection_1.call)(() => store.commit([[agent.branch, token]]));
163
+ yield* events.send({ type: 'agent:produce', agentId: agent.id, text, tokenCount });
164
+ }
165
+ // Parse + report
166
+ const parsed = JSON.parse(output);
167
+ if (parsed?.result) {
168
+ agent.reportResult(parsed.result, 'scratchpad');
169
+ yield* events.send({ type: 'agent:report', agentId: agent.id, result: agent.result });
170
+ reported = true;
171
+ }
172
+ });
173
+ }
174
+ catch { /* decode failure or malformed JSON — non-fatal, prune below */ }
175
+ // Always prune after scope exits (success or decode failure)
176
+ if (!agent.branch.disposed)
177
+ agent.branch.pruneSync();
178
+ // Emit tick so TUI updates pressure percentage after prune
179
+ const postPressure = new ContextPressure(ctx);
180
+ yield* events.send({ type: 'agent:tick', cellsUsed: postPressure.cellsUsed, nCtx: postPressure.nCtx });
181
+ return reported;
182
+ }
183
+ // ── PRODUCE action handlers ─────────────────────────────────────
184
+ // Each handler encapsulates state transitions, events, and trace for one
185
+ // policy action outcome. The PRODUCE switch dispatches to these.
186
+ function* handleFreeTextReport(a, content, events) {
187
+ a.reportResult(content, 'free_text');
188
+ a.transition('idle');
189
+ yield* events.send({ type: 'agent:report', agentId: a.id, result: a.result });
190
+ yield* events.send({ type: 'agent:done', agentId: a.id });
191
+ }
192
+ function* handleIdleDrop(a, reason, events, tw, parentTraceId) {
193
+ a.transition('idle');
194
+ if (reason !== 'free_text_stop') {
195
+ tw.write({ traceId: tw.nextId(), parentTraceId, ts: performance.now(),
196
+ type: 'pool:agentDrop', agentId: a.id,
197
+ reason: reason === 'max_turns' ? 'maxTurns' : 'pressure_softcut' });
198
+ }
199
+ yield* events.send({ type: 'agent:done', agentId: a.id });
200
+ }
201
+ function* handleNudge(a, message, tc, ctx, tools) {
202
+ const callId = tc?.id || `call_${a.toolCallCount}`;
203
+ const nudgeResult = { error: message };
204
+ a.incrementTurns();
205
+ a.transition('awaiting_tool');
206
+ const prefillTokens = (0, sdk_2.buildToolResultDelta)(ctx, JSON.stringify(nudgeResult), callId);
207
+ const probe = tools?.get(tc?.name || '')?.probe(nudgeResult) ?? undefined;
208
+ a.resetTurn();
209
+ return { agentId: a.id, prefillTokens, toolName: tc?.name || '', callId, probe };
210
+ }
211
+ function* handleReport(a, result, tc, terminalTool, pruneOnReport, events) {
212
+ a.reportResult(result, 'report_tool');
213
+ a.transition('idle');
214
+ a.incrementToolCalls();
215
+ yield* events.send({ type: 'agent:tool_call', agentId: a.id, tool: terminalTool, args: tc.arguments });
216
+ yield* events.send({ type: 'agent:report', agentId: a.id, result: a.result });
217
+ yield* events.send({ type: 'agent:done', agentId: a.id });
218
+ if (pruneOnReport && !a.branch.disposed)
219
+ a.branch.pruneSync();
220
+ }
96
221
  /**
97
222
  * Fork an agent from a parent branch with its own system prompt and task.
98
223
  *
@@ -318,232 +443,91 @@ function useAgentPool(opts) {
318
443
  };
319
444
  for (const a of agents)
320
445
  applyLazyGrammar(a);
321
- // ── Tool dispatch coordination ───────────────────────────
322
- // Tool results land in settledBuffer during DISPATCH, drained by SETTLE
323
- // in the next tick. DISPATCH awaits each tool to completion via
324
- // scoped() + call() — no concurrent llama_decode possible.
325
- const settledBuffer = [];
326
- const dispatchedProbes = new Map();
327
446
  const agentById = new Map(agents.map(a => [a.id, a]));
328
447
  let steps = 0;
329
448
  let totalToolCalls = 0;
330
- const counters = {
331
- warmPrefillCalls: 0,
332
- warmPrefillBranches: 0,
333
- };
334
- // ── Four-phase tick loop ─────────────────────────────────
335
- for (;;) {
336
- // -- Phase 1: PRODUCE -- sample from active agents, collect tool calls
337
- const pressure = new ContextPressure(ctx, pressureOpts);
338
- if (trace && (pressure.critical || pressure.headroom < 0)) {
449
+ const counters = { warmPrefillCalls: 0, warmPrefillBranches: 0 };
450
+ // ── Phase operations (close over pool scope) ────────────
451
+ /** SETTLE: prefill tool results that fit, defer oversized items for next tick */
452
+ function* settle(items) {
453
+ const settlePressure = new ContextPressure(ctx, pressureOpts);
454
+ let headroom = settlePressure.headroom;
455
+ if (trace) {
456
+ const desc = items.map(s => `${s.toolName}:${s.prefillTokens.length}`).join(', ');
339
457
  try {
340
- process.stderr.write(`[PRODUCE] ${pressure.critical ? 'CRITICAL' : 'SOFT_LIMIT'} remaining=${pressure.remaining} headroom=${pressure.headroom} cellsUsed=${pressure.cellsUsed} nCtx=${pressure.nCtx}\n`);
458
+ process.stderr.write(`[SETTLE] remaining=${settlePressure.remaining} headroom=${headroom} cellsUsed=${settlePressure.cellsUsed} nCtx=${settlePressure.nCtx} items=[${desc}]\n`);
341
459
  }
342
460
  catch { }
343
461
  }
344
- const entries = [];
345
- const toolCalls = [];
346
- for (const a of agents) {
347
- if (a.status !== 'active')
348
- continue;
349
- const policyExit = policy.shouldExit?.(a, pressure);
350
- if (policyExit ?? pressure.critical) {
351
- a.transition('idle');
352
- const exitReason = pressure.critical ? 'pressure_critical'
353
- : policyExit ? 'policy_exit'
354
- : 'pressure_critical';
355
- tw.write({ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
356
- type: 'pool:agentDrop', agentId: a.id, reason: exitReason });
357
- yield* events.send({ type: 'agent:done', agentId: a.id });
462
+ const prefillPairs = [];
463
+ const settledAgents = [];
464
+ const deferred = [];
465
+ for (const item of items) {
466
+ const a = agentById.get(item.agentId);
467
+ if (!a || a.status === 'idle')
358
468
  continue;
359
- }
360
- const { token, text, isStop } = a.branch.produceSync();
361
- if (isStop) {
362
- const parsed = ctx.parseChatOutput(a.rawOutput, a.fmt.format, {
363
- reasoningFormat: a.fmt.reasoningFormat,
364
- generationPrompt: a.fmt.generationPrompt,
365
- parser: a.fmt.parser,
366
- });
367
- tw.write({
368
- traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
369
- type: 'agent:turn', agentId: a.id, turn: a.turns,
370
- rawOutput: a.rawOutput,
371
- parsedContent: parsed.content || null,
372
- parsedToolCalls: parsed.toolCalls.map(tc => ({ name: tc.name, arguments: tc.arguments })),
373
- });
374
- // Policy decides what to do with the parsed output
375
- const action = policy.onProduced(a, parsed, pressure, policyConfig);
376
- switch (action.type) {
377
- case 'free_text_report':
378
- a.reportResult(action.content, 'free_text');
379
- a.transition('idle');
380
- yield* events.send({ type: 'agent:report', agentId: a.id, result: a.result });
381
- yield* events.send({ type: 'agent:done', agentId: a.id });
382
- continue;
383
- case 'idle':
384
- a.transition('idle');
385
- if (action.reason !== 'free_text_stop') {
386
- tw.write({ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
387
- type: 'pool:agentDrop', agentId: a.id,
388
- reason: action.reason === 'max_turns' ? 'maxTurns' : 'pressure_softcut' });
389
- }
390
- yield* events.send({ type: 'agent:done', agentId: a.id });
391
- continue;
392
- case 'nudge': {
393
- const tc = parsed.toolCalls[0];
394
- const callId = tc?.id || `call_${a.toolCallCount}`;
395
- const nudgeMsg = JSON.stringify({ error: action.message });
396
- a.incrementTurns();
397
- a.transition('awaiting_tool');
398
- const prefillTokens = (0, sdk_2.buildToolResultDelta)(ctx, nudgeMsg, callId);
399
- settledBuffer.push({ agentId: a.id, prefillTokens, toolName: tc?.name || '', callId });
400
- a.resetTurn();
401
- tw.write({ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
402
- type: 'pool:agentNudge', agentId: a.id, reason: 'pressure_softcut' });
403
- continue;
469
+ if (item.prefillTokens.length > headroom) {
470
+ if (trace) {
471
+ try {
472
+ process.stderr.write(`[SETTLE] DEFER ${item.toolName}:${item.prefillTokens.length} > headroom=${headroom}\n`);
404
473
  }
405
- case 'report':
406
- a.reportResult(action.result, 'report_tool');
407
- a.transition('idle');
408
- a.incrementToolCalls();
409
- totalToolCalls++;
410
- yield* events.send({ type: 'agent:tool_call', agentId: a.id, tool: terminalTool, args: parsed.toolCalls[0].arguments });
411
- yield* events.send({ type: 'agent:report', agentId: a.id, result: a.result });
412
- yield* events.send({ type: 'agent:done', agentId: a.id });
413
- if (pruneOnReport && !a.branch.disposed) {
414
- a.branch.pruneSync();
415
- }
416
- continue;
417
- case 'tool_call':
418
- a.transition('awaiting_tool');
419
- toolCalls.push({ agent: a, tc: action.tc });
420
- a.resetTurn();
421
- continue;
474
+ catch { }
422
475
  }
476
+ deferred.push(item);
477
+ continue;
423
478
  }
424
- entries.push([a.branch, token]);
425
- if (trace) {
426
- const entropy = a.branch.modelEntropy();
427
- const surprisal = a.branch.modelSurprisal(token);
428
- a.accumulateTokenWithTrace(text, entropy, surprisal);
429
- yield* events.send({
430
- type: 'agent:produce', agentId: a.id, text, tokenCount: a.tokenCount,
431
- entropy, surprisal,
432
- });
433
- }
434
- else {
435
- a.accumulateToken(text);
436
- yield* events.send({ type: 'agent:produce', agentId: a.id, text, tokenCount: a.tokenCount });
437
- }
438
- }
439
- // -- Phase 2: COMMIT -- batch-decode produced tokens
440
- if (entries.length > 0) {
441
- yield* (0, effection_1.call)(() => store.commit(entries));
442
- steps++;
443
- const commitPressure = new ContextPressure(ctx, pressureOpts);
444
- yield* events.send({ type: 'agent:tick', cellsUsed: commitPressure.cellsUsed, nCtx: commitPressure.nCtx });
479
+ prefillPairs.push([a.branch, item.prefillTokens]);
480
+ settledAgents.push(a);
481
+ headroom -= item.prefillTokens.length;
482
+ const postSettle = new ContextPressure(ctx, pressureOpts);
483
+ a.recordToolResult({
484
+ name: item.toolName, args: item.callId,
485
+ resultTokenCount: item.prefillTokens.length,
486
+ contextAfterPercent: postSettle.percentAvailable,
487
+ timestamp: performance.now(),
488
+ });
489
+ tw.write({ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
490
+ type: 'branch:prefill', branchHandle: a.id,
491
+ tokenCount: item.prefillTokens.length, role: 'toolResult' });
445
492
  }
446
- // -- Phase 3: SETTLE -- drain settled tool buffer, batch prefill
447
- const settled = settledBuffer.splice(0);
448
- if (settled.length > 0) {
449
- // Fresh snapshot — Phase 2 commits may have advanced positions
450
- const settlePressure = new ContextPressure(ctx, pressureOpts);
451
- let headroom = settlePressure.headroom;
493
+ if (prefillPairs.length > 0) {
452
494
  if (trace) {
453
- const items = settled.map(s => `${s.toolName}:${s.prefillTokens.length}`).join(', ');
495
+ const total = prefillPairs.reduce((s, [, t]) => s + t.length, 0);
454
496
  try {
455
- process.stderr.write(`[SETTLE] remaining=${settlePressure.remaining} headroom=${headroom} cellsUsed=${settlePressure.cellsUsed} nCtx=${settlePressure.nCtx} items=[${items}]\n`);
497
+ process.stderr.write(`[SETTLE] PREFILL ${prefillPairs.length} branches, ${total} tokens, headroom_after=${headroom}\n`);
456
498
  }
457
499
  catch { }
458
500
  }
459
- const prefillPairs = [];
460
- const settledAgents = [];
461
- for (const item of settled) {
462
- const a = agentById.get(item.agentId);
463
- if (!a || a.status === 'idle')
464
- continue;
465
- if (item.prefillTokens.length > headroom) {
466
- if (trace) {
467
- try {
468
- process.stderr.write(`[SETTLE] REJECT ${item.toolName}:${item.prefillTokens.length} > headroom=${headroom}\n`);
469
- }
470
- catch { }
471
- }
472
- const settleAction = policy.onSettleReject(a, item.prefillTokens.length, settlePressure, policyConfig);
473
- if (settleAction.type === 'nudge') {
474
- const nudgeMsg = JSON.stringify({ error: settleAction.message });
475
- const nudgeTokens = (0, sdk_2.buildToolResultDelta)(ctx, nudgeMsg, item.callId);
476
- if (nudgeTokens.length <= headroom) {
477
- prefillPairs.push([a.branch, nudgeTokens]);
478
- settledAgents.push(a);
479
- headroom -= nudgeTokens.length;
480
- tw.write({ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
481
- type: 'pool:agentNudge', agentId: a.id, reason: 'pressure_settle_reject' });
482
- continue;
483
- }
484
- }
485
- // Nudge failed (tokens don't fit) or policy said kill
501
+ yield* (0, effection_1.call)(() => store.prefill(prefillPairs));
502
+ counters.warmPrefillCalls++;
503
+ counters.warmPrefillBranches += prefillPairs.length;
504
+ // Probe prefill from DISPATCH
505
+ const probePairs = [];
506
+ for (const a of settledAgents) {
507
+ const probe = items.find(s => s.agentId === a.id)?.probe;
508
+ if (probe) {
509
+ const probeTokens = ctx.tokenizeSync(probe, false);
510
+ probePairs.push([a.branch, probeTokens]);
486
511
  tw.write({ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
487
- type: 'pool:agentDrop', agentId: a.id, reason: 'pressure_settle_reject' });
488
- a.transition('idle');
489
- yield* events.send({ type: 'agent:done', agentId: a.id });
490
- continue;
512
+ type: 'branch:prefill', branchHandle: a.id,
513
+ tokenCount: probeTokens.length, role: 'probe', probeText: probe });
491
514
  }
492
- prefillPairs.push([a.branch, item.prefillTokens]);
493
- settledAgents.push(a);
494
- headroom -= item.prefillTokens.length;
495
- // Record tool history for policy decisions
496
- const postSettle = new ContextPressure(ctx, pressureOpts);
497
- a.recordToolResult({
498
- name: item.toolName,
499
- args: item.callId,
500
- resultTokenCount: item.prefillTokens.length,
501
- contextAfterPercent: postSettle.percentAvailable,
502
- timestamp: performance.now(),
503
- });
504
- tw.write({ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
505
- type: 'branch:prefill', branchHandle: a.id,
506
- tokenCount: item.prefillTokens.length, role: 'toolResult' });
507
515
  }
508
- if (prefillPairs.length > 0) {
509
- if (trace) {
510
- const totalPrefill = prefillPairs.reduce((s, [, t]) => s + t.length, 0);
511
- try {
512
- process.stderr.write(`[SETTLE] PREFILL ${prefillPairs.length} branches, ${totalPrefill} tokens, headroom_after=${headroom}\n`);
513
- }
514
- catch { }
515
- }
516
- yield* (0, effection_1.call)(() => store.prefill(prefillPairs));
517
- counters.warmPrefillCalls++;
518
- counters.warmPrefillBranches += prefillPairs.length;
519
- // Prefill per-tool reasoning probes for agents that just got real
520
- // tool results. Each tool can optionally return a probe string via
521
- // its `probe` getter — prefilled after the tool result to nudge the
522
- // model into prose reasoning before the next tool call.
523
- const probePairs = [];
524
- for (const a of settledAgents) {
525
- const probe = dispatchedProbes.get(a.id);
526
- if (probe)
527
- probePairs.push([a.branch, ctx.tokenizeSync(probe, false)]);
528
- }
529
- if (probePairs.length > 0) {
530
- yield* (0, effection_1.call)(() => store.prefill(probePairs));
531
- }
532
- dispatchedProbes.clear();
533
- // Only NOW transition state + reset grammar
534
- for (const a of settledAgents) {
535
- a.transition('active');
536
- a.resetTurn();
537
- applyLazyGrammar(a);
538
- }
516
+ if (probePairs.length > 0) {
517
+ yield* (0, effection_1.call)(() => store.prefill(probePairs));
518
+ }
519
+ for (const a of settledAgents) {
520
+ a.transition('active');
521
+ a.resetTurn();
522
+ applyLazyGrammar(a);
539
523
  }
540
524
  }
541
- // -- Phase 4: DISPATCH -- execute collected tool calls sequentially
542
- // scoped() creates an error boundary — inner pool errors are caught
543
- // here instead of crashing the outer pool. call() yields the Operation
544
- // directly, ensuring exclusive llama_context access (no concurrent
545
- // AsyncWorkers). See docs/agents/concurrency.md.
546
- for (const { agent, tc } of toolCalls) {
525
+ return deferred;
526
+ }
527
+ /** DISPATCH: execute tool calls sequentially, return settled items for next tick */
528
+ function* dispatch(calls) {
529
+ const results = [];
530
+ for (const { agent, tc } of calls) {
547
531
  let toolArgs;
548
532
  try {
549
533
  toolArgs = JSON.parse(tc.arguments);
@@ -557,11 +541,6 @@ function useAgentPool(opts) {
557
541
  agent.incrementTurns();
558
542
  yield* events.send({ type: 'agent:tool_call', agentId: agent.id, tool: tc.name, args: tc.arguments });
559
543
  const tool = tools.get(tc.name);
560
- // Fresh pressure snapshot — SETTLE may have consumed significant KV
561
- // since the PRODUCE-phase snapshot at tick-top. On 16K context, a
562
- // single SETTLE pass can drain 12-18% of capacity (3 agents' tool
563
- // results). Using stale PRODUCE pressure here would keep agents in
564
- // explore mode past the threshold.
565
544
  const dispatchPressure = new ContextPressure(ctx, pressureOpts);
566
545
  const explore = policy.shouldExplore?.(agent, dispatchPressure) ?? true;
567
546
  const dispatchTraceId = tw.nextId();
@@ -574,28 +553,23 @@ function useAgentPool(opts) {
574
553
  explore, percentAvailable: dispatchPressure.percentAvailable,
575
554
  });
576
555
  const toolContext = {
577
- agentId: agent.id,
578
- branch: agent.branch,
556
+ agentId: agent.id, branch: agent.branch,
579
557
  onProgress: (p) => {
580
558
  progressBridge.send({ type: 'agent:tool_progress', agentId: agent.id, tool: tc.name, filled: p.filled, total: p.total });
581
559
  },
582
- scorer: opts.scorer,
583
- explore,
560
+ scorer: opts.scorer, explore,
584
561
  pressurePercentAvailable: dispatchPressure.percentAvailable,
585
562
  };
586
563
  try {
587
- // Set TraceParent + CallingAgent so inner pools inherit lineage
588
564
  yield* context_1.TraceParent.set(dispatchTraceId);
589
565
  yield* context_1.CallingAgent.set(agent);
590
566
  const result = yield* (0, effection_1.scoped)(function* () {
591
567
  return yield* (0, effection_1.call)(() => tool ? tool.execute(toolArgs, toolContext) : Promise.resolve({ error: `Unknown tool: ${tc.name}` }));
592
568
  });
593
- // Inject context availability into tool result so agent can make pressure-aware decisions
594
569
  const postToolPressure = new ContextPressure(ctx, pressureOpts);
595
570
  const contextAvailablePercent = postToolPressure.percentAvailable;
596
571
  if (result && typeof result === 'object' && !Array.isArray(result)) {
597
572
  result._contextAvailablePercent = contextAvailablePercent;
598
- // Collect nested results from recursive tool returns
599
573
  const resultObj = result;
600
574
  if (Array.isArray(resultObj.results)) {
601
575
  agent.addNestedResults(resultObj.results.filter((f) => typeof f === 'string'));
@@ -607,109 +581,151 @@ function useAgentPool(opts) {
607
581
  const resultStr = JSON.stringify(result);
608
582
  yield* events.send({ type: 'agent:tool_result', agentId: agent.id, tool: tc.name, result: resultStr, contextAvailablePercent });
609
583
  const prefillTokens = (0, sdk_2.buildToolResultDelta)(ctx, resultStr, callId);
610
- settledBuffer.push({ agentId: agent.id, prefillTokens, toolName: tc.name, callId });
611
- const probe = tool?.probe;
612
- if (probe)
613
- dispatchedProbes.set(agent.id, probe);
614
- tw.write({
615
- traceId: tw.nextId(), parentTraceId: dispatchTraceId, ts: performance.now(),
584
+ const probe = tool?.probe(result) ?? undefined;
585
+ results.push({ agentId: agent.id, prefillTokens, toolName: tc.name, callId, probe });
586
+ tw.write({ traceId: tw.nextId(), parentTraceId: dispatchTraceId, ts: performance.now(),
616
587
  type: 'tool:result', agentId: agent.id, tool: tc.name,
617
588
  result, prefillTokenCount: prefillTokens.length,
618
- durationMs: performance.now() - toolT0,
619
- });
589
+ durationMs: performance.now() - toolT0 });
620
590
  }
621
591
  catch (err) {
622
592
  agent.transition('idle');
623
593
  agent.reportResult(`Tool error: ${err.message}`, 'tool_error');
624
- tw.write({
625
- traceId: tw.nextId(), parentTraceId: dispatchTraceId, ts: performance.now(),
594
+ tw.write({ traceId: tw.nextId(), parentTraceId: dispatchTraceId, ts: performance.now(),
626
595
  type: 'tool:error', agentId: agent.id, tool: tc.name,
627
- error: err.message,
628
- });
596
+ error: err.message });
629
597
  }
630
598
  }
631
- // -- Termination
632
- if (agents.every(a => a.status === 'idle' || a.status === 'disposed'))
633
- break;
599
+ return results;
634
600
  }
635
- // ── Idle processing: scratchpad recovery ─────────────────
636
- // Policy decides per-agent whether to extract findings from killed agents.
637
- // The pool owns the grammar and fork/generate/parse mechanics.
638
- // Free KV from agents that already reported — gives room for extraction.
639
- for (const a of agents) {
640
- if (a.result && !a.branch.disposed) {
641
- a.branch.pruneSync();
601
+ // ── Four-phase tick loop ─────────────────────────────────
602
+ let pendingSettled = [];
603
+ // ── Four-phase tick loop ─────────────────────────────────
604
+ let recoveryAttempted = false;
605
+ for (;;) {
606
+ // -- Phase 1: PRODUCE -- sample from active agents, collect tool calls
607
+ policy.resetTick?.();
608
+ const pressure = new ContextPressure(ctx, pressureOpts);
609
+ if (trace && (pressure.critical || pressure.headroom < 0)) {
610
+ try {
611
+ process.stderr.write(`[PRODUCE] ${pressure.critical ? 'CRITICAL' : 'SOFT_LIMIT'} remaining=${pressure.remaining} headroom=${pressure.headroom} cellsUsed=${pressure.cellsUsed} nCtx=${pressure.nCtx}\n`);
612
+ }
613
+ catch { }
642
614
  }
643
- }
644
- // Check if any agent needs recovery before setting up grammar
645
- const needsRecovery = agents.some(a => a.status === 'idle' && !a.result && !a.branch.disposed &&
646
- policy.onRecovery?.(a)?.type === 'extract');
647
- if (needsRecovery) {
648
- const reportSchema = {
649
- type: 'object',
650
- properties: { result: { type: 'string' } },
651
- required: ['result'],
652
- };
653
- const reportGrammar = yield* (0, effection_1.call)(() => ctx.jsonSchemaToGrammar(JSON.stringify(reportSchema)));
654
- // Cache formatted prompts per unique prompt object
655
- const promptCache = new Map();
615
+ const entries = [];
616
+ const toolCalls = [];
617
+ const nudges = [];
656
618
  for (const a of agents) {
657
- if (a.status !== 'idle' || a.result || a.branch.disposed)
619
+ if (a.status !== 'active')
658
620
  continue;
659
- const recovery = policy.onRecovery?.(a);
660
- if (!recovery || recovery.type === 'skip') {
661
- if (!a.branch.disposed)
662
- a.branch.pruneSync();
621
+ const policyExit = policy.shouldExit?.(a, pressure);
622
+ if (policyExit ?? pressure.critical) {
623
+ a.transition('idle');
624
+ const exitReason = pressure.critical ? 'pressure_critical'
625
+ : policyExit ? 'policy_exit'
626
+ : 'pressure_critical';
627
+ tw.write({ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
628
+ type: 'pool:agentDrop', agentId: a.id, reason: exitReason });
629
+ yield* events.send({ type: 'agent:done', agentId: a.id });
630
+ // Trailing stop: extract findings inline, free KV for remaining agents
631
+ yield* recoverInline(a, policy, ctx, store, tw, poolScope.traceId, events);
663
632
  continue;
664
633
  }
665
- // Format extraction prompt (cache by system+user key)
666
- const cacheKey = recovery.prompt.system + '\0' + recovery.prompt.user;
667
- let extractionPromptStr = promptCache.get(cacheKey);
668
- if (!extractionPromptStr) {
669
- const reportMessages = [
670
- { role: 'system', content: recovery.prompt.system },
671
- { role: 'user', content: recovery.prompt.user },
672
- ];
673
- const { prompt } = ctx.formatChatSync(JSON.stringify(reportMessages), { enableThinking: false });
674
- extractionPromptStr = prompt;
675
- promptCache.set(cacheKey, prompt);
634
+ const { token, text, isStop } = a.branch.produceSync();
635
+ if (isStop) {
636
+ const parsed = ctx.parseChatOutput(a.rawOutput, a.fmt.format, {
637
+ reasoningFormat: a.fmt.reasoningFormat,
638
+ generationPrompt: a.fmt.generationPrompt,
639
+ parser: a.fmt.parser,
640
+ });
641
+ tw.write({
642
+ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
643
+ type: 'agent:turn', agentId: a.id, turn: a.turns,
644
+ rawOutput: a.rawOutput,
645
+ parsedContent: parsed.content || null,
646
+ parsedToolCalls: parsed.toolCalls.map(tc => ({ name: tc.name, arguments: tc.arguments })),
647
+ });
648
+ // Policy decides what to do with the parsed output
649
+ const action = policy.onProduced(a, parsed, pressure, policyConfig);
650
+ switch (action.type) {
651
+ case 'free_text_report':
652
+ yield* handleFreeTextReport(a, action.content, events);
653
+ continue;
654
+ case 'idle':
655
+ yield* handleIdleDrop(a, action.reason, events, tw, poolScope.traceId);
656
+ continue;
657
+ case 'nudge':
658
+ nudges.push(yield* handleNudge(a, action.message, parsed.toolCalls[0], ctx, tools));
659
+ tw.write({ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
660
+ type: 'pool:agentNudge', agentId: a.id, reason: 'pressure_softcut' });
661
+ continue;
662
+ case 'report':
663
+ yield* handleReport(a, action.result, parsed.toolCalls[0], terminalTool, pruneOnReport, events);
664
+ totalToolCalls++;
665
+ continue;
666
+ case 'tool_call':
667
+ a.transition('awaiting_tool');
668
+ toolCalls.push({ agent: a, tc: action.tc });
669
+ a.resetTurn();
670
+ continue;
671
+ }
676
672
  }
677
- try {
678
- yield* events.send({ type: 'agent:spawn', agentId: a.id, parentAgentId: a.parentId });
679
- const branch = yield* (0, generate_1.prepare)({
680
- prompt: extractionPromptStr,
681
- grammar: reportGrammar,
682
- parent: a.branch,
673
+ entries.push([a.branch, token]);
674
+ if (trace) {
675
+ const entropy = a.branch.modelEntropy();
676
+ const surprisal = a.branch.modelSurprisal(token);
677
+ a.accumulateTokenWithTrace(text, entropy, surprisal);
678
+ yield* events.send({
679
+ type: 'agent:produce', agentId: a.id, text, tokenCount: a.tokenCount,
680
+ entropy, surprisal,
683
681
  });
684
- try {
685
- let output = '';
686
- let tokenCount = 0;
687
- yield* (0, effection_1.call)(async () => {
688
- for await (const { text } of branch) {
689
- output += text;
690
- tokenCount++;
691
- }
692
- });
693
- const tickPressure = new ContextPressure(ctx, pressureOpts);
694
- yield* events.send({
695
- type: 'agent:tick', cellsUsed: tickPressure.cellsUsed, nCtx: tickPressure.nCtx,
696
- });
697
- const parsed = JSON.parse(output);
698
- if (parsed?.result) {
699
- a.reportResult(parsed.result, 'scratchpad');
700
- yield* events.send({ type: 'agent:report', agentId: a.id, result: a.result });
682
+ }
683
+ else {
684
+ a.accumulateToken(text);
685
+ yield* events.send({ type: 'agent:produce', agentId: a.id, text, tokenCount: a.tokenCount });
686
+ }
687
+ }
688
+ // -- Phase 2: COMMIT -- batch-decode produced tokens
689
+ if (entries.length > 0) {
690
+ yield* (0, effection_1.call)(() => store.commit(entries));
691
+ steps++;
692
+ const commitPressure = new ContextPressure(ctx, pressureOpts);
693
+ yield* events.send({ type: 'agent:tick', cellsUsed: commitPressure.cellsUsed, nCtx: commitPressure.nCtx });
694
+ }
695
+ // -- Phase 3: SETTLE (settle what fits, defer what doesn't)
696
+ const toSettle = [...pendingSettled, ...nudges];
697
+ const deferred = toSettle.length > 0 ? yield* settle(toSettle) : [];
698
+ // Stall-breaker: if items are deferred and no active agents remain,
699
+ // sacrifice an awaiting_tool agent to free KV. Without this, agents
700
+ // with oversized results stay awaiting_tool indefinitely — PRODUCE
701
+ // skips them, headroom never recovers, the pool loops forever.
702
+ if (deferred.length > 0 && !agents.some(a => a.status === 'active')) {
703
+ const victim = agents.find(a => a.status === 'awaiting_tool' && !a.branch.disposed);
704
+ if (victim) {
705
+ victim.transition('idle');
706
+ tw.write({ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
707
+ type: 'pool:agentDrop', agentId: victim.id, reason: 'pressure_settle_reject' });
708
+ yield* events.send({ type: 'agent:done', agentId: victim.id });
709
+ yield* recoverInline(victim, policy, ctx, store, tw, poolScope.traceId, events);
710
+ }
711
+ }
712
+ // -- Phase 4: DISPATCH
713
+ const dispatched = yield* dispatch(toolCalls);
714
+ // Deferred + new dispatch results → next tick's SETTLE
715
+ pendingSettled = [...deferred, ...dispatched];
716
+ // -- Termination + recovery
717
+ if (agents.every(a => a.status === 'idle' || a.status === 'disposed')) {
718
+ if (!recoveryAttempted) {
719
+ recoveryAttempted = true;
720
+ // Recover any idle agents that weren't handled by inline recovery
721
+ // (e.g., killed by max_turns, time budget, or free_text_stop)
722
+ for (const a of agents) {
723
+ if (a.status === 'idle' && !a.result && !a.branch.disposed) {
724
+ yield* recoverInline(a, policy, ctx, store, tw, poolScope.traceId, events);
701
725
  }
702
726
  }
703
- finally {
704
- if (!branch.disposed)
705
- branch.pruneSync();
706
- }
707
727
  }
708
- catch {
709
- /* extraction failure non-fatal */
710
- }
711
- if (!a.branch.disposed)
712
- a.branch.pruneSync();
728
+ break;
713
729
  }
714
730
  }
715
731
  // ── Provide result — suspends, branches stay alive ───────