@lloyal-labs/lloyal-agents 1.7.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +81 -97
- package/dist/Agent.d.ts +26 -0
- package/dist/Agent.d.ts.map +1 -1
- package/dist/Agent.js +22 -0
- package/dist/Agent.js.map +1 -1
- package/dist/AgentPolicy.d.ts +27 -10
- package/dist/AgentPolicy.d.ts.map +1 -1
- package/dist/AgentPolicy.js +78 -16
- package/dist/AgentPolicy.js.map +1 -1
- package/dist/agent-pool.d.ts +14 -2
- package/dist/agent-pool.d.ts.map +1 -1
- package/dist/agent-pool.js +415 -148
- package/dist/agent-pool.js.map +1 -1
- package/dist/context.d.ts +18 -1
- package/dist/context.d.ts.map +1 -1
- package/dist/context.js +18 -1
- package/dist/context.js.map +1 -1
- package/dist/create-agent-pool.d.ts +33 -15
- package/dist/create-agent-pool.d.ts.map +1 -1
- package/dist/create-agent-pool.js +34 -10
- package/dist/create-agent-pool.js.map +1 -1
- package/dist/index.d.ts +5 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +10 -1
- package/dist/index.js.map +1 -1
- package/dist/orchestrators.d.ts +161 -0
- package/dist/orchestrators.d.ts.map +1 -0
- package/dist/orchestrators.js +173 -0
- package/dist/orchestrators.js.map +1 -0
- package/dist/replay.d.ts +96 -0
- package/dist/replay.d.ts.map +1 -0
- package/dist/replay.js +108 -0
- package/dist/replay.js.map +1 -0
- package/dist/shared-root.d.ts +56 -18
- package/dist/shared-root.d.ts.map +1 -1
- package/dist/shared-root.js +79 -52
- package/dist/shared-root.js.map +1 -1
- package/dist/trace-types.d.ts +22 -2
- package/dist/trace-types.d.ts.map +1 -1
- package/dist/types.d.ts +33 -5
- package/dist/types.d.ts.map +1 -1
- package/dist/use-agent.d.ts.map +1 -1
- package/dist/use-agent.js +13 -17
- package/dist/use-agent.js.map +1 -1
- package/package.json +5 -5
package/dist/agent-pool.js
CHANGED
|
@@ -52,8 +52,20 @@ const AgentPolicy_1 = require("./AgentPolicy");
|
|
|
52
52
|
class ContextPressure {
|
|
53
53
|
/** Default softLimit: 1024 tokens reserved for downstream work */
|
|
54
54
|
static DEFAULT_SOFT_LIMIT = 1024;
|
|
55
|
-
/**
|
|
56
|
-
|
|
55
|
+
/**
|
|
56
|
+
* Default hardLimit: 512 tokens — matches llama.cpp's default `n_batch`.
|
|
57
|
+
* The pool validates at startup that `hardLimit >= nBatch`; the default
|
|
58
|
+
* is sized to satisfy the invariant for the default llama.cpp context.
|
|
59
|
+
* Recovery fits within the `hardLimit` reserve.
|
|
60
|
+
*/
|
|
61
|
+
static DEFAULT_HARD_LIMIT = 512;
|
|
62
|
+
/**
|
|
63
|
+
* Assumed `nBatch` when the native binding doesn't expose it.
|
|
64
|
+
* Pool startup validates `pressureThresholds.hardLimit >= this`.
|
|
65
|
+
* TODO: once `SessionContext.nBatch` is exposed (lloyal.node
|
|
66
|
+
* follow-up), read from ctx.nBatch instead.
|
|
67
|
+
*/
|
|
68
|
+
static ASSUMED_N_BATCH = 512;
|
|
57
69
|
/** Total KV cache capacity (max positions). 0 when no context limit. */
|
|
58
70
|
nCtx;
|
|
59
71
|
/** KV cells currently in use (monotonic within a pool run). */
|
|
@@ -108,8 +120,10 @@ exports.ContextPressure = ContextPressure;
|
|
|
108
120
|
*
|
|
109
121
|
* Returns true if the agent reported findings.
|
|
110
122
|
*/
|
|
111
|
-
function* recoverInline(agent, policy, ctx, store, tw, parentTraceId, events) {
|
|
112
|
-
|
|
123
|
+
function* recoverInline(agent, policy, ctx, store, tw, parentTraceId, events, pressureOpts) {
|
|
124
|
+
// Fresh snapshot — the policy uses this to compute the recovery budget
|
|
125
|
+
// (reflected in the rendered prompt via `<%= it.budget %>`).
|
|
126
|
+
const recovery = policy.onRecovery?.(agent, new ContextPressure(ctx, pressureOpts));
|
|
113
127
|
if (!recovery || recovery.type === 'skip') {
|
|
114
128
|
if (!agent.branch.disposed)
|
|
115
129
|
agent.branch.pruneSync();
|
|
@@ -132,8 +146,13 @@ function* recoverInline(agent, policy, ctx, store, tw, parentTraceId, events) {
|
|
|
132
146
|
required: ['result'],
|
|
133
147
|
})));
|
|
134
148
|
// Recovery runs in its own scope — if prefill or decode fails
|
|
135
|
-
// (KV exhaustion), the scope tears down cleanly.
|
|
149
|
+
// (KV exhaustion), the scope tears down cleanly. Diagnostic trace
|
|
150
|
+
// events (pool:recoveryProduce + recoveryReport/recoveryFailed) make
|
|
151
|
+
// silent recovery failures observable in traces.
|
|
136
152
|
let reported = false;
|
|
153
|
+
let output = '';
|
|
154
|
+
let producedTokens = 0;
|
|
155
|
+
let failureReason = null;
|
|
137
156
|
try {
|
|
138
157
|
yield* (0, effection_1.scoped)(function* () {
|
|
139
158
|
yield* (0, effection_1.call)(() => store.prefill([[agent.branch, tokens]]));
|
|
@@ -144,27 +163,53 @@ function* recoverInline(agent, policy, ctx, store, tw, parentTraceId, events) {
|
|
|
144
163
|
tokenCount: tokens.length, role: 'recovery',
|
|
145
164
|
});
|
|
146
165
|
// Single-agent produce/commit loop
|
|
147
|
-
let output = '';
|
|
148
|
-
let tokenCount = 0;
|
|
149
166
|
for (;;) {
|
|
150
167
|
const { token, text, isStop } = agent.branch.produceSync();
|
|
151
168
|
if (isStop)
|
|
152
169
|
break;
|
|
153
170
|
output += text;
|
|
154
|
-
|
|
171
|
+
producedTokens++;
|
|
155
172
|
yield* (0, effection_1.call)(() => store.commit([[agent.branch, token]]));
|
|
156
|
-
yield* events.send({ type: 'agent:produce', agentId: agent.id, text, tokenCount });
|
|
173
|
+
yield* events.send({ type: 'agent:produce', agentId: agent.id, text, tokenCount: producedTokens });
|
|
157
174
|
}
|
|
175
|
+
tw.write({
|
|
176
|
+
traceId: tw.nextId(), parentTraceId, ts: performance.now(),
|
|
177
|
+
type: 'pool:recoveryProduce', agentId: agent.id,
|
|
178
|
+
tokenCount: producedTokens, outputLength: output.length,
|
|
179
|
+
});
|
|
158
180
|
// Parse + report
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
181
|
+
try {
|
|
182
|
+
const parsed = JSON.parse(output);
|
|
183
|
+
if (parsed?.result) {
|
|
184
|
+
agent.reportResult(parsed.result, 'scratchpad');
|
|
185
|
+
yield* events.send({ type: 'agent:report', agentId: agent.id, result: agent.result });
|
|
186
|
+
reported = true;
|
|
187
|
+
tw.write({
|
|
188
|
+
traceId: tw.nextId(), parentTraceId, ts: performance.now(),
|
|
189
|
+
type: 'pool:recoveryReport', agentId: agent.id,
|
|
190
|
+
resultLength: parsed.result.length,
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
else {
|
|
194
|
+
failureReason = 'no_result_field';
|
|
195
|
+
}
|
|
164
196
|
}
|
|
197
|
+
catch (e) {
|
|
198
|
+
failureReason = `parse_error: ${e.message ?? 'unknown'}`;
|
|
199
|
+
}
|
|
200
|
+
});
|
|
201
|
+
}
|
|
202
|
+
catch (e) {
|
|
203
|
+
failureReason = `scope_error: ${e.message ?? 'unknown'}`;
|
|
204
|
+
}
|
|
205
|
+
if (!reported) {
|
|
206
|
+
tw.write({
|
|
207
|
+
traceId: tw.nextId(), parentTraceId, ts: performance.now(),
|
|
208
|
+
type: 'pool:recoveryFailed', agentId: agent.id,
|
|
209
|
+
reason: failureReason ?? 'unknown',
|
|
210
|
+
outputExcerpt: output.slice(0, 200),
|
|
165
211
|
});
|
|
166
212
|
}
|
|
167
|
-
catch { /* prefill overflow, decode failure, or malformed JSON — non-fatal */ }
|
|
168
213
|
// Always prune after scope exits (success or failure)
|
|
169
214
|
if (!agent.branch.disposed)
|
|
170
215
|
agent.branch.pruneSync();
|
|
@@ -196,7 +241,7 @@ function* handleNudge(a, message, tc, ctx, tools) {
|
|
|
196
241
|
const nudgeResult = { error: message };
|
|
197
242
|
a.incrementTurns();
|
|
198
243
|
a.transition('awaiting_tool');
|
|
199
|
-
const prefillTokens = (0, sdk_2.buildToolResultDelta)(ctx, JSON.stringify(nudgeResult), callId);
|
|
244
|
+
const prefillTokens = (0, sdk_2.buildToolResultDelta)(ctx, JSON.stringify(nudgeResult), callId, { enableThinking: a.fmt.enableThinking });
|
|
200
245
|
const probe = tools?.get(tc?.name || '')?.probe(nudgeResult) ?? undefined;
|
|
201
246
|
a.resetTurn();
|
|
202
247
|
return { agentId: a.id, prefillTokens, toolName: tc?.name || '', callId, args: tc?.arguments || '', probe };
|
|
@@ -218,22 +263,43 @@ function* handleReport(a, result, tc, terminalTool, pruneOnReport, events) {
|
|
|
218
263
|
* On scope exit (error, cancellation), `ensure()` prunes the branch
|
|
219
264
|
* automatically — the orphaned-branch leak is structurally impossible.
|
|
220
265
|
*/
|
|
221
|
-
function* setupAgent(parent, task, ctx) {
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
266
|
+
function* setupAgent(parent, task, ctx, enableThinking) {
|
|
267
|
+
// Probe shared-root mode. When set, the queryRoot already has the
|
|
268
|
+
// [system + tools] chat header prefilled and we MUST NOT re-emit them
|
|
269
|
+
// in the agent's suffix — the bytes are already in attention via fork
|
|
270
|
+
// prefix-share. The new agent inherits parser/grammar/format/triggers
|
|
271
|
+
// from sharedFmt so tool dispatch keeps working.
|
|
272
|
+
let sharedFmt = null;
|
|
273
|
+
try {
|
|
274
|
+
sharedFmt = (yield* context_1.RootFmt.get()) ?? null;
|
|
275
|
+
}
|
|
276
|
+
catch { /* not in shared mode */ }
|
|
277
|
+
// Compose the messages to format into the suffix. In shared mode with
|
|
278
|
+
// an empty per-spec systemPrompt, drop the system message — the role
|
|
279
|
+
// lives at the root, the agent only contributes a user turn. With a
|
|
280
|
+
// non-empty per-spec systemPrompt, include it: the agent's KV will
|
|
281
|
+
// contain TWO system messages in lineage, which Qwen3 handles (recovery
|
|
282
|
+
// ships on the same multi-system pattern).
|
|
283
|
+
const messages = sharedFmt && task.systemPrompt === ''
|
|
284
|
+
? [{ role: 'user', content: task.content }]
|
|
285
|
+
: [
|
|
286
|
+
{ role: 'system', content: task.systemPrompt },
|
|
287
|
+
{ role: 'user', content: task.content },
|
|
288
|
+
];
|
|
289
|
+
const fmtOpts = { enableThinking };
|
|
290
|
+
// Tools belong at the root in shared mode; emitting them again here
|
|
291
|
+
// would re-prefill the same schema bytes for nothing.
|
|
292
|
+
if (task.tools && !sharedFmt)
|
|
228
293
|
fmtOpts.tools = task.tools;
|
|
229
294
|
const fmt = ctx.formatChatSync(JSON.stringify(messages), fmtOpts);
|
|
230
|
-
|
|
295
|
+
// Tool-support guard runs only on the non-shared path. Shared mode's
|
|
296
|
+
// root already passed the equivalent check at withSharedRoot setup.
|
|
297
|
+
if (task.tools && !sharedFmt
|
|
298
|
+
&& (fmt.format === sdk_1.CHAT_FORMAT_CONTENT_ONLY || fmt.format === sdk_1.CHAT_FORMAT_GENERIC)) {
|
|
231
299
|
// Error before fork — no branch to clean up
|
|
232
300
|
throw new Error('Model does not support tool calling. Please use a model with native tool support (e.g. Qwen3, Llama 3.x, Mistral).');
|
|
233
301
|
}
|
|
234
302
|
const branch = parent.forkSync();
|
|
235
|
-
yield* (0, effection_1.ensure)(() => { if (!branch.disposed)
|
|
236
|
-
branch.pruneSync(); });
|
|
237
303
|
const sep = ctx.getTurnSeparator();
|
|
238
304
|
const suffixTokens = [...sep, ...ctx.tokenizeSync(fmt.prompt, false)];
|
|
239
305
|
if (task.seed != null)
|
|
@@ -246,13 +312,22 @@ function* setupAgent(parent, task, ctx) {
|
|
|
246
312
|
callingAgent = a;
|
|
247
313
|
}
|
|
248
314
|
catch { /* top-level — no caller */ }
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
315
|
+
// In shared mode the new agent's parser/grammar/format/triggers come
|
|
316
|
+
// from the root's pre-computed fmt — those fields know about the tool
|
|
317
|
+
// palette that's in attention via the inherited prefix. In non-shared
|
|
318
|
+
// mode, fresh fmt drives those fields (existing behavior).
|
|
319
|
+
const fmtConfig = sharedFmt
|
|
320
|
+
? {
|
|
321
|
+
format: sharedFmt.format,
|
|
322
|
+
reasoningFormat: sharedFmt.reasoningFormat,
|
|
323
|
+
generationPrompt: sharedFmt.generationPrompt,
|
|
324
|
+
parser: sharedFmt.parser,
|
|
325
|
+
grammar: sharedFmt.grammar,
|
|
326
|
+
grammarLazy: sharedFmt.grammarLazy,
|
|
327
|
+
grammarTriggers: sharedFmt.grammarTriggers,
|
|
328
|
+
enableThinking,
|
|
329
|
+
}
|
|
330
|
+
: {
|
|
256
331
|
format: fmt.format,
|
|
257
332
|
reasoningFormat: fmt.reasoningFormat,
|
|
258
333
|
generationPrompt: fmt.generationPrompt,
|
|
@@ -260,7 +335,15 @@ function* setupAgent(parent, task, ctx) {
|
|
|
260
335
|
grammar: fmt.grammar,
|
|
261
336
|
grammarLazy: fmt.grammarLazy,
|
|
262
337
|
grammarTriggers: fmt.grammarTriggers,
|
|
263
|
-
|
|
338
|
+
enableThinking,
|
|
339
|
+
};
|
|
340
|
+
const agent = new Agent_1.Agent({
|
|
341
|
+
id: branch.handle,
|
|
342
|
+
parentId: parent.handle,
|
|
343
|
+
branch,
|
|
344
|
+
parent: callingAgent,
|
|
345
|
+
task: task.content,
|
|
346
|
+
fmt: fmtConfig,
|
|
264
347
|
});
|
|
265
348
|
return { agent, suffixTokens, formattedPrompt: fmt.prompt };
|
|
266
349
|
}
|
|
@@ -326,7 +409,7 @@ function useAgentPool(opts) {
|
|
|
326
409
|
}
|
|
327
410
|
});
|
|
328
411
|
const tw = yield* context_1.Trace.expect();
|
|
329
|
-
const {
|
|
412
|
+
const { root, orchestrate, toolsJson, tools, maxTurns = 100, terminalTool, trace = false, pruneOnReport = false, enableThinking = false } = opts;
|
|
330
413
|
// Tool index map for trace — position in toolkit array
|
|
331
414
|
const toolIndexMap = new Map([...tools.keys()].map((name, i) => [name, i]));
|
|
332
415
|
const toolkitSize = tools.size;
|
|
@@ -338,7 +421,7 @@ function useAgentPool(opts) {
|
|
|
338
421
|
poolParentTraceId = p;
|
|
339
422
|
}
|
|
340
423
|
catch { /* top level */ }
|
|
341
|
-
const poolScope = (0, trace_scope_1.traceScope)(tw, poolParentTraceId, 'pool', {
|
|
424
|
+
const poolScope = (0, trace_scope_1.traceScope)(tw, poolParentTraceId, 'pool', { maxTurns, terminalTool });
|
|
342
425
|
// Whether the pool's tool registry contains tools besides the terminal tool.
|
|
343
426
|
// When false, agents are allowed to call the terminal tool as their first
|
|
344
427
|
// action (e.g. reporter sub-agents that only have `report()`). When true,
|
|
@@ -353,68 +436,38 @@ function useAgentPool(opts) {
|
|
|
353
436
|
const policy = opts.policy ?? new AgentPolicy_1.DefaultAgentPolicy();
|
|
354
437
|
const pressureOpts = policy.pressureThresholds
|
|
355
438
|
?? { softLimit: ContextPressure.DEFAULT_SOFT_LIMIT, hardLimit: ContextPressure.DEFAULT_HARD_LIMIT };
|
|
439
|
+
// Invariant: hardLimit must be at least the native batch size (nBatch).
|
|
440
|
+
// When `pressure.critical` fires and the kill path runs recovery, the
|
|
441
|
+
// reserve cells (hardLimit count) must accommodate `recoverInline`'s
|
|
442
|
+
// next batch allocation — otherwise native decode will OOM with
|
|
443
|
+
// "failed to find a memory slot for batch of size N".
|
|
444
|
+
// Until `SessionContext.nBatch` is exposed natively, we validate against
|
|
445
|
+
// `ContextPressure.ASSUMED_N_BATCH` (512, matches llama.cpp default).
|
|
446
|
+
const nBatch = ContextPressure.ASSUMED_N_BATCH;
|
|
447
|
+
const hardLimitVal = pressureOpts.hardLimit ?? ContextPressure.DEFAULT_HARD_LIMIT;
|
|
448
|
+
if (hardLimitVal < nBatch) {
|
|
449
|
+
throw new Error(`useAgentPool: Invariant Violation — hardLimit (${hardLimitVal}) must be >= nBatch (${nBatch}). ` +
|
|
450
|
+
`Recovery reserves hardLimit cells for its own decode; if smaller than nBatch, the next batch ` +
|
|
451
|
+
`allocation will OOM. Increase policy.budget.context.hardLimit to at least ${nBatch}.`);
|
|
452
|
+
}
|
|
356
453
|
const policyConfig = { maxTurns, terminalTool, hasNonTerminalTools };
|
|
357
|
-
// ──
|
|
358
|
-
//
|
|
359
|
-
//
|
|
454
|
+
// ── Orchestrator-driven setup ────────────────────────────
|
|
455
|
+
// Agents are spawned lazily via `ctx.spawn` from the orchestrator.
|
|
456
|
+
// The tick loop iterates over whatever agents are currently active.
|
|
457
|
+
// decode_each batches across all active agents regardless of spawn order.
|
|
360
458
|
const agents = [];
|
|
361
|
-
const
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
agents
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
|
|
371
|
-
type: 'branch:create', branchHandle: agent.id, parentHandle: agent.parentId,
|
|
372
|
-
position: 0, role: 'agentFork',
|
|
373
|
-
});
|
|
374
|
-
tw.write({
|
|
375
|
-
traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
|
|
376
|
-
type: 'prompt:format', promptText: formattedPrompt,
|
|
377
|
-
taskContent: task.content,
|
|
378
|
-
tokenCount: suffixTokens.length,
|
|
379
|
-
messages: JSON.stringify([
|
|
380
|
-
{ role: 'system', content: task.systemPrompt },
|
|
381
|
-
{ role: 'user', content: task.content },
|
|
382
|
-
]),
|
|
383
|
-
tools: task.tools, role: 'agentSuffix',
|
|
384
|
-
});
|
|
385
|
-
}
|
|
386
|
-
// Batch prefill all agent suffixes — pressure-gated.
|
|
387
|
-
// Each suffix is the full formatted chat (system prompt + tools JSON +
|
|
388
|
-
// user message + generation prompt), tokenized via formatChatSync().
|
|
389
|
-
// Suffix cost is model-dependent: ~250-400 tokens per agent depending
|
|
390
|
-
// on chat template verbosity and tool schema size.
|
|
391
|
-
const initPressure = new ContextPressure(ctx, pressureOpts);
|
|
392
|
-
const totalSuffix = prefillSetup.reduce((s, [, t]) => s + t.length, 0);
|
|
393
|
-
if (!initPressure.canFit(totalSuffix)) {
|
|
394
|
-
// Not enough room — drop agents from the end until it fits
|
|
395
|
-
while (prefillSetup.length > 0) {
|
|
396
|
-
const needed = prefillSetup.reduce((s, [, t]) => s + t.length, 0);
|
|
397
|
-
if (initPressure.canFit(needed))
|
|
398
|
-
break;
|
|
399
|
-
prefillSetup.pop();
|
|
400
|
-
const dropped = agents.pop();
|
|
401
|
-
dropped.dispose();
|
|
402
|
-
tw.write({
|
|
403
|
-
traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
|
|
404
|
-
type: 'pool:agentDrop', agentId: dropped.id, reason: 'pressure_init',
|
|
405
|
-
});
|
|
459
|
+
const agentById = new Map();
|
|
460
|
+
const pendingSpawns = [];
|
|
461
|
+
const pendingExtends = [];
|
|
462
|
+
// Pool-level branch cleanup — ensures orphan-branch cleanup even when
|
|
463
|
+
// spawns are lazy and the orchestrator's spawn scope exits early.
|
|
464
|
+
yield* (0, effection_1.ensure)(() => {
|
|
465
|
+
for (const a of agents) {
|
|
466
|
+
if (!a.branch.disposed)
|
|
467
|
+
a.branch.pruneSync();
|
|
406
468
|
}
|
|
407
|
-
}
|
|
408
|
-
if (prefillSetup.length > 0) {
|
|
409
|
-
yield* (0, effection_1.call)(() => store.prefill(prefillSetup));
|
|
410
|
-
}
|
|
411
|
-
tw.write({
|
|
412
|
-
traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
|
|
413
|
-
type: 'pool:open', agentCount: agents.length,
|
|
414
|
-
taskSuffixTokens: prefillSetup.map(([, t]) => t.length),
|
|
415
|
-
pressure: { remaining: initPressure.remaining, softLimit: initPressure.softLimit, headroom: initPressure.headroom },
|
|
416
469
|
});
|
|
417
|
-
//
|
|
470
|
+
// Lazy grammar setup — applied inside ctx.spawn after prefill completes.
|
|
418
471
|
const applyLazyGrammar = (a) => {
|
|
419
472
|
if (a.fmt.grammar && a.fmt.grammarLazy && a.fmt.grammarTriggers.length > 0) {
|
|
420
473
|
const triggers = a.fmt.grammarTriggers.map(t => {
|
|
@@ -429,11 +482,112 @@ function useAgentPool(opts) {
|
|
|
429
482
|
a.branch.setGrammarLazy(a.fmt.grammar, triggers);
|
|
430
483
|
}
|
|
431
484
|
};
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
485
|
+
tw.write({
|
|
486
|
+
traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
|
|
487
|
+
type: 'pool:open', agentCount: 0, taskSuffixTokens: [],
|
|
488
|
+
pressure: (() => {
|
|
489
|
+
const p = new ContextPressure(ctx, pressureOpts);
|
|
490
|
+
return { remaining: p.remaining, softLimit: p.softLimit, headroom: p.headroom };
|
|
491
|
+
})(),
|
|
492
|
+
});
|
|
493
|
+
// ── PoolContext — orchestrator's API surface ─────────────
|
|
494
|
+
const poolContext = {
|
|
495
|
+
root,
|
|
496
|
+
*spawn(spec) {
|
|
497
|
+
const parent = spec.parent ?? root;
|
|
498
|
+
const task = {
|
|
499
|
+
systemPrompt: spec.systemPrompt,
|
|
500
|
+
content: spec.content,
|
|
501
|
+
tools: toolsJson,
|
|
502
|
+
seed: spec.seed,
|
|
503
|
+
parent,
|
|
504
|
+
};
|
|
505
|
+
// Synchronous setup — fork, tokenize suffix, pressure check.
|
|
506
|
+
// No native store call yet; that's the tick loop's SPAWN phase's job.
|
|
507
|
+
const { agent, suffixTokens, formattedPrompt } = yield* setupAgent(parent, task, ctx, enableThinking);
|
|
508
|
+
const pressure = new ContextPressure(ctx, pressureOpts);
|
|
509
|
+
if (!pressure.canFit(suffixTokens.length)) {
|
|
510
|
+
agent.branch.pruneSync();
|
|
511
|
+
agent.dispose();
|
|
512
|
+
tw.write({
|
|
513
|
+
traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
|
|
514
|
+
type: 'pool:agentDrop', agentId: agent.id, reason: 'pressure_init',
|
|
515
|
+
});
|
|
516
|
+
throw new Error(`useAgentPool: cannot fit agent suffix (${suffixTokens.length} tokens) under current pressure`);
|
|
517
|
+
}
|
|
518
|
+
// Enqueue for SPAWN phase. The tick loop will batch this with any
|
|
519
|
+
// other pending spawns into ONE store.prefill, transition to active,
|
|
520
|
+
// write trace events, and emit agent:spawn. Return the agent
|
|
521
|
+
// immediately — waitFor() is keyed off a transition, not a status
|
|
522
|
+
// snapshot, so the pre-activation 'idle' status doesn't race with
|
|
523
|
+
// the real terminal-idle signal.
|
|
524
|
+
pendingSpawns.push({ agent, suffixTokens, formattedPrompt, task });
|
|
525
|
+
agents.push(agent);
|
|
526
|
+
agentById.set(agent.id, agent);
|
|
527
|
+
return agent;
|
|
528
|
+
},
|
|
529
|
+
*waitFor(agent) {
|
|
530
|
+
// Agent completion = terminal 'idle' OR 'disposed'. Pre-activation
|
|
531
|
+
// 'idle' (the constructor default) would be a false positive, so we
|
|
532
|
+
// wait for a TRANSITION signal rather than checking status.snapshot.
|
|
533
|
+
// The SPAWN phase transitions 'idle' → 'active' when it activates the
|
|
534
|
+
// agent; subsequent transitions lead to a terminal 'idle' or 'disposed'.
|
|
535
|
+
const stream = yield* (0, effection_1.each)(agent.statusSignal);
|
|
536
|
+
// Only short-circuit for already-disposed — no further signal is coming.
|
|
537
|
+
if (agent.status === 'disposed')
|
|
538
|
+
return agent;
|
|
539
|
+
for (const s of stream) {
|
|
540
|
+
if (s === 'idle' || s === 'disposed')
|
|
541
|
+
return agent;
|
|
542
|
+
yield* effection_1.each.next();
|
|
543
|
+
}
|
|
544
|
+
return agent;
|
|
545
|
+
},
|
|
546
|
+
*extendRoot(userContent, assistantContent) {
|
|
547
|
+
if (!assistantContent)
|
|
548
|
+
return 0;
|
|
549
|
+
const turnTokens = (0, sdk_2.buildTurnDelta)(ctx, userContent, assistantContent);
|
|
550
|
+
// Rendezvous with the tick loop's SPAWN phase — see pendingExtends.
|
|
551
|
+
// action() is the Effection-native one-shot suspend: orchestrator
|
|
552
|
+
// queues the request, suspends; tick loop drains + resolves; this
|
|
553
|
+
// operation returns the deltaTokens. The finally returned from the
|
|
554
|
+
// executor marks the request discarded if this fiber is cancelled
|
|
555
|
+
// before the drain runs, so the drain doesn't touch a dead action.
|
|
556
|
+
return yield* (0, effection_1.action)((resolve, reject) => {
|
|
557
|
+
const req = {
|
|
558
|
+
tokens: turnTokens,
|
|
559
|
+
userContent,
|
|
560
|
+
assistantContent,
|
|
561
|
+
resolve,
|
|
562
|
+
reject,
|
|
563
|
+
discarded: false,
|
|
564
|
+
};
|
|
565
|
+
pendingExtends.push(req);
|
|
566
|
+
return () => { req.discarded = true; };
|
|
567
|
+
});
|
|
568
|
+
},
|
|
569
|
+
canFit(estimatedSuffixTokens) {
|
|
570
|
+
return new ContextPressure(ctx, pressureOpts).canFit(estimatedSuffixTokens);
|
|
571
|
+
},
|
|
572
|
+
};
|
|
573
|
+
// Subscribe BEFORE spawning orchestrator or tick loop — no events missed
|
|
436
574
|
const subscription = yield* poolChannel;
|
|
575
|
+
// Orchestrator runs concurrently with tick loop under the pool scope.
|
|
576
|
+
// Sets orchestratorDone when complete; tick loop terminates on
|
|
577
|
+
// (orchestratorDone && all agents idle/disposed).
|
|
578
|
+
let orchestratorDone = false;
|
|
579
|
+
let orchestratorError = null;
|
|
580
|
+
yield* (0, effection_1.spawn)(function* () {
|
|
581
|
+
try {
|
|
582
|
+
yield* orchestrate(poolContext);
|
|
583
|
+
}
|
|
584
|
+
catch (e) {
|
|
585
|
+
orchestratorError = e;
|
|
586
|
+
}
|
|
587
|
+
finally {
|
|
588
|
+
orchestratorDone = true;
|
|
589
|
+
}
|
|
590
|
+
});
|
|
437
591
|
// Spawn tick loop — runs concurrently with Subscription consumption.
|
|
438
592
|
// scoped() creates an error boundary: if llama_decode fails (KV exhaustion),
|
|
439
593
|
// the scope tears down and the channel closes with whatever results exist.
|
|
@@ -442,42 +596,32 @@ function useAgentPool(opts) {
|
|
|
442
596
|
let totalToolCalls = 0;
|
|
443
597
|
const counters = { warmPrefillCalls: 0, warmPrefillBranches: 0 };
|
|
444
598
|
try {
|
|
445
|
-
// Emit spawn events and activate agents
|
|
446
|
-
for (const a of agents) {
|
|
447
|
-
a.transition('active');
|
|
448
|
-
yield* poolChannel.send({ type: 'agent:spawn', agentId: a.id, parentAgentId: a.parentId });
|
|
449
|
-
}
|
|
450
599
|
// ── Phase operations (close over pool scope) ────────────
|
|
451
600
|
/** SETTLE: prefill tool results that fit, defer oversized items for next tick */
|
|
452
601
|
function* settle(items) {
|
|
453
602
|
const settlePressure = new ContextPressure(ctx, pressureOpts);
|
|
454
603
|
let headroom = settlePressure.headroom;
|
|
455
|
-
if (trace) {
|
|
456
|
-
const desc = items.map(s => `${s.toolName}:${s.prefillTokens.length}`).join(', ');
|
|
457
|
-
try {
|
|
458
|
-
process.stderr.write(`[SETTLE] remaining=${settlePressure.remaining} headroom=${headroom} cellsUsed=${settlePressure.cellsUsed} nCtx=${settlePressure.nCtx} items=[${desc}]\n`);
|
|
459
|
-
}
|
|
460
|
-
catch { }
|
|
461
|
-
}
|
|
462
604
|
const prefillPairs = [];
|
|
463
605
|
const settledAgents = [];
|
|
606
|
+
const itemProbes = new Map();
|
|
464
607
|
const deferred = [];
|
|
465
608
|
for (const item of items) {
|
|
466
609
|
const a = agentById.get(item.agentId);
|
|
467
610
|
if (!a || a.status === 'idle')
|
|
468
611
|
continue;
|
|
469
612
|
if (item.prefillTokens.length > headroom) {
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
}
|
|
613
|
+
// Defer — siblings may finish and free KV, letting this result
|
|
614
|
+
// settle next tick (staggered-exit for parallel orchestration).
|
|
615
|
+
// Policy is consulted at stall-break time, not here: invoking
|
|
616
|
+
// it eagerly would break "wait for a sibling to report and
|
|
617
|
+
// free cells" by nudging/dropping on first over-headroom.
|
|
476
618
|
deferred.push(item);
|
|
477
619
|
continue;
|
|
478
620
|
}
|
|
479
621
|
prefillPairs.push([a.branch, item.prefillTokens]);
|
|
480
622
|
settledAgents.push(a);
|
|
623
|
+
if (item.probe)
|
|
624
|
+
itemProbes.set(a.id, item.probe);
|
|
481
625
|
headroom -= item.prefillTokens.length;
|
|
482
626
|
const postSettle = new ContextPressure(ctx, pressureOpts);
|
|
483
627
|
a.recordToolResult({
|
|
@@ -491,20 +635,13 @@ function useAgentPool(opts) {
|
|
|
491
635
|
tokenCount: item.prefillTokens.length, role: 'toolResult' });
|
|
492
636
|
}
|
|
493
637
|
if (prefillPairs.length > 0) {
|
|
494
|
-
if (trace) {
|
|
495
|
-
const total = prefillPairs.reduce((s, [, t]) => s + t.length, 0);
|
|
496
|
-
try {
|
|
497
|
-
process.stderr.write(`[SETTLE] PREFILL ${prefillPairs.length} branches, ${total} tokens, headroom_after=${headroom}\n`);
|
|
498
|
-
}
|
|
499
|
-
catch { }
|
|
500
|
-
}
|
|
501
638
|
yield* (0, effection_1.call)(() => store.prefill(prefillPairs));
|
|
502
639
|
counters.warmPrefillCalls++;
|
|
503
640
|
counters.warmPrefillBranches += prefillPairs.length;
|
|
504
|
-
// Probe prefill from DISPATCH
|
|
641
|
+
// Probe prefill from DISPATCH or nudge-replacement.
|
|
505
642
|
const probePairs = [];
|
|
506
643
|
for (const a of settledAgents) {
|
|
507
|
-
const probe =
|
|
644
|
+
const probe = itemProbes.get(a.id);
|
|
508
645
|
if (probe) {
|
|
509
646
|
const probeTokens = ctx.tokenizeSync(probe, false);
|
|
510
647
|
probePairs.push([a.branch, probeTokens]);
|
|
@@ -584,7 +721,7 @@ function useAgentPool(opts) {
|
|
|
584
721
|
}
|
|
585
722
|
const resultStr = JSON.stringify(result);
|
|
586
723
|
yield* poolChannel.send({ type: 'agent:tool_result', agentId: agent.id, tool: tc.name, result: resultStr, contextAvailablePercent });
|
|
587
|
-
const prefillTokens = (0, sdk_2.buildToolResultDelta)(ctx, resultStr, callId);
|
|
724
|
+
const prefillTokens = (0, sdk_2.buildToolResultDelta)(ctx, resultStr, callId, { enableThinking: agent.fmt.enableThinking });
|
|
588
725
|
const probe = tool?.probe(result) ?? undefined;
|
|
589
726
|
results.push({ agentId: agent.id, prefillTokens, toolName: tc.name, callId, args: tc.arguments, probe });
|
|
590
727
|
tw.write({ traceId: tw.nextId(), parentTraceId: dispatchTraceId, ts: performance.now(),
|
|
@@ -607,15 +744,86 @@ function useAgentPool(opts) {
|
|
|
607
744
|
// ── Four-phase tick loop ─────────────────────────────────
|
|
608
745
|
let recoveryAttempted = false;
|
|
609
746
|
for (;;) {
|
|
610
|
-
//
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
747
|
+
// Idle until orchestrator enqueues work (spawn or extend) or completes.
|
|
748
|
+
// Include pendingExtends: the final extend after the last task in chain
|
|
749
|
+
// mode must drain before the loop exits, otherwise the orchestrator fiber
|
|
750
|
+
// is left suspended on a dead action.
|
|
751
|
+
if (agents.length === 0
|
|
752
|
+
&& pendingSpawns.length === 0
|
|
753
|
+
&& pendingExtends.length === 0) {
|
|
754
|
+
if (orchestratorDone)
|
|
755
|
+
break;
|
|
756
|
+
yield* (0, effection_1.sleep)(1);
|
|
757
|
+
continue;
|
|
758
|
+
}
|
|
759
|
+
// -- Phase 0: SPAWN+EXTEND -- drain pending spawns AND pending extends,
|
|
760
|
+
// batching all fork-suffix prefills and extend-onto-root prefills into
|
|
761
|
+
// ONE native store.prefill call. All store-level native calls in this
|
|
762
|
+
// pool are issued from this fiber (the tick loop), never concurrently
|
|
763
|
+
// with the orchestrator's fiber. Piggybacking extend in this phase
|
|
764
|
+
// preserves the continuous-tree-batching invariant (one GPU round-trip
|
|
765
|
+
// per tick) and naturally atomic-orders both kinds of work.
|
|
766
|
+
if (pendingSpawns.length > 0 || pendingExtends.length > 0) {
|
|
767
|
+
const drainedSpawns = pendingSpawns.splice(0, pendingSpawns.length);
|
|
768
|
+
const drainedExtends = pendingExtends
|
|
769
|
+
.splice(0, pendingExtends.length)
|
|
770
|
+
.filter(e => !e.discarded);
|
|
771
|
+
const prefillPairs = [
|
|
772
|
+
...drainedSpawns.map(s => [s.agent.branch, s.suffixTokens]),
|
|
773
|
+
...drainedExtends.map(e => [root, e.tokens]),
|
|
774
|
+
];
|
|
614
775
|
try {
|
|
615
|
-
|
|
776
|
+
if (prefillPairs.length > 0) {
|
|
777
|
+
yield* (0, effection_1.call)(() => store.prefill(prefillPairs));
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
catch (err) {
|
|
781
|
+
for (const e of drainedExtends)
|
|
782
|
+
e.reject(err);
|
|
783
|
+
throw err;
|
|
784
|
+
}
|
|
785
|
+
// Resolve extend requests with the delta token count. root.position
|
|
786
|
+
// has advanced by the sum of extend token counts at this point.
|
|
787
|
+
for (const e of drainedExtends) {
|
|
788
|
+
tw.write({
|
|
789
|
+
traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
|
|
790
|
+
type: 'spine:extend',
|
|
791
|
+
userContent: e.userContent,
|
|
792
|
+
assistantContent: e.assistantContent,
|
|
793
|
+
deltaTokens: e.tokens.length,
|
|
794
|
+
positionAfter: root.position,
|
|
795
|
+
});
|
|
796
|
+
e.resolve(e.tokens.length);
|
|
797
|
+
}
|
|
798
|
+
for (const s of drainedSpawns) {
|
|
799
|
+
tw.write({
|
|
800
|
+
traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
|
|
801
|
+
type: 'branch:create', branchHandle: s.agent.id, parentHandle: s.agent.parentId,
|
|
802
|
+
position: 0, role: 'agentFork',
|
|
803
|
+
});
|
|
804
|
+
tw.write({
|
|
805
|
+
traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
|
|
806
|
+
type: 'prompt:format', promptText: s.formattedPrompt,
|
|
807
|
+
taskContent: s.task.content, tokenCount: s.suffixTokens.length,
|
|
808
|
+
messages: JSON.stringify([
|
|
809
|
+
{ role: 'system', content: s.task.systemPrompt },
|
|
810
|
+
{ role: 'user', content: s.task.content },
|
|
811
|
+
]),
|
|
812
|
+
tools: s.task.tools, role: 'agentSuffix',
|
|
813
|
+
});
|
|
814
|
+
applyLazyGrammar(s.agent);
|
|
815
|
+
// transition fires agent.statusSignal — ctx.spawn's subscriber is waiting on this.
|
|
816
|
+
s.agent.transition('active');
|
|
817
|
+
yield* poolChannel.send({ type: 'agent:spawn', agentId: s.agent.id, parentAgentId: s.agent.parentId });
|
|
616
818
|
}
|
|
617
|
-
catch { }
|
|
618
819
|
}
|
|
820
|
+
// If all we had was pending spawns, and none of them activated (shouldn't happen
|
|
821
|
+
// normally — SPAWN always transitions to active), nothing to produce. Loop back.
|
|
822
|
+
if (agents.length === 0)
|
|
823
|
+
continue;
|
|
824
|
+
// -- Phase 1: PRODUCE -- sample from active agents, collect tool calls
|
|
825
|
+
policy.resetTick?.();
|
|
826
|
+
const pressure = new ContextPressure(ctx, pressureOpts);
|
|
619
827
|
const entries = [];
|
|
620
828
|
const toolCalls = [];
|
|
621
829
|
const nudges = [];
|
|
@@ -624,15 +832,19 @@ function useAgentPool(opts) {
|
|
|
624
832
|
continue;
|
|
625
833
|
const policyExit = policy.shouldExit?.(a, pressure);
|
|
626
834
|
if (policyExit ?? pressure.critical) {
|
|
627
|
-
a.transition('idle');
|
|
628
835
|
const exitReason = pressure.critical ? 'pressure_critical'
|
|
629
836
|
: policyExit ? 'policy_exit'
|
|
630
837
|
: 'pressure_critical';
|
|
631
838
|
tw.write({ traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
|
|
632
839
|
type: 'pool:agentDrop', agentId: a.id, reason: exitReason });
|
|
633
840
|
yield* poolChannel.send({ type: 'agent:done', agentId: a.id });
|
|
634
|
-
//
|
|
635
|
-
|
|
841
|
+
// Run recovery BEFORE transitioning to idle — otherwise the statusSignal
|
|
842
|
+
// fires 'idle' mid-recovery, PoolContext.waitFor returns early, the
|
|
843
|
+
// orchestrator resumes and starts spawning/prefilling the next task
|
|
844
|
+
// while this agent is still being decoded by recoverInline. Concurrent
|
|
845
|
+
// native calls on the same llama_context → SEGV.
|
|
846
|
+
yield* recoverInline(a, policy, ctx, store, tw, poolScope.traceId, poolChannel, pressureOpts);
|
|
847
|
+
a.transition('idle');
|
|
636
848
|
continue;
|
|
637
849
|
}
|
|
638
850
|
const { token, text, isStop } = a.branch.produceSync();
|
|
@@ -697,38 +909,93 @@ function useAgentPool(opts) {
|
|
|
697
909
|
// -- Phase 3: SETTLE (settle what fits, defer what doesn't)
|
|
698
910
|
const toSettle = [...pendingSettled, ...nudges];
|
|
699
911
|
const deferred = toSettle.length > 0 ? yield* settle(toSettle) : [];
|
|
700
|
-
// Stall-breaker:
|
|
701
|
-
//
|
|
702
|
-
//
|
|
703
|
-
//
|
|
912
|
+
// Stall-breaker: `deferred` has items but no active siblings can free
|
|
913
|
+
// KV. Consult policy per deferred item — the policy is the "last
|
|
914
|
+
// resort" decision point (staggered-exit for parallel orchestration
|
|
915
|
+
// still works because defer-on-oversize above lets items wait while
|
|
916
|
+
// siblings are active; only when ALL siblings are awaiting_tool or
|
|
917
|
+
// idle do we reach here). Distinct drop reasons:
|
|
918
|
+
// - `pressure_settle_reject` — policy said idle, or nudge but the
|
|
919
|
+
// nudge payload itself doesn't fit (policy suggestion infeasible).
|
|
920
|
+
// - `settle_stall_break` — policy hook absent (legacy fallback).
|
|
704
921
|
if (deferred.length > 0 && !agents.some(a => a.status === 'active')) {
|
|
705
|
-
const
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
922
|
+
const stallPressure = new ContextPressure(ctx, pressureOpts);
|
|
923
|
+
let stallHeadroom = stallPressure.headroom;
|
|
924
|
+
const resolved = [];
|
|
925
|
+
for (const item of deferred) {
|
|
926
|
+
const a = agentById.get(item.agentId);
|
|
927
|
+
if (!a || a.status !== 'awaiting_tool' || a.branch.disposed)
|
|
928
|
+
continue;
|
|
929
|
+
const action = policy.onSettleReject?.(a, item.prefillTokens.length, stallPressure, policyConfig);
|
|
930
|
+
if (action?.type === 'nudge') {
|
|
931
|
+
// Record the policy's decision regardless of whether the
|
|
932
|
+
// nudge itself fits — the event captures "policy consulted,
|
|
933
|
+
// returned nudge" which is separate from "nudge was actionable".
|
|
934
|
+
tw.write({
|
|
935
|
+
traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
|
|
936
|
+
type: 'pool:agentNudge', agentId: a.id, reason: 'settle_reject', message: action.message,
|
|
937
|
+
});
|
|
938
|
+
const nudgeResult = { error: action.message };
|
|
939
|
+
const nudgeTokens = (0, sdk_2.buildToolResultDelta)(ctx, JSON.stringify(nudgeResult), item.callId, { enableThinking: a.fmt.enableThinking });
|
|
940
|
+
if (nudgeTokens.length <= stallHeadroom) {
|
|
941
|
+
const probe = tools.get(item.toolName)?.probe(nudgeResult) ?? undefined;
|
|
942
|
+
a.incrementTurns();
|
|
943
|
+
resolved.push({
|
|
944
|
+
agentId: a.id,
|
|
945
|
+
prefillTokens: nudgeTokens,
|
|
946
|
+
toolName: item.toolName,
|
|
947
|
+
callId: item.callId,
|
|
948
|
+
args: item.args,
|
|
949
|
+
probe,
|
|
950
|
+
});
|
|
951
|
+
stallHeadroom -= nudgeTokens.length;
|
|
952
|
+
continue;
|
|
953
|
+
}
|
|
954
|
+
// Nudge doesn't fit — policy's suggestion is infeasible, fall through to drop.
|
|
955
|
+
}
|
|
956
|
+
// Drop. Reason: policy-said-idle OR nudge-didn't-fit →
|
|
957
|
+
// `pressure_settle_reject` (policy path). Policy hook absent →
|
|
958
|
+
// `settle_stall_break` (legacy fallback).
|
|
959
|
+
const reason = action ? 'pressure_settle_reject' : 'settle_stall_break';
|
|
960
|
+
tw.write({
|
|
961
|
+
traceId: tw.nextId(), parentTraceId: poolScope.traceId, ts: performance.now(),
|
|
962
|
+
type: 'pool:agentDrop', agentId: a.id, reason,
|
|
963
|
+
});
|
|
964
|
+
yield* poolChannel.send({ type: 'agent:done', agentId: a.id });
|
|
965
|
+
// Recover BEFORE transition — single-fiber store discipline.
|
|
966
|
+
yield* recoverInline(a, policy, ctx, store, tw, poolScope.traceId, poolChannel, pressureOpts);
|
|
967
|
+
a.transition('idle');
|
|
712
968
|
}
|
|
969
|
+
// Replace deferred with the surviving (nudged) items for next tick.
|
|
970
|
+
deferred.length = 0;
|
|
971
|
+
deferred.push(...resolved);
|
|
713
972
|
}
|
|
714
973
|
// -- Phase 4: DISPATCH
|
|
715
974
|
const dispatched = yield* dispatch(toolCalls);
|
|
716
975
|
// Deferred + new dispatch results → next tick's SETTLE
|
|
717
976
|
pendingSettled = [...deferred, ...dispatched];
|
|
718
977
|
// -- Termination + recovery
|
|
719
|
-
|
|
978
|
+
// Wait for the orchestrator to finish before closing — it may spawn more agents.
|
|
979
|
+
const allIdle = agents.every(a => a.status === 'idle' || a.status === 'disposed');
|
|
980
|
+
if (allIdle && orchestratorDone) {
|
|
720
981
|
if (!recoveryAttempted) {
|
|
721
982
|
recoveryAttempted = true;
|
|
722
983
|
// Recover any idle agents that weren't handled by inline recovery
|
|
723
984
|
// (e.g., killed by max_turns, time budget, or free_text_stop)
|
|
724
985
|
for (const a of agents) {
|
|
725
986
|
if (a.status === 'idle' && !a.result && !a.branch.disposed) {
|
|
726
|
-
yield* recoverInline(a, policy, ctx, store, tw, poolScope.traceId, poolChannel);
|
|
987
|
+
yield* recoverInline(a, policy, ctx, store, tw, poolScope.traceId, poolChannel, pressureOpts);
|
|
727
988
|
}
|
|
728
989
|
}
|
|
729
990
|
}
|
|
991
|
+
if (orchestratorError)
|
|
992
|
+
throw orchestratorError;
|
|
730
993
|
break;
|
|
731
994
|
}
|
|
995
|
+
if (allIdle && !orchestratorDone) {
|
|
996
|
+
// All current agents done but orchestrator may spawn more.
|
|
997
|
+
yield* (0, effection_1.sleep)(1);
|
|
998
|
+
}
|
|
732
999
|
}
|
|
733
1000
|
// ── Close channel with result — consumers get AgentPoolResult as close value ───────
|
|
734
1001
|
// Branch cleanup is handled by each branch's ensure() from setupAgent —
|