@lloyal-labs/lloyal-agents 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +280 -0
  2. package/dist/Tool.d.ts +65 -0
  3. package/dist/Tool.d.ts.map +1 -0
  4. package/dist/Tool.js +59 -0
  5. package/dist/Tool.js.map +1 -0
  6. package/dist/agent-pool.d.ts +114 -0
  7. package/dist/agent-pool.d.ts.map +1 -0
  8. package/dist/agent-pool.js +528 -0
  9. package/dist/agent-pool.js.map +1 -0
  10. package/dist/context.d.ts +33 -0
  11. package/dist/context.d.ts.map +1 -0
  12. package/dist/context.js +33 -0
  13. package/dist/context.js.map +1 -0
  14. package/dist/diverge.d.ts +39 -0
  15. package/dist/diverge.d.ts.map +1 -0
  16. package/dist/diverge.js +148 -0
  17. package/dist/diverge.js.map +1 -0
  18. package/dist/generate.d.ts +30 -0
  19. package/dist/generate.d.ts.map +1 -0
  20. package/dist/generate.js +58 -0
  21. package/dist/generate.js.map +1 -0
  22. package/dist/index.d.ts +15 -0
  23. package/dist/index.d.ts.map +1 -0
  24. package/dist/index.js +28 -0
  25. package/dist/index.js.map +1 -0
  26. package/dist/init.d.ts +58 -0
  27. package/dist/init.d.ts.map +1 -0
  28. package/dist/init.js +57 -0
  29. package/dist/init.js.map +1 -0
  30. package/dist/run-agents.d.ts +39 -0
  31. package/dist/run-agents.d.ts.map +1 -0
  32. package/dist/run-agents.js +46 -0
  33. package/dist/run-agents.js.map +1 -0
  34. package/dist/shared-root.d.ts +55 -0
  35. package/dist/shared-root.d.ts.map +1 -0
  36. package/dist/shared-root.js +62 -0
  37. package/dist/shared-root.js.map +1 -0
  38. package/dist/toolkit.d.ts +38 -0
  39. package/dist/toolkit.d.ts.map +1 -0
  40. package/dist/toolkit.js +31 -0
  41. package/dist/toolkit.js.map +1 -0
  42. package/dist/types.d.ts +380 -0
  43. package/dist/types.d.ts.map +1 -0
  44. package/dist/types.js +3 -0
  45. package/dist/types.js.map +1 -0
  46. package/package.json +40 -0
package/README.md ADDED
@@ -0,0 +1,280 @@
1
+ # @lloyal-labs/lloyal-agents
2
+
3
+ Structured concurrency agent runtime for the lloyal inference platform.
4
+
5
+ `lloyal-agents` runs multi-agent inference inside the decode loop. Agents are branches of a single running process — forked from shared KV cache state, advancing through one GPU forward pass per tick, spawning sub-agents from their own live branches at arbitrary depth. Orchestration is not a layer above inference. It is inference.
6
+
7
+ ```bash
8
+ npm i @lloyal-labs/lloyal-agents
9
+ ```
10
+
11
+ **Backends:** [lloyal.node](https://github.com/lloyal-ai/lloyal.node) — prebuilt binaries for macOS (Metal, CPU), Linux (CPU, CUDA, Vulkan), and Windows (CPU, CUDA, Vulkan). GPU selection at runtime.
12
+
13
+ ## Generation as the Primitive
14
+
15
+ The core architectural decision: generation is the primitive, not the API call. Agents are not processes that exchange messages. They are branches of a running inference process — forked from shared KV cache state, generating independently, their outputs comparable because they share a computational origin.
16
+
17
+ This is built on [lloyal.node](https://github.com/lloyal-ai/lloyal.node), which provides forkable decode state and continuous tree batching over llama.cpp. `lloyal-agents` adds structured concurrency, tool dispatch, and a three-phase tick loop that drives N branches through a single GPU forward pass per step.
18
+
19
+ The public API surface:
20
+
21
+ ```typescript
22
+ import {
23
+ initAgents, // bootstrap: session, store, event channel
24
+ generate, // single-branch grammar-constrained generation
25
+ diverge, // multi-branch perplexity selection
26
+ useAgentPool, // concurrent agents as an Effection resource
27
+ runAgents, // same, with automatic branch cleanup
28
+ withSharedRoot, // scoped shared KV prefix with guaranteed teardown
29
+ createToolkit, // tool registry from Tool[] → toolMap + toolsJson
30
+ Ctx,
31
+ Store,
32
+ Events, // Effection contexts — implicit dependency resolution
33
+ } from "@lloyal-labs/lloyal-agents";
34
+ ```
35
+
36
+ That is essentially the entire framework.
37
+
38
+ ### Bootstrap
39
+
40
+ ```typescript
41
+ import { main, call } from "effection";
42
+ import { createContext } from "@lloyal-labs/lloyal.node";
43
+ import { initAgents } from "@lloyal-labs/lloyal-agents";
44
+
45
+ main(function* () {
46
+ const ctx = yield* call(() =>
47
+ createContext({
48
+ modelPath: "model.gguf",
49
+ nCtx: 16384,
50
+ nSeqMax: 8,
51
+ typeK: "q4_0",
52
+ typeV: "q4_0",
53
+ }),
54
+ );
55
+
56
+ const { session, events } = yield* initAgents(ctx);
57
+ // Ctx, Store, Events now set — generate(), diverge(),
58
+ // useAgentPool() find them automatically.
59
+ // Session + context disposed on scope exit.
60
+ });
61
+ ```
62
+
63
+ ## Shared Frontier
64
+
65
+ When agents fork from a common branch, they inherit its KV cache — the full attention state up to the fork point. This boundary is the shared frontier: the last position where all agents had identical computational state.
66
+
67
+ Everything before the frontier is shared context. Everything after is independent reasoning. The model doesn't need to be told what the other agents know — it already attended over the same prefix. Communication happened at prefill time, through the attention mechanism, with zero serialization overhead.
68
+
69
+ ```typescript
70
+ yield *
71
+ withSharedRoot(
72
+ { systemPrompt: RESEARCH_PROMPT, tools: toolsJson },
73
+ function* (root, prefixLen) {
74
+ // root is a prefilled branch — system prompt already in KV cache.
75
+ // Every agent forked from root shares that prefix.
76
+ // KV saved = prefixLen × (agentCount - 1)
77
+ return yield* runAgents({
78
+ tasks: questions.map((q) => ({
79
+ systemPrompt: RESEARCH_PROMPT,
80
+ content: q,
81
+ tools: toolsJson,
82
+ parent: root,
83
+ })),
84
+ tools: toolMap,
85
+ });
86
+ },
87
+ );
88
+ ```
89
+
90
+ `withSharedRoot` creates the prefix, passes it to the body, and guarantees cleanup via `try/finally` — the root branch cannot leak out of the block. Effection enforces the lifetime.
91
+
92
+ ## In-Loop Orchestration
93
+
94
+ All active agents advance together in a three-phase tick loop:
95
+
96
+ **PRODUCE.** Every generating agent calls `produceSync()` — synchronous sampling with no async gap between agents. This matters because it means the entire produce phase is a single uninterrupted pass over the active set.
97
+
98
+ **COMMIT.** One `store.commit()` call packs all produced tokens into a single `llama_batch` and dispatches once. N branches, one GPU call. No per-agent decode overhead.
99
+
100
+ **SETTLE.** Tool results that resolved during COMMIT are drained from a buffer. Each result is tokenized into a delta, budget-checked against a fresh `ContextPressure` snapshot, and batch-prefilled into the agent's branch. Grammar state resets. The agent transitions back to `generating`.
101
+
102
+ ```typescript
103
+ // From the tick loop — Phase 1
104
+ const entries: [Branch, number][] = [];
105
+ for (const a of agents) {
106
+ if (a.state !== "generating") continue;
107
+ if (pressure.critical) {
108
+ a.state = "done";
109
+ continue;
110
+ }
111
+
112
+ const { token, text, isStop } = a.branch.produceSync();
113
+ if (isStop) {
114
+ /* parse tool calls, dispatch or finalize */ continue;
115
+ }
116
+ entries.push([a.branch, token]);
117
+ }
118
+
119
+ // Phase 2 — single GPU dispatch
120
+ if (entries.length > 0) {
121
+ yield * call(() => store.commit(entries));
122
+ }
123
+ ```
124
+
125
+ When no agent is generating and tools are still pending, the loop parks itself via `action()` — an Effection primitive that suspends the generator until a tool resolves and calls `wakeIdle()`. No polling. No sleep loops.
126
+
127
+ ## Structured Concurrency DAG
128
+
129
+ Agent lifecycles are managed by [Effection](https://github.com/thefrontside/effection), a structured concurrency library for JavaScript. This is not optional sugar — it is load-bearing infrastructure. It is what makes recursive agents possible.
130
+
131
+ Every branch registers an `ensure()` callback at fork time:
132
+
133
+ ```typescript
134
+ function* setupAgent(parent, task, ctx) {
135
+ const branch = parent.forkSync();
136
+ yield* ensure(() => {
137
+ if (!branch.disposed) branch.pruneSync();
138
+ });
139
+ // ...
140
+ }
141
+ ```
142
+
143
+ If the scope exits — error, cancellation, normal completion — the branch is pruned. Orphaned branches are structurally impossible. Tool dispatch uses `scope.run()` for eager start inside the agent pool scope; if the scope tears down, pending tools are cancelled. The DAG is not imposed on the orchestration. It is intrinsic to the Effection task tree.
144
+
145
+ `useAgentPool` is an Effection `resource()` — it suspends via `provide()` after all agents complete, but keeps their branches alive. The caller can fork sub-agents from any completed agent's branch. Those sub-agents inherit the parent agent's full KV state — everything it generated, every tool result it consumed, every reasoning step it took. No summarization. No context window management. The sub-agent continues from the parent's frontier.
146
+
147
+ The deep-research harness ships a concrete example: `reportPass`. Research agents run through the tick loop with tools — search, grep, read_file, report. Some agents get hard-cut by context pressure before they can submit findings. Rather than losing their work, the harness forks a sub-agent from each hard-cut agent's branch with a constrained tool set (report only):
148
+
149
+ ```typescript
150
+ function* reportPass(pool: AgentPoolResult, opts: WorkflowOpts) {
151
+ const hardCut = pool.agents.filter((a) => !a.findings && !a.branch.disposed);
152
+ if (hardCut.length === 0) return;
153
+
154
+ const reporters = yield* runAgents({
155
+ tasks: hardCut.map((a) => ({
156
+ systemPrompt: REPORT_PROMPT,
157
+ content: "Report your findings.",
158
+ tools: reportOnlyTools,
159
+ parent: a.branch, // fork from the parent agent's branch
160
+ })),
161
+ tools: new Map([["report", reportTool]]),
162
+ terminalTool: "report",
163
+ });
164
+
165
+ hardCut.forEach((a, i) => {
166
+ if (reporters.agents[i]?.findings)
167
+ a.findings = reporters.agents[i].findings;
168
+ });
169
+ }
170
+ ```
171
+
172
+ The sub-agent sees everything the parent saw — its system prompt, its tool calls, its partial reasoning — because that state is already in the KV cache at the fork point. The sub-agent just continues from where the parent was cut off, with a tighter mandate.
173
+
174
+ This is the DAG in practice: parent agents form the first level, reporter sub-agents form the second. `runAgents` wraps `useAgentPool` in `scoped()`, so the reporter branches are pruned when it returns. The parent branches are still alive in the outer scope. When that outer scope exits, every `ensure()` callback fires and prunes the parents. Teardown propagates top-down. Cleanup is guaranteed bottom-up.
175
+
176
+ There is nothing in the framework that limits this to two levels. Agents can spawn sub-agents that spawn sub-agents. An agent pool can run inside another agent pool's scope. The structured concurrency guarantees compose at every depth.
177
+
178
+ ## Hallucination Detection
179
+
180
+ The framework provides hallucination detection at two levels.
181
+
182
+ **Per-token observables.** Every branch exposes runtime-accessible signals on every step: `branch.modelEntropy()` (Shannon entropy of the full vocabulary distribution), `branch.modelSurprisal(token)` (surprisal of the chosen token: -log2(p)), `branch.perplexity` (model-level, from raw logits), and `branch.samplingPerplexity` (sampling-level, from the filtered distribution). The delta between model and sampling perplexity is itself a hallucination indicator — high sampling perplexity relative to model perplexity means the sampler is working against the model's probability mass.
183
+
184
+ Enable `trace: true` on agent pools to capture entropy and surprisal on every `agent:produce` event.
185
+
186
+ **Multi-branch semantic comparison.** `diverge()` forks N branches from a shared frontier, generates independently, and returns all outputs with their perplexity scores:
187
+
188
+ ```typescript
189
+ const result =
190
+ yield *
191
+ diverge({
192
+ parent: root, // shared frontier
193
+ attempts: 3, // fork 3 branches
194
+ params: { temperature: 0.7 },
195
+ });
196
+ // result.best — lowest-perplexity branch, still alive
197
+ // result.attempts — all branches with output, ppl, token count
198
+ // Losers already pruned. Winner's branch is caller's responsibility.
199
+ ```
200
+
201
+ The harness decides how to compare. The deep-research example measures semantic equivalence across diverge outputs using bigram Jaccard similarity — where branches agree, the model is confident; where they diverge, hallucination risk is high. No model call required for the comparison itself. Other harnesses can use different equivalence measures over the same `diverge()` primitive.
202
+
203
+ This directly operationalizes the semantic entropy work from Farquhar et al. ([Nature, 2024](https://www.nature.com/articles/s41586-024-07421-0)) — but as a runtime primitive, not a post-hoc metric. The key constraint: divergence from a common computational ancestor is signal. Divergence from independently-constructed contexts is sampling variance. This measurement is only meaningful because agents share a frontier.
204
+
205
+ ## Session Accumulation
206
+
207
+ When agents converge — when the entropy gate passes — the winning branch is not returned as output. It is promoted. It becomes the new trunk of the session. The next query starts from ground that was computationally earned by the previous convergence check.
208
+
209
+ This is the cold/warm session distinction. A cold query runs the full pipeline: plan the decomposition, dispatch research agents, synthesize via `diverge`, evaluate convergence, promote. A warm query — one where a trusted trunk already exists — skips verification entirely. The frontier is already established. Agents fork from it, research, and the session responds directly from findings.
210
+
211
+ Each promote is an epistemic commitment: this branch survived N-way comparison and convergence evaluation, so it becomes the basis for future reasoning. The session doesn't just carry forward text — it carries forward the KV state of a branch that survived verification. Future agents fork from this state. Their shared frontier is not an empty system prompt. It is the accumulated, verified reasoning of every previous cycle.
212
+
213
+ Over multiple queries, the session compounds. Early queries establish the foundation. Later queries branch from it, research further, verify further, promote further. The trunk grows. The frontier advances. The model's effective context is not what you put in the prompt — it is what was earned by convergence.
214
+
215
+ ## Context Pressure
216
+
217
+ KV cache is finite. `ContextPressure` snapshots the remaining budget on every tick and enforces two thresholds:
218
+
219
+ - **softLimit** (default 1024 tokens remaining): SETTLE rejects tool results that would cross this floor. PRODUCE hard-cuts agents requesting non-terminal tool calls. Terminal tools (e.g. `report`) still pass — agents can always submit findings. INIT drops agents that don't fit above this floor.
220
+ - **hardLimit** (default 128 tokens remaining): agents killed immediately before `produceSync()`. No decode call is made below this line — it would crash.
221
+
222
+ Tool result prefill in the SETTLE phase is budget-gated against a fresh pressure snapshot. If a tool result doesn't fit, the agent is terminated rather than risking a context overflow mid-generation. The softLimit reserves space for downstream work — reporter sub-agents, verification passes.
223
+
224
+ ```typescript
225
+ yield *
226
+ useAgentPool({
227
+ tasks,
228
+ tools: toolMap,
229
+ terminalTool: "report",
230
+ pressure: { softLimit: 2048 }, // reserve 2K for reporters + verify
231
+ });
232
+ ```
233
+
234
+ ## Tools
235
+
236
+ Tools are class-based with OpenAI-compatible function schemas:
237
+
238
+ ```typescript
239
+ import { Tool } from "@lloyal-labs/lloyal-agents";
240
+ import type { ToolContext } from "@lloyal-labs/lloyal-agents";
241
+
242
+ class SearchTool extends Tool<{ query: string }> {
243
+ readonly name = "search";
244
+ readonly description = "Semantic search over the corpus";
245
+ readonly parameters = {
246
+ type: "object",
247
+ properties: { query: { type: "string", description: "Search query" } },
248
+ required: ["query"],
249
+ };
250
+
251
+ async execute(args: { query: string }, context?: ToolContext) {
252
+ const results = await this.reranker.rank(args.query, this.chunks);
253
+ context?.onProgress?.({
254
+ filled: results.length,
255
+ total: this.chunks.length,
256
+ });
257
+ return results.slice(0, 10);
258
+ }
259
+ }
260
+ ```
261
+
262
+ `createToolkit(tools)` aggregates tools into a `{ toolMap, toolsJson }` pair — `toolMap` for runtime dispatch, `toolsJson` for prompt formatting.
263
+
264
+ ## Events
265
+
266
+ The runtime emits structured events for TUI, logging, or telemetry:
267
+
268
+ | Event | Payload |
269
+ | --------------------- | --------------------------------------------------------- |
270
+ | `agent:spawn` | `agentId`, `parentAgentId` |
271
+ | `agent:produce` | `agentId`, `text`, `tokenCount`, `entropy?`, `surprisal?` |
272
+ | `agent:tool_call` | `agentId`, `tool`, `args` |
273
+ | `agent:tool_result` | `agentId`, `tool`, `result` |
274
+ | `agent:tool_progress` | `agentId`, `tool`, `filled`, `total` |
275
+ | `agent:report` | `agentId`, `findings` |
276
+ | `agent:done` | `agentId` |
277
+
278
+ ## License
279
+
280
+ Apache-2.0
package/dist/Tool.d.ts ADDED
@@ -0,0 +1,65 @@
1
+ import type { JsonSchema, ToolSchema, ToolContext } from './types';
2
+ /**
3
+ * Abstract base class for tools usable by agents in the runtime
4
+ *
5
+ * Subclass to define tools that agents can invoke during generation.
6
+ * Implement `name`, `description`, `parameters`, and `execute()`. The
7
+ * {@link schema} getter auto-generates the OpenAI-compatible function
8
+ * schema expected by `formatChat()`.
9
+ *
10
+ * Pass tool instances to {@link createToolkit} to build the `toolMap`
11
+ * and `toolsJson` pair consumed by {@link useAgentPool} and
12
+ * {@link runAgents}.
13
+ *
14
+ * @example Search tool
15
+ * ```typescript
16
+ * class SearchTool extends Tool<{ query: string; topK?: number }> {
17
+ * readonly name = 'search';
18
+ * readonly description = 'Search the corpus for relevant passages';
19
+ * readonly parameters = {
20
+ * type: 'object',
21
+ * properties: {
22
+ * query: { type: 'string', description: 'Search query' },
23
+ * topK: { type: 'number', description: 'Number of results' },
24
+ * },
25
+ * required: ['query'],
26
+ * };
27
+ *
28
+ * async execute(args: { query: string; topK?: number }, ctx?: ToolContext) {
29
+ * const results = await this.reranker.rank(args.query, args.topK ?? 5);
30
+ * return { results };
31
+ * }
32
+ * }
33
+ * ```
34
+ *
35
+ * @category Agents
36
+ */
37
+ export declare abstract class Tool<TArgs = Record<string, unknown>> {
38
+ /** Tool name — used as the function identifier in tool calls */
39
+ abstract readonly name: string;
40
+ /** Human-readable description shown to the model */
41
+ abstract readonly description: string;
42
+ /** JSON Schema describing the tool's expected arguments */
43
+ abstract readonly parameters: JsonSchema;
44
+ /**
45
+ * Execute the tool with parsed arguments
46
+ *
47
+ * Called by the agent pool when the model emits a tool call matching
48
+ * this tool's name. The return value is JSON-serialized and prefilled
49
+ * back into the agent's context as a tool result.
50
+ *
51
+ * @param args - Parsed arguments from the model's tool call
52
+ * @param context - Execution context with progress reporting callback
53
+ * @returns Tool result (will be JSON-serialized)
54
+ */
55
+ abstract execute(args: TArgs, context?: ToolContext): Promise<unknown>;
56
+ /**
57
+ * OpenAI-compatible function tool schema
58
+ *
59
+ * Auto-generated from `name`, `description`, and `parameters`.
60
+ * Used by {@link createToolkit} to build the JSON string passed
61
+ * to `formatChat()`.
62
+ */
63
+ get schema(): ToolSchema;
64
+ }
65
+ //# sourceMappingURL=Tool.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"Tool.d.ts","sourceRoot":"","sources":["../src/Tool.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AAEnE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AACH,8BAAsB,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC;IACxD,gEAAgE;IAChE,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAC/B,oDAAoD;IACpD,QAAQ,CAAC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IACtC,2DAA2D;IAC3D,QAAQ,CAAC,QAAQ,CAAC,UAAU,EAAE,UAAU,CAAC;IAEzC;;;;;;;;;;OAUG;IACH,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,KAAK,EAAE,OAAO,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,OAAO,CAAC;IAEtE;;;;;;OAMG;IACH,IAAI,MAAM,IAAI,UAAU,CASvB;CACF"}
package/dist/Tool.js ADDED
@@ -0,0 +1,59 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.Tool = void 0;
4
+ /**
5
+ * Abstract base class for tools usable by agents in the runtime
6
+ *
7
+ * Subclass to define tools that agents can invoke during generation.
8
+ * Implement `name`, `description`, `parameters`, and `execute()`. The
9
+ * {@link schema} getter auto-generates the OpenAI-compatible function
10
+ * schema expected by `formatChat()`.
11
+ *
12
+ * Pass tool instances to {@link createToolkit} to build the `toolMap`
13
+ * and `toolsJson` pair consumed by {@link useAgentPool} and
14
+ * {@link runAgents}.
15
+ *
16
+ * @example Search tool
17
+ * ```typescript
18
+ * class SearchTool extends Tool<{ query: string; topK?: number }> {
19
+ * readonly name = 'search';
20
+ * readonly description = 'Search the corpus for relevant passages';
21
+ * readonly parameters = {
22
+ * type: 'object',
23
+ * properties: {
24
+ * query: { type: 'string', description: 'Search query' },
25
+ * topK: { type: 'number', description: 'Number of results' },
26
+ * },
27
+ * required: ['query'],
28
+ * };
29
+ *
30
+ * async execute(args: { query: string; topK?: number }, ctx?: ToolContext) {
31
+ * const results = await this.reranker.rank(args.query, args.topK ?? 5);
32
+ * return { results };
33
+ * }
34
+ * }
35
+ * ```
36
+ *
37
+ * @category Agents
38
+ */
39
+ class Tool {
40
+ /**
41
+ * OpenAI-compatible function tool schema
42
+ *
43
+ * Auto-generated from `name`, `description`, and `parameters`.
44
+ * Used by {@link createToolkit} to build the JSON string passed
45
+ * to `formatChat()`.
46
+ */
47
+ get schema() {
48
+ return {
49
+ type: 'function',
50
+ function: {
51
+ name: this.name,
52
+ description: this.description,
53
+ parameters: this.parameters,
54
+ },
55
+ };
56
+ }
57
+ }
58
+ exports.Tool = Tool;
59
+ //# sourceMappingURL=Tool.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"Tool.js","sourceRoot":"","sources":["../src/Tool.ts"],"names":[],"mappings":";;;AAEA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AACH,MAAsB,IAAI;IAqBxB;;;;;;OAMG;IACH,IAAI,MAAM;QACR,OAAO;YACL,IAAI,EAAE,UAAU;YAChB,QAAQ,EAAE;gBACR,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,UAAU,EAAE,IAAI,CAAC,UAAU;aAC5B;SACF,CAAC;IACJ,CAAC;CACF;AAtCD,oBAsCC"}
@@ -0,0 +1,114 @@
1
+ import type { Operation } from 'effection';
2
+ import { type SessionContext } from '@lloyal-labs/sdk';
3
+ import type { PressureThresholds, AgentPoolOptions, AgentPoolResult } from './types';
4
+ /**
5
+ * Immutable KV budget snapshot for one tick of the agent loop
6
+ *
7
+ * Created from `SessionContext._storeKvPressure()` which returns
8
+ * `{ nCtx, cellsUsed, remaining }` where `remaining = nCtx - cellsUsed`.
9
+ * `cellsUsed` is a monotonic counter in `BranchStore` — it increments on
10
+ * every `decode_each` / `decode_scatter` but does **not** decrement on
11
+ * individual branch prune (only resets on bulk ops like `retainOnly` and
12
+ * `drain`). This means `remaining` is a conservative lower bound that
13
+ * becomes increasingly pessimistic as branches are pruned mid-run.
14
+ *
15
+ * Two thresholds partition `remaining` into three zones:
16
+ *
17
+ * ```
18
+ * ┌──────────────────────────────────────────────────────┐
19
+ * │ nCtx │
20
+ * │ ┌──────────┬───────────────────┬──────────────────┐ │
21
+ * │ │cellsUsed │ headroom > 0 │ softLimit │ │
22
+ * │ │ (in use) │ (new work OK) │ (reserved) │ │
23
+ * │ └──────────┴───────────────────┴──────────────────┘ │
24
+ * │ ◄── remaining ──► │ │
25
+ * │ │ │
26
+ * │ headroom = remaining - softLimit │
27
+ * │ critical = remaining < hardLimit │
28
+ * └──────────────────────────────────────────────────────┘
29
+ * ```
30
+ *
31
+ * - **headroom > 0** — room for new work (tool results, generation)
32
+ * - **headroom ≤ 0** — over budget. SETTLE rejects tool results, PRODUCE
33
+ * hard-cuts non-terminal tool calls. Terminal tools still pass.
34
+ * - **critical** — remaining below hardLimit. Agents killed before
35
+ * `produceSync()` to prevent llama_decode crashes.
36
+ *
37
+ * @category Agents
38
+ */
39
+ export declare class ContextPressure {
40
+ /** Default softLimit: 1024 tokens reserved for downstream work */
41
+ static readonly DEFAULT_SOFT_LIMIT = 1024;
42
+ /** Default hardLimit: 128 tokens crash-prevention floor */
43
+ static readonly DEFAULT_HARD_LIMIT = 128;
44
+ /**
45
+ * KV slots remaining (`nCtx - cellsUsed`).
46
+ * Infinity when nCtx ≤ 0 (no context limit).
47
+ * Conservative: may undercount actual free space when branches have been
48
+ * pruned, since `cellsUsed` is monotonic.
49
+ */
50
+ readonly remaining: number;
51
+ /** Remaining KV floor — tokens reserved for downstream work */
52
+ readonly softLimit: number;
53
+ /** Crash-prevention floor — agents killed when remaining drops below */
54
+ readonly hardLimit: number;
55
+ constructor(ctx: SessionContext, opts?: PressureThresholds);
56
+ /**
57
+ * Tokens available for new work: `remaining - softLimit`.
58
+ * Positive means room to accept tool results or continue generating.
59
+ * Negative means over budget — SETTLE rejects, PRODUCE hard-cuts.
60
+ */
61
+ get headroom(): number;
62
+ /** `remaining < hardLimit` — agent must not call `produceSync()`. */
63
+ get critical(): boolean;
64
+ /** Can `tokenCount` tokens fit while staying above softLimit? */
65
+ canFit(tokenCount: number): boolean;
66
+ }
67
+ /**
68
+ * Concurrent agent generation loop as an Effection resource
69
+ *
70
+ * Runs N agents in parallel using a three-phase tick loop over shared
71
+ * {@link BranchStore} infrastructure. Each agent forks from a parent
72
+ * branch, generates tokens, invokes tools, and reports findings.
73
+ *
74
+ * **Three-phase tick loop:**
75
+ * 1. **PRODUCE** — sample all active agents via `produceSync()` (no async gap)
76
+ * 2. **COMMIT** — single GPU call via `store.commit()` for all produced tokens
77
+ * 3. **SETTLE** — drain settled tool results, batch prefill, reset grammars
78
+ *
79
+ * Tool dispatch uses `scope.run()` for eager start — tool executions run as
80
+ * children of the agent pool scope and are cancelled if the scope exits.
81
+ *
82
+ * **Resource semantics:** `provide()` suspends after all agents complete,
83
+ * keeping branches alive so the caller can fork from them (e.g. for
84
+ * verification). Branches are pruned when the scope exits — each branch's
85
+ * `ensure()` from `setupAgent` handles cleanup automatically.
86
+ *
87
+ * For automatic branch cleanup on return, use {@link runAgents} instead.
88
+ *
89
+ * @param opts - Pool configuration: tasks, tools, sampling params, max turns
90
+ * @returns Agent pool result with per-agent findings and aggregate statistics
91
+ *
92
+ * @example Shared root with agent pool
93
+ * ```typescript
94
+ * const pool = yield* withSharedRoot(
95
+ * { systemPrompt: RESEARCH_PROMPT, tools: toolsJson },
96
+ * function*(root) {
97
+ * return yield* useAgentPool({
98
+ * tasks: questions.map(q => ({
99
+ * systemPrompt: RESEARCH_PROMPT,
100
+ * content: q,
101
+ * tools: toolsJson,
102
+ * parent: root,
103
+ * })),
104
+ * tools: toolMap,
105
+ * maxTurns: 6,
106
+ * });
107
+ * },
108
+ * );
109
+ * ```
110
+ *
111
+ * @category Agents
112
+ */
113
+ export declare function useAgentPool(opts: AgentPoolOptions): Operation<AgentPoolResult>;
114
+ //# sourceMappingURL=agent-pool.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"agent-pool.d.ts","sourceRoot":"","sources":["../src/agent-pool.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAkB,MAAM,WAAW,CAAC;AAE3D,OAAO,EAA+G,KAAK,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAIpK,OAAO,KAAK,EAEV,kBAAkB,EAElB,gBAAgB,EAChB,eAAe,EAEhB,MAAM,SAAS,CAAC;AAqCjB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AACH,qBAAa,eAAe;IAC1B,kEAAkE;IAClE,MAAM,CAAC,QAAQ,CAAC,kBAAkB,QAAQ;IAC1C,2DAA2D;IAC3D,MAAM,CAAC,QAAQ,CAAC,kBAAkB,OAAO;IAEzC;;;;;OAKG;IACH,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,+DAA+D;IAC/D,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,wEAAwE;IACxE,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;gBAEf,GAAG,EAAE,cAAc,EAAE,IAAI,CAAC,EAAE,kBAAkB;IAO1D;;;;OAIG;IACH,IAAI,QAAQ,IAAI,MAAM,CAA4C;IAElE,qEAAqE;IACrE,IAAI,QAAQ,IAAI,OAAO,CAA4C;IAEnE,iEAAiE;IACjE,MAAM,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO;CACpC;AAwDD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6CG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,gBAAgB,GAAG,SAAS,CAAC,eAAe,CAAC,CAwW/E"}