@lloyal-labs/sdk 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,162 @@
1
+ # @lloyal-labs/sdk
2
+
3
+ Backend-agnostic TypeScript SDK for the lloyal inference platform.
4
+
5
+ Composable inference primitives for forkable decode state, shared-prefix KV branching, and continuous tree batching. Branches share a KV prefix while keeping independent machinery — sampler chain, grammar, logits snapshot, perplexity tracker — for controlled divergence at decode time. `BranchStore` packs tokens from N branches (each at a different position, different seq_id, each needing independent logits captured) into a single `llama_batch` and dispatches once.
6
+
7
+ ```bash
8
+ npm i @lloyal-labs/sdk
9
+ ```
10
+
11
+ The SDK exports the `SessionContext` contract and the primitives that operate on it. Backend bindings ([lloyal.node](https://github.com/lloyal-ai/lloyal.node), [nitro-llama](https://github.com/lloyal-ai/nitro-llama)) provide `createContext()` — the SDK takes it from there.
12
+
13
+ ## The Branch API
14
+
15
+ ```typescript
16
+ import { createContext } from '@lloyal-labs/lloyal.node';
17
+ import { Branch, BranchStore } from '@lloyal-labs/sdk';
18
+
19
+ const ctx = await createContext({ modelPath: './model.gguf', nSeqMax: 6 });
20
+ const store = new BranchStore(ctx);
21
+
22
+ // Shared prompt: "Explain quantum entanglement"
23
+ const prompt = await ctx.tokenize('Explain quantum entanglement');
24
+
25
+ const root = Branch.create(ctx, 0, { temperature: 0.8 });
26
+ await root.prefill(prompt);
27
+
28
+ // Fork 4 branches — each gets a different reasoning prefix
29
+ const analogy = await root.fork();
30
+ const formal = await root.fork();
31
+ const socratic = await root.fork();
32
+ const visual = await root.fork();
33
+
34
+ // Scatter-prefill: inject divergent prefixes in one batched dispatch
35
+ // 4 branches × variable lengths → auto bin-packed into minimal GPU calls
36
+ await store.prefill([
37
+ [analogy, await ctx.tokenize('Think of it like two coins...')], // 12 tokens
38
+ [formal, await ctx.tokenize('In quantum mechanics, the...')], // 8 tokens
39
+ [socratic, await ctx.tokenize('What happens when you measure...')], // 10 tokens
40
+ [visual, await ctx.tokenize('Imagine two particles...')], // 7 tokens
41
+ ]);
42
+
43
+ // Generate — all 4 in lockstep, 1 GPU call per step
44
+ const branches = [analogy, formal, socratic, visual];
45
+ for (;;) {
46
+ const live = branches.filter(b => !b.disposed);
47
+ if (!live.length) break;
48
+
49
+ const entries: [Branch, number][] = [];
50
+ for (const b of live) {
51
+ const { token, text, isStop } = b.produceSync();
52
+ if (isStop) { b.pruneSync(); continue; }
53
+ entries.push([b, token]);
54
+ }
55
+ if (!entries.length) break;
56
+ await store.commit(entries);
57
+ }
58
+
59
+ // Winner takes all — one seq_keep pass, losers vaporized
60
+ const winner = branches
61
+ .filter(b => !b.disposed)
62
+ .reduce((a, b) => (a.perplexity < b.perplexity ? a : b));
63
+ await store.retainOnly(winner);
64
+ ```
65
+
66
+ Or for single-branch generation, Branch is an async iterable — generate until EOG:
67
+
68
+ ```typescript
69
+ for await (const { token, text } of branch) {
70
+ process.stdout.write(text);
71
+ }
72
+ ```
73
+
74
+ ## Continuous Tree Batching
75
+
76
+ Tree search with N branches means N calls to `llama_decode()` — each paying GPU dispatch overhead, memory barriers, and PCIe round-trips. `BranchStore` eliminates this: tokens from N branches are packed into a single `llama_batch` and dispatched once. N branches, 1 GPU call.
77
+
78
+ Two packing strategies for different access patterns:
79
+
80
+ ```typescript
81
+ // commit: 1 token per branch — one GPU dispatch for N branches
82
+ await store.commit([[branch1, tok1], [branch2, tok2], [branch3, tok3]]);
83
+
84
+ // prefill: variable tokens per branch — asymmetric injection
85
+ await store.prefill([
86
+ [branchA, systemTokens], // 200 tokens
87
+ [branchB, queryTokens], // 12 tokens
88
+ [branchC, docTokens], // 800 tokens
89
+ ]);
90
+ // Greedy bin-packed into ceil(total / nBatch) dispatches
91
+ ```
92
+
93
+ ## Topology
94
+
95
+ Parent/child edges are always-on. Simple chat to best-of-N to deep search is one continuum.
96
+
97
+ ```typescript
98
+ branch.parent; // handle or null if root
99
+ branch.children; // child handles
100
+ branch.isLeaf; // no children?
101
+ ```
102
+
103
+ | Method | Behavior |
104
+ |--------|----------|
105
+ | `pruneSync()` | Throws if children exist |
106
+ | `pruneSubtreeSync()` | Iterative post-order traversal |
107
+
108
+ ## Per-Token Metrics
109
+
110
+ Every branch exposes runtime-accessible information-theoretic measures on every step:
111
+
112
+ ```typescript
113
+ branch.modelEntropy(); // Shannon entropy of full vocab distribution (bits)
114
+ branch.modelSurprisal(token); // -log2(p) for a specific token
115
+ branch.perplexity; // model-level PPL (exp of mean NLL from raw logits)
116
+ branch.samplingPerplexity; // sampling-level PPL (from filtered distribution)
117
+ ```
118
+
119
+ ## Session
120
+
121
+ `Session` manages the conversation trunk — the single promoted branch that accumulates verified context across queries.
122
+
123
+ ```typescript
124
+ const session = new Session({ ctx, store });
125
+
126
+ // Prefill a user turn into the trunk
127
+ await session.prefillUser('What is quantum entanglement?');
128
+
129
+ // After generation + verification, promote a branch to become the new trunk
130
+ await session.promote(verifiedBranch);
131
+
132
+ // Next query starts from the promoted trunk's KV state
133
+ session.trunk; // the live branch
134
+ ```
135
+
136
+ ## Rerank
137
+
138
+ Backend-agnostic reranker. The caller provides a `SessionContext` — how it was created (local, remote, quantized) is not the SDK's concern.
139
+
140
+ ```typescript
141
+ import { Rerank } from '@lloyal-labs/sdk';
142
+
143
+ const reranker = await Rerank.create(ctx, { nSeqMax: 8 });
144
+ const scores = await reranker.rank(query, documents);
145
+ ```
146
+
147
+ ## Exports
148
+
149
+ ```typescript
150
+ // Classes
151
+ export { Branch, BranchStore, Session, Rerank };
152
+
153
+ // Delta builders (for tool result injection)
154
+ export { buildUserDelta, buildToolResultDelta };
155
+
156
+ // Types
157
+ export type { SessionContext, SamplingParams, Produced, ContextOptions, ... };
158
+ ```
159
+
160
+ ## License
161
+
162
+ Apache-2.0
@@ -0,0 +1,463 @@
1
+ import type { SessionContext, SamplingParams, Produced, GrammarTrigger } from './types';
2
+ /**
3
+ * Forkable inference handle for covalent generation
4
+ *
5
+ * A Branch owns everything needed for independent generation: a KV cache
6
+ * sequence, sampler chain, logits snapshot, and perplexity tracker.
7
+ *
8
+ * Forking is cheap — the KV prefix is shared in memory (metadata-only operation under unified KV —
9
+ * no KV tensor buffers are copied), so sibling branches read from the same physical KV entries.
10
+ * Only tokens decoded after the fork point are exclusive to each branch.
11
+ *
12
+ * Branches form trees, not just flat lists. Fork from root for best-of-N,
13
+ * fork from children for tree search/beam search, fork from a draft for speculative
14
+ * decoding.
15
+ *
16
+ * The produce/commit protocol separates sampling from state advancement:
17
+ * produce() samples without writing to KV, letting you inspect the result
18
+ * before deciding to commit().
19
+ *
20
+ * @example Best-of-N with perplexity selection
21
+ * ```typescript
22
+ * const root = Branch.create(ctx, tokens.length, { temperature: 0.8 });
23
+ * await root.prefill(tokens);
24
+ *
25
+ * const results = [];
26
+ * for (let i = 0; i < 5; i++) {
27
+ * const branch = await root.fork();
28
+ * branch.reseedSampler(1000 + i);
29
+ * const tokens = [];
30
+ * for await (const { token } of branch) tokens.push(token);
31
+ * results.push({ branch, tokens, ppl: branch.perplexity });
32
+ * }
33
+ *
34
+ * const best = results.reduce((a, b) => a.ppl < b.ppl ? a : b);
35
+ * for (const r of results) { if (r !== best) await r.branch.prune(); }
36
+ * ```
37
+ *
38
+ * @category Branching
39
+ */
40
+ export declare class Branch {
41
+ private _ctx;
42
+ private _handle;
43
+ private _disposed;
44
+ constructor(ctx: SessionContext, handle: number);
45
+ /**
46
+ * Create a root branch at the given position
47
+ *
48
+ * The branch takes ownership of the sequence and creates its own sampler
49
+ * chain from the provided params. Call prefill() to decode prompt tokens
50
+ * and capture the logit distribution before forking.
51
+ *
52
+ * @param ctx - SessionContext to create branch on
53
+ * @param position - Starting position (typically prompt token count)
54
+ * @param params - Sampling parameters (temperature, topP, etc.)
55
+ * @param nBatch - Per-branch batch size override (defaults to context nBatch).
56
+ * Controls chunk size for prefill(). Has no effect on
57
+ * single-token commit() which uses a zero-allocation fast path.
58
+ * @param grammar - GBNF grammar string for constrained generation.
59
+ * When provided, sample() returns only grammar-valid tokens. The grammar state
60
+ * is cloned on fork(), so sibling branches can diverge independently.
61
+ * @returns New Branch instance
62
+ */
63
+ static create(ctx: SessionContext, position: number, params?: SamplingParams, nBatch?: number, grammar?: string): Branch;
64
+ /**
65
+ * Fork this branch to a new sequence (async)
66
+ *
67
+ * Async contract: local branches resolve immediately; cloud branches
68
+ * may perform an HTTP round-trip. Use {@link forkSync} when you know
69
+ * the branch is local and want zero-overhead forking.
70
+ *
71
+ * @returns New forked Branch
72
+ */
73
+ fork(): Promise<Branch>;
74
+ /**
75
+ * Fork this branch to a new sequence (sync)
76
+ *
77
+ * The child shares the parent's KV prefix in memory (metadata-only under unified KV, no KV buffer copy).
78
+ * Logits, sampler state, and perplexity tracker are cloned so the child
79
+ * can diverge independently. Fork from any branch — root or intermediate —
80
+ * to build arbitrarily deep trees.
81
+ *
82
+ * Call reseedSampler() on each child for stochastic diversity.
83
+ *
84
+ * @returns New forked Branch
85
+ */
86
+ forkSync(): Branch;
87
+ /**
88
+ * Get a copy of this branch's captured logits snapshot.
89
+ *
90
+ * Returns n_vocab floats — the raw logit distribution from the last
91
+ * prefill() or commit() call.
92
+ *
93
+ * Returns an independent copy of the branch's internal snapshot.
94
+ * The returned Float32Array is safe to hold across async boundaries
95
+ * and is not affected by subsequent decode operations.
96
+ *
97
+ * @returns Independent copy of the logits snapshot (n_vocab elements)
98
+ * @throws If no logits have been captured yet
99
+ */
100
+ getLogits(): Float32Array;
101
+ /**
102
+ * Bulk-decode tokens into the branch's KV cache and capture logits.
103
+ *
104
+ * `tokens.length` is the total count to process; the branch's `nBatch`
105
+ * (set at `Branch.create`) controls how many are sent per `llama_decode`
106
+ * call. E.g. 500 tokens with `nBatch=64` → 8 calls (7×64 + 1×52).
107
+ *
108
+ * Advances `position` by `tokens.length`. Stores final logits into the
109
+ * branch's internal snapshot — the next `produce()`/`sample()` reads
110
+ * from it.
111
+ *
112
+ * Does NOT accept tokens into the repeat-penalty window — for external
113
+ * tokens (user input between turns), not model-generated tokens.
114
+ * For model output, use `commit()` which does accept + decode.
115
+ *
116
+ * The primary way to feed tokens into a branch's KV cache.
117
+ *
118
+ * @param tokens - Token IDs to decode
119
+ */
120
+ prefill(tokens: number[]): Promise<void>;
121
+ /**
122
+ * Sample next token from branch's logits snapshot
123
+ *
124
+ * Applies the branch's full sampler chain (top-k, top-p, temperature,
125
+ * repeat/presence penalties) to the captured logits.
126
+ *
127
+ * @returns Sampled token ID
128
+ */
129
+ sample(): number;
130
+ /**
131
+ * Record token in the sampler's repeat/presence penalty window
132
+ *
133
+ * @param token - Token to accept
134
+ */
135
+ accept(token: number): void;
136
+ /**
137
+ * Discard this branch (async)
138
+ *
139
+ * Async contract: local branches resolve immediately; cloud branches
140
+ * may perform an HTTP round-trip. Use {@link pruneSync} when you know
141
+ * the branch is local.
142
+ *
143
+ * RESTRICT mode: throws if children exist. Use {@link pruneSubtree} to
144
+ * cascade-delete an entire subtree.
145
+ */
146
+ prune(): Promise<void>;
147
+ /**
148
+ * Discard this branch — remove its divergent KV entries and free the handle (sync)
149
+ *
150
+ * Only removes KV entries divergent from the shared prefix; sibling branches
151
+ * are unaffected. The disposed flag is set synchronously — any call to
152
+ * produce(), commit(), etc. after prune() will throw immediately.
153
+ *
154
+ * RESTRICT mode: throws if children exist. Use {@link pruneSubtreeSync} to
155
+ * cascade-delete an entire subtree.
156
+ */
157
+ pruneSync(): void;
158
+ /**
159
+ * Discard this branch and all its descendants (async)
160
+ *
161
+ * Async contract: local branches resolve immediately; cloud branches
162
+ * may perform an HTTP round-trip. Use {@link pruneSubtreeSync} when you know
163
+ * the branch is local.
164
+ */
165
+ pruneSubtree(): Promise<void>;
166
+ /**
167
+ * Discard this branch and all its descendants — CASCADE delete (sync)
168
+ *
169
+ * Iterative post-order traversal: prunes children first, then this branch.
170
+ * Use when tearing down an entire subtree (e.g. abandoned search path).
171
+ * Sets disposed synchronously.
172
+ */
173
+ pruneSubtreeSync(): void;
174
+ /**
175
+ * Reseed the sampler's PRNG for diversity after fork()
176
+ *
177
+ * CRITICAL for parallel generation: Without reseeding, all forked branches
178
+ * produce identical outputs because they share the same PRNG state.
179
+ *
180
+ * Only affects stochastic samplers (temperature > 0). Greedy samplers are unchanged.
181
+ *
182
+ * @param seed - New seed for the PRNG
183
+ */
184
+ reseedSampler(seed: number): void;
185
+ /**
186
+ * Apply dynamic logit adjustments for this branch only
187
+ *
188
+ * Unlike `logit_bias` in sampling params (which is cloned on fork), steer biases
189
+ * are NOT inherited by child branches. Each branch manages its own steer state
190
+ * independently. This makes steer ideal for path-dependent constraints.
191
+ *
192
+ * **Use cases:**
193
+ * - **tsampler**: Block tokens that would create repeated N-grams based on
194
+ * this branch's specific generation history
195
+ * - **Diverse beam search**: Penalize tokens already chosen by sibling beams
196
+ * to encourage output diversity across the beam
197
+ * - **Dynamic constraints**: Apply token restrictions that change per-step
198
+ *
199
+ * **Sampling order:** Grammar → Logit Bias → Steer → Sampler Chain
200
+ *
201
+ * @param biases - Array of token adjustments. Use `-Infinity` to completely
202
+ * block a token, positive values to boost probability, negative to reduce.
203
+ *
204
+ * @example Block tokens for N-gram deduplication (tsampler pattern)
205
+ * ```ts
206
+ * // Compute which tokens would create repeated 4-grams
207
+ * const blocked = computeNgramBlocks(generatedTokens, n=4);
208
+ *
209
+ * // Block those tokens for this sample only
210
+ * branch.steer(blocked.map(t => ({ token: t, bias: -Infinity })));
211
+ *
212
+ * const { token } = await branch.produce(); // Blocked tokens won't be sampled
213
+ * await branch.commit(token);
214
+ *
215
+ * // Clear for next iteration (recompute based on new history)
216
+ * branch.clearSteer();
217
+ * ```
218
+ *
219
+ * @example Diverse beam search
220
+ * ```ts
221
+ * // Each beam penalizes tokens chosen by siblings this step
222
+ * for (const beam of beams) {
223
+ * // Collect tokens chosen by other beams
224
+ * const siblingTokens = beams
225
+ * .filter(b => b !== beam && b.lastToken !== undefined)
226
+ * .map(b => b.lastToken);
227
+ *
228
+ * // Penalize sibling choices to encourage diversity
229
+ * beam.branch.steer(siblingTokens.map(t => ({ token: t, bias: -2.0 })));
230
+ *
231
+ * const { token } = await beam.branch.produce();
232
+ * await beam.branch.commit(token);
233
+ * beam.lastToken = token;
234
+ * beam.branch.clearSteer();
235
+ * }
236
+ * ```
237
+ *
238
+ * @example Boost specific tokens
239
+ * ```ts
240
+ * // Boost "yes" and "no" tokens for a yes/no question
241
+ * branch.steer([
242
+ * { token: yesTokenId, bias: 5.0 },
243
+ * { token: noTokenId, bias: 5.0 }
244
+ * ]);
245
+ * ```
246
+ */
247
+ steer(biases: Array<{
248
+ token: number;
249
+ bias: number;
250
+ }>): void;
251
+ /**
252
+ * Clear all steer biases from this branch
253
+ *
254
+ * Removes any dynamic logit adjustments set by `steer()`. Call this after
255
+ * each generation step if your steer constraints are computed per-step
256
+ * (e.g., N-gram blocking where the blocked set changes as text grows).
257
+ *
258
+ * @example Per-step steer pattern
259
+ * ```ts
260
+ * for (let i = 0; i < maxTokens; i++) {
261
+ * // Compute constraints based on current state
262
+ * const blocked = computeConstraints(generatedTokens);
263
+ * branch.steer(blocked.map(t => ({ token: t, bias: -Infinity })));
264
+ *
265
+ * const { token, isStop } = await branch.produce();
266
+ * if (isStop) break;
267
+ *
268
+ * await branch.commit(token);
269
+ * branch.clearSteer(); // Reset for next iteration
270
+ * generatedTokens.push(token);
271
+ * }
272
+ * ```
273
+ */
274
+ clearSteer(): void;
275
+ /**
276
+ * Replace the sampler chain with new parameters (memoized)
277
+ *
278
+ * If the new params match the current chain's params, this is a no-op.
279
+ * Otherwise the old chain is freed and a new one is created. Use for
280
+ * Entropy-Driven Temperature (EDT) and other adaptive sampling strategies
281
+ * that adjust parameters per-step.
282
+ *
283
+ * @param params - New sampling parameters
284
+ *
285
+ * @example Entropy-Driven Temperature
286
+ * ```typescript
287
+ * const entropy = branch.modelEntropy('nats');
288
+ * branch.setSamplerParams({ temperature: edtTemperature(entropy) });
289
+ * const { token } = await branch.produce();
290
+ * await branch.commit(token);
291
+ * ```
292
+ */
293
+ setSamplerParams(params: SamplingParams): void;
294
+ /**
295
+ * Replace or remove the grammar constraint
296
+ *
297
+ * Pass a GBNF grammar string to constrain generation. Pass empty string
298
+ * or undefined to remove the constraint. The grammar state is cloned on
299
+ * fork(), so sibling branches can diverge independently after hot-swap.
300
+ *
301
+ * @param grammarStr - GBNF grammar string, or empty/undefined to remove
302
+ *
303
+ * @example Hot-swap grammar mid-generation
304
+ * ```typescript
305
+ * // Start unconstrained, then switch to JSON after detecting tool call
306
+ * branch.setGrammar(jsonGrammar);
307
+ * const { token } = await branch.produce();
308
+ * ```
309
+ */
310
+ setGrammar(grammarStr?: string): void;
311
+ /**
312
+ * Set lazy grammar — unconstrained until trigger, then grammar-constrained
313
+ *
314
+ * Generation runs freely until a trigger pattern or token fires, at which
315
+ * point the grammar activates and constrains subsequent tokens. Used for
316
+ * tool-call generation: model writes freely until `<tool_call>`, then
317
+ * grammar forces valid XML structure.
318
+ *
319
+ * The grammar state is cloned on fork(), so sibling branches can diverge
320
+ * independently. Call again after a tool result prefill to reset.
321
+ *
322
+ * @param grammar - GBNF grammar string
323
+ * @param triggers - Trigger conditions from formatChat().grammarTriggers
324
+ */
325
+ setGrammarLazy(grammar: string, triggers: GrammarTrigger[]): void;
326
+ /**
327
+ * Sample next token without advancing state (async)
328
+ *
329
+ * Async contract: local branches resolve immediately; cloud branches
330
+ * may perform an HTTP round-trip. Use {@link produceSync} when you know
331
+ * the branch is local and want zero-overhead sampling.
332
+ */
333
+ produce(): Promise<Produced>;
334
+ /**
335
+ * Sample next token without advancing state (sync)
336
+ *
337
+ * Same as {@link produce} but synchronous. Use when you know the branch
338
+ * is local and want to avoid the microtick overhead of a promise.
339
+ */
340
+ produceSync(): Produced;
341
+ /**
342
+ * Accept and decode — update branch state, then write token to KV
343
+ *
344
+ * Accepts the token into the sampler penalty window (for correct PPL
345
+ * measurement), then decodes (writing to KV cache via AsyncWorker on
346
+ * the libuv thread pool) and captures the resulting logits for the next
347
+ * produce() call. Accept-first ordering with rollback: if decode throws,
348
+ * sampler/grammar/metrics are restored from clones.
349
+ *
350
+ * @param token Token to commit (from produce())
351
+ */
352
+ commit(token: number): Promise<void>;
353
+ /**
354
+ * Compute entropy of the branch's logits distribution
355
+ *
356
+ * Measures model uncertainty from the branch's captured logits snapshot:
357
+ * - Low entropy: Model is confident (peaked distribution)
358
+ * - High entropy: Model is uncertain (flat distribution)
359
+ *
360
+ * Operates directly on `state->logits_snapshot` — no JS round-trip.
361
+ *
362
+ * @param base - Logarithm base: "nats" (default) or "bits"
363
+ * @returns Entropy value in specified base
364
+ *
365
+ * COST: O(n_vocab) - must sum over all token probabilities
366
+ */
367
+ modelEntropy(base?: 'nats' | 'bits'): number;
368
+ /**
369
+ * Compute surprisal (negative log-likelihood) for a specific token
370
+ *
371
+ * Measures how "surprising" the model finds the given token from
372
+ * the branch's captured logits snapshot:
373
+ * - Low surprisal: Model expected this token (high probability)
374
+ * - High surprisal: Model didn't expect this token (low probability)
375
+ *
376
+ * Operates directly on `state->logits_snapshot` — no JS round-trip.
377
+ *
378
+ * @param token - Token ID to compute surprisal for
379
+ * @param base - Logarithm base: "nats" (default) or "bits"
380
+ * @returns Surprisal value in specified base
381
+ *
382
+ * COST: O(n_vocab) - softmax normalization required
383
+ */
384
+ modelSurprisal(token: number, base?: 'nats' | 'bits'): number;
385
+ /**
386
+ * Sampling-level perplexity (from filtered distribution)
387
+ *
388
+ * Returns perplexity from the distribution actually sampled from
389
+ * (after top-k/p/temp/penalties). Useful for policy priors and
390
+ * monitoring sampler chain impact.
391
+ *
392
+ * Compare with {@link perplexity} which is model-level (raw logits).
393
+ */
394
+ get samplingPerplexity(): number;
395
+ /**
396
+ * Set static logit biases on this branch
397
+ *
398
+ * Unlike {@link steer} (which is NOT inherited on fork), logit biases
399
+ * ARE cloned when forking. Use for persistent constraints that should
400
+ * propagate to child branches.
401
+ *
402
+ * Applied during sample() in order: Grammar -> Logit Bias -> Steer -> Sampler Chain
403
+ *
404
+ * @param biases - Array of token adjustments. Use `-Infinity` to block,
405
+ * positive to boost, negative to reduce.
406
+ */
407
+ setLogitBias(biases: Array<{
408
+ token: number;
409
+ bias: number;
410
+ }>): void;
411
+ /**
412
+ * Clear all static logit biases from this branch
413
+ */
414
+ clearLogitBias(): void;
415
+ /** Branch's current position (number of tokens decoded) */
416
+ get position(): number;
417
+ /** Branch's perplexity (exp of mean surprisal) */
418
+ get perplexity(): number;
419
+ /** Internal handle (for debugging) */
420
+ get handle(): number;
421
+ /** Whether this branch has been disposed */
422
+ get disposed(): boolean;
423
+ /** Parent branch handle, or null if root */
424
+ get parent(): number | null;
425
+ /** Child branch handles */
426
+ get children(): number[];
427
+ /** True if this branch has no children */
428
+ get isLeaf(): boolean;
429
+ /** True if this branch holds a KV lease */
430
+ get isActive(): boolean;
431
+ /**
432
+ * Async iterator — generate tokens until EOG
433
+ *
434
+ * Commit-before-yield semantics: every yielded token is already written
435
+ * to KV and accepted into the sampler. Breaking out of the loop is clean —
436
+ * no orphaned uncommitted tokens, perplexity reflects all yielded tokens.
437
+ *
438
+ * For inspect-before-commit (speculative decoding, tree search), use
439
+ * the {@link produce}/{@link commit} protocol directly.
440
+ *
441
+ * @example Generate to completion
442
+ * ```typescript
443
+ * for await (const { token, text } of branch) {
444
+ * process.stdout.write(text);
445
+ * }
446
+ * ```
447
+ *
448
+ * @example Generate with consumer-side bound
449
+ * ```typescript
450
+ * const tokens = [];
451
+ * for await (const { token } of branch) {
452
+ * tokens.push(token);
453
+ * if (tokens.length >= limit) break;
454
+ * }
455
+ * ```
456
+ */
457
+ [Symbol.asyncIterator](): AsyncIterableIterator<{
458
+ token: number;
459
+ text: string;
460
+ }>;
461
+ private _ensureNotDisposed;
462
+ }
463
+ //# sourceMappingURL=Branch.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"Branch.d.ts","sourceRoot":"","sources":["../src/Branch.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,cAAc,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AAGxF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AACH,qBAAa,MAAM;IACjB,OAAO,CAAC,IAAI,CAAiB;IAC7B,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,SAAS,CAAU;gBAEf,GAAG,EAAE,cAAc,EAAE,MAAM,EAAE,MAAM;IAM/C;;;;;;;;;;;;;;;;;OAiBG;IACH,MAAM,CAAC,MAAM,CACX,GAAG,EAAE,cAAc,EACnB,QAAQ,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,cAAc,EACvB,MAAM,CAAC,EAAE,MAAM,EACf,OAAO,CAAC,EAAE,MAAM,GACf,MAAM;IAKT;;;;;;;;OAQG;IACG,IAAI,IAAI,OAAO,CAAC,MAAM,CAAC;IAI7B;;;;;;;;;;;OAWG;IACH,QAAQ,IAAI,MAAM;IAMlB;;;;;;;;;;;;OAYG;IACH,SAAS,IAAI,YAAY;IAKzB;;;;;;;;;;;;;;;;;;OAkBG;IACG,OAAO,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAK9C;;;;;;;OAOG;IACH,MAAM,IAAI,MAAM;IAKhB;;;;OAIG;IACH,MAAM,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAK3B;;;;;;;;;OASG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAI5B;;;;;;;;;OASG;IACH,SAAS,IAAI,IAAI;IAajB;;;;;;OAMG;IACG,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC;IAInC;;;;;;OAMG;IACH,gBAAgB,IAAI,IAAI;IAMxB;;;;;;;;;OASG;IACH,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAKjC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OA6DG;IACH,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,GAAG,IAAI;IAK3D;;;;;;;;;;;;;;;;;;;;;;OAsBG;IACH,UAAU,IAAI,IAAI;IAKlB;;;;;;;;;;;;;;;;;OAiBG;IACH,gBAAgB,CAAC,MAAM,EAAE,cAAc,GAAG,IAAI;IAK9C;;;;;;;;;;;;;;;OAeG;IACH,UAAU,CAAC,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI;IAKrC;;;;;;;;;;;;;OAaG;IACH,cAAc,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,cAAc,EAAE,GAAG,IAAI;IA0BjE;;;;;;OAMG;IACG,OAAO,IAAI,OAAO,CAAC,QAAQ,CAAC;IAIlC;;;;;OAKG;IACH,WAAW,IAAI,QAAQ;IAUvB;;;;;;;;;;OAUG;IACG,MAAM,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAO1C;;;;;;;;;;;;;OAaG;IACH,YAAY,CAAC,IAAI,GAAE,MAAM,GAAG,MAAe,GAAG,MAAM;IAKpD;;;;;;;;;;;;;;;OAeG;IACH,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,MAAM,GAAG,MAAe,GAAG,MAAM;IAKrE;;;;;;;;OAQG;IACH,IAAI,kBAAkB,IAAI,MAAM,CAG/B;IAED;;;;;;;;;;;OAWG;IACH,YAAY,CAAC,MAAM,EAAE,KAAK,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,GAAG,IAAI;IAKlE;;OAEG;IACH,cAAc,IAAI,IAAI;IAOtB,2DAA2D;IAC3D,IAAI,QAAQ,IAAI,MAAM,CAGrB;IAED,kDAAkD;IAClD,IAAI,UAAU,IAAI,MAAM,CAGvB;IAED,sCAAsC;IACtC,IAAI,MAAM,IAAI,MAAM,CAEnB;IAED,4CAA4C;IAC5C,IAAI,QAAQ,IAAI,OAAO,CAEtB;IAED,4CAA4C;IAC5C,IAAI,MAAM,IAAI,MAAM,GAAG,IAAI,CAI1B;IAED,2BAA2B;IAC3B,IAAI,QAAQ,IAAI,MAAM,EAAE,CAGvB;IAED,0CAA0C;IAC1C,IAAI,MAAM,IAAI,OAAO,CAGpB;IAED,2CAA2C;IAC3C,IAAI,QAAQ,IAAI,OAAO,CAGtB;IAID;;;;;;;;;;;;;;;;;;;;;;;;;OAyBG;IACI,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,qBAAqB,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;IAWvF,OAAO,CAAC,kBAAkB;CAK3B"}