@lloyal-labs/sdk 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +162 -0
- package/dist/Branch.d.ts +463 -0
- package/dist/Branch.d.ts.map +1 -0
- package/dist/Branch.js +608 -0
- package/dist/Branch.js.map +1 -0
- package/dist/BranchStore.d.ts +125 -0
- package/dist/BranchStore.d.ts.map +1 -0
- package/dist/BranchStore.js +155 -0
- package/dist/BranchStore.js.map +1 -0
- package/dist/Rerank.d.ts +38 -0
- package/dist/Rerank.d.ts.map +1 -0
- package/dist/Rerank.js +220 -0
- package/dist/Rerank.js.map +1 -0
- package/dist/Session.d.ts +74 -0
- package/dist/Session.d.ts.map +1 -0
- package/dist/Session.js +93 -0
- package/dist/Session.js.map +1 -0
- package/dist/deltas.d.ts +37 -0
- package/dist/deltas.d.ts.map +1 -0
- package/dist/deltas.js +52 -0
- package/dist/deltas.js.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +22 -0
- package/dist/index.js.map +1 -0
- package/dist/types.d.ts +1365 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +85 -0
- package/dist/types.js.map +1 -0
- package/package.json +35 -0
package/README.md
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# @lloyal-labs/sdk
|
|
2
|
+
|
|
3
|
+
Backend-agnostic TypeScript SDK for the lloyal inference platform.
|
|
4
|
+
|
|
5
|
+
Composable inference primitives for forkable decode state, shared-prefix KV branching, and continuous tree batching. Branches share a KV prefix while keeping independent machinery — sampler chain, grammar, logits snapshot, perplexity tracker — for controlled divergence at decode time. `BranchStore` packs tokens from N branches (each at a different position, different seq_id, each needing independent logits captured) into a single `llama_batch` and dispatches once.
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm i @lloyal-labs/sdk
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
The SDK exports the `SessionContext` contract and the primitives that operate on it. Backend bindings ([lloyal.node](https://github.com/lloyal-ai/lloyal.node), [nitro-llama](https://github.com/lloyal-ai/nitro-llama)) provide `createContext()` — the SDK takes it from there.
|
|
12
|
+
|
|
13
|
+
## The Branch API
|
|
14
|
+
|
|
15
|
+
```typescript
|
|
16
|
+
import { createContext } from '@lloyal-labs/lloyal.node';
|
|
17
|
+
import { Branch, BranchStore } from '@lloyal-labs/sdk';
|
|
18
|
+
|
|
19
|
+
const ctx = await createContext({ modelPath: './model.gguf', nSeqMax: 6 });
|
|
20
|
+
const store = new BranchStore(ctx);
|
|
21
|
+
|
|
22
|
+
// Shared prompt: "Explain quantum entanglement"
|
|
23
|
+
const prompt = await ctx.tokenize('Explain quantum entanglement');
|
|
24
|
+
|
|
25
|
+
const root = Branch.create(ctx, 0, { temperature: 0.8 });
|
|
26
|
+
await root.prefill(prompt);
|
|
27
|
+
|
|
28
|
+
// Fork 4 branches — each gets a different reasoning prefix
|
|
29
|
+
const analogy = await root.fork();
|
|
30
|
+
const formal = await root.fork();
|
|
31
|
+
const socratic = await root.fork();
|
|
32
|
+
const visual = await root.fork();
|
|
33
|
+
|
|
34
|
+
// Scatter-prefill: inject divergent prefixes in one batched dispatch
|
|
35
|
+
// 4 branches × variable lengths → auto bin-packed into minimal GPU calls
|
|
36
|
+
await store.prefill([
|
|
37
|
+
[analogy, await ctx.tokenize('Think of it like two coins...')], // 12 tokens
|
|
38
|
+
[formal, await ctx.tokenize('In quantum mechanics, the...')], // 8 tokens
|
|
39
|
+
[socratic, await ctx.tokenize('What happens when you measure...')], // 10 tokens
|
|
40
|
+
[visual, await ctx.tokenize('Imagine two particles...')], // 7 tokens
|
|
41
|
+
]);
|
|
42
|
+
|
|
43
|
+
// Generate — all 4 in lockstep, 1 GPU call per step
|
|
44
|
+
const branches = [analogy, formal, socratic, visual];
|
|
45
|
+
for (;;) {
|
|
46
|
+
const live = branches.filter(b => !b.disposed);
|
|
47
|
+
if (!live.length) break;
|
|
48
|
+
|
|
49
|
+
const entries: [Branch, number][] = [];
|
|
50
|
+
for (const b of live) {
|
|
51
|
+
const { token, text, isStop } = b.produceSync();
|
|
52
|
+
if (isStop) { b.pruneSync(); continue; }
|
|
53
|
+
entries.push([b, token]);
|
|
54
|
+
}
|
|
55
|
+
if (!entries.length) break;
|
|
56
|
+
await store.commit(entries);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Winner takes all — one seq_keep pass, losers vaporized
|
|
60
|
+
const winner = branches
|
|
61
|
+
.filter(b => !b.disposed)
|
|
62
|
+
.reduce((a, b) => (a.perplexity < b.perplexity ? a : b));
|
|
63
|
+
await store.retainOnly(winner);
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Or for single-branch generation, Branch is an async iterable — generate until EOG:
|
|
67
|
+
|
|
68
|
+
```typescript
|
|
69
|
+
for await (const { token, text } of branch) {
|
|
70
|
+
process.stdout.write(text);
|
|
71
|
+
}
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Continuous Tree Batching
|
|
75
|
+
|
|
76
|
+
Tree search with N branches means N calls to `llama_decode()` — each paying GPU dispatch overhead, memory barriers, and PCIe round-trips. `BranchStore` eliminates this: tokens from N branches are packed into a single `llama_batch` and dispatched once. N branches, 1 GPU call.
|
|
77
|
+
|
|
78
|
+
Two packing strategies for different access patterns:
|
|
79
|
+
|
|
80
|
+
```typescript
|
|
81
|
+
// commit: 1 token per branch — one GPU dispatch for N branches
|
|
82
|
+
await store.commit([[branch1, tok1], [branch2, tok2], [branch3, tok3]]);
|
|
83
|
+
|
|
84
|
+
// prefill: variable tokens per branch — asymmetric injection
|
|
85
|
+
await store.prefill([
|
|
86
|
+
[branchA, systemTokens], // 200 tokens
|
|
87
|
+
[branchB, queryTokens], // 12 tokens
|
|
88
|
+
[branchC, docTokens], // 800 tokens
|
|
89
|
+
]);
|
|
90
|
+
// Greedy bin-packed into ceil(total / nBatch) dispatches
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Topology
|
|
94
|
+
|
|
95
|
+
Parent/child edges are always-on. Simple chat to best-of-N to deep search is one continuum.
|
|
96
|
+
|
|
97
|
+
```typescript
|
|
98
|
+
branch.parent; // handle or null if root
|
|
99
|
+
branch.children; // child handles
|
|
100
|
+
branch.isLeaf; // no children?
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
| Method | Behavior |
|
|
104
|
+
|--------|----------|
|
|
105
|
+
| `pruneSync()` | Throws if children exist |
|
|
106
|
+
| `pruneSubtreeSync()` | Iterative post-order traversal |
|
|
107
|
+
|
|
108
|
+
## Per-Token Metrics
|
|
109
|
+
|
|
110
|
+
Every branch exposes runtime-accessible information-theoretic measures on every step:
|
|
111
|
+
|
|
112
|
+
```typescript
|
|
113
|
+
branch.modelEntropy(); // Shannon entropy of full vocab distribution (bits)
|
|
114
|
+
branch.modelSurprisal(token); // -log2(p) for a specific token
|
|
115
|
+
branch.perplexity; // model-level PPL (exp of mean NLL from raw logits)
|
|
116
|
+
branch.samplingPerplexity; // sampling-level PPL (from filtered distribution)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Session
|
|
120
|
+
|
|
121
|
+
`Session` manages the conversation trunk — the single promoted branch that accumulates verified context across queries.
|
|
122
|
+
|
|
123
|
+
```typescript
|
|
124
|
+
const session = new Session({ ctx, store });
|
|
125
|
+
|
|
126
|
+
// Prefill a user turn into the trunk
|
|
127
|
+
await session.prefillUser('What is quantum entanglement?');
|
|
128
|
+
|
|
129
|
+
// After generation + verification, promote a branch to become the new trunk
|
|
130
|
+
await session.promote(verifiedBranch);
|
|
131
|
+
|
|
132
|
+
// Next query starts from the promoted trunk's KV state
|
|
133
|
+
session.trunk; // the live branch
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## Rerank
|
|
137
|
+
|
|
138
|
+
Backend-agnostic reranker. The caller provides a `SessionContext` — how it was created (local, remote, quantized) is not the SDK's concern.
|
|
139
|
+
|
|
140
|
+
```typescript
|
|
141
|
+
import { Rerank } from '@lloyal-labs/sdk';
|
|
142
|
+
|
|
143
|
+
const reranker = await Rerank.create(ctx, { nSeqMax: 8 });
|
|
144
|
+
const scores = await reranker.rank(query, documents);
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## Exports
|
|
148
|
+
|
|
149
|
+
```typescript
|
|
150
|
+
// Classes
|
|
151
|
+
export { Branch, BranchStore, Session, Rerank };
|
|
152
|
+
|
|
153
|
+
// Delta builders (for tool result injection)
|
|
154
|
+
export { buildUserDelta, buildToolResultDelta };
|
|
155
|
+
|
|
156
|
+
// Types
|
|
157
|
+
export type { SessionContext, SamplingParams, Produced, ContextOptions, ... };
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
## License
|
|
161
|
+
|
|
162
|
+
Apache-2.0
|
package/dist/Branch.d.ts
ADDED
|
@@ -0,0 +1,463 @@
|
|
|
1
|
+
import type { SessionContext, SamplingParams, Produced, GrammarTrigger } from './types';
|
|
2
|
+
/**
|
|
3
|
+
* Forkable inference handle for covalent generation
|
|
4
|
+
*
|
|
5
|
+
* A Branch owns everything needed for independent generation: a KV cache
|
|
6
|
+
* sequence, sampler chain, logits snapshot, and perplexity tracker.
|
|
7
|
+
*
|
|
8
|
+
* Forking is cheap — the KV prefix is shared in memory (metadata-only operation under unified KV —
|
|
9
|
+
* no KV tensor buffers are copied), so sibling branches read from the same physical KV entries.
|
|
10
|
+
* Only tokens decoded after the fork point are exclusive to each branch.
|
|
11
|
+
*
|
|
12
|
+
* Branches form trees, not just flat lists. Fork from root for best-of-N,
|
|
13
|
+
* fork from children for tree search/beam search, fork from a draft for speculative
|
|
14
|
+
* decoding.
|
|
15
|
+
*
|
|
16
|
+
* The produce/commit protocol separates sampling from state advancement:
|
|
17
|
+
* produce() samples without writing to KV, letting you inspect the result
|
|
18
|
+
* before deciding to commit().
|
|
19
|
+
*
|
|
20
|
+
* @example Best-of-N with perplexity selection
|
|
21
|
+
* ```typescript
|
|
22
|
+
* const root = Branch.create(ctx, tokens.length, { temperature: 0.8 });
|
|
23
|
+
* await root.prefill(tokens);
|
|
24
|
+
*
|
|
25
|
+
* const results = [];
|
|
26
|
+
* for (let i = 0; i < 5; i++) {
|
|
27
|
+
* const branch = await root.fork();
|
|
28
|
+
* branch.reseedSampler(1000 + i);
|
|
29
|
+
* const tokens = [];
|
|
30
|
+
* for await (const { token } of branch) tokens.push(token);
|
|
31
|
+
* results.push({ branch, tokens, ppl: branch.perplexity });
|
|
32
|
+
* }
|
|
33
|
+
*
|
|
34
|
+
* const best = results.reduce((a, b) => a.ppl < b.ppl ? a : b);
|
|
35
|
+
* for (const r of results) { if (r !== best) await r.branch.prune(); }
|
|
36
|
+
* ```
|
|
37
|
+
*
|
|
38
|
+
* @category Branching
|
|
39
|
+
*/
|
|
40
|
+
export declare class Branch {
|
|
41
|
+
private _ctx;
|
|
42
|
+
private _handle;
|
|
43
|
+
private _disposed;
|
|
44
|
+
constructor(ctx: SessionContext, handle: number);
|
|
45
|
+
/**
|
|
46
|
+
* Create a root branch at the given position
|
|
47
|
+
*
|
|
48
|
+
* The branch takes ownership of the sequence and creates its own sampler
|
|
49
|
+
* chain from the provided params. Call prefill() to decode prompt tokens
|
|
50
|
+
* and capture the logit distribution before forking.
|
|
51
|
+
*
|
|
52
|
+
* @param ctx - SessionContext to create branch on
|
|
53
|
+
* @param position - Starting position (typically prompt token count)
|
|
54
|
+
* @param params - Sampling parameters (temperature, topP, etc.)
|
|
55
|
+
* @param nBatch - Per-branch batch size override (defaults to context nBatch).
|
|
56
|
+
* Controls chunk size for prefill(). Has no effect on
|
|
57
|
+
* single-token commit() which uses a zero-allocation fast path.
|
|
58
|
+
* @param grammar - GBNF grammar string for constrained generation.
|
|
59
|
+
* When provided, sample() returns only grammar-valid tokens. The grammar state
|
|
60
|
+
* is cloned on fork(), so sibling branches can diverge independently.
|
|
61
|
+
* @returns New Branch instance
|
|
62
|
+
*/
|
|
63
|
+
static create(ctx: SessionContext, position: number, params?: SamplingParams, nBatch?: number, grammar?: string): Branch;
|
|
64
|
+
/**
|
|
65
|
+
* Fork this branch to a new sequence (async)
|
|
66
|
+
*
|
|
67
|
+
* Async contract: local branches resolve immediately; cloud branches
|
|
68
|
+
* may perform an HTTP round-trip. Use {@link forkSync} when you know
|
|
69
|
+
* the branch is local and want zero-overhead forking.
|
|
70
|
+
*
|
|
71
|
+
* @returns New forked Branch
|
|
72
|
+
*/
|
|
73
|
+
fork(): Promise<Branch>;
|
|
74
|
+
/**
|
|
75
|
+
* Fork this branch to a new sequence (sync)
|
|
76
|
+
*
|
|
77
|
+
* The child shares the parent's KV prefix in memory (metadata-only under unified KV, no KV buffer copy).
|
|
78
|
+
* Logits, sampler state, and perplexity tracker are cloned so the child
|
|
79
|
+
* can diverge independently. Fork from any branch — root or intermediate —
|
|
80
|
+
* to build arbitrarily deep trees.
|
|
81
|
+
*
|
|
82
|
+
* Call reseedSampler() on each child for stochastic diversity.
|
|
83
|
+
*
|
|
84
|
+
* @returns New forked Branch
|
|
85
|
+
*/
|
|
86
|
+
forkSync(): Branch;
|
|
87
|
+
/**
|
|
88
|
+
* Get a copy of this branch's captured logits snapshot.
|
|
89
|
+
*
|
|
90
|
+
* Returns n_vocab floats — the raw logit distribution from the last
|
|
91
|
+
* prefill() or commit() call.
|
|
92
|
+
*
|
|
93
|
+
* Returns an independent copy of the branch's internal snapshot.
|
|
94
|
+
* The returned Float32Array is safe to hold across async boundaries
|
|
95
|
+
* and is not affected by subsequent decode operations.
|
|
96
|
+
*
|
|
97
|
+
* @returns Independent copy of the logits snapshot (n_vocab elements)
|
|
98
|
+
* @throws If no logits have been captured yet
|
|
99
|
+
*/
|
|
100
|
+
getLogits(): Float32Array;
|
|
101
|
+
/**
|
|
102
|
+
* Bulk-decode tokens into the branch's KV cache and capture logits.
|
|
103
|
+
*
|
|
104
|
+
* `tokens.length` is the total count to process; the branch's `nBatch`
|
|
105
|
+
* (set at `Branch.create`) controls how many are sent per `llama_decode`
|
|
106
|
+
* call. E.g. 500 tokens with `nBatch=64` → 8 calls (7×64 + 1×52).
|
|
107
|
+
*
|
|
108
|
+
* Advances `position` by `tokens.length`. Stores final logits into the
|
|
109
|
+
* branch's internal snapshot — the next `produce()`/`sample()` reads
|
|
110
|
+
* from it.
|
|
111
|
+
*
|
|
112
|
+
* Does NOT accept tokens into the repeat-penalty window — for external
|
|
113
|
+
* tokens (user input between turns), not model-generated tokens.
|
|
114
|
+
* For model output, use `commit()` which does accept + decode.
|
|
115
|
+
*
|
|
116
|
+
* The primary way to feed tokens into a branch's KV cache.
|
|
117
|
+
*
|
|
118
|
+
* @param tokens - Token IDs to decode
|
|
119
|
+
*/
|
|
120
|
+
prefill(tokens: number[]): Promise<void>;
|
|
121
|
+
/**
|
|
122
|
+
* Sample next token from branch's logits snapshot
|
|
123
|
+
*
|
|
124
|
+
* Applies the branch's full sampler chain (top-k, top-p, temperature,
|
|
125
|
+
* repeat/presence penalties) to the captured logits.
|
|
126
|
+
*
|
|
127
|
+
* @returns Sampled token ID
|
|
128
|
+
*/
|
|
129
|
+
sample(): number;
|
|
130
|
+
/**
|
|
131
|
+
* Record token in the sampler's repeat/presence penalty window
|
|
132
|
+
*
|
|
133
|
+
* @param token - Token to accept
|
|
134
|
+
*/
|
|
135
|
+
accept(token: number): void;
|
|
136
|
+
/**
|
|
137
|
+
* Discard this branch (async)
|
|
138
|
+
*
|
|
139
|
+
* Async contract: local branches resolve immediately; cloud branches
|
|
140
|
+
* may perform an HTTP round-trip. Use {@link pruneSync} when you know
|
|
141
|
+
* the branch is local.
|
|
142
|
+
*
|
|
143
|
+
* RESTRICT mode: throws if children exist. Use {@link pruneSubtree} to
|
|
144
|
+
* cascade-delete an entire subtree.
|
|
145
|
+
*/
|
|
146
|
+
prune(): Promise<void>;
|
|
147
|
+
/**
|
|
148
|
+
* Discard this branch — remove its divergent KV entries and free the handle (sync)
|
|
149
|
+
*
|
|
150
|
+
* Only removes KV entries divergent from the shared prefix; sibling branches
|
|
151
|
+
* are unaffected. The disposed flag is set synchronously — any call to
|
|
152
|
+
* produce(), commit(), etc. after prune() will throw immediately.
|
|
153
|
+
*
|
|
154
|
+
* RESTRICT mode: throws if children exist. Use {@link pruneSubtreeSync} to
|
|
155
|
+
* cascade-delete an entire subtree.
|
|
156
|
+
*/
|
|
157
|
+
pruneSync(): void;
|
|
158
|
+
/**
|
|
159
|
+
* Discard this branch and all its descendants (async)
|
|
160
|
+
*
|
|
161
|
+
* Async contract: local branches resolve immediately; cloud branches
|
|
162
|
+
* may perform an HTTP round-trip. Use {@link pruneSubtreeSync} when you know
|
|
163
|
+
* the branch is local.
|
|
164
|
+
*/
|
|
165
|
+
pruneSubtree(): Promise<void>;
|
|
166
|
+
/**
|
|
167
|
+
* Discard this branch and all its descendants — CASCADE delete (sync)
|
|
168
|
+
*
|
|
169
|
+
* Iterative post-order traversal: prunes children first, then this branch.
|
|
170
|
+
* Use when tearing down an entire subtree (e.g. abandoned search path).
|
|
171
|
+
* Sets disposed synchronously.
|
|
172
|
+
*/
|
|
173
|
+
pruneSubtreeSync(): void;
|
|
174
|
+
/**
|
|
175
|
+
* Reseed the sampler's PRNG for diversity after fork()
|
|
176
|
+
*
|
|
177
|
+
* CRITICAL for parallel generation: Without reseeding, all forked branches
|
|
178
|
+
* produce identical outputs because they share the same PRNG state.
|
|
179
|
+
*
|
|
180
|
+
* Only affects stochastic samplers (temperature > 0). Greedy samplers are unchanged.
|
|
181
|
+
*
|
|
182
|
+
* @param seed - New seed for the PRNG
|
|
183
|
+
*/
|
|
184
|
+
reseedSampler(seed: number): void;
|
|
185
|
+
/**
|
|
186
|
+
* Apply dynamic logit adjustments for this branch only
|
|
187
|
+
*
|
|
188
|
+
* Unlike `logit_bias` in sampling params (which is cloned on fork), steer biases
|
|
189
|
+
* are NOT inherited by child branches. Each branch manages its own steer state
|
|
190
|
+
* independently. This makes steer ideal for path-dependent constraints.
|
|
191
|
+
*
|
|
192
|
+
* **Use cases:**
|
|
193
|
+
* - **tsampler**: Block tokens that would create repeated N-grams based on
|
|
194
|
+
* this branch's specific generation history
|
|
195
|
+
* - **Diverse beam search**: Penalize tokens already chosen by sibling beams
|
|
196
|
+
* to encourage output diversity across the beam
|
|
197
|
+
* - **Dynamic constraints**: Apply token restrictions that change per-step
|
|
198
|
+
*
|
|
199
|
+
* **Sampling order:** Grammar → Logit Bias → Steer → Sampler Chain
|
|
200
|
+
*
|
|
201
|
+
* @param biases - Array of token adjustments. Use `-Infinity` to completely
|
|
202
|
+
* block a token, positive values to boost probability, negative to reduce.
|
|
203
|
+
*
|
|
204
|
+
* @example Block tokens for N-gram deduplication (tsampler pattern)
|
|
205
|
+
* ```ts
|
|
206
|
+
* // Compute which tokens would create repeated 4-grams
|
|
207
|
+
* const blocked = computeNgramBlocks(generatedTokens, n=4);
|
|
208
|
+
*
|
|
209
|
+
* // Block those tokens for this sample only
|
|
210
|
+
* branch.steer(blocked.map(t => ({ token: t, bias: -Infinity })));
|
|
211
|
+
*
|
|
212
|
+
* const { token } = await branch.produce(); // Blocked tokens won't be sampled
|
|
213
|
+
* await branch.commit(token);
|
|
214
|
+
*
|
|
215
|
+
* // Clear for next iteration (recompute based on new history)
|
|
216
|
+
* branch.clearSteer();
|
|
217
|
+
* ```
|
|
218
|
+
*
|
|
219
|
+
* @example Diverse beam search
|
|
220
|
+
* ```ts
|
|
221
|
+
* // Each beam penalizes tokens chosen by siblings this step
|
|
222
|
+
* for (const beam of beams) {
|
|
223
|
+
* // Collect tokens chosen by other beams
|
|
224
|
+
* const siblingTokens = beams
|
|
225
|
+
* .filter(b => b !== beam && b.lastToken !== undefined)
|
|
226
|
+
* .map(b => b.lastToken);
|
|
227
|
+
*
|
|
228
|
+
* // Penalize sibling choices to encourage diversity
|
|
229
|
+
* beam.branch.steer(siblingTokens.map(t => ({ token: t, bias: -2.0 })));
|
|
230
|
+
*
|
|
231
|
+
* const { token } = await beam.branch.produce();
|
|
232
|
+
* await beam.branch.commit(token);
|
|
233
|
+
* beam.lastToken = token;
|
|
234
|
+
* beam.branch.clearSteer();
|
|
235
|
+
* }
|
|
236
|
+
* ```
|
|
237
|
+
*
|
|
238
|
+
* @example Boost specific tokens
|
|
239
|
+
* ```ts
|
|
240
|
+
* // Boost "yes" and "no" tokens for a yes/no question
|
|
241
|
+
* branch.steer([
|
|
242
|
+
* { token: yesTokenId, bias: 5.0 },
|
|
243
|
+
* { token: noTokenId, bias: 5.0 }
|
|
244
|
+
* ]);
|
|
245
|
+
* ```
|
|
246
|
+
*/
|
|
247
|
+
steer(biases: Array<{
|
|
248
|
+
token: number;
|
|
249
|
+
bias: number;
|
|
250
|
+
}>): void;
|
|
251
|
+
/**
|
|
252
|
+
* Clear all steer biases from this branch
|
|
253
|
+
*
|
|
254
|
+
* Removes any dynamic logit adjustments set by `steer()`. Call this after
|
|
255
|
+
* each generation step if your steer constraints are computed per-step
|
|
256
|
+
* (e.g., N-gram blocking where the blocked set changes as text grows).
|
|
257
|
+
*
|
|
258
|
+
* @example Per-step steer pattern
|
|
259
|
+
* ```ts
|
|
260
|
+
* for (let i = 0; i < maxTokens; i++) {
|
|
261
|
+
* // Compute constraints based on current state
|
|
262
|
+
* const blocked = computeConstraints(generatedTokens);
|
|
263
|
+
* branch.steer(blocked.map(t => ({ token: t, bias: -Infinity })));
|
|
264
|
+
*
|
|
265
|
+
* const { token, isStop } = await branch.produce();
|
|
266
|
+
* if (isStop) break;
|
|
267
|
+
*
|
|
268
|
+
* await branch.commit(token);
|
|
269
|
+
* branch.clearSteer(); // Reset for next iteration
|
|
270
|
+
* generatedTokens.push(token);
|
|
271
|
+
* }
|
|
272
|
+
* ```
|
|
273
|
+
*/
|
|
274
|
+
clearSteer(): void;
|
|
275
|
+
/**
|
|
276
|
+
* Replace the sampler chain with new parameters (memoized)
|
|
277
|
+
*
|
|
278
|
+
* If the new params match the current chain's params, this is a no-op.
|
|
279
|
+
* Otherwise the old chain is freed and a new one is created. Use for
|
|
280
|
+
* Entropy-Driven Temperature (EDT) and other adaptive sampling strategies
|
|
281
|
+
* that adjust parameters per-step.
|
|
282
|
+
*
|
|
283
|
+
* @param params - New sampling parameters
|
|
284
|
+
*
|
|
285
|
+
* @example Entropy-Driven Temperature
|
|
286
|
+
* ```typescript
|
|
287
|
+
* const entropy = branch.modelEntropy('nats');
|
|
288
|
+
* branch.setSamplerParams({ temperature: edtTemperature(entropy) });
|
|
289
|
+
* const { token } = await branch.produce();
|
|
290
|
+
* await branch.commit(token);
|
|
291
|
+
* ```
|
|
292
|
+
*/
|
|
293
|
+
setSamplerParams(params: SamplingParams): void;
|
|
294
|
+
/**
|
|
295
|
+
* Replace or remove the grammar constraint
|
|
296
|
+
*
|
|
297
|
+
* Pass a GBNF grammar string to constrain generation. Pass empty string
|
|
298
|
+
* or undefined to remove the constraint. The grammar state is cloned on
|
|
299
|
+
* fork(), so sibling branches can diverge independently after hot-swap.
|
|
300
|
+
*
|
|
301
|
+
* @param grammarStr - GBNF grammar string, or empty/undefined to remove
|
|
302
|
+
*
|
|
303
|
+
* @example Hot-swap grammar mid-generation
|
|
304
|
+
* ```typescript
|
|
305
|
+
* // Start unconstrained, then switch to JSON after detecting tool call
|
|
306
|
+
* branch.setGrammar(jsonGrammar);
|
|
307
|
+
* const { token } = await branch.produce();
|
|
308
|
+
* ```
|
|
309
|
+
*/
|
|
310
|
+
setGrammar(grammarStr?: string): void;
|
|
311
|
+
/**
|
|
312
|
+
* Set lazy grammar — unconstrained until trigger, then grammar-constrained
|
|
313
|
+
*
|
|
314
|
+
* Generation runs freely until a trigger pattern or token fires, at which
|
|
315
|
+
* point the grammar activates and constrains subsequent tokens. Used for
|
|
316
|
+
* tool-call generation: model writes freely until `<tool_call>`, then
|
|
317
|
+
* grammar forces valid XML structure.
|
|
318
|
+
*
|
|
319
|
+
* The grammar state is cloned on fork(), so sibling branches can diverge
|
|
320
|
+
* independently. Call again after a tool result prefill to reset.
|
|
321
|
+
*
|
|
322
|
+
* @param grammar - GBNF grammar string
|
|
323
|
+
* @param triggers - Trigger conditions from formatChat().grammarTriggers
|
|
324
|
+
*/
|
|
325
|
+
setGrammarLazy(grammar: string, triggers: GrammarTrigger[]): void;
|
|
326
|
+
/**
|
|
327
|
+
* Sample next token without advancing state (async)
|
|
328
|
+
*
|
|
329
|
+
* Async contract: local branches resolve immediately; cloud branches
|
|
330
|
+
* may perform an HTTP round-trip. Use {@link produceSync} when you know
|
|
331
|
+
* the branch is local and want zero-overhead sampling.
|
|
332
|
+
*/
|
|
333
|
+
produce(): Promise<Produced>;
|
|
334
|
+
/**
|
|
335
|
+
* Sample next token without advancing state (sync)
|
|
336
|
+
*
|
|
337
|
+
* Same as {@link produce} but synchronous. Use when you know the branch
|
|
338
|
+
* is local and want to avoid the microtick overhead of a promise.
|
|
339
|
+
*/
|
|
340
|
+
produceSync(): Produced;
|
|
341
|
+
/**
|
|
342
|
+
* Accept and decode — update branch state, then write token to KV
|
|
343
|
+
*
|
|
344
|
+
* Accepts the token into the sampler penalty window (for correct PPL
|
|
345
|
+
* measurement), then decodes (writing to KV cache via AsyncWorker on
|
|
346
|
+
* the libuv thread pool) and captures the resulting logits for the next
|
|
347
|
+
* produce() call. Accept-first ordering with rollback: if decode throws,
|
|
348
|
+
* sampler/grammar/metrics are restored from clones.
|
|
349
|
+
*
|
|
350
|
+
* @param token Token to commit (from produce())
|
|
351
|
+
*/
|
|
352
|
+
commit(token: number): Promise<void>;
|
|
353
|
+
/**
|
|
354
|
+
* Compute entropy of the branch's logits distribution
|
|
355
|
+
*
|
|
356
|
+
* Measures model uncertainty from the branch's captured logits snapshot:
|
|
357
|
+
* - Low entropy: Model is confident (peaked distribution)
|
|
358
|
+
* - High entropy: Model is uncertain (flat distribution)
|
|
359
|
+
*
|
|
360
|
+
* Operates directly on `state->logits_snapshot` — no JS round-trip.
|
|
361
|
+
*
|
|
362
|
+
* @param base - Logarithm base: "nats" (default) or "bits"
|
|
363
|
+
* @returns Entropy value in specified base
|
|
364
|
+
*
|
|
365
|
+
* COST: O(n_vocab) - must sum over all token probabilities
|
|
366
|
+
*/
|
|
367
|
+
modelEntropy(base?: 'nats' | 'bits'): number;
|
|
368
|
+
/**
|
|
369
|
+
* Compute surprisal (negative log-likelihood) for a specific token
|
|
370
|
+
*
|
|
371
|
+
* Measures how "surprising" the model finds the given token from
|
|
372
|
+
* the branch's captured logits snapshot:
|
|
373
|
+
* - Low surprisal: Model expected this token (high probability)
|
|
374
|
+
* - High surprisal: Model didn't expect this token (low probability)
|
|
375
|
+
*
|
|
376
|
+
* Operates directly on `state->logits_snapshot` — no JS round-trip.
|
|
377
|
+
*
|
|
378
|
+
* @param token - Token ID to compute surprisal for
|
|
379
|
+
* @param base - Logarithm base: "nats" (default) or "bits"
|
|
380
|
+
* @returns Surprisal value in specified base
|
|
381
|
+
*
|
|
382
|
+
* COST: O(n_vocab) - softmax normalization required
|
|
383
|
+
*/
|
|
384
|
+
modelSurprisal(token: number, base?: 'nats' | 'bits'): number;
|
|
385
|
+
/**
|
|
386
|
+
* Sampling-level perplexity (from filtered distribution)
|
|
387
|
+
*
|
|
388
|
+
* Returns perplexity from the distribution actually sampled from
|
|
389
|
+
* (after top-k/p/temp/penalties). Useful for policy priors and
|
|
390
|
+
* monitoring sampler chain impact.
|
|
391
|
+
*
|
|
392
|
+
* Compare with {@link perplexity} which is model-level (raw logits).
|
|
393
|
+
*/
|
|
394
|
+
get samplingPerplexity(): number;
|
|
395
|
+
/**
|
|
396
|
+
* Set static logit biases on this branch
|
|
397
|
+
*
|
|
398
|
+
* Unlike {@link steer} (which is NOT inherited on fork), logit biases
|
|
399
|
+
* ARE cloned when forking. Use for persistent constraints that should
|
|
400
|
+
* propagate to child branches.
|
|
401
|
+
*
|
|
402
|
+
* Applied during sample() in order: Grammar -> Logit Bias -> Steer -> Sampler Chain
|
|
403
|
+
*
|
|
404
|
+
* @param biases - Array of token adjustments. Use `-Infinity` to block,
|
|
405
|
+
* positive to boost, negative to reduce.
|
|
406
|
+
*/
|
|
407
|
+
setLogitBias(biases: Array<{
|
|
408
|
+
token: number;
|
|
409
|
+
bias: number;
|
|
410
|
+
}>): void;
|
|
411
|
+
/**
|
|
412
|
+
* Clear all static logit biases from this branch
|
|
413
|
+
*/
|
|
414
|
+
clearLogitBias(): void;
|
|
415
|
+
/** Branch's current position (number of tokens decoded) */
|
|
416
|
+
get position(): number;
|
|
417
|
+
/** Branch's perplexity (exp of mean surprisal) */
|
|
418
|
+
get perplexity(): number;
|
|
419
|
+
/** Internal handle (for debugging) */
|
|
420
|
+
get handle(): number;
|
|
421
|
+
/** Whether this branch has been disposed */
|
|
422
|
+
get disposed(): boolean;
|
|
423
|
+
/** Parent branch handle, or null if root */
|
|
424
|
+
get parent(): number | null;
|
|
425
|
+
/** Child branch handles */
|
|
426
|
+
get children(): number[];
|
|
427
|
+
/** True if this branch has no children */
|
|
428
|
+
get isLeaf(): boolean;
|
|
429
|
+
/** True if this branch holds a KV lease */
|
|
430
|
+
get isActive(): boolean;
|
|
431
|
+
/**
|
|
432
|
+
* Async iterator — generate tokens until EOG
|
|
433
|
+
*
|
|
434
|
+
* Commit-before-yield semantics: every yielded token is already written
|
|
435
|
+
* to KV and accepted into the sampler. Breaking out of the loop is clean —
|
|
436
|
+
* no orphaned uncommitted tokens, perplexity reflects all yielded tokens.
|
|
437
|
+
*
|
|
438
|
+
* For inspect-before-commit (speculative decoding, tree search), use
|
|
439
|
+
* the {@link produce}/{@link commit} protocol directly.
|
|
440
|
+
*
|
|
441
|
+
* @example Generate to completion
|
|
442
|
+
* ```typescript
|
|
443
|
+
* for await (const { token, text } of branch) {
|
|
444
|
+
* process.stdout.write(text);
|
|
445
|
+
* }
|
|
446
|
+
* ```
|
|
447
|
+
*
|
|
448
|
+
* @example Generate with consumer-side bound
|
|
449
|
+
* ```typescript
|
|
450
|
+
* const tokens = [];
|
|
451
|
+
* for await (const { token } of branch) {
|
|
452
|
+
* tokens.push(token);
|
|
453
|
+
* if (tokens.length >= limit) break;
|
|
454
|
+
* }
|
|
455
|
+
* ```
|
|
456
|
+
*/
|
|
457
|
+
[Symbol.asyncIterator](): AsyncIterableIterator<{
|
|
458
|
+
token: number;
|
|
459
|
+
text: string;
|
|
460
|
+
}>;
|
|
461
|
+
private _ensureNotDisposed;
|
|
462
|
+
}
|
|
463
|
+
//# sourceMappingURL=Branch.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"Branch.d.ts","sourceRoot":"","sources":["../src/Branch.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,cAAc,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AAGxF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AACH,qBAAa,MAAM;IACjB,OAAO,CAAC,IAAI,CAAiB;IAC7B,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,SAAS,CAAU;gBAEf,GAAG,EAAE,cAAc,EAAE,MAAM,EAAE,MAAM;IAM/C;;;;;;;;;;;;;;;;;OAiBG;IACH,MAAM,CAAC,MAAM,CACX,GAAG,EAAE,cAAc,EACnB,QAAQ,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,cAAc,EACvB,MAAM,CAAC,EAAE,MAAM,EACf,OAAO,CAAC,EAAE,MAAM,GACf,MAAM;IAKT;;;;;;;;OAQG;IACG,IAAI,IAAI,OAAO,CAAC,MAAM,CAAC;IAI7B;;;;;;;;;;;OAWG;IACH,QAAQ,IAAI,MAAM;IAMlB;;;;;;;;;;;;OAYG;IACH,SAAS,IAAI,YAAY;IAKzB;;;;;;;;;;;;;;;;;;OAkBG;IACG,OAAO,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAK9C;;;;;;;OAOG;IACH,MAAM,IAAI,MAAM;IAKhB;;;;OAIG;IACH,MAAM,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAK3B;;;;;;;;;OASG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAI5B;;;;;;;;;OASG;IACH,SAAS,IAAI,IAAI;IAajB;;;;;;OAMG;IACG,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC;IAInC;;;;;;OAMG;IACH,gBAAgB,IAAI,IAAI;IAMxB;;;;;;;;;OASG;IACH,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAKjC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OA6DG;IACH,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,GAAG,IAAI;IAK3D;;;;;;;;;;;;;;;;;;;;;;OAsBG;IACH,UAAU,IAAI,IAAI;IAKlB;;;;;;;;;;;;;;;;;OAiBG;IACH,gBAAgB,CAAC,MAAM,EAAE,cAAc,GAAG,IAAI;IAK9C;;;;;;;;;;;;;;;OAeG;IACH,UAAU,CAAC,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI;IAKrC;;;;;;;;;;;;;OAaG;IACH,cAAc,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,cAAc,EAAE,GAAG,IAAI;IA0BjE;;;;;;OAMG;IACG,OAAO,IAAI,OAAO,CAAC,QAAQ,CAAC;IAIlC;;;;;OAKG;IACH,WAAW,IAAI,QAAQ;IAUvB;;;;;;;;;;OAUG;IACG,MAAM,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAO1C;;;;;;;;;;;;;OAaG;IACH,YAAY,CAAC,IAAI,GAAE,MAAM,GAAG,MAAe,GAAG,MAAM;IAKpD;;;;;;;;;;;;;;;OAeG;IACH,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,MAAM,GAAG,MAAe,GAAG,MAAM;IAKrE;;;;;;;;OAQG;IACH,IAAI,kBAAkB,IAAI,MAAM,CAG/B;IAED;;;;;;;;;;;OAWG;IACH,YAAY,CAAC,MAAM,EAAE,KAAK,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,GAAG,IAAI;IAKlE;;OAEG;IACH,cAAc,IAAI,IAAI;IAOtB,2DAA2D;IAC3D,IAAI,QAAQ,IAAI,MAAM,CAGrB;IAED,kDAAkD;IAClD,IAAI,UAAU,IAAI,MAAM,CAGvB;IAED,sCAAsC;IACtC,IAAI,MAAM,IAAI,MAAM,CAEnB;IAED,4CAA4C;IAC5C,IAAI,QAAQ,IAAI,OAAO,CAEtB;IAED,4CAA4C;IAC5C,IAAI,MAAM,IAAI,MAAM,GAAG,IAAI,CAI1B;IAED,2BAA2B;IAC3B,IAAI,QAAQ,IAAI,MAAM,EAAE,CAGvB;IAED,0CAA0C;IAC1C,IAAI,MAAM,IAAI,OAAO,CAGpB;IAED,2CAA2C;IAC3C,IAAI,QAAQ,IAAI,OAAO,CAGtB;IAID;;;;;;;;;;;;;;;;;;;;;;;;;OAyBG;IACI,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,qBAAqB,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;IAWvF,OAAO,CAAC,kBAAkB;CAK3B"}
|