@lloyal-labs/lloyal.node 1.0.5-alpha → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +158 -267
- package/lib/Branch.js +268 -0
- package/lib/index.d.ts +307 -165
- package/lib/index.js +165 -19
- package/package.json +19 -18
- package/scripts/create-platform-package.js +19 -40
- package/scripts/download-test-models.sh +10 -0
- package/scripts/install.js +0 -138
package/lib/Branch.js
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Branch - Forkable inference handle for covalent generation
|
|
3
|
+
*
|
|
4
|
+
* A Branch owns everything needed for independent generation: a KV cache
|
|
5
|
+
* sequence, sampler chain, logits snapshot, and perplexity tracker.
|
|
6
|
+
*
|
|
7
|
+
* Forking is cheap — the KV prefix is shared in memory (metadata-only operation under unified KV —
|
|
8
|
+
* no KV tensor buffers are copied), so sibling branches read from the same physical KV entries.
|
|
9
|
+
* Only tokens decoded after the fork point are exclusive to each branch.
|
|
10
|
+
* This is the covalent property: branches share a bond (common prefix)
|
|
11
|
+
* while diverging independently.
|
|
12
|
+
*
|
|
13
|
+
* Branches form trees, not just flat lists. Fork from root for best-of-N,
|
|
14
|
+
* fork from children for MCTS/beam search, fork from a draft for speculative
|
|
15
|
+
* decoding.
|
|
16
|
+
*
|
|
17
|
+
* The produce/commit protocol separates sampling from state advancement:
|
|
18
|
+
* produce() samples without writing to KV, letting you inspect the result
|
|
19
|
+
* before deciding to commit(). This two-phase split is what makes speculative
|
|
20
|
+
* verification and tree search natural.
|
|
21
|
+
*
|
|
22
|
+
* @example Best-of-N with perplexity selection
|
|
23
|
+
* ```js
|
|
24
|
+
* const root = Branch.create(ctx, 0, tokens.length, { temperature: 0.8 });
|
|
25
|
+
* root.captureLogits();
|
|
26
|
+
*
|
|
27
|
+
* const candidates = [1, 2, 3, 4, 5].map((seqId, i) => {
|
|
28
|
+
* const branch = root.fork(seqId);
|
|
29
|
+
* branch.reseedSampler(1000 + i);
|
|
30
|
+
* return branch;
|
|
31
|
+
* });
|
|
32
|
+
*
|
|
33
|
+
* for (let t = 0; t < 50; t++) {
|
|
34
|
+
* for (const branch of candidates) {
|
|
35
|
+
* const { token, isStop } = branch.produce();
|
|
36
|
+
* if (isStop) continue;
|
|
37
|
+
* branch.commit(token);
|
|
38
|
+
* }
|
|
39
|
+
* }
|
|
40
|
+
*
|
|
41
|
+
* const best = candidates.reduce((a, b) => a.perplexity < b.perplexity ? a : b);
|
|
42
|
+
* for (const c of candidates) { if (c !== best) c.prune(); }
|
|
43
|
+
* ```
|
|
44
|
+
*/
|
|
45
|
+
|
|
46
|
+
class Branch {
|
|
47
|
+
/**
|
|
48
|
+
* @param {SessionContext} ctx
|
|
49
|
+
* @param {number} handle
|
|
50
|
+
*/
|
|
51
|
+
constructor(ctx, handle) {
|
|
52
|
+
this._ctx = ctx;
|
|
53
|
+
this._handle = handle;
|
|
54
|
+
this._disposed = false;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Create a root branch at the given position
|
|
59
|
+
*
|
|
60
|
+
* The branch takes ownership of the sequence and creates its own sampler
|
|
61
|
+
* chain from the provided params. Call captureLogits() after prefill to
|
|
62
|
+
* freeze the logit distribution before forking.
|
|
63
|
+
*
|
|
64
|
+
* @param {SessionContext} ctx - SessionContext to create branch on
|
|
65
|
+
* @param {number} seqId - Sequence ID for this branch
|
|
66
|
+
* @param {number} position - Starting position (typically prompt token count)
|
|
67
|
+
* @param {SamplingParams} [params] - Sampling parameters (temperature, topP, etc.)
|
|
68
|
+
* @returns {Branch} New Branch instance
|
|
69
|
+
*/
|
|
70
|
+
static create(ctx, seqId, position, params) {
|
|
71
|
+
const handle = ctx._branchCreate(seqId, position, params);
|
|
72
|
+
return new Branch(ctx, handle);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Fork this branch to a new sequence
|
|
77
|
+
*
|
|
78
|
+
* The child shares the parent's KV prefix in memory (metadata-only under unified KV, no KV buffer copy).
|
|
79
|
+
* Logits, sampler state, and perplexity tracker are cloned so the child
|
|
80
|
+
* can diverge independently. Fork from any branch — root or intermediate —
|
|
81
|
+
* to build arbitrarily deep trees.
|
|
82
|
+
*
|
|
83
|
+
* Call reseedSampler() on each child for stochastic diversity.
|
|
84
|
+
*
|
|
85
|
+
* @param {number} newSeqId - Sequence ID for the forked branch
|
|
86
|
+
* @returns {Branch} New forked Branch
|
|
87
|
+
*/
|
|
88
|
+
fork(newSeqId) {
|
|
89
|
+
this._ensureNotDisposed();
|
|
90
|
+
const newHandle = this._ctx._branchFork(this._handle, newSeqId);
|
|
91
|
+
return new Branch(this._ctx, newHandle);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Freeze the current logit distribution into this branch
|
|
96
|
+
*
|
|
97
|
+
* Logits are ephemeral — they're overwritten on the next decode() call.
|
|
98
|
+
* Capturing preserves them so this branch (and any forks from it) can
|
|
99
|
+
* sample from the same distribution. Essential before fork().
|
|
100
|
+
*/
|
|
101
|
+
captureLogits() {
|
|
102
|
+
this._ensureNotDisposed();
|
|
103
|
+
this._ctx._branchCaptureLogits(this._handle);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Single-token forward pass with logit snapshot
|
|
108
|
+
*
|
|
109
|
+
* Runs one decode step (writing the token's KV entries), advances position,
|
|
110
|
+
* and captures the resulting logits for the next sample() call.
|
|
111
|
+
*
|
|
112
|
+
* @param {number} token - Token to decode
|
|
113
|
+
*/
|
|
114
|
+
decodeAndCaptureOne(token) {
|
|
115
|
+
this._ensureNotDisposed();
|
|
116
|
+
this._ctx._branchDecodeAndCaptureOne(this._handle, token);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Sample next token from branch's logits snapshot
|
|
121
|
+
*
|
|
122
|
+
* Applies the branch's full sampler chain (top-k, top-p, temperature,
|
|
123
|
+
* repeat/presence penalties) to the captured logits.
|
|
124
|
+
*
|
|
125
|
+
* @returns {number} Sampled token ID
|
|
126
|
+
*/
|
|
127
|
+
sample() {
|
|
128
|
+
this._ensureNotDisposed();
|
|
129
|
+
return this._ctx._branchSample(this._handle);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Record token in the sampler's repeat/presence penalty window
|
|
134
|
+
*
|
|
135
|
+
* @param {number} token - Token to accept
|
|
136
|
+
*/
|
|
137
|
+
accept(token) {
|
|
138
|
+
this._ensureNotDisposed();
|
|
139
|
+
this._ctx._branchAccept(this._handle, token);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Discard this branch entirely — remove its KV entries and free the handle
|
|
144
|
+
*
|
|
145
|
+
* Use for losers: branches whose generation you want to erase completely.
|
|
146
|
+
* Only removes KV entries divergent from the shared prefix; sibling
|
|
147
|
+
* branches are unaffected.
|
|
148
|
+
*/
|
|
149
|
+
prune() {
|
|
150
|
+
if (this._disposed) return;
|
|
151
|
+
this._ctx._branchPrune(this._handle);
|
|
152
|
+
this._disposed = true;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Release the handle but keep KV cache entries intact
|
|
157
|
+
*
|
|
158
|
+
* Use for winners: you're done branching but want to continue generation
|
|
159
|
+
* on this sequence using raw ctx.decode()/ctx.sample() calls. The KV
|
|
160
|
+
* cache entries remain at their current positions.
|
|
161
|
+
*/
|
|
162
|
+
destroy() {
|
|
163
|
+
if (this._disposed) return;
|
|
164
|
+
this._ctx._branchDestroy(this._handle);
|
|
165
|
+
this._disposed = true;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Reseed the sampler's PRNG for diversity after fork()
|
|
170
|
+
*
|
|
171
|
+
* CRITICAL for parallel generation: Without reseeding, all forked branches
|
|
172
|
+
* produce identical outputs because they share the same PRNG state.
|
|
173
|
+
*
|
|
174
|
+
* Only affects stochastic samplers (temperature > 0). Greedy samplers are unchanged.
|
|
175
|
+
*
|
|
176
|
+
* @param {number} seed - New seed for the PRNG
|
|
177
|
+
*
|
|
178
|
+
* @example
|
|
179
|
+
* ```js
|
|
180
|
+
* const root = Branch.create(ctx, 0, pos, { temperature: 0.9 });
|
|
181
|
+
* root.captureLogits();
|
|
182
|
+
*
|
|
183
|
+
* // Fork and reseed for diversity
|
|
184
|
+
* const branches = [1, 2, 3, 4, 5].map((seqId, i) => {
|
|
185
|
+
* const branch = root.fork(seqId);
|
|
186
|
+
* branch.reseedSampler(1000 + i); // Each branch gets unique seed
|
|
187
|
+
* return branch;
|
|
188
|
+
* });
|
|
189
|
+
* ```
|
|
190
|
+
*/
|
|
191
|
+
reseedSampler(seed) {
|
|
192
|
+
this._ensureNotDisposed();
|
|
193
|
+
this._ctx._branchSamplerChainReseed(this._handle, seed);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Sample the next token without advancing state
|
|
198
|
+
*
|
|
199
|
+
* No KV write, no position update. Inspect the result before deciding
|
|
200
|
+
* to commit() — this separation is what enables speculative verification
|
|
201
|
+
* and conditional branching.
|
|
202
|
+
*
|
|
203
|
+
* @returns {{ token: number, text: string, isStop: boolean }}
|
|
204
|
+
*/
|
|
205
|
+
produce() {
|
|
206
|
+
this._ensureNotDisposed();
|
|
207
|
+
const token = this.sample();
|
|
208
|
+
return {
|
|
209
|
+
token,
|
|
210
|
+
text: this._ctx.tokenToText(token),
|
|
211
|
+
isStop: this._ctx.isStopToken(token),
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Accept and advance — write token to KV and update branch state
|
|
217
|
+
*
|
|
218
|
+
* Accepts the token for repeat-penalty tracking, decodes it (writing to
|
|
219
|
+
* KV cache), and captures the resulting logits for the next produce() call.
|
|
220
|
+
*
|
|
221
|
+
* @param {number} token - Token to commit (from produce())
|
|
222
|
+
*/
|
|
223
|
+
commit(token) {
|
|
224
|
+
this._ensureNotDisposed();
|
|
225
|
+
this.accept(token);
|
|
226
|
+
this.decodeAndCaptureOne(token);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// ===== ACCESSORS =====
|
|
230
|
+
|
|
231
|
+
/** @returns {number} Branch's sequence ID */
|
|
232
|
+
get seqId() {
|
|
233
|
+
this._ensureNotDisposed();
|
|
234
|
+
return this._ctx._branchGetSeqId(this._handle);
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/** @returns {number} Branch's current position (number of tokens decoded) */
|
|
238
|
+
get position() {
|
|
239
|
+
this._ensureNotDisposed();
|
|
240
|
+
return this._ctx._branchGetPosition(this._handle);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/** @returns {number} Branch's perplexity (exp of mean surprisal) */
|
|
244
|
+
get perplexity() {
|
|
245
|
+
this._ensureNotDisposed();
|
|
246
|
+
return this._ctx._branchGetPerplexity(this._handle);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/** @returns {number} Internal handle (for debugging) */
|
|
250
|
+
get handle() {
|
|
251
|
+
return this._handle;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/** @returns {boolean} Whether this branch has been disposed */
|
|
255
|
+
get disposed() {
|
|
256
|
+
return this._disposed;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// ===== INTERNAL =====
|
|
260
|
+
|
|
261
|
+
_ensureNotDisposed() {
|
|
262
|
+
if (this._disposed) {
|
|
263
|
+
throw new Error('Branch has been disposed');
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
module.exports = { Branch };
|