@lloyal-labs/lloyal.node 1.0.5-alpha → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/Branch.js ADDED
@@ -0,0 +1,268 @@
1
+ /**
2
+ * Branch - Forkable inference handle for covalent generation
3
+ *
4
+ * A Branch owns everything needed for independent generation: a KV cache
5
+ * sequence, sampler chain, logits snapshot, and perplexity tracker.
6
+ *
7
+ * Forking is cheap — the KV prefix is shared in memory (metadata-only operation under unified KV —
8
+ * no KV tensor buffers are copied), so sibling branches read from the same physical KV entries.
9
+ * Only tokens decoded after the fork point are exclusive to each branch.
10
+ * This is the covalent property: branches share a bond (common prefix)
11
+ * while diverging independently.
12
+ *
13
+ * Branches form trees, not just flat lists. Fork from root for best-of-N,
14
+ * fork from children for MCTS/beam search, fork from a draft for speculative
15
+ * decoding.
16
+ *
17
+ * The produce/commit protocol separates sampling from state advancement:
18
+ * produce() samples without writing to KV, letting you inspect the result
19
+ * before deciding to commit(). This two-phase split is what makes speculative
20
+ * verification and tree search natural.
21
+ *
22
+ * @example Best-of-N with perplexity selection
23
+ * ```js
24
+ * const root = Branch.create(ctx, 0, tokens.length, { temperature: 0.8 });
25
+ * root.captureLogits();
26
+ *
27
+ * const candidates = [1, 2, 3, 4, 5].map((seqId, i) => {
28
+ * const branch = root.fork(seqId);
29
+ * branch.reseedSampler(1000 + i);
30
+ * return branch;
31
+ * });
32
+ *
33
+ * for (let t = 0; t < 50; t++) {
34
+ * for (const branch of candidates) {
35
+ * const { token, isStop } = branch.produce();
36
+ * if (isStop) continue;
37
+ * branch.commit(token);
38
+ * }
39
+ * }
40
+ *
41
+ * const best = candidates.reduce((a, b) => a.perplexity < b.perplexity ? a : b);
42
+ * for (const c of candidates) { if (c !== best) c.prune(); }
43
+ * ```
44
+ */
45
+
46
+ class Branch {
47
+ /**
48
+ * @param {SessionContext} ctx
49
+ * @param {number} handle
50
+ */
51
+ constructor(ctx, handle) {
52
+ this._ctx = ctx;
53
+ this._handle = handle;
54
+ this._disposed = false;
55
+ }
56
+
57
+ /**
58
+ * Create a root branch at the given position
59
+ *
60
+ * The branch takes ownership of the sequence and creates its own sampler
61
+ * chain from the provided params. Call captureLogits() after prefill to
62
+ * freeze the logit distribution before forking.
63
+ *
64
+ * @param {SessionContext} ctx - SessionContext to create branch on
65
+ * @param {number} seqId - Sequence ID for this branch
66
+ * @param {number} position - Starting position (typically prompt token count)
67
+ * @param {SamplingParams} [params] - Sampling parameters (temperature, topP, etc.)
68
+ * @returns {Branch} New Branch instance
69
+ */
70
+ static create(ctx, seqId, position, params) {
71
+ const handle = ctx._branchCreate(seqId, position, params);
72
+ return new Branch(ctx, handle);
73
+ }
74
+
75
+ /**
76
+ * Fork this branch to a new sequence
77
+ *
78
+ * The child shares the parent's KV prefix in memory (metadata-only under unified KV, no KV buffer copy).
79
+ * Logits, sampler state, and perplexity tracker are cloned so the child
80
+ * can diverge independently. Fork from any branch — root or intermediate —
81
+ * to build arbitrarily deep trees.
82
+ *
83
+ * Call reseedSampler() on each child for stochastic diversity.
84
+ *
85
+ * @param {number} newSeqId - Sequence ID for the forked branch
86
+ * @returns {Branch} New forked Branch
87
+ */
88
+ fork(newSeqId) {
89
+ this._ensureNotDisposed();
90
+ const newHandle = this._ctx._branchFork(this._handle, newSeqId);
91
+ return new Branch(this._ctx, newHandle);
92
+ }
93
+
94
+ /**
95
+ * Freeze the current logit distribution into this branch
96
+ *
97
+ * Logits are ephemeral — they're overwritten on the next decode() call.
98
+ * Capturing preserves them so this branch (and any forks from it) can
99
+ * sample from the same distribution. Essential before fork().
100
+ */
101
+ captureLogits() {
102
+ this._ensureNotDisposed();
103
+ this._ctx._branchCaptureLogits(this._handle);
104
+ }
105
+
106
+ /**
107
+ * Single-token forward pass with logit snapshot
108
+ *
109
+ * Runs one decode step (writing the token's KV entries), advances position,
110
+ * and captures the resulting logits for the next sample() call.
111
+ *
112
+ * @param {number} token - Token to decode
113
+ */
114
+ decodeAndCaptureOne(token) {
115
+ this._ensureNotDisposed();
116
+ this._ctx._branchDecodeAndCaptureOne(this._handle, token);
117
+ }
118
+
119
+ /**
120
+ * Sample next token from branch's logits snapshot
121
+ *
122
+ * Applies the branch's full sampler chain (top-k, top-p, temperature,
123
+ * repeat/presence penalties) to the captured logits.
124
+ *
125
+ * @returns {number} Sampled token ID
126
+ */
127
+ sample() {
128
+ this._ensureNotDisposed();
129
+ return this._ctx._branchSample(this._handle);
130
+ }
131
+
132
+ /**
133
+ * Record token in the sampler's repeat/presence penalty window
134
+ *
135
+ * @param {number} token - Token to accept
136
+ */
137
+ accept(token) {
138
+ this._ensureNotDisposed();
139
+ this._ctx._branchAccept(this._handle, token);
140
+ }
141
+
142
+ /**
143
+ * Discard this branch entirely — remove its KV entries and free the handle
144
+ *
145
+ * Use for losers: branches whose generation you want to erase completely.
146
+ * Only removes KV entries divergent from the shared prefix; sibling
147
+ * branches are unaffected.
148
+ */
149
+ prune() {
150
+ if (this._disposed) return;
151
+ this._ctx._branchPrune(this._handle);
152
+ this._disposed = true;
153
+ }
154
+
155
+ /**
156
+ * Release the handle but keep KV cache entries intact
157
+ *
158
+ * Use for winners: you're done branching but want to continue generation
159
+ * on this sequence using raw ctx.decode()/ctx.sample() calls. The KV
160
+ * cache entries remain at their current positions.
161
+ */
162
+ destroy() {
163
+ if (this._disposed) return;
164
+ this._ctx._branchDestroy(this._handle);
165
+ this._disposed = true;
166
+ }
167
+
168
+ /**
169
+ * Reseed the sampler's PRNG for diversity after fork()
170
+ *
171
+ * CRITICAL for parallel generation: Without reseeding, all forked branches
172
+ * produce identical outputs because they share the same PRNG state.
173
+ *
174
+ * Only affects stochastic samplers (temperature > 0). Greedy samplers are unchanged.
175
+ *
176
+ * @param {number} seed - New seed for the PRNG
177
+ *
178
+ * @example
179
+ * ```js
180
+ * const root = Branch.create(ctx, 0, pos, { temperature: 0.9 });
181
+ * root.captureLogits();
182
+ *
183
+ * // Fork and reseed for diversity
184
+ * const branches = [1, 2, 3, 4, 5].map((seqId, i) => {
185
+ * const branch = root.fork(seqId);
186
+ * branch.reseedSampler(1000 + i); // Each branch gets unique seed
187
+ * return branch;
188
+ * });
189
+ * ```
190
+ */
191
+ reseedSampler(seed) {
192
+ this._ensureNotDisposed();
193
+ this._ctx._branchSamplerChainReseed(this._handle, seed);
194
+ }
195
+
196
+ /**
197
+ * Sample the next token without advancing state
198
+ *
199
+ * No KV write, no position update. Inspect the result before deciding
200
+ * to commit() — this separation is what enables speculative verification
201
+ * and conditional branching.
202
+ *
203
+ * @returns {{ token: number, text: string, isStop: boolean }}
204
+ */
205
+ produce() {
206
+ this._ensureNotDisposed();
207
+ const token = this.sample();
208
+ return {
209
+ token,
210
+ text: this._ctx.tokenToText(token),
211
+ isStop: this._ctx.isStopToken(token),
212
+ };
213
+ }
214
+
215
+ /**
216
+ * Accept and advance — write token to KV and update branch state
217
+ *
218
+ * Accepts the token for repeat-penalty tracking, decodes it (writing to
219
+ * KV cache), and captures the resulting logits for the next produce() call.
220
+ *
221
+ * @param {number} token - Token to commit (from produce())
222
+ */
223
+ commit(token) {
224
+ this._ensureNotDisposed();
225
+ this.accept(token);
226
+ this.decodeAndCaptureOne(token);
227
+ }
228
+
229
+ // ===== ACCESSORS =====
230
+
231
+ /** @returns {number} Branch's sequence ID */
232
+ get seqId() {
233
+ this._ensureNotDisposed();
234
+ return this._ctx._branchGetSeqId(this._handle);
235
+ }
236
+
237
+ /** @returns {number} Branch's current position (number of tokens decoded) */
238
+ get position() {
239
+ this._ensureNotDisposed();
240
+ return this._ctx._branchGetPosition(this._handle);
241
+ }
242
+
243
+ /** @returns {number} Branch's perplexity (exp of mean surprisal) */
244
+ get perplexity() {
245
+ this._ensureNotDisposed();
246
+ return this._ctx._branchGetPerplexity(this._handle);
247
+ }
248
+
249
+ /** @returns {number} Internal handle (for debugging) */
250
+ get handle() {
251
+ return this._handle;
252
+ }
253
+
254
+ /** @returns {boolean} Whether this branch has been disposed */
255
+ get disposed() {
256
+ return this._disposed;
257
+ }
258
+
259
+ // ===== INTERNAL =====
260
+
261
+ _ensureNotDisposed() {
262
+ if (this._disposed) {
263
+ throw new Error('Branch has been disposed');
264
+ }
265
+ }
266
+ }
267
+
268
+ module.exports = { Branch };