@lloyal-labs/lloyal.node 1.0.5-alpha → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +158 -267
- package/lib/Branch.js +268 -0
- package/lib/index.d.ts +307 -165
- package/lib/index.js +165 -19
- package/package.json +19 -18
- package/scripts/create-platform-package.js +19 -40
- package/scripts/download-test-models.sh +10 -0
- package/scripts/install.js +0 -138
package/README.md
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
# lloyal.node
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
**Advanced edge inference for Node.js**
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
A llama.cpp control surface in TypeScript with atomic inference state forking. Real time rolling perplexity/entropy/surprisal and multi-sequence parallel exploration primitives.
|
|
6
6
|
|
|
7
7
|
```bash
|
|
8
|
-
npm install lloyal.node
|
|
8
|
+
npm install @lloyal-labs/lloyal.node
|
|
9
9
|
```
|
|
10
10
|
|
|
11
11
|
Prebuilt binaries for 13 platforms:
|
|
@@ -19,281 +19,139 @@ Prebuilt binaries for 13 platforms:
|
|
|
19
19
|
| Windows | x64 | CPU / CUDA / Vulkan |
|
|
20
20
|
| Windows | arm64 | CPU / Vulkan |
|
|
21
21
|
|
|
22
|
-
|
|
22
|
+
GPU selection happens at runtime, not install time. See [distribution.md](docs/distribution.md) for details.
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Examples
|
|
27
|
+
|
|
28
|
+
Working examples demonstrate each capability:
|
|
29
|
+
|
|
30
|
+
| Example | What It Demonstrates |
|
|
31
|
+
| ----------------------------------------- | ----------------------------------------------------------------------------- |
|
|
32
|
+
| [`best-of-n/`](./examples/best-of-n/) | Branch API parallel generation, PPL selection, fork/produce/commit |
|
|
33
|
+
| [`speculative/`](./examples/speculative/) | Branch API fork/prune, draft/verify/accept/reject, bonus token sampling |
|
|
34
|
+
| [`entropy/`](./examples/entropy/) | Entropy Decision Tree — `modelEntropy()` mid-generation as control signal |
|
|
35
|
+
| [`grammar/`](./examples/grammar/) | Pull loop with generators, JSON schema constraints, KV + grammar branching |
|
|
36
|
+
| [`streaming/`](./examples/streaming/) | Infinite context via BlinkKV, `clearAndReseed`, perplexity tracking |
|
|
37
|
+
| [`chat/`](./examples/chat/) | Interactive streaming chat |
|
|
38
|
+
| [`embed/`](./examples/embed/) | Text embeddings extraction |
|
|
23
39
|
|
|
24
40
|
```bash
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
41
|
+
node examples/best-of-n/best-of-n.mjs
|
|
42
|
+
node examples/speculative/speculative.mjs
|
|
43
|
+
node examples/entropy/entropy.mjs
|
|
44
|
+
node examples/grammar/grammar.mjs
|
|
28
45
|
```
|
|
29
46
|
|
|
30
|
-
|
|
47
|
+
Each example has a README explaining the pattern in depth.
|
|
31
48
|
|
|
32
|
-
|
|
49
|
+
---
|
|
33
50
|
|
|
34
|
-
|
|
51
|
+
## Core Patterns
|
|
35
52
|
|
|
36
|
-
|
|
37
|
-
import { createContext } from 'lloyal.node';
|
|
53
|
+
### Branch API
|
|
38
54
|
|
|
39
|
-
|
|
40
|
-
const ctx = await createContext({
|
|
41
|
-
modelPath: './model.gguf',
|
|
42
|
-
nCtx: 2048,
|
|
43
|
-
nThreads: 4,
|
|
44
|
-
});
|
|
55
|
+
`Branch` is the primary API for parallel generation. Each branch owns a KV cache sequence, sampler chain, logits snapshot, and perplexity tracker. Fork a branch to explore alternatives, compare by perplexity, prune losers.
|
|
45
56
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
await ctx.decode(tokens, 0);
|
|
57
|
+
```javascript
|
|
58
|
+
import { createContext, Branch } from '@lloyal-labs/lloyal.node';
|
|
49
59
|
|
|
50
|
-
|
|
51
|
-
|
|
60
|
+
const ctx = await createContext({ modelPath: './model.gguf', nSeqMax: 8 });
|
|
61
|
+
const tokens = await ctx.tokenize('Once upon a time');
|
|
62
|
+
await ctx.decode(tokens, 0, 0);
|
|
52
63
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
64
|
+
// Create root branch, capture logits from prefill
|
|
65
|
+
const root = Branch.create(ctx, 0, tokens.length, { temperature: 0.8 });
|
|
66
|
+
root.captureLogits();
|
|
56
67
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
68
|
+
// Fork N candidates — each gets copied KV, logits, sampler, perplexity
|
|
69
|
+
const candidates = [1, 2, 3, 4, 5].map((seqId, i) => {
|
|
70
|
+
const branch = root.fork(seqId);
|
|
71
|
+
branch.reseedSampler(1000 + i); // Unique PRNG per branch
|
|
72
|
+
return branch;
|
|
73
|
+
});
|
|
60
74
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
75
|
+
// Generate in parallel (interleaved round-robin)
|
|
76
|
+
for (let t = 0; t < 50; t++) {
|
|
77
|
+
for (const branch of candidates) {
|
|
78
|
+
const { token, isStop } = branch.produce(); // Sample (no KV write)
|
|
79
|
+
if (isStop) continue;
|
|
80
|
+
branch.commit(token); // Accept + decode + capture
|
|
64
81
|
}
|
|
65
82
|
}
|
|
66
83
|
|
|
67
|
-
|
|
68
|
-
|
|
84
|
+
// Select best by perplexity, prune losers
|
|
85
|
+
const best = candidates.reduce((a, b) => a.perplexity < b.perplexity ? a : b);
|
|
86
|
+
for (const c of candidates) { if (c !== best) c.prune(); }
|
|
69
87
|
```
|
|
70
88
|
|
|
71
|
-
|
|
89
|
+
**What `fork()` clones:** KV cache sequence, logits snapshot, sampler chain (penalties + PRNG), perplexity tracker. Under unified KV (the default), forking is a metadata-only operation — no KV tensor buffers are copied.
|
|
72
90
|
|
|
73
|
-
|
|
91
|
+
**Use cases:** Best-of-N sampling, speculative decoding, MCTS/LATS tree search, beam search.
|
|
74
92
|
|
|
75
|
-
|
|
93
|
+
See [`examples/best-of-n/`](./examples/best-of-n/) and [`examples/speculative/`](./examples/speculative/) for complete patterns.
|
|
76
94
|
|
|
77
|
-
|
|
78
|
-
2. **TypeScript sampling** — so your app logic can modify probabilities before selection
|
|
95
|
+
### Low-Level Forking
|
|
79
96
|
|
|
80
|
-
|
|
97
|
+
For fine-grained control without the Branch wrapper, raw KV and state operations are available:
|
|
81
98
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
computeModelEntropy,
|
|
87
|
-
TokenHistoryTracker,
|
|
88
|
-
SamplerWorkspace,
|
|
89
|
-
Xoroshiro128Plus,
|
|
90
|
-
} from '@lloyal/tsampler';
|
|
91
|
-
|
|
92
|
-
const ctx = await createContext({ modelPath: './model.gguf' });
|
|
93
|
-
const prng = new Xoroshiro128Plus(42); // Deterministic PRNG
|
|
94
|
-
const tokenHistory = new TokenHistoryTracker(64); // For repetition penalties
|
|
95
|
-
const workspace = new SamplerWorkspace(256); // Pre-allocated, zero-alloc hot path
|
|
96
|
-
|
|
97
|
-
const tokens = await ctx.tokenize(prompt);
|
|
98
|
-
await ctx.decode(tokens, 0);
|
|
99
|
-
|
|
100
|
-
let pos = tokens.length;
|
|
101
|
-
const output: number[] = [];
|
|
102
|
-
|
|
103
|
-
while (output.length < maxTokens) {
|
|
104
|
-
const logits = ctx.getLogits();
|
|
105
|
-
|
|
106
|
-
// === YOUR STEERING LOGIC HERE ===
|
|
107
|
-
|
|
108
|
-
// Enforce domain rules
|
|
109
|
-
if (currency === 'JPY') {
|
|
110
|
-
logits[DECIMAL_TOKEN] = -Infinity; // JPY has no decimal subdivision
|
|
111
|
-
}
|
|
99
|
+
| Approach | Method | Use Case |
|
|
100
|
+
| -------------------- | --------------------------------- | -------------------------------------------- |
|
|
101
|
+
| **Tag copy** | `kvSeqCopy(src, dst)` | Parallel branches with different seqIds |
|
|
102
|
+
| **Snapshot/restore** | `kvCacheSave()` / `kvCacheLoad()` | Sequential exploration, return to checkpoint |
|
|
112
103
|
|
|
113
|
-
|
|
114
|
-
const entropy = computeModelEntropy(logits);
|
|
115
|
-
const params =
|
|
116
|
-
entropy < 2.0
|
|
117
|
-
? { topK: 256, temperature: 1.5 } // Low confidence → explore more
|
|
118
|
-
: { topK: 40, temperature: 0.8 }; // High confidence → stay focused
|
|
104
|
+
[`examples/grammar/`](./examples/grammar/) uses snapshot/restore — save state, explore branches sequentially, restore between each:
|
|
119
105
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
params,
|
|
125
|
-
workspace,
|
|
126
|
-
prng,
|
|
127
|
-
});
|
|
128
|
-
|
|
129
|
-
if (token < 0) break;
|
|
130
|
-
|
|
131
|
-
tokenHistory.accept(token);
|
|
132
|
-
output.push(token);
|
|
133
|
-
await ctx.decode([token], pos++);
|
|
134
|
-
}
|
|
106
|
+
```javascript
|
|
107
|
+
const snapshot = await ctx.kvCacheSave(0); // Save checkpoint
|
|
108
|
+
// ... explore branch ...
|
|
109
|
+
await ctx.kvCacheLoad(0, snapshot); // Return to checkpoint
|
|
135
110
|
```
|
|
136
111
|
|
|
137
|
-
###
|
|
138
|
-
|
|
139
|
-
```typescript
|
|
140
|
-
// Financial: JPY has no decimal subdivision
|
|
141
|
-
if (currency === 'JPY' && parsingAmount) {
|
|
142
|
-
logits[DECIMAL_TOKEN] = -Infinity;
|
|
143
|
-
DIGIT_TOKENS.forEach((id) => (logits[id] += 2.0));
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
// Legal: Boost required terminology
|
|
147
|
-
if (contractType === 'NDA') {
|
|
148
|
-
CONFIDENTIALITY_TOKENS.forEach((id) => (logits[id] += 5.0));
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
// Medical: Enforce terminology based on actual lab values
|
|
152
|
-
if (glucoseLevel > normalMax) {
|
|
153
|
-
ELEVATED_TOKENS.forEach((id) => (logits[id] += 10.0));
|
|
154
|
-
NORMAL_TOKENS.forEach((id) => (logits[id] = -Infinity));
|
|
155
|
-
}
|
|
156
|
-
```
|
|
112
|
+
### Entropy as Control Signal
|
|
157
113
|
|
|
158
|
-
|
|
114
|
+
Model uncertainty mid-generation enables dynamic behavior:
|
|
159
115
|
|
|
160
|
-
```
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
const ppl = new RollingPerplexity();
|
|
164
|
-
|
|
165
|
-
while (generating) {
|
|
166
|
-
const logits = ctx.getLogits();
|
|
167
|
-
const token = sampleWithStrategy(logits, {
|
|
168
|
-
tokenHistory,
|
|
169
|
-
params,
|
|
170
|
-
workspace,
|
|
171
|
-
prng,
|
|
172
|
-
});
|
|
173
|
-
|
|
174
|
-
const surprisal = computeModelSurprisal(logits, token);
|
|
175
|
-
ppl.addSurprisal(surprisal);
|
|
176
|
-
|
|
177
|
-
if (ppl.ppl() > 50) {
|
|
178
|
-
// Generation quality degrading — options:
|
|
179
|
-
// 1. Trigger RAG retrieval for more context
|
|
180
|
-
// 2. Prune KV cache (evict stale context)
|
|
181
|
-
// 3. Early stop and retry with different prompt
|
|
182
|
-
}
|
|
116
|
+
```javascript
|
|
117
|
+
const entropy = ctx.modelEntropy('bits');
|
|
183
118
|
|
|
184
|
-
|
|
119
|
+
if (entropy > 4.0) {
|
|
120
|
+
// High uncertainty — model is guessing
|
|
121
|
+
// Trigger retrieval, reduce temperature, or branch
|
|
185
122
|
}
|
|
186
123
|
```
|
|
187
124
|
|
|
188
|
-
|
|
125
|
+
See [`examples/entropy/`](./examples/entropy/) for entropy-triggered sampling strategies.
|
|
189
126
|
|
|
190
|
-
|
|
191
|
-
import { computeModelEntropy } from '@lloyal/tsampler';
|
|
127
|
+
### Pull Loop with Generators
|
|
192
128
|
|
|
193
|
-
|
|
194
|
-
const logits = ctx.getLogits();
|
|
195
|
-
const entropy = computeModelEntropy(logits);
|
|
129
|
+
For branching mid-generation, generators provide natural backpressure:
|
|
196
130
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
131
|
+
```javascript
|
|
132
|
+
function* tokenGenerator(ctx, grammarHandle) {
|
|
133
|
+
while (true) {
|
|
134
|
+
const logits = ctx.getLogits();
|
|
135
|
+
ctx.applySampler(grammarHandle, logits);
|
|
136
|
+
const token = ctx.sample({ temperature: 0.7 });
|
|
137
|
+
if (ctx.isStopToken(token)) return;
|
|
138
|
+
ctx.acceptSamplerToken(grammarHandle, token);
|
|
139
|
+
yield { token, text: ctx.tokenToText(token) };
|
|
202
140
|
}
|
|
203
|
-
|
|
204
|
-
const token = sampleWithStrategy(logits, {
|
|
205
|
-
tokenHistory,
|
|
206
|
-
params,
|
|
207
|
-
workspace,
|
|
208
|
-
prng,
|
|
209
|
-
});
|
|
210
|
-
// ...
|
|
211
141
|
}
|
|
212
|
-
```
|
|
213
|
-
|
|
214
|
-
## Why TypeScript Sampling?
|
|
215
|
-
|
|
216
|
-
| | Native C++ | TypeScript (tsampler) |
|
|
217
|
-
| ----------------------- | ------------ | --------------------- |
|
|
218
|
-
| Speed | ~0.3ms/token | ~3-5ms/token |
|
|
219
|
-
| Overhead vs 50ms decode | — | ~6-10% |
|
|
220
|
-
| Logit steering | ❌ | ✅ |
|
|
221
|
-
| Adaptive strategies | ❌ | ✅ |
|
|
222
|
-
| OTA updates | Rebuild app | Ship new JS |
|
|
223
|
-
| Debugging | printf | Full inspect |
|
|
224
|
-
|
|
225
|
-
The overhead is imperceptible. A 50ms decode dominates; 3ms sampling is noise.
|
|
226
|
-
|
|
227
|
-
### tsampler Capabilities
|
|
228
|
-
|
|
229
|
-
[tsampler](https://github.com/lloyal-ai/tsampler) provides llama.cpp sampling parity in pure TypeScript:
|
|
230
142
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
**Infrastructure:**
|
|
236
|
-
|
|
237
|
-
- `Xoroshiro128Plus` — deterministic PRNG, reproducible generations
|
|
238
|
-
- `TokenHistoryTracker` — sliding window for penalty calculations
|
|
239
|
-
- `SamplerWorkspace` — pre-allocated buffers, zero-alloc hot path
|
|
240
|
-
- `computeModelEntropy()` — Shannon entropy in nats
|
|
241
|
-
- `computeModelSurprisal()` — per-token surprisal
|
|
242
|
-
- `RollingPerplexity` — streaming perplexity tracking
|
|
243
|
-
|
|
244
|
-
### Native References
|
|
245
|
-
|
|
246
|
-
lloyal.node includes native C++ implementations for validation:
|
|
247
|
-
|
|
248
|
-
```typescript
|
|
249
|
-
// TypeScript implementation
|
|
250
|
-
const tsEntropy = computeModelEntropy(logits);
|
|
251
|
-
|
|
252
|
-
// Native reference (C++)
|
|
253
|
-
const nativeEntropy = ctx.computeEntropy();
|
|
254
|
-
|
|
255
|
-
// Should match within float precision
|
|
256
|
-
console.assert(Math.abs(tsEntropy - nativeEntropy) < 1e-5);
|
|
143
|
+
// Consumer controls pace — stop at branch point
|
|
144
|
+
for (const { token, text } of gen) {
|
|
145
|
+
if (accumulated.includes('"city"')) break; // Pause here, branch
|
|
146
|
+
}
|
|
257
147
|
```
|
|
258
148
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
- `ctx.computeEntropy()` — Shannon entropy in nats
|
|
262
|
-
- `ctx.greedySample()` — argmax token ID
|
|
263
|
-
|
|
264
|
-
Build with confidence. Validate against native. Deploy TypeScript.
|
|
149
|
+
See [`examples/grammar/`](./examples/grammar/) for the full pull loop pattern.
|
|
265
150
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
lloyal.node supports embedding extraction with configurable pooling:
|
|
269
|
-
|
|
270
|
-
```typescript
|
|
271
|
-
import { createContext } from 'lloyal.node';
|
|
272
|
-
|
|
273
|
-
const ctx = await createContext({
|
|
274
|
-
modelPath: './nomic-embed-text.gguf',
|
|
275
|
-
embeddings: true,
|
|
276
|
-
poolingType: 1, // 0=NONE, 1=MEAN, 2=CLS, 3=LAST
|
|
277
|
-
});
|
|
278
|
-
|
|
279
|
-
async function embed(text: string): Promise<Float32Array> {
|
|
280
|
-
const tokens = await ctx.tokenize(text);
|
|
281
|
-
await ctx.encode(tokens);
|
|
282
|
-
|
|
283
|
-
const embedding = ctx.getEmbeddings(true); // L2-normalized
|
|
284
|
-
await ctx.kvCacheClear(); // Reset for next text
|
|
285
|
-
|
|
286
|
-
return embedding;
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
const vec = await embed('Document to embed');
|
|
290
|
-
console.log(`Dimension: ${ctx.getEmbeddingDimension()}`); // e.g., 768
|
|
291
|
-
```
|
|
151
|
+
---
|
|
292
152
|
|
|
293
153
|
## API Reference
|
|
294
154
|
|
|
295
|
-
**📖 [Full API Documentation](https://lloyal-ai.github.io/lloyal.node)** - Complete reference with examples and type definitions
|
|
296
|
-
|
|
297
155
|
### Context Creation
|
|
298
156
|
|
|
299
157
|
```typescript
|
|
@@ -301,55 +159,88 @@ const ctx = await createContext({
|
|
|
301
159
|
modelPath: string, // Path to .gguf file (required)
|
|
302
160
|
nCtx?: number, // Context size (default: 2048)
|
|
303
161
|
nThreads?: number, // CPU threads (default: 4)
|
|
304
|
-
nGpuLayers?: number, // Layers to offload to GPU (default: 0)
|
|
305
162
|
embeddings?: boolean, // Enable embedding mode (default: false)
|
|
306
|
-
poolingType?: number
|
|
163
|
+
poolingType?: number, // 0=NONE, 1=MEAN, 2=CLS, 3=LAST
|
|
164
|
+
nSeqMax?: number, // Max parallel sequences (default: 1)
|
|
307
165
|
});
|
|
308
166
|
```
|
|
309
167
|
|
|
310
|
-
###
|
|
311
|
-
|
|
312
|
-
| Method
|
|
313
|
-
|
|
|
314
|
-
| `tokenize(text)`
|
|
315
|
-
| `detokenize(tokens)`
|
|
316
|
-
| `
|
|
317
|
-
| `
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
|
168
|
+
### Core Methods
|
|
169
|
+
|
|
170
|
+
| Method | Returns | Description |
|
|
171
|
+
| ----------------------------- | ------------------- | ------------------------------- |
|
|
172
|
+
| `tokenize(text)` | `Promise<number[]>` | Text → token IDs |
|
|
173
|
+
| `detokenize(tokens)` | `Promise<string>` | Token IDs → text |
|
|
174
|
+
| `tokenToText(token)` | `string` | Single token → text (streaming) |
|
|
175
|
+
| `decode(tokens, pos, seqId?)` | `Promise<void>` | Forward pass, updates KV cache |
|
|
176
|
+
| `sample(params?)` | `number` | Sample next token |
|
|
177
|
+
| `isStopToken(token)` | `boolean` | Check for EOS token |
|
|
178
|
+
| `getLogits()` | `Float32Array` | Raw logits (zero-copy view) |
|
|
179
|
+
|
|
180
|
+
### KV Cache
|
|
181
|
+
|
|
182
|
+
| Method | Returns | Description |
|
|
183
|
+
| ---------------------------------- | ----------------- | ------------------------------ |
|
|
184
|
+
| `kvCacheSize(seqId?)` | `number` | Tokens in cache |
|
|
185
|
+
| `kvCacheClear()` | `Promise<void>` | Clear all sequences |
|
|
186
|
+
| `kvCacheRemove(seqId, start, end)` | `Promise<void>` | Remove token range |
|
|
187
|
+
| `kvCacheSave(seqId?)` | `Promise<Buffer>` | Snapshot state |
|
|
188
|
+
| `kvCacheLoad(seqId, state)` | `Promise<void>` | Restore state |
|
|
189
|
+
| `kvSeqCopy(src, dst)` | `void` | Copy sequence (tag copy, O(1)) |
|
|
190
|
+
| `kvSeqKeep(seqId)` | `void` | Keep only one sequence |
|
|
191
|
+
| `clearAndReseed(sinks, tail)` | `Promise<void>` | BlinkKV pattern |
|
|
192
|
+
|
|
193
|
+
### Grammar (Handle-Based)
|
|
194
|
+
|
|
195
|
+
| Method | Returns | Description |
|
|
196
|
+
| -------------------------------- | -------- | --------------------------- |
|
|
197
|
+
| `jsonSchemaToGrammar(schema)` | `string` | Schema → GBNF |
|
|
198
|
+
| `createSampler(grammarStr)` | `number` | Create grammar handle |
|
|
199
|
+
| `cloneSampler(handle)` | `number` | Clone grammar state |
|
|
200
|
+
| `applySampler(handle, logits)` | `void` | Apply constraints to logits |
|
|
201
|
+
| `acceptSamplerToken(handle, id)` | `void` | Advance parser state |
|
|
202
|
+
| `freeSamplerHandle(handle)` | `void` | Release grammar handle |
|
|
203
|
+
|
|
204
|
+
### Metrics
|
|
205
|
+
|
|
206
|
+
| Method | Returns | Description |
|
|
207
|
+
| --------------------------------------- | --------------- | ------------------------------------------ |
|
|
208
|
+
| `modelEntropy(base?, logits?)` | `number` | Distribution entropy (bits/nats) |
|
|
209
|
+
| `modelSurprisal(token, base?, logits?)` | `number` | Token surprisal (supports captured logits) |
|
|
210
|
+
| `createPerplexityTracker()` | `TrackerHandle` | Create tracker (forkable) |
|
|
211
|
+
| `clonePerplexityTracker(handle)` | `TrackerHandle` | Clone tracker state |
|
|
212
|
+
| `addSurprisal(handle, value)` | `void` | Add to tracker |
|
|
213
|
+
| `getPerplexity(handle)` | `number` | Get current PPL |
|
|
214
|
+
| `freePerplexityTracker(handle)` | `void` | Release tracker |
|
|
325
215
|
|
|
326
216
|
### Embeddings
|
|
327
217
|
|
|
328
|
-
| Method | Returns | Description
|
|
329
|
-
| --------------------------- | --------------- |
|
|
330
|
-
| `encode(tokens)` | `Promise<void>` | Forward pass for
|
|
331
|
-
| `getEmbeddings(normalize?)` | `Float32Array` |
|
|
332
|
-
| `getEmbeddingDimension()` | `number` | Vector dimension
|
|
333
|
-
| `kvCacheClear()` | `Promise<void>` | Clear KV cache between texts |
|
|
218
|
+
| Method | Returns | Description |
|
|
219
|
+
| --------------------------- | --------------- | --------------------------- |
|
|
220
|
+
| `encode(tokens)` | `Promise<void>` | Forward pass for embeddings |
|
|
221
|
+
| `getEmbeddings(normalize?)` | `Float32Array` | Extract embedding vector |
|
|
222
|
+
| `getEmbeddingDimension()` | `number` | Vector dimension |
|
|
334
223
|
|
|
335
224
|
### Lifecycle
|
|
336
225
|
|
|
337
|
-
| Method | Description
|
|
338
|
-
| ----------- |
|
|
339
|
-
| `dispose()` | Free native resources
|
|
226
|
+
| Method | Description |
|
|
227
|
+
| ----------- | ------------------------------------ |
|
|
228
|
+
| `dispose()` | Free native resources (**required**) |
|
|
229
|
+
|
|
230
|
+
---
|
|
340
231
|
|
|
341
|
-
##
|
|
232
|
+
## Ecosystem
|
|
342
233
|
|
|
343
|
-
| Package |
|
|
344
|
-
| ------------------------------------------------------- | ------------ |
|
|
345
|
-
| [liblloyal](https://github.com/lloyal-ai/liblloyal) | C++ |
|
|
346
|
-
| **lloyal.node** |
|
|
347
|
-
| [
|
|
348
|
-
| [
|
|
234
|
+
| Package | Runtime | Description |
|
|
235
|
+
| ------------------------------------------------------- | ------------ | --------------------------------- |
|
|
236
|
+
| [liblloyal](https://github.com/lloyal-ai/liblloyal) | C++ | Header-only inference kernel |
|
|
237
|
+
| **lloyal.node** | Node.js | This package |
|
|
238
|
+
| [nitro-llama](https://github.com/lloyal-ai/nitro-llama) | React Native | Mobile bindings via Nitro Modules |
|
|
239
|
+
| [tsampler](https://github.com/lloyal-ai/tsampler) | TypeScript | Reference sampler implementation |
|
|
349
240
|
|
|
350
241
|
## Contributing
|
|
351
242
|
|
|
352
|
-
See [CONTRIBUTING.md](./CONTRIBUTING.md) for development setup
|
|
243
|
+
See [CONTRIBUTING.md](./CONTRIBUTING.md) for development setup and release process.
|
|
353
244
|
|
|
354
245
|
## License
|
|
355
246
|
|