@lloyal-labs/lloyal.node 1.0.4-alpha → 1.0.6-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +145 -269
- package/lib/index.d.ts +125 -9
- package/lib/index.js +156 -17
- package/package.json +16 -17
- package/scripts/create-platform-package.js +19 -40
- package/scripts/install.js +0 -138
package/README.md
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
# lloyal.node
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
**Advanced edge inference for Node.js**
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Inference with forkable state — KV cache, grammar, metrics all clone atomically. Entropy and surprisal mid-generation. Multi-sequence parallel exploration. The control surface llama.cpp exposes, in TypeScript.
|
|
6
6
|
|
|
7
7
|
```bash
|
|
8
|
-
npm install lloyal.node
|
|
8
|
+
npm install @lloyal-labs/lloyal.node
|
|
9
9
|
```
|
|
10
10
|
|
|
11
11
|
Prebuilt binaries for 13 platforms:
|
|
@@ -19,281 +19,124 @@ Prebuilt binaries for 13 platforms:
|
|
|
19
19
|
| Windows | x64 | CPU / CUDA / Vulkan |
|
|
20
20
|
| Windows | arm64 | CPU / Vulkan |
|
|
21
21
|
|
|
22
|
-
|
|
22
|
+
GPU selection happens at runtime, not install time. See [distribution.md](docs/distribution.md) for details.
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
LLOYAL_GPU=cuda npm install # NVIDIA
|
|
26
|
-
LLOYAL_GPU=vulkan npm install # AMD/Intel
|
|
27
|
-
LLOYAL_GPU=cpu npm install # Force CPU
|
|
28
|
-
```
|
|
29
|
-
|
|
30
|
-
See [DISTRIBUTION.md](./docs/DISTRIBUTION.md) for package details.
|
|
31
|
-
|
|
32
|
-
## Quick Start
|
|
33
|
-
|
|
34
|
-
Complete example with greedy sampling:
|
|
35
|
-
|
|
36
|
-
```typescript
|
|
37
|
-
import { createContext } from 'lloyal.node';
|
|
38
|
-
|
|
39
|
-
async function generate(prompt: string, maxTokens = 100): Promise<string> {
|
|
40
|
-
const ctx = await createContext({
|
|
41
|
-
modelPath: './model.gguf',
|
|
42
|
-
nCtx: 2048,
|
|
43
|
-
nThreads: 4,
|
|
44
|
-
});
|
|
24
|
+
---
|
|
45
25
|
|
|
46
|
-
|
|
47
|
-
const tokens = await ctx.tokenize(prompt);
|
|
48
|
-
await ctx.decode(tokens, 0);
|
|
26
|
+
## Examples
|
|
49
27
|
|
|
50
|
-
|
|
51
|
-
let pos = tokens.length;
|
|
28
|
+
Working examples demonstrate each capability:
|
|
52
29
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
30
|
+
| Example | What It Demonstrates |
|
|
31
|
+
| ----------------------------------------- | ----------------------------------------------------------------------------- |
|
|
32
|
+
| [`best-of-n/`](./examples/best-of-n/) | Multi-sequence generation, PPL selection, captured logits for fair comparison |
|
|
33
|
+
| [`speculative/`](./examples/speculative/) | KV forking, draft/verify/accept/reject, `kvCacheRemove` for rejected tokens |
|
|
34
|
+
| [`entropy/`](./examples/entropy/) | Entropy Decision Tree — `modelEntropy()` mid-generation as control signal |
|
|
35
|
+
| [`grammar/`](./examples/grammar/) | Pull loop with generators, JSON schema constraints, KV + grammar branching |
|
|
36
|
+
| [`streaming/`](./examples/streaming/) | Infinite context via BlinkKV, `clearAndReseed`, perplexity tracking |
|
|
37
|
+
| [`chat/`](./examples/chat/) | Interactive streaming chat |
|
|
38
|
+
| [`embed/`](./examples/embed/) | Text embeddings extraction |
|
|
56
39
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
} finally {
|
|
63
|
-
ctx.dispose();
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
const response = await generate('The capital of France is');
|
|
68
|
-
console.log(response);
|
|
40
|
+
```bash
|
|
41
|
+
node examples/best-of-n/best-of-n.mjs
|
|
42
|
+
node examples/speculative/speculative.mjs
|
|
43
|
+
node examples/entropy/entropy.mjs
|
|
44
|
+
node examples/grammar/grammar.mjs
|
|
69
45
|
```
|
|
70
46
|
|
|
71
|
-
|
|
47
|
+
Each example has a README explaining the pattern in depth.
|
|
72
48
|
|
|
73
|
-
|
|
49
|
+
---
|
|
74
50
|
|
|
75
|
-
|
|
51
|
+
## Core Patterns
|
|
76
52
|
|
|
77
|
-
|
|
78
|
-
2. **TypeScript sampling** — so your app logic can modify probabilities before selection
|
|
53
|
+
### Forkable State
|
|
79
54
|
|
|
80
|
-
|
|
55
|
+
KV cache, grammar parser, and perplexity trackers all live behind handles. Handles clone atomically.
|
|
81
56
|
|
|
82
|
-
|
|
83
|
-
import { createContext } from 'lloyal.node';
|
|
84
|
-
import {
|
|
85
|
-
sampleWithStrategy,
|
|
86
|
-
computeModelEntropy,
|
|
87
|
-
TokenHistoryTracker,
|
|
88
|
-
SamplerWorkspace,
|
|
89
|
-
Xoroshiro128Plus,
|
|
90
|
-
} from '@lloyal/tsampler';
|
|
91
|
-
|
|
92
|
-
const ctx = await createContext({ modelPath: './model.gguf' });
|
|
93
|
-
const prng = new Xoroshiro128Plus(42); // Deterministic PRNG
|
|
94
|
-
const tokenHistory = new TokenHistoryTracker(64); // For repetition penalties
|
|
95
|
-
const workspace = new SamplerWorkspace(256); // Pre-allocated, zero-alloc hot path
|
|
96
|
-
|
|
97
|
-
const tokens = await ctx.tokenize(prompt);
|
|
98
|
-
await ctx.decode(tokens, 0);
|
|
99
|
-
|
|
100
|
-
let pos = tokens.length;
|
|
101
|
-
const output: number[] = [];
|
|
102
|
-
|
|
103
|
-
while (output.length < maxTokens) {
|
|
104
|
-
const logits = ctx.getLogits();
|
|
105
|
-
|
|
106
|
-
// === YOUR STEERING LOGIC HERE ===
|
|
107
|
-
|
|
108
|
-
// Enforce domain rules
|
|
109
|
-
if (currency === 'JPY') {
|
|
110
|
-
logits[DECIMAL_TOKEN] = -Infinity; // JPY has no decimal subdivision
|
|
111
|
-
}
|
|
57
|
+
**Two forking strategies:**
|
|
112
58
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
? { topK: 256, temperature: 1.5 } // Low confidence → explore more
|
|
118
|
-
: { topK: 40, temperature: 0.8 }; // High confidence → stay focused
|
|
59
|
+
| Approach | Method | Use Case |
|
|
60
|
+
| -------------------- | --------------------------------- | -------------------------------------------- |
|
|
61
|
+
| **Tag copy** | `kvSeqCopy(src, dst)` | Parallel branches with different seqIds |
|
|
62
|
+
| **Snapshot/restore** | `kvCacheSave()` / `kvCacheLoad()` | Sequential exploration, return to checkpoint |
|
|
119
63
|
|
|
120
|
-
|
|
64
|
+
[`examples/best-of-n/`](./examples/best-of-n/) uses tag copy — each candidate gets its own seqId, branches run in parallel:
|
|
121
65
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
workspace,
|
|
126
|
-
prng,
|
|
127
|
-
});
|
|
66
|
+
```javascript
|
|
67
|
+
ctx.kvSeqCopy(0, seqId); // O(1) tag copy, branch diverges on seqId
|
|
68
|
+
```
|
|
128
69
|
|
|
129
|
-
|
|
70
|
+
[`examples/grammar/`](./examples/grammar/) uses snapshot/restore — save state, explore branches sequentially, restore between each:
|
|
130
71
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
72
|
+
```javascript
|
|
73
|
+
const snapshot = await ctx.kvCacheSave(0); // Save checkpoint
|
|
74
|
+
// ... explore branch ...
|
|
75
|
+
await ctx.kvCacheLoad(0, snapshot); // Return to checkpoint
|
|
135
76
|
```
|
|
136
77
|
|
|
137
|
-
|
|
78
|
+
Both approaches also fork grammar state with `cloneSampler()` when grammar constraints are involved.
|
|
138
79
|
|
|
139
|
-
|
|
140
|
-
// Financial: JPY has no decimal subdivision
|
|
141
|
-
if (currency === 'JPY' && parsingAmount) {
|
|
142
|
-
logits[DECIMAL_TOKEN] = -Infinity;
|
|
143
|
-
DIGIT_TOKENS.forEach((id) => (logits[id] += 2.0));
|
|
144
|
-
}
|
|
80
|
+
### Captured Logits
|
|
145
81
|
|
|
146
|
-
|
|
147
|
-
if (contractType === 'NDA') {
|
|
148
|
-
CONFIDENTIALITY_TOKENS.forEach((id) => (logits[id] += 5.0));
|
|
149
|
-
}
|
|
82
|
+
After decode, logits represent P(next_token | context). When forking to multiple sequences, capture logits for fair comparison:
|
|
150
83
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
NORMAL_TOKENS.forEach((id) => (logits[id] = -Infinity));
|
|
155
|
-
}
|
|
156
|
-
```
|
|
84
|
+
```javascript
|
|
85
|
+
// Capture after prefill
|
|
86
|
+
const capturedLogits = new Float32Array(ctx.getLogits());
|
|
157
87
|
|
|
158
|
-
|
|
88
|
+
// All candidates sample first token from same distribution
|
|
89
|
+
const token = sampleWithStrategy(capturedLogits, { params, workspace, prng });
|
|
159
90
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
const ppl = new RollingPerplexity();
|
|
164
|
-
|
|
165
|
-
while (generating) {
|
|
166
|
-
const logits = ctx.getLogits();
|
|
167
|
-
const token = sampleWithStrategy(logits, {
|
|
168
|
-
tokenHistory,
|
|
169
|
-
params,
|
|
170
|
-
workspace,
|
|
171
|
-
prng,
|
|
172
|
-
});
|
|
173
|
-
|
|
174
|
-
const surprisal = computeModelSurprisal(logits, token);
|
|
175
|
-
ppl.addSurprisal(surprisal);
|
|
176
|
-
|
|
177
|
-
if (ppl.ppl() > 50) {
|
|
178
|
-
// Generation quality degrading — options:
|
|
179
|
-
// 1. Trigger RAG retrieval for more context
|
|
180
|
-
// 2. Prune KV cache (evict stale context)
|
|
181
|
-
// 3. Early stop and retry with different prompt
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
// ...
|
|
185
|
-
}
|
|
91
|
+
// Compute surprisal from captured logits (native C++)
|
|
92
|
+
const surprisal = ctx.modelSurprisal(token, 'nats', capturedLogits);
|
|
186
93
|
```
|
|
187
94
|
|
|
188
|
-
|
|
95
|
+
See [`examples/best-of-n/`](./examples/best-of-n/) for the full pattern.
|
|
189
96
|
|
|
190
|
-
|
|
191
|
-
import { computeModelEntropy } from '@lloyal/tsampler';
|
|
97
|
+
### Entropy as Control Signal
|
|
192
98
|
|
|
193
|
-
|
|
194
|
-
const logits = ctx.getLogits();
|
|
195
|
-
const entropy = computeModelEntropy(logits);
|
|
99
|
+
Model uncertainty mid-generation enables dynamic behavior:
|
|
196
100
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
const context = await rag.retrieve(currentQuery);
|
|
200
|
-
await injectContext(ctx, context);
|
|
201
|
-
continue; // Re-evaluate with new context
|
|
202
|
-
}
|
|
101
|
+
```javascript
|
|
102
|
+
const entropy = ctx.modelEntropy('bits');
|
|
203
103
|
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
workspace,
|
|
208
|
-
prng,
|
|
209
|
-
});
|
|
210
|
-
// ...
|
|
104
|
+
if (entropy > 4.0) {
|
|
105
|
+
// High uncertainty — model is guessing
|
|
106
|
+
// Trigger retrieval, reduce temperature, or branch
|
|
211
107
|
}
|
|
212
108
|
```
|
|
213
109
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
| | Native C++ | TypeScript (tsampler) |
|
|
217
|
-
| ----------------------- | ------------ | --------------------- |
|
|
218
|
-
| Speed | ~0.3ms/token | ~3-5ms/token |
|
|
219
|
-
| Overhead vs 50ms decode | — | ~6-10% |
|
|
220
|
-
| Logit steering | ❌ | ✅ |
|
|
221
|
-
| Adaptive strategies | ❌ | ✅ |
|
|
222
|
-
| OTA updates | Rebuild app | Ship new JS |
|
|
223
|
-
| Debugging | printf | Full inspect |
|
|
224
|
-
|
|
225
|
-
The overhead is imperceptible. A 50ms decode dominates; 3ms sampling is noise.
|
|
110
|
+
See [`examples/entropy/`](./examples/entropy/) for entropy-triggered sampling strategies.
|
|
226
111
|
|
|
227
|
-
###
|
|
112
|
+
### Pull Loop with Generators
|
|
228
113
|
|
|
229
|
-
|
|
114
|
+
For branching mid-generation, generators provide natural backpressure:
|
|
230
115
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
- `RollingPerplexity` — streaming perplexity tracking
|
|
243
|
-
|
|
244
|
-
### Native References
|
|
245
|
-
|
|
246
|
-
lloyal.node includes native C++ implementations for validation:
|
|
247
|
-
|
|
248
|
-
```typescript
|
|
249
|
-
// TypeScript implementation
|
|
250
|
-
const tsEntropy = computeModelEntropy(logits);
|
|
251
|
-
|
|
252
|
-
// Native reference (C++)
|
|
253
|
-
const nativeEntropy = ctx.computeEntropy();
|
|
116
|
+
```javascript
|
|
117
|
+
function* tokenGenerator(ctx, grammarHandle) {
|
|
118
|
+
while (true) {
|
|
119
|
+
const logits = ctx.getLogits();
|
|
120
|
+
ctx.applySampler(grammarHandle, logits);
|
|
121
|
+
const token = ctx.sample({ temperature: 0.7 });
|
|
122
|
+
if (ctx.isStopToken(token)) return;
|
|
123
|
+
ctx.acceptSamplerToken(grammarHandle, token);
|
|
124
|
+
yield { token, text: ctx.tokenToText(token) };
|
|
125
|
+
}
|
|
126
|
+
}
|
|
254
127
|
|
|
255
|
-
//
|
|
256
|
-
|
|
128
|
+
// Consumer controls pace — stop at branch point
|
|
129
|
+
for (const { token, text } of gen) {
|
|
130
|
+
if (accumulated.includes('"city"')) break; // Pause here, branch
|
|
131
|
+
}
|
|
257
132
|
```
|
|
258
133
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
- `ctx.computeEntropy()` — Shannon entropy in nats
|
|
262
|
-
- `ctx.greedySample()` — argmax token ID
|
|
263
|
-
|
|
264
|
-
Build with confidence. Validate against native. Deploy TypeScript.
|
|
265
|
-
|
|
266
|
-
## Embeddings
|
|
267
|
-
|
|
268
|
-
lloyal.node supports embedding extraction with configurable pooling:
|
|
134
|
+
See [`examples/grammar/`](./examples/grammar/) for the full pull loop pattern.
|
|
269
135
|
|
|
270
|
-
|
|
271
|
-
import { createContext } from 'lloyal.node';
|
|
272
|
-
|
|
273
|
-
const ctx = await createContext({
|
|
274
|
-
modelPath: './nomic-embed-text.gguf',
|
|
275
|
-
embeddings: true,
|
|
276
|
-
poolingType: 1, // 0=NONE, 1=MEAN, 2=CLS, 3=LAST
|
|
277
|
-
});
|
|
278
|
-
|
|
279
|
-
async function embed(text: string): Promise<Float32Array> {
|
|
280
|
-
const tokens = await ctx.tokenize(text);
|
|
281
|
-
await ctx.encode(tokens);
|
|
282
|
-
|
|
283
|
-
const embedding = ctx.getEmbeddings(true); // L2-normalized
|
|
284
|
-
await ctx.kvCacheClear(); // Reset for next text
|
|
285
|
-
|
|
286
|
-
return embedding;
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
const vec = await embed('Document to embed');
|
|
290
|
-
console.log(`Dimension: ${ctx.getEmbeddingDimension()}`); // e.g., 768
|
|
291
|
-
```
|
|
136
|
+
---
|
|
292
137
|
|
|
293
138
|
## API Reference
|
|
294
139
|
|
|
295
|
-
**📖 [Full API Documentation](https://lloyal-ai.github.io/lloyal.node)** - Complete reference with examples and type definitions
|
|
296
|
-
|
|
297
140
|
### Context Creation
|
|
298
141
|
|
|
299
142
|
```typescript
|
|
@@ -301,55 +144,88 @@ const ctx = await createContext({
|
|
|
301
144
|
modelPath: string, // Path to .gguf file (required)
|
|
302
145
|
nCtx?: number, // Context size (default: 2048)
|
|
303
146
|
nThreads?: number, // CPU threads (default: 4)
|
|
304
|
-
nGpuLayers?: number, // Layers to offload to GPU (default: 0)
|
|
305
147
|
embeddings?: boolean, // Enable embedding mode (default: false)
|
|
306
|
-
poolingType?: number
|
|
148
|
+
poolingType?: number, // 0=NONE, 1=MEAN, 2=CLS, 3=LAST
|
|
149
|
+
nSeqMax?: number, // Max parallel sequences (default: 1)
|
|
307
150
|
});
|
|
308
151
|
```
|
|
309
152
|
|
|
310
|
-
###
|
|
311
|
-
|
|
312
|
-
| Method
|
|
313
|
-
|
|
|
314
|
-
| `tokenize(text)`
|
|
315
|
-
| `detokenize(tokens)`
|
|
316
|
-
| `
|
|
317
|
-
| `
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
|
153
|
+
### Core Methods
|
|
154
|
+
|
|
155
|
+
| Method | Returns | Description |
|
|
156
|
+
| ----------------------------- | ------------------- | ------------------------------- |
|
|
157
|
+
| `tokenize(text)` | `Promise<number[]>` | Text → token IDs |
|
|
158
|
+
| `detokenize(tokens)` | `Promise<string>` | Token IDs → text |
|
|
159
|
+
| `tokenToText(token)` | `string` | Single token → text (streaming) |
|
|
160
|
+
| `decode(tokens, pos, seqId?)` | `Promise<void>` | Forward pass, updates KV cache |
|
|
161
|
+
| `sample(params?)` | `number` | Sample next token |
|
|
162
|
+
| `isStopToken(token)` | `boolean` | Check for EOS token |
|
|
163
|
+
| `getLogits()` | `Float32Array` | Raw logits (zero-copy view) |
|
|
164
|
+
|
|
165
|
+
### KV Cache
|
|
166
|
+
|
|
167
|
+
| Method | Returns | Description |
|
|
168
|
+
| ---------------------------------- | ----------------- | ------------------------------ |
|
|
169
|
+
| `kvCacheSize(seqId?)` | `number` | Tokens in cache |
|
|
170
|
+
| `kvCacheClear()` | `Promise<void>` | Clear all sequences |
|
|
171
|
+
| `kvCacheRemove(seqId, start, end)` | `Promise<void>` | Remove token range |
|
|
172
|
+
| `kvCacheSave(seqId?)` | `Promise<Buffer>` | Snapshot state |
|
|
173
|
+
| `kvCacheLoad(seqId, state)` | `Promise<void>` | Restore state |
|
|
174
|
+
| `kvSeqCopy(src, dst)` | `void` | Copy sequence (tag copy, O(1)) |
|
|
175
|
+
| `kvSeqKeep(seqId)` | `void` | Keep only one sequence |
|
|
176
|
+
| `clearAndReseed(sinks, tail)` | `Promise<void>` | BlinkKV pattern |
|
|
177
|
+
|
|
178
|
+
### Grammar (Handle-Based)
|
|
179
|
+
|
|
180
|
+
| Method | Returns | Description |
|
|
181
|
+
| -------------------------------- | -------- | --------------------------- |
|
|
182
|
+
| `jsonSchemaToGrammar(schema)` | `string` | Schema → GBNF |
|
|
183
|
+
| `createSampler(grammarStr)` | `number` | Create grammar handle |
|
|
184
|
+
| `cloneSampler(handle)` | `number` | Clone grammar state |
|
|
185
|
+
| `applySampler(handle, logits)` | `void` | Apply constraints to logits |
|
|
186
|
+
| `acceptSamplerToken(handle, id)` | `void` | Advance parser state |
|
|
187
|
+
| `freeSamplerHandle(handle)` | `void` | Release grammar handle |
|
|
188
|
+
|
|
189
|
+
### Metrics
|
|
190
|
+
|
|
191
|
+
| Method | Returns | Description |
|
|
192
|
+
| --------------------------------------- | --------------- | ------------------------------------------ |
|
|
193
|
+
| `modelEntropy(base?, logits?)` | `number` | Distribution entropy (bits/nats) |
|
|
194
|
+
| `modelSurprisal(token, base?, logits?)` | `number` | Token surprisal (supports captured logits) |
|
|
195
|
+
| `createPerplexityTracker()` | `TrackerHandle` | Create tracker (forkable) |
|
|
196
|
+
| `clonePerplexityTracker(handle)` | `TrackerHandle` | Clone tracker state |
|
|
197
|
+
| `addSurprisal(handle, value)` | `void` | Add to tracker |
|
|
198
|
+
| `getPerplexity(handle)` | `number` | Get current PPL |
|
|
199
|
+
| `freePerplexityTracker(handle)` | `void` | Release tracker |
|
|
325
200
|
|
|
326
201
|
### Embeddings
|
|
327
202
|
|
|
328
|
-
| Method | Returns | Description
|
|
329
|
-
| --------------------------- | --------------- |
|
|
330
|
-
| `encode(tokens)` | `Promise<void>` | Forward pass for
|
|
331
|
-
| `getEmbeddings(normalize?)` | `Float32Array` |
|
|
332
|
-
| `getEmbeddingDimension()` | `number` | Vector dimension
|
|
333
|
-
| `kvCacheClear()` | `Promise<void>` | Clear KV cache between texts |
|
|
203
|
+
| Method | Returns | Description |
|
|
204
|
+
| --------------------------- | --------------- | --------------------------- |
|
|
205
|
+
| `encode(tokens)` | `Promise<void>` | Forward pass for embeddings |
|
|
206
|
+
| `getEmbeddings(normalize?)` | `Float32Array` | Extract embedding vector |
|
|
207
|
+
| `getEmbeddingDimension()` | `number` | Vector dimension |
|
|
334
208
|
|
|
335
209
|
### Lifecycle
|
|
336
210
|
|
|
337
|
-
| Method | Description
|
|
338
|
-
| ----------- |
|
|
339
|
-
| `dispose()` | Free native resources
|
|
211
|
+
| Method | Description |
|
|
212
|
+
| ----------- | ------------------------------------ |
|
|
213
|
+
| `dispose()` | Free native resources (**required**) |
|
|
214
|
+
|
|
215
|
+
---
|
|
340
216
|
|
|
341
|
-
##
|
|
217
|
+
## Ecosystem
|
|
342
218
|
|
|
343
|
-
| Package |
|
|
344
|
-
| ------------------------------------------------------- | ------------ |
|
|
345
|
-
| [liblloyal](https://github.com/lloyal-ai/liblloyal) | C++ |
|
|
346
|
-
| **lloyal.node** |
|
|
347
|
-
| [
|
|
348
|
-
| [
|
|
219
|
+
| Package | Runtime | Description |
|
|
220
|
+
| ------------------------------------------------------- | ------------ | --------------------------------- |
|
|
221
|
+
| [liblloyal](https://github.com/lloyal-ai/liblloyal) | C++ | Header-only inference kernel |
|
|
222
|
+
| **lloyal.node** | Node.js | This package |
|
|
223
|
+
| [nitro-llama](https://github.com/lloyal-ai/nitro-llama) | React Native | Mobile bindings via Nitro Modules |
|
|
224
|
+
| [tsampler](https://github.com/lloyal-ai/tsampler) | TypeScript | Reference sampler implementation |
|
|
349
225
|
|
|
350
226
|
## Contributing
|
|
351
227
|
|
|
352
|
-
See [CONTRIBUTING.md](./CONTRIBUTING.md) for development setup
|
|
228
|
+
See [CONTRIBUTING.md](./CONTRIBUTING.md) for development setup and release process.
|
|
353
229
|
|
|
354
230
|
## License
|
|
355
231
|
|
package/lib/index.d.ts
CHANGED
|
@@ -4,6 +4,48 @@
|
|
|
4
4
|
* N-API bindings for liblloyal - Node.js native addon for llama.cpp inference
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
+
/**
|
|
8
|
+
* GPU variant for binary loading
|
|
9
|
+
*
|
|
10
|
+
* Specifies which GPU-accelerated binary to load:
|
|
11
|
+
* - 'default': CPU-only (works everywhere)
|
|
12
|
+
* - 'cuda': NVIDIA CUDA (requires libcudart.so/cudart64.dll)
|
|
13
|
+
* - 'vulkan': Vulkan (AMD/Intel/NVIDIA, requires Vulkan runtime)
|
|
14
|
+
*
|
|
15
|
+
* If the requested variant is unavailable (package not installed or
|
|
16
|
+
* runtime libraries missing), loading automatically falls back to CPU.
|
|
17
|
+
*/
|
|
18
|
+
export type GpuVariant = 'default' | 'cuda' | 'vulkan';
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Options for binary loading
|
|
22
|
+
*
|
|
23
|
+
* Controls which native binary variant is loaded when creating a context.
|
|
24
|
+
* Use this for explicit GPU variant selection with automatic fallback.
|
|
25
|
+
*/
|
|
26
|
+
export interface LoadOptions {
|
|
27
|
+
/**
|
|
28
|
+
* GPU variant to use
|
|
29
|
+
*
|
|
30
|
+
* - 'cuda': NVIDIA CUDA (requires libcudart.so)
|
|
31
|
+
* - 'vulkan': Vulkan (AMD/Intel/NVIDIA)
|
|
32
|
+
* - 'default' or undefined: CPU only
|
|
33
|
+
*
|
|
34
|
+
* If the requested variant is unavailable (missing runtime libraries),
|
|
35
|
+
* automatically falls back to CPU with a console warning.
|
|
36
|
+
*
|
|
37
|
+
* @example
|
|
38
|
+
* ```typescript
|
|
39
|
+
* // Request CUDA with automatic fallback to CPU
|
|
40
|
+
* const ctx = await createContext(
|
|
41
|
+
* { modelPath: './model.gguf' },
|
|
42
|
+
* { gpuVariant: 'cuda' }
|
|
43
|
+
* );
|
|
44
|
+
* ```
|
|
45
|
+
*/
|
|
46
|
+
gpuVariant?: GpuVariant;
|
|
47
|
+
}
|
|
48
|
+
|
|
7
49
|
/**
|
|
8
50
|
* Pooling type for embedding extraction
|
|
9
51
|
*/
|
|
@@ -867,13 +909,15 @@ export interface SessionContext {
|
|
|
867
909
|
* - High surprisal: Model didn't expect this token (low probability)
|
|
868
910
|
*
|
|
869
911
|
* Call after decode() to compute surprisal for any token based on
|
|
870
|
-
* the current logits distribution
|
|
912
|
+
* the current logits distribution, or pass captured logits for
|
|
913
|
+
* offline computation (e.g., best-of-n scoring from prefill logits).
|
|
871
914
|
*
|
|
872
915
|
* @param pickedTokenId - Token ID to compute surprisal for
|
|
873
916
|
* @param base - Logarithm base: "nats" (default) or "bits"
|
|
917
|
+
* @param logits - Optional Float32Array of logits (uses current context logits if omitted)
|
|
874
918
|
* @returns Surprisal value in specified base
|
|
875
919
|
*
|
|
876
|
-
* @example
|
|
920
|
+
* @example Current context logits (default)
|
|
877
921
|
* ```typescript
|
|
878
922
|
* await ctx.decode(tokens, position);
|
|
879
923
|
* const token = ctx.sample();
|
|
@@ -881,9 +925,18 @@ export interface SessionContext {
|
|
|
881
925
|
* console.log(`Model surprise: ${surprisal.toFixed(2)} bits`);
|
|
882
926
|
* ```
|
|
883
927
|
*
|
|
884
|
-
*
|
|
928
|
+
* @example Captured/arbitrary logits (for best-of-n, verification, etc.)
|
|
929
|
+
* ```typescript
|
|
930
|
+
* // Capture logits after prefill
|
|
931
|
+
* const capturedLogits = new Float32Array(ctx.getLogits());
|
|
932
|
+
*
|
|
933
|
+
* // Later: compute surprisal from captured logits
|
|
934
|
+
* const surprisal = ctx.modelSurprisal(token, "nats", capturedLogits);
|
|
935
|
+
* ```
|
|
936
|
+
*
|
|
937
|
+
* COST: O(n_vocab) - softmax normalization required
|
|
885
938
|
*/
|
|
886
|
-
modelSurprisal(pickedTokenId: number, base?: 'nats' | 'bits'): number;
|
|
939
|
+
modelSurprisal(pickedTokenId: number, base?: 'nats' | 'bits', logits?: Float32Array): number;
|
|
887
940
|
|
|
888
941
|
/**
|
|
889
942
|
* Compute entropy of the entire logits distribution.
|
|
@@ -892,12 +945,14 @@ export interface SessionContext {
|
|
|
892
945
|
* - Low entropy: Model is confident (peaked distribution)
|
|
893
946
|
* - High entropy: Model is uncertain (flat distribution)
|
|
894
947
|
*
|
|
895
|
-
* Call after decode() to analyze the current prediction distribution
|
|
948
|
+
* Call after decode() to analyze the current prediction distribution,
|
|
949
|
+
* or pass captured logits for offline analysis.
|
|
896
950
|
*
|
|
897
951
|
* @param base - Logarithm base: "nats" (default), "bits", or "base10"
|
|
952
|
+
* @param logits - Optional Float32Array of logits (uses current context logits if omitted)
|
|
898
953
|
* @returns Entropy value in specified base
|
|
899
954
|
*
|
|
900
|
-
* @example
|
|
955
|
+
* @example Current context logits (default)
|
|
901
956
|
* ```typescript
|
|
902
957
|
* await ctx.decode(tokens, position);
|
|
903
958
|
* const entropy = ctx.modelEntropy("bits");
|
|
@@ -906,9 +961,15 @@ export interface SessionContext {
|
|
|
906
961
|
* }
|
|
907
962
|
* ```
|
|
908
963
|
*
|
|
964
|
+
* @example Captured/arbitrary logits
|
|
965
|
+
* ```typescript
|
|
966
|
+
* const capturedLogits = new Float32Array(ctx.getLogits());
|
|
967
|
+
* const entropy = ctx.modelEntropy("nats", capturedLogits);
|
|
968
|
+
* ```
|
|
969
|
+
*
|
|
909
970
|
* COST: O(n_vocab) - must sum over all token probabilities
|
|
910
971
|
*/
|
|
911
|
-
modelEntropy(base?: 'nats' | 'bits'): number;
|
|
972
|
+
modelEntropy(base?: 'nats' | 'bits', logits?: Float32Array): number;
|
|
912
973
|
|
|
913
974
|
/**
|
|
914
975
|
* Create a new perplexity tracker.
|
|
@@ -1304,9 +1365,14 @@ export interface SessionContext {
|
|
|
1304
1365
|
/**
|
|
1305
1366
|
* Create a new inference context
|
|
1306
1367
|
*
|
|
1368
|
+
* Loads the appropriate native binary (with automatic GPU fallback) and
|
|
1369
|
+
* creates an inference context for the specified model.
|
|
1370
|
+
*
|
|
1307
1371
|
* @param options Context creation options
|
|
1372
|
+
* @param loadOptions Optional binary loading options (GPU variant selection)
|
|
1308
1373
|
* @returns Promise resolving to SessionContext instance
|
|
1309
|
-
*
|
|
1374
|
+
*
|
|
1375
|
+
* @example Basic usage
|
|
1310
1376
|
* ```typescript
|
|
1311
1377
|
* const ctx = await createContext({
|
|
1312
1378
|
* modelPath: './model.gguf',
|
|
@@ -1322,8 +1388,58 @@ export interface SessionContext {
|
|
|
1322
1388
|
* ctx.dispose();
|
|
1323
1389
|
* }
|
|
1324
1390
|
* ```
|
|
1391
|
+
*
|
|
1392
|
+
* @example With GPU variant selection
|
|
1393
|
+
* ```typescript
|
|
1394
|
+
* // Request CUDA - falls back to CPU if unavailable
|
|
1395
|
+
* const ctx = await createContext(
|
|
1396
|
+
* { modelPath: './model.gguf', nCtx: 4096 },
|
|
1397
|
+
* { gpuVariant: 'cuda' }
|
|
1398
|
+
* );
|
|
1399
|
+
* ```
|
|
1400
|
+
*
|
|
1401
|
+
* @example Using environment variable
|
|
1402
|
+
* ```typescript
|
|
1403
|
+
* // Set LLOYAL_GPU=cuda before running
|
|
1404
|
+
* // createContext will automatically use CUDA if available
|
|
1405
|
+
* const ctx = await createContext({ modelPath: './model.gguf' });
|
|
1406
|
+
* ```
|
|
1407
|
+
*/
|
|
1408
|
+
export function createContext(
|
|
1409
|
+
options: ContextOptions,
|
|
1410
|
+
loadOptions?: LoadOptions
|
|
1411
|
+
): Promise<SessionContext>;
|
|
1412
|
+
|
|
1413
|
+
/**
|
|
1414
|
+
* Load native binary for a specific GPU variant
|
|
1415
|
+
*
|
|
1416
|
+
* Loads the appropriate platform-specific binary with automatic fallback:
|
|
1417
|
+
* 1. Try requested GPU variant (if specified)
|
|
1418
|
+
* 2. Fall back to default (CPU) platform package
|
|
1419
|
+
* 3. Fall back to local build (development: build/Release/lloyal.node)
|
|
1420
|
+
*
|
|
1421
|
+
* Use this for advanced scenarios where you need direct binary access
|
|
1422
|
+
* or want to check variant availability before creating a context.
|
|
1423
|
+
*
|
|
1424
|
+
* @param variant GPU variant: 'cuda', 'vulkan', or undefined for CPU
|
|
1425
|
+
* @returns Native binary module with createContext method
|
|
1426
|
+
* @throws Error if no binary available for the current platform
|
|
1427
|
+
*
|
|
1428
|
+
* @example
|
|
1429
|
+
* ```typescript
|
|
1430
|
+
* // Load default (CPU) binary
|
|
1431
|
+
* const binary = loadBinary();
|
|
1432
|
+
*
|
|
1433
|
+
* // Load CUDA binary (falls back to CPU if unavailable)
|
|
1434
|
+
* const binary = loadBinary('cuda');
|
|
1435
|
+
*
|
|
1436
|
+
* // Create context from loaded binary
|
|
1437
|
+
* const ctx = await binary.createContext({ modelPath: './model.gguf' });
|
|
1438
|
+
* ```
|
|
1325
1439
|
*/
|
|
1326
|
-
export function
|
|
1440
|
+
export function loadBinary(variant?: GpuVariant): {
|
|
1441
|
+
createContext(options: ContextOptions): Promise<SessionContext>;
|
|
1442
|
+
};
|
|
1327
1443
|
|
|
1328
1444
|
/**
|
|
1329
1445
|
* Safe logits access with automatic lifetime management
|
package/lib/index.js
CHANGED
|
@@ -1,6 +1,3 @@
|
|
|
1
|
-
const path = require('path');
|
|
2
|
-
const binary = require('node-gyp-build')(path.join(__dirname, '..'));
|
|
3
|
-
|
|
4
1
|
/**
|
|
5
2
|
* liblloyal-node - Thin N-API wrapper over liblloyal
|
|
6
3
|
*
|
|
@@ -9,7 +6,7 @@ const binary = require('node-gyp-build')(path.join(__dirname, '..'));
|
|
|
9
6
|
*
|
|
10
7
|
* @example
|
|
11
8
|
* ```js
|
|
12
|
-
* const { createContext, withLogits } = require('lloyal.node');
|
|
9
|
+
* const { createContext, withLogits } = require('@lloyal-labs/lloyal.node');
|
|
13
10
|
*
|
|
14
11
|
* const ctx = await createContext({
|
|
15
12
|
* modelPath: './model.gguf',
|
|
@@ -24,7 +21,7 @@ const binary = require('node-gyp-build')(path.join(__dirname, '..'));
|
|
|
24
21
|
* await ctx.decode(tokens, 0);
|
|
25
22
|
*
|
|
26
23
|
* // Safe logits access (Runtime Borrow Checker pattern)
|
|
27
|
-
* const entropy =
|
|
24
|
+
* const entropy = withLogits(ctx, (logits) => {
|
|
28
25
|
* // logits is valid here - use synchronously only!
|
|
29
26
|
* return computeEntropy(logits);
|
|
30
27
|
* });
|
|
@@ -36,7 +33,120 @@ const binary = require('node-gyp-build')(path.join(__dirname, '..'));
|
|
|
36
33
|
* // Cleanup
|
|
37
34
|
* ctx.dispose();
|
|
38
35
|
* ```
|
|
36
|
+
*
|
|
37
|
+
* @example GPU variant selection
|
|
38
|
+
* ```js
|
|
39
|
+
* // Option 1: Environment variable (affects all contexts)
|
|
40
|
+
* // Set LLOYAL_GPU=cuda before running
|
|
41
|
+
*
|
|
42
|
+
* // Option 2: Per-context selection (recommended)
|
|
43
|
+
* const ctx = await createContext(
|
|
44
|
+
* { modelPath: './model.gguf', nCtx: 4096 },
|
|
45
|
+
* { gpuVariant: 'cuda' } // Falls back to CPU if CUDA unavailable
|
|
46
|
+
* );
|
|
47
|
+
* ```
|
|
48
|
+
*/
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Platform package naming: @lloyal-labs/lloyal.node-{platform}-{arch}[-{gpu}]
|
|
52
|
+
* @param {string} [variant] - GPU variant: 'cuda', 'vulkan', or undefined for CPU
|
|
53
|
+
* @returns {string} Platform package name
|
|
54
|
+
*/
|
|
55
|
+
const getPlatformPackageName = (variant) => {
|
|
56
|
+
const platform = process.platform;
|
|
57
|
+
const arch = process.arch;
|
|
58
|
+
// cpu/metal/default = no suffix, cuda/vulkan = suffix
|
|
59
|
+
const noSuffix = !variant || variant === 'default' || variant === 'cpu' || variant === 'metal';
|
|
60
|
+
const suffix = noSuffix ? '' : `-${variant}`;
|
|
61
|
+
return `@lloyal-labs/lloyal.node-${platform}-${arch}${suffix}`;
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Try to load a platform package, return null on failure.
|
|
66
|
+
* Failures include: package not installed, missing GPU runtime libs (dlopen fails),
|
|
67
|
+
* or module doesn't export expected interface.
|
|
68
|
+
* @param {string} packageName - Package name to load
|
|
69
|
+
* @param {boolean} [verbose=false] - Log failure reasons
|
|
70
|
+
* @returns {object|null} The native binary module or null
|
|
39
71
|
*/
|
|
72
|
+
const tryLoadPackage = (packageName, verbose = false) => {
|
|
73
|
+
try {
|
|
74
|
+
const mod = require(packageName);
|
|
75
|
+
// Validate it's actually a native module with expected exports
|
|
76
|
+
if (mod && typeof mod.createContext === 'function') {
|
|
77
|
+
return mod;
|
|
78
|
+
}
|
|
79
|
+
if (verbose) {
|
|
80
|
+
console.warn(`[lloyal.node] ${packageName} loaded but missing createContext export`);
|
|
81
|
+
}
|
|
82
|
+
return null;
|
|
83
|
+
} catch (e) {
|
|
84
|
+
if (verbose) {
|
|
85
|
+
console.warn(`[lloyal.node] Failed to load ${packageName}: ${e.message}`);
|
|
86
|
+
}
|
|
87
|
+
return null;
|
|
88
|
+
}
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Load the native binary with automatic fallback.
|
|
93
|
+
*
|
|
94
|
+
* Loading priority:
|
|
95
|
+
* 1. Requested GPU variant (if specified)
|
|
96
|
+
* 2. Default platform package (CPU)
|
|
97
|
+
* 3. Local build (development: build/Release/lloyal.node)
|
|
98
|
+
*
|
|
99
|
+
* @param {string} [variant] - GPU variant: 'cuda', 'vulkan', or undefined for CPU
|
|
100
|
+
* @returns {object} The native binary module
|
|
101
|
+
* @throws {Error} If no binary can be loaded
|
|
102
|
+
*/
|
|
103
|
+
const loadBinary = (variant) => {
|
|
104
|
+
// Use env var if no variant specified
|
|
105
|
+
variant = variant ?? process.env.LLOYAL_GPU;
|
|
106
|
+
// LLOYAL_NO_FALLBACK=1 disables fallback (for CI testing specific packages)
|
|
107
|
+
const noFallback = process.env.LLOYAL_NO_FALLBACK === '1';
|
|
108
|
+
|
|
109
|
+
// 1. Try requested variant (if specified)
|
|
110
|
+
if (variant && variant !== 'default') {
|
|
111
|
+
const pkgName = getPlatformPackageName(variant);
|
|
112
|
+
const binary = tryLoadPackage(pkgName, true); // verbose=true to see errors
|
|
113
|
+
if (binary) return binary;
|
|
114
|
+
|
|
115
|
+
if (noFallback) {
|
|
116
|
+
throw new Error(
|
|
117
|
+
`[lloyal.node] GPU variant "${variant}" failed to load. ` +
|
|
118
|
+
`Package: ${pkgName}. Check that runtime libraries are available.`
|
|
119
|
+
);
|
|
120
|
+
}
|
|
121
|
+
console.warn(`[lloyal.node] GPU variant "${variant}" unavailable, falling back to CPU`);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// 2. Try default platform package (CPU)
|
|
125
|
+
const defaultPkg = getPlatformPackageName();
|
|
126
|
+
const binary = tryLoadPackage(defaultPkg, true); // verbose=true
|
|
127
|
+
if (binary) return binary;
|
|
128
|
+
|
|
129
|
+
// 3. Try local build (development)
|
|
130
|
+
try {
|
|
131
|
+
return require('../build/Release/lloyal.node');
|
|
132
|
+
} catch (e) {
|
|
133
|
+
// ignore
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
throw new Error(
|
|
137
|
+
`No lloyal.node binary found for ${process.platform}-${process.arch}. ` +
|
|
138
|
+
`Tried: ${variant ? getPlatformPackageName(variant) + ', ' : ''}${defaultPkg}`
|
|
139
|
+
);
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
// Default binary (loaded lazily on first use)
|
|
143
|
+
let _binary = null;
|
|
144
|
+
const getBinary = () => {
|
|
145
|
+
if (!_binary) {
|
|
146
|
+
_binary = loadBinary(process.env.LLOYAL_GPU);
|
|
147
|
+
}
|
|
148
|
+
return _binary;
|
|
149
|
+
};
|
|
40
150
|
|
|
41
151
|
/**
|
|
42
152
|
* Safe logits access with Runtime Borrow Checker pattern
|
|
@@ -97,25 +207,54 @@ module.exports = {
|
|
|
97
207
|
/**
|
|
98
208
|
* Create a new inference context
|
|
99
209
|
*
|
|
100
|
-
* @param {
|
|
101
|
-
* @param {
|
|
102
|
-
* @
|
|
103
|
-
*
|
|
104
|
-
* @
|
|
210
|
+
* @param {ContextOptions} options - Context configuration
|
|
211
|
+
* @param {LoadOptions} [loadOptions] - Binary loading options
|
|
212
|
+
* @returns {Promise<SessionContext>} The inference context
|
|
213
|
+
*
|
|
214
|
+
* @example
|
|
215
|
+
* ```js
|
|
216
|
+
* // Basic usage
|
|
217
|
+
* const ctx = await createContext({
|
|
218
|
+
* modelPath: './model.gguf',
|
|
219
|
+
* nCtx: 2048,
|
|
220
|
+
* nThreads: 4
|
|
221
|
+
* });
|
|
222
|
+
*
|
|
223
|
+
* // With GPU variant
|
|
224
|
+
* const ctx = await createContext(
|
|
225
|
+
* { modelPath: './model.gguf' },
|
|
226
|
+
* { gpuVariant: 'cuda' }
|
|
227
|
+
* );
|
|
228
|
+
* ```
|
|
105
229
|
*/
|
|
106
|
-
createContext: async (options) => {
|
|
107
|
-
|
|
108
|
-
|
|
230
|
+
createContext: async (options, loadOptions) => {
|
|
231
|
+
const variant = loadOptions?.gpuVariant || process.env.LLOYAL_GPU;
|
|
232
|
+
const binary = variant ? loadBinary(variant) : getBinary();
|
|
109
233
|
return binary.createContext(options);
|
|
110
234
|
},
|
|
111
235
|
|
|
112
236
|
/**
|
|
113
|
-
*
|
|
237
|
+
* Load binary for a specific GPU variant.
|
|
238
|
+
* Useful for checking variant availability before creating context.
|
|
239
|
+
*
|
|
240
|
+
* @param {string} [variant] - 'cuda', 'vulkan', or undefined for CPU
|
|
241
|
+
* @returns {object} Native binary module
|
|
242
|
+
* @throws {Error} If no binary available for platform
|
|
243
|
+
*
|
|
244
|
+
* @example
|
|
245
|
+
* ```js
|
|
246
|
+
* // Load default (CPU) binary
|
|
247
|
+
* const binary = loadBinary();
|
|
114
248
|
*
|
|
115
|
-
*
|
|
249
|
+
* // Load CUDA binary (falls back to CPU if unavailable)
|
|
250
|
+
* const binary = loadBinary('cuda');
|
|
251
|
+
* ```
|
|
252
|
+
*/
|
|
253
|
+
loadBinary,
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* Safe logits access with Runtime Borrow Checker pattern.
|
|
116
257
|
* See function JSDoc for full documentation.
|
|
117
258
|
*/
|
|
118
259
|
withLogits,
|
|
119
|
-
|
|
120
|
-
SessionContext: binary.SessionContext
|
|
121
260
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lloyal-labs/lloyal.node",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.6-alpha",
|
|
4
4
|
"description": "Node.js client for liblloyal+llama.cpp",
|
|
5
5
|
"main": "lib/index.js",
|
|
6
6
|
"types": "lib/index.d.ts",
|
|
@@ -10,7 +10,6 @@
|
|
|
10
10
|
},
|
|
11
11
|
"scripts": {
|
|
12
12
|
"download-models": "bash scripts/download-test-models.sh",
|
|
13
|
-
"install": "node scripts/install.js",
|
|
14
13
|
"build": "node scripts/build.js",
|
|
15
14
|
"build:debug": "cmake-js compile --debug",
|
|
16
15
|
"rebuild": "cmake-js rebuild",
|
|
@@ -43,8 +42,8 @@
|
|
|
43
42
|
},
|
|
44
43
|
"homepage": "https://github.com/lloyal-ai/lloyal.node#readme",
|
|
45
44
|
"dependencies": {
|
|
46
|
-
"
|
|
47
|
-
"node-
|
|
45
|
+
"@lloyal-labs/tsampler": "^0.2.0",
|
|
46
|
+
"node-addon-api": "^8.5.0"
|
|
48
47
|
},
|
|
49
48
|
"devDependencies": {
|
|
50
49
|
"cmake-js": "^7.4.0",
|
|
@@ -52,19 +51,19 @@
|
|
|
52
51
|
"typedoc": "^0.27.5"
|
|
53
52
|
},
|
|
54
53
|
"optionalDependencies": {
|
|
55
|
-
"@lloyal-labs/lloyal.node-darwin-arm64": "1.0.
|
|
56
|
-
"@lloyal-labs/lloyal.node-darwin-x64": "1.0.
|
|
57
|
-
"@lloyal-labs/lloyal.node-linux-arm64": "1.0.
|
|
58
|
-
"@lloyal-labs/lloyal.node-linux-arm64-cuda": "1.0.
|
|
59
|
-
"@lloyal-labs/lloyal.node-linux-arm64-vulkan": "1.0.
|
|
60
|
-
"@lloyal-labs/lloyal.node-linux-x64": "1.0.
|
|
61
|
-
"@lloyal-labs/lloyal.node-linux-x64-cuda": "1.0.
|
|
62
|
-
"@lloyal-labs/lloyal.node-linux-x64-vulkan": "1.0.
|
|
63
|
-
"@lloyal-labs/lloyal.node-win32-arm64": "1.0.
|
|
64
|
-
"@lloyal-labs/lloyal.node-win32-arm64-vulkan": "1.0.
|
|
65
|
-
"@lloyal-labs/lloyal.node-win32-x64": "1.0.
|
|
66
|
-
"@lloyal-labs/lloyal.node-win32-x64-cuda": "1.0.
|
|
67
|
-
"@lloyal-labs/lloyal.node-win32-x64-vulkan": "1.0.
|
|
54
|
+
"@lloyal-labs/lloyal.node-darwin-arm64": "1.0.6-alpha",
|
|
55
|
+
"@lloyal-labs/lloyal.node-darwin-x64": "1.0.6-alpha",
|
|
56
|
+
"@lloyal-labs/lloyal.node-linux-arm64": "1.0.6-alpha",
|
|
57
|
+
"@lloyal-labs/lloyal.node-linux-arm64-cuda": "1.0.6-alpha",
|
|
58
|
+
"@lloyal-labs/lloyal.node-linux-arm64-vulkan": "1.0.6-alpha",
|
|
59
|
+
"@lloyal-labs/lloyal.node-linux-x64": "1.0.6-alpha",
|
|
60
|
+
"@lloyal-labs/lloyal.node-linux-x64-cuda": "1.0.6-alpha",
|
|
61
|
+
"@lloyal-labs/lloyal.node-linux-x64-vulkan": "1.0.6-alpha",
|
|
62
|
+
"@lloyal-labs/lloyal.node-win32-arm64": "1.0.6-alpha",
|
|
63
|
+
"@lloyal-labs/lloyal.node-win32-arm64-vulkan": "1.0.6-alpha",
|
|
64
|
+
"@lloyal-labs/lloyal.node-win32-x64": "1.0.6-alpha",
|
|
65
|
+
"@lloyal-labs/lloyal.node-win32-x64-cuda": "1.0.6-alpha",
|
|
66
|
+
"@lloyal-labs/lloyal.node-win32-x64-vulkan": "1.0.6-alpha"
|
|
68
67
|
},
|
|
69
68
|
"engines": {
|
|
70
69
|
"node": ">=22.0.0"
|
|
@@ -108,52 +108,31 @@ if (osName === 'darwin') {
|
|
|
108
108
|
// Create package.json from template
|
|
109
109
|
console.log('\nGenerating package.json...');
|
|
110
110
|
const mainPackageJson = require(path.join(ROOT, 'package.json'));
|
|
111
|
-
const templatePath = path.join(ROOT, 'packages', 'template', 'package.json');
|
|
112
|
-
|
|
113
|
-
let pkgJson;
|
|
114
|
-
if (fs.existsSync(templatePath)) {
|
|
115
|
-
pkgJson = require(templatePath);
|
|
116
|
-
} else {
|
|
117
|
-
// Fallback template if file doesn't exist yet
|
|
118
|
-
pkgJson = {
|
|
119
|
-
name: '@lloyal-labs/lloyal.node-PLATFORM',
|
|
120
|
-
version: '0.0.0',
|
|
121
|
-
description: 'Lloyal native binary for PLATFORM',
|
|
122
|
-
main: 'index.js',
|
|
123
|
-
files: ['bin/', 'index.js'],
|
|
124
|
-
repository: {
|
|
125
|
-
type: 'git',
|
|
126
|
-
url: 'git+https://github.com/lloyal-ai/lloyal.node.git'
|
|
127
|
-
},
|
|
128
|
-
license: 'Apache-2.0'
|
|
129
|
-
};
|
|
130
|
-
}
|
|
131
111
|
|
|
132
|
-
//
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
pkgJson
|
|
136
|
-
|
|
137
|
-
|
|
112
|
+
// Platform package exports the binary directly (no index.js wrapper)
|
|
113
|
+
// This enables runtime dynamic require with automatic fallback:
|
|
114
|
+
// require('@lloyal-labs/lloyal.node-linux-x64') → bin/lloyal.node
|
|
115
|
+
const pkgJson = {
|
|
116
|
+
name: `@lloyal-labs/lloyal.node-${packageName}`,
|
|
117
|
+
version: mainPackageJson.version,
|
|
118
|
+
description: `Lloyal native binary for ${packageName}`,
|
|
119
|
+
main: 'bin/lloyal.node',
|
|
120
|
+
os: [osName],
|
|
121
|
+
cpu: [arch],
|
|
122
|
+
files: ['bin/'],
|
|
123
|
+
repository: {
|
|
124
|
+
type: 'git',
|
|
125
|
+
url: 'git+https://github.com/lloyal-ai/lloyal.node.git'
|
|
126
|
+
},
|
|
127
|
+
author: 'lloyal.ai',
|
|
128
|
+
license: 'Apache-2.0'
|
|
129
|
+
};
|
|
138
130
|
|
|
139
131
|
fs.writeFileSync(
|
|
140
132
|
path.join(PKG_DIR, 'package.json'),
|
|
141
133
|
JSON.stringify(pkgJson, null, 2) + '\n'
|
|
142
134
|
);
|
|
143
|
-
console.log(` ✓ Created package.json`);
|
|
144
|
-
|
|
145
|
-
// Create index.js
|
|
146
|
-
console.log('\nGenerating index.js...');
|
|
147
|
-
const indexJs = `// Platform-specific binary package for ${packageName}
|
|
148
|
-
// This file resolves to the native binary in bin/
|
|
149
|
-
|
|
150
|
-
const path = require('path');
|
|
151
|
-
|
|
152
|
-
module.exports = path.join(__dirname, 'bin', 'lloyal.node');
|
|
153
|
-
`;
|
|
154
|
-
|
|
155
|
-
fs.writeFileSync(path.join(PKG_DIR, 'index.js'), indexJs);
|
|
156
|
-
console.log(` ✓ Created index.js`);
|
|
135
|
+
console.log(` ✓ Created package.json (main: bin/lloyal.node)`);
|
|
157
136
|
|
|
158
137
|
// Summary
|
|
159
138
|
console.log(`\n✅ Platform package created successfully!`);
|
package/scripts/install.js
DELETED
|
@@ -1,138 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* Smart installer for lloyal.node
|
|
4
|
-
*
|
|
5
|
-
* Strategy:
|
|
6
|
-
* 1. Check if prebuilt binary exists for this platform
|
|
7
|
-
* 2. If yes, copy to build/Release/ and exit
|
|
8
|
-
* 3. If no, show helpful error with build-from-source instructions
|
|
9
|
-
*
|
|
10
|
-
* Respects LLOYAL_GPU environment variable for GPU variant selection
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
const fs = require('fs');
|
|
14
|
-
const path = require('path');
|
|
15
|
-
|
|
16
|
-
const PLATFORM = process.platform;
|
|
17
|
-
const ARCH = process.arch;
|
|
18
|
-
const ROOT = __dirname + '/..';
|
|
19
|
-
const BUILD_DIR = path.join(ROOT, 'build', 'Release');
|
|
20
|
-
|
|
21
|
-
// Logging helpers
|
|
22
|
-
const log = (msg) => console.log(`[lloyal.node] ${msg}`);
|
|
23
|
-
const error = (msg) => console.error(`[lloyal.node] ❌ ${msg}`);
|
|
24
|
-
|
|
25
|
-
/**
|
|
26
|
-
* Check if a platform package is installed and has binaries
|
|
27
|
-
*/
|
|
28
|
-
function findPrebuilt(packageName) {
|
|
29
|
-
try {
|
|
30
|
-
const pkgPath = require.resolve(packageName);
|
|
31
|
-
const binPath = require(packageName); // index.js exports path to binary
|
|
32
|
-
|
|
33
|
-
if (fs.existsSync(binPath)) {
|
|
34
|
-
const binDir = path.dirname(binPath);
|
|
35
|
-
return binDir;
|
|
36
|
-
}
|
|
37
|
-
} catch (e) {
|
|
38
|
-
// Package not installed or doesn't export binary path
|
|
39
|
-
}
|
|
40
|
-
return null;
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
/**
|
|
44
|
-
* Copy prebuilt binaries to build/Release/
|
|
45
|
-
*/
|
|
46
|
-
function installPrebuilt(binDir, packageName) {
|
|
47
|
-
log(`Found prebuilt binaries in ${packageName}`);
|
|
48
|
-
|
|
49
|
-
try {
|
|
50
|
-
// Create build/Release directory
|
|
51
|
-
fs.mkdirSync(BUILD_DIR, { recursive: true });
|
|
52
|
-
|
|
53
|
-
// Copy all files from bin directory
|
|
54
|
-
const files = fs.readdirSync(binDir);
|
|
55
|
-
files.forEach(file => {
|
|
56
|
-
const src = path.join(binDir, file);
|
|
57
|
-
const dest = path.join(BUILD_DIR, file);
|
|
58
|
-
|
|
59
|
-
if (fs.statSync(src).isFile()) {
|
|
60
|
-
fs.copyFileSync(src, dest);
|
|
61
|
-
log(` ✓ Copied ${file}`);
|
|
62
|
-
}
|
|
63
|
-
});
|
|
64
|
-
|
|
65
|
-
log(`✅ Installed prebuilt binaries successfully`);
|
|
66
|
-
process.exit(0);
|
|
67
|
-
} catch (e) {
|
|
68
|
-
error(`Failed to install prebuilt: ${e.message}`);
|
|
69
|
-
// Don't exit - fall through to source build
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
/**
|
|
74
|
-
* Main installation logic
|
|
75
|
-
*/
|
|
76
|
-
function main() {
|
|
77
|
-
log(`Platform: ${PLATFORM}-${ARCH}`);
|
|
78
|
-
|
|
79
|
-
// 1. Check for user-specified GPU variant via environment variable
|
|
80
|
-
if (process.env.LLOYAL_GPU) {
|
|
81
|
-
const gpu = process.env.LLOYAL_GPU.toLowerCase();
|
|
82
|
-
const packageName = `@lloyal-labs/lloyal.node-${PLATFORM}-${ARCH}-${gpu}`;
|
|
83
|
-
|
|
84
|
-
log(`LLOYAL_GPU=${gpu}, looking for ${packageName}...`);
|
|
85
|
-
const binDir = findPrebuilt(packageName);
|
|
86
|
-
|
|
87
|
-
if (binDir) {
|
|
88
|
-
installPrebuilt(binDir, packageName);
|
|
89
|
-
return; // exit(0) called in installPrebuilt
|
|
90
|
-
} else {
|
|
91
|
-
log(` ⚠️ Package ${packageName} not found`);
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
// 2. Check for GPU variants in priority order
|
|
96
|
-
const gpuVariants = ['cuda', 'vulkan'];
|
|
97
|
-
for (const gpu of gpuVariants) {
|
|
98
|
-
const packageName = `@lloyal-labs/lloyal.node-${PLATFORM}-${ARCH}-${gpu}`;
|
|
99
|
-
const binDir = findPrebuilt(packageName);
|
|
100
|
-
|
|
101
|
-
if (binDir) {
|
|
102
|
-
log(`Auto-detected GPU variant: ${gpu}`);
|
|
103
|
-
installPrebuilt(binDir, packageName);
|
|
104
|
-
return; // exit(0) called in installPrebuilt
|
|
105
|
-
}
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
// 3. Check for default platform package (CPU or Metal on macOS)
|
|
109
|
-
const defaultPackage = `@lloyal-labs/lloyal.node-${PLATFORM}-${ARCH}`;
|
|
110
|
-
const binDir = findPrebuilt(defaultPackage);
|
|
111
|
-
|
|
112
|
-
if (binDir) {
|
|
113
|
-
installPrebuilt(binDir, defaultPackage);
|
|
114
|
-
return; // exit(0) called in installPrebuilt
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
// 4. No prebuilt found - error with helpful message
|
|
118
|
-
log('');
|
|
119
|
-
error('No prebuilt binary found for your platform');
|
|
120
|
-
log('');
|
|
121
|
-
log(` Platform: ${PLATFORM}-${ARCH}`);
|
|
122
|
-
log('');
|
|
123
|
-
log(' Options:');
|
|
124
|
-
log(' 1. Install a platform-specific package:');
|
|
125
|
-
log(` npm install @lloyal-labs/lloyal.node-${PLATFORM}-${ARCH}`);
|
|
126
|
-
log('');
|
|
127
|
-
log(' 2. Build from source (requires C++20, CMake 3.18+):');
|
|
128
|
-
log(' git clone --recursive https://github.com/lloyal-ai/lloyal.node.git');
|
|
129
|
-
log(' cd lloyal.node && npm run build');
|
|
130
|
-
log('');
|
|
131
|
-
log(' See: https://github.com/lloyal-ai/lloyal.node#building');
|
|
132
|
-
log('');
|
|
133
|
-
|
|
134
|
-
process.exit(1);
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
// Run installer
|
|
138
|
-
main();
|