@lloyal-labs/lloyal.node 1.0.7 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +78 -163
- package/package.json +15 -14
- package/scripts/create-platform-package.js +3 -2
- package/scripts/sync-llama-cpp.js +117 -0
package/README.md
CHANGED
|
@@ -1,58 +1,12 @@
|
|
|
1
1
|
# lloyal.node
|
|
2
2
|
|
|
3
|
-
**
|
|
3
|
+
**Covalent inference for Node.js**
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Forkable inference state for llama.cpp — Branch a generation into a tree — prefix sharing is the bond across branches while each owns its own machinery (sampler chain, seed, grammar, logits snapshot, perplexity tracker) enabling controlled divergence at decode time.
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
npm install @lloyal-labs/lloyal.node
|
|
9
|
-
```
|
|
10
|
-
|
|
11
|
-
Prebuilt binaries for 13 platforms:
|
|
12
|
-
|
|
13
|
-
| Platform | Arch | Acceleration |
|
|
14
|
-
| -------- | ----- | ------------------- |
|
|
15
|
-
| macOS | arm64 | Metal |
|
|
16
|
-
| macOS | x64 | CPU |
|
|
17
|
-
| Linux | x64 | CPU / CUDA / Vulkan |
|
|
18
|
-
| Linux | arm64 | CPU / CUDA / Vulkan |
|
|
19
|
-
| Windows | x64 | CPU / CUDA / Vulkan |
|
|
20
|
-
| Windows | arm64 | CPU / Vulkan |
|
|
21
|
-
|
|
22
|
-
GPU selection happens at runtime, not install time. See [distribution.md](docs/distribution.md) for details.
|
|
23
|
-
|
|
24
|
-
---
|
|
25
|
-
|
|
26
|
-
## Examples
|
|
27
|
-
|
|
28
|
-
Working examples demonstrate each capability:
|
|
7
|
+
## Branch API
|
|
29
8
|
|
|
30
|
-
|
|
31
|
-
| ----------------------------------------- | ----------------------------------------------------------------------------- |
|
|
32
|
-
| [`best-of-n/`](./examples/best-of-n/) | Branch API parallel generation, PPL selection, fork/produce/commit |
|
|
33
|
-
| [`speculative/`](./examples/speculative/) | Branch API fork/prune, draft/verify/accept/reject, bonus token sampling |
|
|
34
|
-
| [`entropy/`](./examples/entropy/) | Entropy Decision Tree — `modelEntropy()` mid-generation as control signal |
|
|
35
|
-
| [`grammar/`](./examples/grammar/) | Pull loop with generators, JSON schema constraints, KV + grammar branching |
|
|
36
|
-
| [`streaming/`](./examples/streaming/) | Infinite context via BlinkKV, `clearAndReseed`, perplexity tracking |
|
|
37
|
-
| [`chat/`](./examples/chat/) | Interactive streaming chat |
|
|
38
|
-
| [`embed/`](./examples/embed/) | Text embeddings extraction |
|
|
39
|
-
|
|
40
|
-
```bash
|
|
41
|
-
node examples/best-of-n/best-of-n.mjs
|
|
42
|
-
node examples/speculative/speculative.mjs
|
|
43
|
-
node examples/entropy/entropy.mjs
|
|
44
|
-
node examples/grammar/grammar.mjs
|
|
45
|
-
```
|
|
46
|
-
|
|
47
|
-
Each example has a README explaining the pattern in depth.
|
|
48
|
-
|
|
49
|
-
---
|
|
50
|
-
|
|
51
|
-
## Core Patterns
|
|
52
|
-
|
|
53
|
-
### Branch API
|
|
54
|
-
|
|
55
|
-
`Branch` is the primary API for parallel generation. Each branch owns a KV cache sequence, sampler chain, logits snapshot, and perplexity tracker. Fork a branch to explore alternatives, compare by perplexity, prune losers.
|
|
9
|
+
Fork from root for best-of-N, fork from children for MCTS/beam search, fork from a draft for speculative decoding. The produce/commit protocol separates sampling from state advancement — sample without writing to KV, inspect the result, then decide whether to commit.
|
|
56
10
|
|
|
57
11
|
```javascript
|
|
58
12
|
import { createContext, Branch } from '@lloyal-labs/lloyal.node';
|
|
@@ -61,54 +15,91 @@ const ctx = await createContext({ modelPath: './model.gguf', nSeqMax: 8 });
|
|
|
61
15
|
const tokens = await ctx.tokenize('Once upon a time');
|
|
62
16
|
await ctx.decode(tokens, 0, 0);
|
|
63
17
|
|
|
64
|
-
// Create root branch,
|
|
18
|
+
// Create root branch, freeze logits from prefill
|
|
65
19
|
const root = Branch.create(ctx, 0, tokens.length, { temperature: 0.8 });
|
|
66
20
|
root.captureLogits();
|
|
67
21
|
|
|
68
|
-
// Fork N candidates —
|
|
22
|
+
// Fork N candidates — KV prefix shared, sampler/grammar/logits/perplexity cloned
|
|
69
23
|
const candidates = [1, 2, 3, 4, 5].map((seqId, i) => {
|
|
70
24
|
const branch = root.fork(seqId);
|
|
71
|
-
branch.reseedSampler(1000 + i);
|
|
25
|
+
branch.reseedSampler(1000 + i);
|
|
72
26
|
return branch;
|
|
73
27
|
});
|
|
74
28
|
|
|
75
|
-
// Generate
|
|
29
|
+
// Generate (interleaved round-robin)
|
|
76
30
|
for (let t = 0; t < 50; t++) {
|
|
77
31
|
for (const branch of candidates) {
|
|
78
|
-
const { token, isStop } = branch.produce(); // Sample
|
|
32
|
+
const { token, isStop } = branch.produce(); // Sample, no KV write
|
|
79
33
|
if (isStop) continue;
|
|
80
|
-
branch.commit(token); // Accept +
|
|
34
|
+
branch.commit(token); // Accept + forward pass + capture
|
|
81
35
|
}
|
|
82
36
|
}
|
|
83
37
|
|
|
84
|
-
// Select
|
|
85
|
-
const best = candidates.reduce((a, b) => a.perplexity < b.perplexity ? a : b);
|
|
86
|
-
for (const c of candidates) {
|
|
38
|
+
// Select by perplexity, prune losers
|
|
39
|
+
const best = candidates.reduce((a, b) => (a.perplexity < b.perplexity ? a : b));
|
|
40
|
+
for (const c of candidates) {
|
|
41
|
+
if (c !== best) c.prune();
|
|
42
|
+
}
|
|
87
43
|
```
|
|
88
44
|
|
|
89
|
-
**What `fork()`
|
|
45
|
+
**What `fork()` shares:** KV cache prefix (metadata-only under unified KV — no tensor buffers copied).
|
|
90
46
|
|
|
91
|
-
**
|
|
47
|
+
**What `fork()` clones:** Logits snapshot, sampler chain (penalties + PRNG), grammar state, logit bias, perplexity tracker.
|
|
92
48
|
|
|
93
|
-
|
|
49
|
+
**Key methods:**
|
|
94
50
|
|
|
95
|
-
|
|
51
|
+
- `produce()` / `commit()` — two-phase: sample without KV write, then advance
|
|
52
|
+
- `prune()` — discard loser and its divergent KV entries
|
|
53
|
+
- `destroy()` — release handle, keep KV (for winners continuing with raw ops)
|
|
54
|
+
- `reseedSampler()` — unique PRNG per fork for stochastic diversity
|
|
55
|
+
- `perplexity` — rolling PPL per branch for quality-based selection
|
|
96
56
|
|
|
97
|
-
|
|
57
|
+
---
|
|
98
58
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
59
|
+
## Install
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
npm install @lloyal-labs/lloyal.node
|
|
63
|
+
```
|
|
103
64
|
|
|
104
|
-
|
|
65
|
+
Prebuilt binaries for 13 platform/GPU combinations. GPU selection at runtime, not install time.
|
|
105
66
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
67
|
+
| Platform | Arch | Acceleration |
|
|
68
|
+
| -------- | ----- | ------------------- |
|
|
69
|
+
| macOS | arm64 | Metal |
|
|
70
|
+
| macOS | x64 | CPU |
|
|
71
|
+
| Linux | x64 | CPU / CUDA / Vulkan |
|
|
72
|
+
| Linux | arm64 | CPU / CUDA / Vulkan |
|
|
73
|
+
| Windows | x64 | CPU / CUDA / Vulkan |
|
|
74
|
+
| Windows | arm64 | CPU / Vulkan |
|
|
75
|
+
|
|
76
|
+
See [distribution.md](docs/distribution.md) for details.
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## Examples
|
|
81
|
+
|
|
82
|
+
| Example | Pattern |
|
|
83
|
+
| ----------------------------------------- | -------------------------------------------------------------------------- |
|
|
84
|
+
| [`best-of-n/`](./examples/best-of-n/) | Branch API: fork, produce/commit, perplexity selection |
|
|
85
|
+
| [`speculative/`](./examples/speculative/) | Branch API: draft/verify, fork/prune, bonus token sampling |
|
|
86
|
+
| [`streaming/`](./examples/streaming/) | Infinite context via BlinkKV reseeding with sidecar summarization |
|
|
87
|
+
| [`entropy/`](./examples/entropy/) | `modelEntropy()` mid-generation as control signal |
|
|
88
|
+
| [`grammar/`](./examples/grammar/) | Pull loop with generators, JSON schema constraints, KV + grammar branching |
|
|
89
|
+
| [`chat/`](./examples/chat/) | Interactive streaming chat |
|
|
90
|
+
| [`embed/`](./examples/embed/) | Text embeddings extraction |
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
node examples/best-of-n/best-of-n.mjs
|
|
94
|
+
node examples/speculative/speculative.mjs
|
|
110
95
|
```
|
|
111
96
|
|
|
97
|
+
Each example has a README explaining the pattern.
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## Other Patterns
|
|
102
|
+
|
|
112
103
|
### Entropy as Control Signal
|
|
113
104
|
|
|
114
105
|
Model uncertainty mid-generation enables dynamic behavior:
|
|
@@ -124,26 +115,21 @@ if (entropy > 4.0) {
|
|
|
124
115
|
|
|
125
116
|
See [`examples/entropy/`](./examples/entropy/) for entropy-triggered sampling strategies.
|
|
126
117
|
|
|
127
|
-
###
|
|
118
|
+
### Low-Level KV Operations
|
|
128
119
|
|
|
129
|
-
For
|
|
120
|
+
For fine-grained control without Branch:
|
|
130
121
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
ctx.applySampler(grammarHandle, logits);
|
|
136
|
-
const token = ctx.sample({ temperature: 0.7 });
|
|
137
|
-
if (ctx.isStopToken(token)) return;
|
|
138
|
-
ctx.acceptSamplerToken(grammarHandle, token);
|
|
139
|
-
yield { token, text: ctx.tokenToText(token) };
|
|
140
|
-
}
|
|
141
|
-
}
|
|
122
|
+
| Approach | Method | Use Case |
|
|
123
|
+
| -------------------- | --------------------------------- | -------------------------------------------- |
|
|
124
|
+
| **Sequence copy** | `kvSeqCopy(src, dst)` | Share prefix across sequences |
|
|
125
|
+
| **Snapshot/restore** | `kvCacheSave()` / `kvCacheLoad()` | Sequential exploration, return to checkpoint |
|
|
142
126
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
127
|
+
### Grammar-Constrained Generation
|
|
128
|
+
|
|
129
|
+
```javascript
|
|
130
|
+
const grammar = ctx.jsonSchemaToGrammar(schema);
|
|
131
|
+
const handle = ctx.createSampler(grammar);
|
|
132
|
+
// Pull loop — consumer controls pace, can branch at any point
|
|
147
133
|
```
|
|
148
134
|
|
|
149
135
|
See [`examples/grammar/`](./examples/grammar/) for the full pull loop pattern.
|
|
@@ -152,80 +138,9 @@ See [`examples/grammar/`](./examples/grammar/) for the full pull loop pattern.
|
|
|
152
138
|
|
|
153
139
|
## API Reference
|
|
154
140
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
```typescript
|
|
158
|
-
const ctx = await createContext({
|
|
159
|
-
modelPath: string, // Path to .gguf file (required)
|
|
160
|
-
nCtx?: number, // Context size (default: 2048)
|
|
161
|
-
nThreads?: number, // CPU threads (default: 4)
|
|
162
|
-
embeddings?: boolean, // Enable embedding mode (default: false)
|
|
163
|
-
poolingType?: number, // 0=NONE, 1=MEAN, 2=CLS, 3=LAST
|
|
164
|
-
nSeqMax?: number, // Max parallel sequences (default: 1)
|
|
165
|
-
});
|
|
166
|
-
```
|
|
141
|
+
Full API documentation: **[lloyal-ai.github.io/lloyal.node](https://lloyal-ai.github.io/lloyal.node/)**
|
|
167
142
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
| Method | Returns | Description |
|
|
171
|
-
| ----------------------------- | ------------------- | ------------------------------- |
|
|
172
|
-
| `tokenize(text)` | `Promise<number[]>` | Text → token IDs |
|
|
173
|
-
| `detokenize(tokens)` | `Promise<string>` | Token IDs → text |
|
|
174
|
-
| `tokenToText(token)` | `string` | Single token → text (streaming) |
|
|
175
|
-
| `decode(tokens, pos, seqId?)` | `Promise<void>` | Forward pass, updates KV cache |
|
|
176
|
-
| `sample(params?)` | `number` | Sample next token |
|
|
177
|
-
| `isStopToken(token)` | `boolean` | Check for EOS token |
|
|
178
|
-
| `getLogits()` | `Float32Array` | Raw logits (zero-copy view) |
|
|
179
|
-
|
|
180
|
-
### KV Cache
|
|
181
|
-
|
|
182
|
-
| Method | Returns | Description |
|
|
183
|
-
| ---------------------------------- | ----------------- | ------------------------------ |
|
|
184
|
-
| `kvCacheSize(seqId?)` | `number` | Tokens in cache |
|
|
185
|
-
| `kvCacheClear()` | `Promise<void>` | Clear all sequences |
|
|
186
|
-
| `kvCacheRemove(seqId, start, end)` | `Promise<void>` | Remove token range |
|
|
187
|
-
| `kvCacheSave(seqId?)` | `Promise<Buffer>` | Snapshot state |
|
|
188
|
-
| `kvCacheLoad(seqId, state)` | `Promise<void>` | Restore state |
|
|
189
|
-
| `kvSeqCopy(src, dst)` | `void` | Copy sequence (tag copy, O(1)) |
|
|
190
|
-
| `kvSeqKeep(seqId)` | `void` | Keep only one sequence |
|
|
191
|
-
| `clearAndReseed(sinks, tail)` | `Promise<void>` | BlinkKV pattern |
|
|
192
|
-
|
|
193
|
-
### Grammar (Handle-Based)
|
|
194
|
-
|
|
195
|
-
| Method | Returns | Description |
|
|
196
|
-
| -------------------------------- | -------- | --------------------------- |
|
|
197
|
-
| `jsonSchemaToGrammar(schema)` | `string` | Schema → GBNF |
|
|
198
|
-
| `createSampler(grammarStr)` | `number` | Create grammar handle |
|
|
199
|
-
| `cloneSampler(handle)` | `number` | Clone grammar state |
|
|
200
|
-
| `applySampler(handle, logits)` | `void` | Apply constraints to logits |
|
|
201
|
-
| `acceptSamplerToken(handle, id)` | `void` | Advance parser state |
|
|
202
|
-
| `freeSamplerHandle(handle)` | `void` | Release grammar handle |
|
|
203
|
-
|
|
204
|
-
### Metrics
|
|
205
|
-
|
|
206
|
-
| Method | Returns | Description |
|
|
207
|
-
| --------------------------------------- | --------------- | ------------------------------------------ |
|
|
208
|
-
| `modelEntropy(base?, logits?)` | `number` | Distribution entropy (bits/nats) |
|
|
209
|
-
| `modelSurprisal(token, base?, logits?)` | `number` | Token surprisal (supports captured logits) |
|
|
210
|
-
| `createPerplexityTracker()` | `TrackerHandle` | Create tracker (forkable) |
|
|
211
|
-
| `clonePerplexityTracker(handle)` | `TrackerHandle` | Clone tracker state |
|
|
212
|
-
| `addSurprisal(handle, value)` | `void` | Add to tracker |
|
|
213
|
-
| `getPerplexity(handle)` | `number` | Get current PPL |
|
|
214
|
-
| `freePerplexityTracker(handle)` | `void` | Release tracker |
|
|
215
|
-
|
|
216
|
-
### Embeddings
|
|
217
|
-
|
|
218
|
-
| Method | Returns | Description |
|
|
219
|
-
| --------------------------- | --------------- | --------------------------- |
|
|
220
|
-
| `encode(tokens)` | `Promise<void>` | Forward pass for embeddings |
|
|
221
|
-
| `getEmbeddings(normalize?)` | `Float32Array` | Extract embedding vector |
|
|
222
|
-
| `getEmbeddingDimension()` | `number` | Vector dimension |
|
|
223
|
-
|
|
224
|
-
### Lifecycle
|
|
225
|
-
|
|
226
|
-
| Method | Description |
|
|
227
|
-
| ----------- | ------------------------------------ |
|
|
228
|
-
| `dispose()` | Free native resources (**required**) |
|
|
143
|
+
Generated from [`lib/index.d.ts`](./lib/index.d.ts) with TypeDoc.
|
|
229
144
|
|
|
230
145
|
---
|
|
231
146
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lloyal-labs/lloyal.node",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.8",
|
|
4
4
|
"description": "Node.js client for liblloyal+llama.cpp",
|
|
5
5
|
"main": "lib/index.js",
|
|
6
6
|
"types": "lib/index.d.ts",
|
|
@@ -20,6 +20,7 @@
|
|
|
20
20
|
"test:api": "node test/api.js",
|
|
21
21
|
"test:e2e": "node test/e2e.js",
|
|
22
22
|
"test:examples": "node test/examples.js",
|
|
23
|
+
"sync:llama-cpp": "node scripts/sync-llama-cpp.js",
|
|
23
24
|
"example": "node examples/chat/chat.mjs"
|
|
24
25
|
},
|
|
25
26
|
"repository": {
|
|
@@ -53,19 +54,19 @@
|
|
|
53
54
|
"typedoc-rhineai-theme": "^1.2.0"
|
|
54
55
|
},
|
|
55
56
|
"optionalDependencies": {
|
|
56
|
-
"@lloyal-labs/lloyal.node-darwin-arm64": "1.0.
|
|
57
|
-
"@lloyal-labs/lloyal.node-darwin-x64": "1.0.
|
|
58
|
-
"@lloyal-labs/lloyal.node-linux-arm64": "1.0.
|
|
59
|
-
"@lloyal-labs/lloyal.node-linux-arm64-cuda": "1.0.
|
|
60
|
-
"@lloyal-labs/lloyal.node-linux-arm64-vulkan": "1.0.
|
|
61
|
-
"@lloyal-labs/lloyal.node-linux-x64": "1.0.
|
|
62
|
-
"@lloyal-labs/lloyal.node-linux-x64-cuda": "1.0.
|
|
63
|
-
"@lloyal-labs/lloyal.node-linux-x64-vulkan": "1.0.
|
|
64
|
-
"@lloyal-labs/lloyal.node-win32-arm64": "1.0.
|
|
65
|
-
"@lloyal-labs/lloyal.node-win32-arm64-vulkan": "1.0.
|
|
66
|
-
"@lloyal-labs/lloyal.node-win32-x64": "1.0.
|
|
67
|
-
"@lloyal-labs/lloyal.node-win32-x64-cuda": "1.0.
|
|
68
|
-
"@lloyal-labs/lloyal.node-win32-x64-vulkan": "1.0.
|
|
57
|
+
"@lloyal-labs/lloyal.node-darwin-arm64": "1.0.8",
|
|
58
|
+
"@lloyal-labs/lloyal.node-darwin-x64": "1.0.8",
|
|
59
|
+
"@lloyal-labs/lloyal.node-linux-arm64": "1.0.8",
|
|
60
|
+
"@lloyal-labs/lloyal.node-linux-arm64-cuda": "1.0.8",
|
|
61
|
+
"@lloyal-labs/lloyal.node-linux-arm64-vulkan": "1.0.8",
|
|
62
|
+
"@lloyal-labs/lloyal.node-linux-x64": "1.0.8",
|
|
63
|
+
"@lloyal-labs/lloyal.node-linux-x64-cuda": "1.0.8",
|
|
64
|
+
"@lloyal-labs/lloyal.node-linux-x64-vulkan": "1.0.8",
|
|
65
|
+
"@lloyal-labs/lloyal.node-win32-arm64": "1.0.8",
|
|
66
|
+
"@lloyal-labs/lloyal.node-win32-arm64-vulkan": "1.0.8",
|
|
67
|
+
"@lloyal-labs/lloyal.node-win32-x64": "1.0.8",
|
|
68
|
+
"@lloyal-labs/lloyal.node-win32-x64-cuda": "1.0.8",
|
|
69
|
+
"@lloyal-labs/lloyal.node-win32-x64-vulkan": "1.0.8"
|
|
69
70
|
},
|
|
70
71
|
"engines": {
|
|
71
72
|
"node": ">=22.0.0"
|
|
@@ -79,8 +79,9 @@ if (osName === 'darwin') {
|
|
|
79
79
|
});
|
|
80
80
|
|
|
81
81
|
} else if (osName === 'linux') {
|
|
82
|
-
// Copy all .so files
|
|
83
|
-
|
|
82
|
+
// Copy all .so files including versioned variants (e.g., libllama.so.0, libllama.so.0.0.X)
|
|
83
|
+
// llama.cpp sets SOVERSION, producing versioned names that the binary references at runtime
|
|
84
|
+
const sos = fs.readdirSync(BUILD_DIR).filter(f => /\.so(\.\d+)*$/.test(f));
|
|
84
85
|
if (sos.length > 0) {
|
|
85
86
|
sos.forEach(so => {
|
|
86
87
|
fs.copyFileSync(path.join(BUILD_DIR, so), path.join(BIN_DIR, so));
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Sync llama.cpp submodule to match liblloyal's .llama-cpp-version
|
|
4
|
+
*
|
|
5
|
+
* Single source of truth: liblloyal/.llama-cpp-version contains the tag
|
|
6
|
+
* that the llama.cpp submodule should be checked out at.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* node scripts/sync-llama-cpp.js # Sync submodule to target tag
|
|
10
|
+
* node scripts/sync-llama-cpp.js --check # Validate match (CI mode)
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
const { execSync } = require('child_process');
|
|
14
|
+
const fs = require('fs');
|
|
15
|
+
const path = require('path');
|
|
16
|
+
|
|
17
|
+
const ROOT = path.join(__dirname, '..');
|
|
18
|
+
const VERSION_FILE = path.join(ROOT, 'liblloyal', '.llama-cpp-version');
|
|
19
|
+
const LLAMA_CPP_DIR = path.join(ROOT, 'llama.cpp');
|
|
20
|
+
|
|
21
|
+
const CHECK_ONLY = process.argv.includes('--check');
|
|
22
|
+
|
|
23
|
+
// --- Read target version ---
|
|
24
|
+
|
|
25
|
+
if (!fs.existsSync(VERSION_FILE)) {
|
|
26
|
+
console.error('[sync-llama-cpp] Error: liblloyal/.llama-cpp-version not found.');
|
|
27
|
+
console.error('[sync-llama-cpp] Make sure liblloyal submodule is initialized:');
|
|
28
|
+
console.error('[sync-llama-cpp] git submodule update --init --recursive');
|
|
29
|
+
process.exit(1);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const versionFileContent = fs.readFileSync(VERSION_FILE, 'utf8');
|
|
33
|
+
const targetVersion = versionFileContent
|
|
34
|
+
.split('\n')
|
|
35
|
+
.filter(line => !line.startsWith('#') && line.trim().length > 0)
|
|
36
|
+
[0]
|
|
37
|
+
?.trim();
|
|
38
|
+
|
|
39
|
+
if (!targetVersion) {
|
|
40
|
+
console.error('[sync-llama-cpp] Error: Could not parse version from liblloyal/.llama-cpp-version');
|
|
41
|
+
process.exit(1);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
console.log(`[sync-llama-cpp] Target llama.cpp version: ${targetVersion}`);
|
|
45
|
+
|
|
46
|
+
// --- Check llama.cpp submodule exists ---
|
|
47
|
+
|
|
48
|
+
if (!fs.existsSync(path.join(LLAMA_CPP_DIR, '.git'))) {
|
|
49
|
+
console.error('[sync-llama-cpp] Error: llama.cpp submodule not initialized.');
|
|
50
|
+
console.error('[sync-llama-cpp] Run: git submodule update --init --recursive');
|
|
51
|
+
process.exit(1);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// --- Helper ---
|
|
55
|
+
|
|
56
|
+
function exec(cmd, opts = {}) {
|
|
57
|
+
return execSync(cmd, { cwd: LLAMA_CPP_DIR, encoding: 'utf8', stdio: 'pipe', ...opts }).trim();
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// --- Get current llama.cpp state ---
|
|
61
|
+
|
|
62
|
+
const currentSha = exec('git rev-parse HEAD');
|
|
63
|
+
|
|
64
|
+
// Resolve target tag to SHA (may need to fetch in shallow clones)
|
|
65
|
+
let targetSha;
|
|
66
|
+
try {
|
|
67
|
+
targetSha = exec(`git rev-parse ${targetVersion}`);
|
|
68
|
+
} catch {
|
|
69
|
+
// Tag not available locally — fetch it
|
|
70
|
+
console.log(`[sync-llama-cpp] Tag ${targetVersion} not found locally, fetching...`);
|
|
71
|
+
try {
|
|
72
|
+
exec(`git fetch origin tag ${targetVersion} --no-tags --depth 1`);
|
|
73
|
+
targetSha = exec(`git rev-parse ${targetVersion}`);
|
|
74
|
+
} catch (e) {
|
|
75
|
+
console.error(`[sync-llama-cpp] Error: Tag ${targetVersion} not found in remote.`);
|
|
76
|
+
console.error(`[sync-llama-cpp] Verify tag exists: https://github.com/ggml-org/llama.cpp/releases/tag/${targetVersion}`);
|
|
77
|
+
process.exit(1);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const currentShort = currentSha.slice(0, 7);
|
|
82
|
+
const targetShort = targetSha.slice(0, 7);
|
|
83
|
+
|
|
84
|
+
console.log(`[sync-llama-cpp] Current: ${currentShort} (${currentSha})`);
|
|
85
|
+
console.log(`[sync-llama-cpp] Target: ${targetShort} (${targetVersion})`);
|
|
86
|
+
|
|
87
|
+
if (currentSha === targetSha) {
|
|
88
|
+
console.log(`[sync-llama-cpp] llama.cpp submodule matches ${targetVersion}.`);
|
|
89
|
+
process.exit(0);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// --- Mismatch ---
|
|
93
|
+
|
|
94
|
+
if (CHECK_ONLY) {
|
|
95
|
+
console.error(`\n[sync-llama-cpp] MISMATCH: llama.cpp submodule is at ${currentShort}, expected ${targetVersion} (${targetShort})`);
|
|
96
|
+
console.error(`[sync-llama-cpp] Fix: npm run sync:llama-cpp`);
|
|
97
|
+
process.exit(1);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// --- Sync ---
|
|
101
|
+
|
|
102
|
+
console.log(`[sync-llama-cpp] Checking out ${targetVersion}...`);
|
|
103
|
+
|
|
104
|
+
try {
|
|
105
|
+
exec(`git checkout ${targetVersion}`);
|
|
106
|
+
} catch {
|
|
107
|
+
exec(`git fetch origin tag ${targetVersion} --no-tags --depth 1`);
|
|
108
|
+
exec(`git checkout ${targetVersion}`);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const newShort = exec('git rev-parse --short HEAD');
|
|
112
|
+
console.log(`[sync-llama-cpp] llama.cpp now at: ${newShort} (${targetVersion})`);
|
|
113
|
+
console.log('');
|
|
114
|
+
console.log('[sync-llama-cpp] Next steps:');
|
|
115
|
+
console.log(' 1. Build and test: npm run build && npm test');
|
|
116
|
+
console.log(' 2. Stage changes: git add llama.cpp');
|
|
117
|
+
console.log(' 3. Commit: git commit -m "chore(deps): sync llama.cpp to ' + targetVersion + '"');
|