@lloyal-labs/lloyal.node 1.0.9 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/Branch.js +33 -2
- package/lib/index.d.ts +28 -2
- package/package.json +14 -14
package/lib/Branch.js
CHANGED
|
@@ -65,10 +65,15 @@ class Branch {
|
|
|
65
65
|
* @param {number} seqId - Sequence ID for this branch
|
|
66
66
|
* @param {number} position - Starting position (typically prompt token count)
|
|
67
67
|
* @param {SamplingParams} [params] - Sampling parameters (temperature, topP, etc.)
|
|
68
|
+
* @param {number} [nBatch] - Per-branch batch size override (defaults to context nBatch).
|
|
69
|
+
* Controls chunk size for prefill() (decode_and_capture_batch). Has no effect on
|
|
70
|
+
* single-token commit() which uses a zero-allocation fast path. Useful for tuning
|
|
71
|
+
* memory/throughput tradeoff on bulk token decode — e.g. smaller nBatch for cheap
|
|
72
|
+
* exploration branches, larger for the trunk.
|
|
68
73
|
* @returns {Branch} New Branch instance
|
|
69
74
|
*/
|
|
70
|
-
static create(ctx, seqId, position, params) {
|
|
71
|
-
const handle = ctx._branchCreate(seqId, position, params);
|
|
75
|
+
static create(ctx, seqId, position, params, nBatch) {
|
|
76
|
+
const handle = ctx._branchCreate(seqId, position, params, nBatch);
|
|
72
77
|
return new Branch(ctx, handle);
|
|
73
78
|
}
|
|
74
79
|
|
|
@@ -116,6 +121,32 @@ class Branch {
|
|
|
116
121
|
this._ctx._branchDecodeAndCaptureOne(this._handle, token);
|
|
117
122
|
}
|
|
118
123
|
|
|
124
|
+
/**
|
|
125
|
+
* Bulk-decode tokens into the branch's KV cache and capture logits
|
|
126
|
+
*
|
|
127
|
+
* Feeds an array of tokens through the model. tokens.length is the total
|
|
128
|
+
* count to process; the branch's nBatch (set at Branch.create) controls
|
|
129
|
+
* how many are sent per llama_decode call. For example, 500 tokens with
|
|
130
|
+
* nBatch=64 makes 8 llama_decode calls (7×64 + 1×52). With nBatch=512
|
|
131
|
+
* it makes 1.
|
|
132
|
+
*
|
|
133
|
+
* Advances position by tokens.length and stores the final logits into
|
|
134
|
+
* the branch's internal snapshot. The next produce()/sample() call reads
|
|
135
|
+
* from that snapshot — logits never cross the JS boundary.
|
|
136
|
+
*
|
|
137
|
+
* Does NOT accept tokens into the sampler's repeat-penalty window — use
|
|
138
|
+
* this for external tokens (user input between turns), not model-generated
|
|
139
|
+
* tokens. For model output, use commit() which does accept + decode.
|
|
140
|
+
*
|
|
141
|
+
* This is the branch-level equivalent of ctx.decode().
|
|
142
|
+
*
|
|
143
|
+
* @param {number[]} tokens - Token IDs to decode
|
|
144
|
+
*/
|
|
145
|
+
prefill(tokens) {
|
|
146
|
+
this._ensureNotDisposed();
|
|
147
|
+
this._ctx._branchDecodeAndCaptureBatch(this._handle, tokens);
|
|
148
|
+
}
|
|
149
|
+
|
|
119
150
|
/**
|
|
120
151
|
* Sample next token from branch's logits snapshot
|
|
121
152
|
*
|
package/lib/index.d.ts
CHANGED
|
@@ -1225,7 +1225,7 @@ export interface SessionContext {
|
|
|
1225
1225
|
// ===== BRANCH API (internal, wrapped by Branch class) =====
|
|
1226
1226
|
|
|
1227
1227
|
/** @internal Create a new branch for parallel generation */
|
|
1228
|
-
_branchCreate(seqId: number, position: number, params?: SamplingParams): number;
|
|
1228
|
+
_branchCreate(seqId: number, position: number, params?: SamplingParams, nBatch?: number): number;
|
|
1229
1229
|
|
|
1230
1230
|
/** @internal Fork a branch to a new sequence */
|
|
1231
1231
|
_branchFork(handle: number, newSeqId: number): number;
|
|
@@ -1236,6 +1236,9 @@ export interface SessionContext {
|
|
|
1236
1236
|
/** @internal Decode a single token and capture logits */
|
|
1237
1237
|
_branchDecodeAndCaptureOne(handle: number, token: number): void;
|
|
1238
1238
|
|
|
1239
|
+
/** @internal Decode multiple tokens in n_batch-sized chunks and capture logits */
|
|
1240
|
+
_branchDecodeAndCaptureBatch(handle: number, tokens: number[]): void;
|
|
1241
|
+
|
|
1239
1242
|
/** @internal Sample next token from branch's logits snapshot */
|
|
1240
1243
|
_branchSample(handle: number): number;
|
|
1241
1244
|
|
|
@@ -1467,12 +1470,14 @@ export class Branch {
|
|
|
1467
1470
|
* @param seqId Sequence ID for this branch
|
|
1468
1471
|
* @param position Starting position (typically prompt token count)
|
|
1469
1472
|
* @param params Sampling parameters (temperature, topP, etc.)
|
|
1473
|
+
* @param nBatch Per-branch batch size override (defaults to context nBatch)
|
|
1470
1474
|
*/
|
|
1471
1475
|
static create(
|
|
1472
1476
|
ctx: SessionContext,
|
|
1473
1477
|
seqId: number,
|
|
1474
1478
|
position: number,
|
|
1475
|
-
params?: SamplingParams
|
|
1479
|
+
params?: SamplingParams,
|
|
1480
|
+
nBatch?: number
|
|
1476
1481
|
): Branch;
|
|
1477
1482
|
|
|
1478
1483
|
/**
|
|
@@ -1493,6 +1498,27 @@ export class Branch {
|
|
|
1493
1498
|
/** Decode a single token, write to KV, and capture resulting logits */
|
|
1494
1499
|
decodeAndCaptureOne(token: number): void;
|
|
1495
1500
|
|
|
1501
|
+
/**
|
|
1502
|
+
* Bulk-decode tokens into the branch's KV cache and capture logits.
|
|
1503
|
+
*
|
|
1504
|
+
* `tokens.length` is the total count to process; the branch's `nBatch`
|
|
1505
|
+
* (set at `Branch.create`) controls how many are sent per `llama_decode`
|
|
1506
|
+
* call. E.g. 500 tokens with `nBatch=64` → 8 calls (7×64 + 1×52).
|
|
1507
|
+
*
|
|
1508
|
+
* Advances `position` by `tokens.length`. Stores final logits into the
|
|
1509
|
+
* branch's internal snapshot — the next `produce()`/`sample()` reads
|
|
1510
|
+
* from it.
|
|
1511
|
+
*
|
|
1512
|
+
* Does NOT accept tokens into the repeat-penalty window — for external
|
|
1513
|
+
* tokens (user input between turns), not model-generated tokens.
|
|
1514
|
+
* For model output, use `commit()` which does accept + decode.
|
|
1515
|
+
*
|
|
1516
|
+
* Branch-level equivalent of `ctx.decode()`.
|
|
1517
|
+
*
|
|
1518
|
+
* @param tokens - Token IDs to decode
|
|
1519
|
+
*/
|
|
1520
|
+
prefill(tokens: number[]): void;
|
|
1521
|
+
|
|
1496
1522
|
/** Sample next token from branch's frozen logits snapshot */
|
|
1497
1523
|
sample(): number;
|
|
1498
1524
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lloyal-labs/lloyal.node",
|
|
3
|
-
"version": "1.0
|
|
3
|
+
"version": "1.1.0",
|
|
4
4
|
"description": "Node.js client for liblloyal+llama.cpp",
|
|
5
5
|
"main": "lib/index.js",
|
|
6
6
|
"types": "lib/index.d.ts",
|
|
@@ -54,19 +54,19 @@
|
|
|
54
54
|
"typedoc-rhineai-theme": "^1.2.0"
|
|
55
55
|
},
|
|
56
56
|
"optionalDependencies": {
|
|
57
|
-
"@lloyal-labs/lloyal.node-darwin-arm64": "1.0
|
|
58
|
-
"@lloyal-labs/lloyal.node-darwin-x64": "1.0
|
|
59
|
-
"@lloyal-labs/lloyal.node-linux-arm64": "1.0
|
|
60
|
-
"@lloyal-labs/lloyal.node-linux-arm64-cuda": "1.0
|
|
61
|
-
"@lloyal-labs/lloyal.node-linux-arm64-vulkan": "1.0
|
|
62
|
-
"@lloyal-labs/lloyal.node-linux-x64": "1.0
|
|
63
|
-
"@lloyal-labs/lloyal.node-linux-x64-cuda": "1.0
|
|
64
|
-
"@lloyal-labs/lloyal.node-linux-x64-vulkan": "1.0
|
|
65
|
-
"@lloyal-labs/lloyal.node-win32-arm64": "1.0
|
|
66
|
-
"@lloyal-labs/lloyal.node-win32-arm64-vulkan": "1.0
|
|
67
|
-
"@lloyal-labs/lloyal.node-win32-x64": "1.0
|
|
68
|
-
"@lloyal-labs/lloyal.node-win32-x64-cuda": "1.0
|
|
69
|
-
"@lloyal-labs/lloyal.node-win32-x64-vulkan": "1.0
|
|
57
|
+
"@lloyal-labs/lloyal.node-darwin-arm64": "1.1.0",
|
|
58
|
+
"@lloyal-labs/lloyal.node-darwin-x64": "1.1.0",
|
|
59
|
+
"@lloyal-labs/lloyal.node-linux-arm64": "1.1.0",
|
|
60
|
+
"@lloyal-labs/lloyal.node-linux-arm64-cuda": "1.1.0",
|
|
61
|
+
"@lloyal-labs/lloyal.node-linux-arm64-vulkan": "1.1.0",
|
|
62
|
+
"@lloyal-labs/lloyal.node-linux-x64": "1.1.0",
|
|
63
|
+
"@lloyal-labs/lloyal.node-linux-x64-cuda": "1.1.0",
|
|
64
|
+
"@lloyal-labs/lloyal.node-linux-x64-vulkan": "1.1.0",
|
|
65
|
+
"@lloyal-labs/lloyal.node-win32-arm64": "1.1.0",
|
|
66
|
+
"@lloyal-labs/lloyal.node-win32-arm64-vulkan": "1.1.0",
|
|
67
|
+
"@lloyal-labs/lloyal.node-win32-x64": "1.1.0",
|
|
68
|
+
"@lloyal-labs/lloyal.node-win32-x64-cuda": "1.1.0",
|
|
69
|
+
"@lloyal-labs/lloyal.node-win32-x64-vulkan": "1.1.0"
|
|
70
70
|
},
|
|
71
71
|
"engines": {
|
|
72
72
|
"node": ">=22.0.0"
|