@lloyal-labs/sdk 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +162 -0
- package/dist/Branch.d.ts +463 -0
- package/dist/Branch.d.ts.map +1 -0
- package/dist/Branch.js +608 -0
- package/dist/Branch.js.map +1 -0
- package/dist/BranchStore.d.ts +125 -0
- package/dist/BranchStore.d.ts.map +1 -0
- package/dist/BranchStore.js +155 -0
- package/dist/BranchStore.js.map +1 -0
- package/dist/Rerank.d.ts +38 -0
- package/dist/Rerank.d.ts.map +1 -0
- package/dist/Rerank.js +220 -0
- package/dist/Rerank.js.map +1 -0
- package/dist/Session.d.ts +74 -0
- package/dist/Session.d.ts.map +1 -0
- package/dist/Session.js +93 -0
- package/dist/Session.js.map +1 -0
- package/dist/deltas.d.ts +37 -0
- package/dist/deltas.d.ts.map +1 -0
- package/dist/deltas.js +52 -0
- package/dist/deltas.js.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +22 -0
- package/dist/index.js.map +1 -0
- package/dist/types.d.ts +1365 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +85 -0
- package/dist/types.js.map +1 -0
- package/package.json +35 -0
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import type { Branch } from './Branch';
|
|
2
|
+
import type { SessionContext } from './types';
|
|
3
|
+
/**
|
|
4
|
+
* High-throughput multi-branch decode operations
|
|
5
|
+
*
|
|
6
|
+
* The naive approach to N-branch generation is N sequential llama_decode()
|
|
7
|
+
* calls — each paying full GPU kernel launch overhead, memory barrier, and
|
|
8
|
+
* PCIe round-trip. BranchStore eliminates this by packing all branches into
|
|
9
|
+
* a single llama_batch and dispatching once: O(1) GPU round-trips regardless
|
|
10
|
+
* of branch count. The GPU parallelizes across sequences within the batch,
|
|
11
|
+
* so N branches approach the wall-time cost of 1.
|
|
12
|
+
*
|
|
13
|
+
* Two operations, two packing strategies:
|
|
14
|
+
*
|
|
15
|
+
* **commit()** — Generation step. Each branch contributes exactly 1 token.
|
|
16
|
+
* Packs N tokens into a single batch via `decode_each` (one row per sequence,
|
|
17
|
+
* all at their respective positions). Single `llama_decode()` call. Logits
|
|
18
|
+
* captured per-branch at batch index `i`. O(N) total work, O(1) GPU
|
|
19
|
+
* dispatches, O(1) amortized dispatch overhead per branch. Accept-first
|
|
20
|
+
* ordering with rollback: accepts each token into its branch's repeat-penalty
|
|
21
|
+
* window before decode, restores from clones if decode throws.
|
|
22
|
+
*
|
|
23
|
+
* **prefill()** — Bulk token injection. Each branch contributes a
|
|
24
|
+
* variable-length token array. Uses a two-pass bin-packing algorithm:
|
|
25
|
+
*
|
|
26
|
+
* - *Pass 1 (planning)*: Greedy first-fit packs items into chunks ≤ nBatch.
|
|
27
|
+
* Items larger than nBatch get a dedicated chunk and fall through to
|
|
28
|
+
* decode_many's internal auto-chunking (ceil(nTokens / nBatch) calls).
|
|
29
|
+
* - *Pass 2 (dispatch)*: Normal chunks dispatch via `decode_scatter` (one
|
|
30
|
+
* `llama_decode` per chunk). Logits are indexed by flattened cursor
|
|
31
|
+
* position: for item k in a chunk, logits live at `cursor + nTokens[k] - 1`.
|
|
32
|
+
*
|
|
33
|
+
* For T total tokens across N branches with batch capacity B:
|
|
34
|
+
* - Best case (T ≤ B): 1 GPU dispatch, all branches in one batch.
|
|
35
|
+
* - Worst case: ceil(T / B) dispatches. Each dispatch is fully packed.
|
|
36
|
+
* - Amortized per-token GPU overhead: O(1/B) — vanishes as batch fills.
|
|
37
|
+
*
|
|
38
|
+
* Does NOT accept tokens into the sampler penalty window — use for
|
|
39
|
+
* external/replayed tokens where repeat-penalty tracking is unwanted.
|
|
40
|
+
* For model-generated tokens, use {@link commit} instead.
|
|
41
|
+
*
|
|
42
|
+
* Both methods take `[branch, token(s)]` tuples — the branch-to-token
|
|
43
|
+
* binding is structural, not positional. After either call, each branch's
|
|
44
|
+
* logits snapshot is updated with the output distribution from its decoded
|
|
45
|
+
* token(s), ready for the next `produce()`/`sample()` call.
|
|
46
|
+
*
|
|
47
|
+
* @example 32-branch generation step — one GPU dispatch
|
|
48
|
+
* ```typescript
|
|
49
|
+
* const store = new BranchStore(ctx);
|
|
50
|
+
* const entries = await Promise.all(branches.map(async b => [b, (await b.produce()).token] as [Branch, number]));
|
|
51
|
+
* await store.commit(entries); // 32 tokens, 1 llama_decode()
|
|
52
|
+
* ```
|
|
53
|
+
*
|
|
54
|
+
* @example Best-of-N with batched commit
|
|
55
|
+
* ```typescript
|
|
56
|
+
* const store = new BranchStore(ctx);
|
|
57
|
+
* const branches = [];
|
|
58
|
+
* for (const _ of [1, 2, 3]) branches.push(await root.fork());
|
|
59
|
+
*
|
|
60
|
+
* for (let step = 0; step < 50; step++) {
|
|
61
|
+
* const produced = await Promise.all(branches.map(async b => [b, await b.produce()] as const));
|
|
62
|
+
* const live = produced.filter(([, p]) => !p.isStop);
|
|
63
|
+
* if (!live.length) break;
|
|
64
|
+
* await store.commit(live.map(([b, p]) => [b, p.token]));
|
|
65
|
+
* }
|
|
66
|
+
* ```
|
|
67
|
+
*
|
|
68
|
+
* @example Asymmetric prefill — variable-length injections, auto-chunked
|
|
69
|
+
* ```typescript
|
|
70
|
+
* await store.prefill([
|
|
71
|
+
* [branchA, systemPromptTokens], // 200 tokens
|
|
72
|
+
* [branchB, shortQueryTokens], // 12 tokens
|
|
73
|
+
* [branchC, longDocumentTokens], // 800 tokens
|
|
74
|
+
* ]);
|
|
75
|
+
* // Bin-packed into ceil(1012 / nBatch) GPU dispatches
|
|
76
|
+
* ```
|
|
77
|
+
*
|
|
78
|
+
* @category Branching
|
|
79
|
+
*/
|
|
80
|
+
export declare class BranchStore {
|
|
81
|
+
private _ctx;
|
|
82
|
+
constructor(ctx: SessionContext);
|
|
83
|
+
/**
|
|
84
|
+
* Batched single-token commit for model-generated tokens
|
|
85
|
+
*
|
|
86
|
+
* Each tuple `[branch, token]` binds one token to one branch.
|
|
87
|
+
* Accepts each token into its branch's repeat-penalty window (for correct
|
|
88
|
+
* PPL measurement), then decodes all N tokens in a single llama_decode()
|
|
89
|
+
* call via decode_each and captures logits per-branch. Accept-first
|
|
90
|
+
* ordering with rollback: if decode throws, sampler/grammar/metrics are
|
|
91
|
+
* restored from clones taken before the accept.
|
|
92
|
+
*
|
|
93
|
+
* @param entries - Array of `[branch, token]` tuples (branches must not be disposed)
|
|
94
|
+
* @throws If any branch is disposed
|
|
95
|
+
*/
|
|
96
|
+
commit(entries: [Branch, number][]): Promise<void>;
|
|
97
|
+
/**
|
|
98
|
+
* Batched variable-length prefill for external tokens
|
|
99
|
+
*
|
|
100
|
+
* Each tuple `[branch, tokens]` binds a token array to one branch.
|
|
101
|
+
* Each branch can receive a different number of tokens — decode_scatter
|
|
102
|
+
* handles variable-length runs and auto-chunks to fit nBatch.
|
|
103
|
+
*
|
|
104
|
+
* Does NOT call accept_token — use for external/replayed tokens where
|
|
105
|
+
* repeat-penalty tracking is unwanted. For model-generated tokens,
|
|
106
|
+
* use {@link commit} instead.
|
|
107
|
+
*
|
|
108
|
+
* @param entries - Array of `[branch, tokens]` tuples (branches must not be disposed)
|
|
109
|
+
* @throws If any branch is disposed
|
|
110
|
+
*/
|
|
111
|
+
prefill(entries: [Branch, number[]][]): Promise<void>;
|
|
112
|
+
/**
|
|
113
|
+
* Retain only the winner branch — evict all other leases and free their slots.
|
|
114
|
+
*
|
|
115
|
+
* Nuclear operation: calls `kv::seq_keep` on the winner's seq_id (stripping all
|
|
116
|
+
* other sequences from KV cache in a single pass), then frees all loser slots
|
|
117
|
+
* and rebuilds the vacancy list. The winner's topology is reset (no parent, no children).
|
|
118
|
+
*
|
|
119
|
+
* @param winner - The branch to keep (must not be disposed, must hold a lease)
|
|
120
|
+
* @throws If winner is disposed or has no lease
|
|
121
|
+
*/
|
|
122
|
+
retainOnly(winner: Branch): Promise<void>;
|
|
123
|
+
get available(): number;
|
|
124
|
+
}
|
|
125
|
+
//# sourceMappingURL=BranchStore.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"BranchStore.d.ts","sourceRoot":"","sources":["../src/BranchStore.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,UAAU,CAAC;AACvC,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AAE9C;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4EG;AACH,qBAAa,WAAW;IACtB,OAAO,CAAC,IAAI,CAAiB;gBAEjB,GAAG,EAAE,cAAc;IAI/B;;;;;;;;;;;;OAYG;IACG,MAAM,CAAC,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAWxD;;;;;;;;;;;;;OAaG;IACG,OAAO,CAAC,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAW3D;;;;;;;;;OASG;IACG,UAAU,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAK/C,IAAI,SAAS,IAAI,MAAM,CAEtB;CACF"}
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.BranchStore = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* High-throughput multi-branch decode operations
|
|
6
|
+
*
|
|
7
|
+
* The naive approach to N-branch generation is N sequential llama_decode()
|
|
8
|
+
* calls — each paying full GPU kernel launch overhead, memory barrier, and
|
|
9
|
+
* PCIe round-trip. BranchStore eliminates this by packing all branches into
|
|
10
|
+
* a single llama_batch and dispatching once: O(1) GPU round-trips regardless
|
|
11
|
+
* of branch count. The GPU parallelizes across sequences within the batch,
|
|
12
|
+
* so N branches approach the wall-time cost of 1.
|
|
13
|
+
*
|
|
14
|
+
* Two operations, two packing strategies:
|
|
15
|
+
*
|
|
16
|
+
* **commit()** — Generation step. Each branch contributes exactly 1 token.
|
|
17
|
+
* Packs N tokens into a single batch via `decode_each` (one row per sequence,
|
|
18
|
+
* all at their respective positions). Single `llama_decode()` call. Logits
|
|
19
|
+
* captured per-branch at batch index `i`. O(N) total work, O(1) GPU
|
|
20
|
+
* dispatches, O(1) amortized dispatch overhead per branch. Accept-first
|
|
21
|
+
* ordering with rollback: accepts each token into its branch's repeat-penalty
|
|
22
|
+
* window before decode, restores from clones if decode throws.
|
|
23
|
+
*
|
|
24
|
+
* **prefill()** — Bulk token injection. Each branch contributes a
|
|
25
|
+
* variable-length token array. Uses a two-pass bin-packing algorithm:
|
|
26
|
+
*
|
|
27
|
+
* - *Pass 1 (planning)*: Greedy first-fit packs items into chunks ≤ nBatch.
|
|
28
|
+
* Items larger than nBatch get a dedicated chunk and fall through to
|
|
29
|
+
* decode_many's internal auto-chunking (ceil(nTokens / nBatch) calls).
|
|
30
|
+
* - *Pass 2 (dispatch)*: Normal chunks dispatch via `decode_scatter` (one
|
|
31
|
+
* `llama_decode` per chunk). Logits are indexed by flattened cursor
|
|
32
|
+
* position: for item k in a chunk, logits live at `cursor + nTokens[k] - 1`.
|
|
33
|
+
*
|
|
34
|
+
* For T total tokens across N branches with batch capacity B:
|
|
35
|
+
* - Best case (T ≤ B): 1 GPU dispatch, all branches in one batch.
|
|
36
|
+
* - Worst case: ceil(T / B) dispatches. Each dispatch is fully packed.
|
|
37
|
+
* - Amortized per-token GPU overhead: O(1/B) — vanishes as batch fills.
|
|
38
|
+
*
|
|
39
|
+
* Does NOT accept tokens into the sampler penalty window — use for
|
|
40
|
+
* external/replayed tokens where repeat-penalty tracking is unwanted.
|
|
41
|
+
* For model-generated tokens, use {@link commit} instead.
|
|
42
|
+
*
|
|
43
|
+
* Both methods take `[branch, token(s)]` tuples — the branch-to-token
|
|
44
|
+
* binding is structural, not positional. After either call, each branch's
|
|
45
|
+
* logits snapshot is updated with the output distribution from its decoded
|
|
46
|
+
* token(s), ready for the next `produce()`/`sample()` call.
|
|
47
|
+
*
|
|
48
|
+
* @example 32-branch generation step — one GPU dispatch
|
|
49
|
+
* ```typescript
|
|
50
|
+
* const store = new BranchStore(ctx);
|
|
51
|
+
* const entries = await Promise.all(branches.map(async b => [b, (await b.produce()).token] as [Branch, number]));
|
|
52
|
+
* await store.commit(entries); // 32 tokens, 1 llama_decode()
|
|
53
|
+
* ```
|
|
54
|
+
*
|
|
55
|
+
* @example Best-of-N with batched commit
|
|
56
|
+
* ```typescript
|
|
57
|
+
* const store = new BranchStore(ctx);
|
|
58
|
+
* const branches = [];
|
|
59
|
+
* for (const _ of [1, 2, 3]) branches.push(await root.fork());
|
|
60
|
+
*
|
|
61
|
+
* for (let step = 0; step < 50; step++) {
|
|
62
|
+
* const produced = await Promise.all(branches.map(async b => [b, await b.produce()] as const));
|
|
63
|
+
* const live = produced.filter(([, p]) => !p.isStop);
|
|
64
|
+
* if (!live.length) break;
|
|
65
|
+
* await store.commit(live.map(([b, p]) => [b, p.token]));
|
|
66
|
+
* }
|
|
67
|
+
* ```
|
|
68
|
+
*
|
|
69
|
+
* @example Asymmetric prefill — variable-length injections, auto-chunked
|
|
70
|
+
* ```typescript
|
|
71
|
+
* await store.prefill([
|
|
72
|
+
* [branchA, systemPromptTokens], // 200 tokens
|
|
73
|
+
* [branchB, shortQueryTokens], // 12 tokens
|
|
74
|
+
* [branchC, longDocumentTokens], // 800 tokens
|
|
75
|
+
* ]);
|
|
76
|
+
* // Bin-packed into ceil(1012 / nBatch) GPU dispatches
|
|
77
|
+
* ```
|
|
78
|
+
*
|
|
79
|
+
* @category Branching
|
|
80
|
+
*/
|
|
81
|
+
class BranchStore {
|
|
82
|
+
_ctx;
|
|
83
|
+
constructor(ctx) {
|
|
84
|
+
this._ctx = ctx;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Batched single-token commit for model-generated tokens
|
|
88
|
+
*
|
|
89
|
+
* Each tuple `[branch, token]` binds one token to one branch.
|
|
90
|
+
* Accepts each token into its branch's repeat-penalty window (for correct
|
|
91
|
+
* PPL measurement), then decodes all N tokens in a single llama_decode()
|
|
92
|
+
* call via decode_each and captures logits per-branch. Accept-first
|
|
93
|
+
* ordering with rollback: if decode throws, sampler/grammar/metrics are
|
|
94
|
+
* restored from clones taken before the accept.
|
|
95
|
+
*
|
|
96
|
+
* @param entries - Array of `[branch, token]` tuples (branches must not be disposed)
|
|
97
|
+
* @throws If any branch is disposed
|
|
98
|
+
*/
|
|
99
|
+
async commit(entries) {
|
|
100
|
+
const handles = [];
|
|
101
|
+
const tokens = [];
|
|
102
|
+
for (const [branch, token] of entries) {
|
|
103
|
+
if (branch.disposed)
|
|
104
|
+
throw new Error('BranchStore.commit: branch is disposed');
|
|
105
|
+
handles.push(branch.handle);
|
|
106
|
+
tokens.push(token);
|
|
107
|
+
}
|
|
108
|
+
await this._ctx._storeCommit(handles, tokens);
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Batched variable-length prefill for external tokens
|
|
112
|
+
*
|
|
113
|
+
* Each tuple `[branch, tokens]` binds a token array to one branch.
|
|
114
|
+
* Each branch can receive a different number of tokens — decode_scatter
|
|
115
|
+
* handles variable-length runs and auto-chunks to fit nBatch.
|
|
116
|
+
*
|
|
117
|
+
* Does NOT call accept_token — use for external/replayed tokens where
|
|
118
|
+
* repeat-penalty tracking is unwanted. For model-generated tokens,
|
|
119
|
+
* use {@link commit} instead.
|
|
120
|
+
*
|
|
121
|
+
* @param entries - Array of `[branch, tokens]` tuples (branches must not be disposed)
|
|
122
|
+
* @throws If any branch is disposed
|
|
123
|
+
*/
|
|
124
|
+
async prefill(entries) {
|
|
125
|
+
const handles = [];
|
|
126
|
+
const tokenArrays = [];
|
|
127
|
+
for (const [branch, tokens] of entries) {
|
|
128
|
+
if (branch.disposed)
|
|
129
|
+
throw new Error('BranchStore.prefill: branch is disposed');
|
|
130
|
+
handles.push(branch.handle);
|
|
131
|
+
tokenArrays.push(tokens);
|
|
132
|
+
}
|
|
133
|
+
await this._ctx._storePrefill(handles, tokenArrays);
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Retain only the winner branch — evict all other leases and free their slots.
|
|
137
|
+
*
|
|
138
|
+
* Nuclear operation: calls `kv::seq_keep` on the winner's seq_id (stripping all
|
|
139
|
+
* other sequences from KV cache in a single pass), then frees all loser slots
|
|
140
|
+
* and rebuilds the vacancy list. The winner's topology is reset (no parent, no children).
|
|
141
|
+
*
|
|
142
|
+
* @param winner - The branch to keep (must not be disposed, must hold a lease)
|
|
143
|
+
* @throws If winner is disposed or has no lease
|
|
144
|
+
*/
|
|
145
|
+
async retainOnly(winner) {
|
|
146
|
+
if (winner.disposed)
|
|
147
|
+
throw new Error('BranchStore.retainOnly: winner is disposed');
|
|
148
|
+
this._ctx._storeRetainOnly(winner.handle);
|
|
149
|
+
}
|
|
150
|
+
get available() {
|
|
151
|
+
return this._ctx._storeAvailable();
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
exports.BranchStore = BranchStore;
|
|
155
|
+
//# sourceMappingURL=BranchStore.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"BranchStore.js","sourceRoot":"","sources":["../src/BranchStore.ts"],"names":[],"mappings":";;;AAGA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4EG;AACH,MAAa,WAAW;IACd,IAAI,CAAiB;IAE7B,YAAY,GAAmB;QAC7B,IAAI,CAAC,IAAI,GAAG,GAAG,CAAC;IAClB,CAAC;IAED;;;;;;;;;;;;OAYG;IACH,KAAK,CAAC,MAAM,CAAC,OAA2B;QACtC,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,KAAK,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,OAAO,EAAE,CAAC;YACtC,IAAI,MAAM,CAAC,QAAQ;gBAAE,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;YAC/E,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YAC5B,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;QACD,MAAM,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IAChD,CAAC;IAED;;;;;;;;;;;;;OAaG;IACH,KAAK,CAAC,OAAO,CAAC,OAA6B;QACzC,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,MAAM,WAAW,GAAe,EAAE,CAAC;QACnC,KAAK,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACvC,IAAI,MAAM,CAAC,QAAQ;gBAAE,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;YAChF,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YAC5B,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC3B,CAAC;QACD,MAAM,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;IACtD,CAAC;IAED;;;;;;;;;OASG;IACH,KAAK,CAAC,UAAU,CAAC,MAAc;QAC7B,IAAI,MAAM,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;QACnF,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IAC5C,CAAC;IAED,IAAI,SAAS;QACX,OAAO,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC;IACrC,CAAC;CACF;AA1ED,kCA0EC"}
|
package/dist/Rerank.d.ts
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import type { SessionContext, RerankProgress } from './types';
|
|
2
|
+
export declare class Rerank {
|
|
3
|
+
private _ctx;
|
|
4
|
+
private _nSeqMax;
|
|
5
|
+
private _nCtx;
|
|
6
|
+
private _yesId;
|
|
7
|
+
private _noId;
|
|
8
|
+
private _prefixTokens;
|
|
9
|
+
private _midTokens;
|
|
10
|
+
private _suffixTokens;
|
|
11
|
+
private _pending;
|
|
12
|
+
private _draining;
|
|
13
|
+
private _disposed;
|
|
14
|
+
private constructor();
|
|
15
|
+
/**
|
|
16
|
+
* Create a Rerank instance from a pre-created SessionContext
|
|
17
|
+
*
|
|
18
|
+
* The caller is responsible for creating the context with appropriate
|
|
19
|
+
* settings (nSeqMax, nCtx, typeK, typeV). Rerank takes ownership of
|
|
20
|
+
* the context and will dispose it on `dispose()`.
|
|
21
|
+
*
|
|
22
|
+
* @param ctx - SessionContext configured for reranking
|
|
23
|
+
* @param opts - Capacity hints (nSeqMax, nCtx) — must match context creation params
|
|
24
|
+
*/
|
|
25
|
+
static create(ctx: SessionContext, opts?: {
|
|
26
|
+
nSeqMax?: number;
|
|
27
|
+
nCtx?: number;
|
|
28
|
+
}): Promise<Rerank>;
|
|
29
|
+
score(query: string, documents: number[][], topK?: number): AsyncIterable<RerankProgress>;
|
|
30
|
+
tokenize(text: string): Promise<number[]>;
|
|
31
|
+
dispose(): void;
|
|
32
|
+
private _sortResults;
|
|
33
|
+
private _enqueue;
|
|
34
|
+
private _fillGroup;
|
|
35
|
+
private _drain;
|
|
36
|
+
private _rerankScore;
|
|
37
|
+
}
|
|
38
|
+
//# sourceMappingURL=Rerank.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"Rerank.d.ts","sourceRoot":"","sources":["../src/Rerank.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAgB,cAAc,EAAE,MAAM,SAAS,CAAC;AAmE5E,qBAAa,MAAM;IACjB,OAAO,CAAC,IAAI,CAAiB;IAC7B,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,aAAa,CAAW;IAChC,OAAO,CAAC,UAAU,CAAW;IAC7B,OAAO,CAAC,aAAa,CAAW;IAChC,OAAO,CAAC,QAAQ,CAAwB;IACxC,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAS;IAE1B,OAAO;IAoBP;;;;;;;;;OASG;WACU,MAAM,CAAC,GAAG,EAAE,cAAc,EAAE,IAAI,CAAC,EAAE;QAAE,OAAO,CAAC,EAAE,MAAM,CAAC;QAAC,IAAI,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAwBrG,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,EAAE,EAAE,IAAI,CAAC,EAAE,MAAM,GAAG,aAAa,CAAC,cAAc,CAAC;IA0BnF,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAI/C,OAAO,IAAI,IAAI;IAUf,OAAO,CAAC,YAAY;IAOpB,OAAO,CAAC,QAAQ;IAkBhB,OAAO,CAAC,UAAU;YAiBJ,MAAM;IA+CpB,OAAO,CAAC,YAAY;CAMrB"}
|
package/dist/Rerank.js
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.Rerank = void 0;
|
|
4
|
+
const SYSTEM_PROMPT = 'Judge whether the Document meets the requirements based on the Query ' +
|
|
5
|
+
'and the Instruct provided. Note that the answer can only be "yes" or "no".';
|
|
6
|
+
const USER_PREFIX = '<Instruct>: Given a web search query, retrieve relevant passages that answer the query\n\n' +
|
|
7
|
+
'<Query>: ';
|
|
8
|
+
/** Simple async channel — _drain pushes, consumer pulls via for-await */
|
|
9
|
+
function channel() {
|
|
10
|
+
const buffer = [];
|
|
11
|
+
let done = false;
|
|
12
|
+
let err = null;
|
|
13
|
+
let notify = null;
|
|
14
|
+
const wait = () => new Promise((r) => { notify = r; });
|
|
15
|
+
return {
|
|
16
|
+
push(value) {
|
|
17
|
+
buffer.push(value);
|
|
18
|
+
notify?.();
|
|
19
|
+
notify = null;
|
|
20
|
+
},
|
|
21
|
+
finish() {
|
|
22
|
+
done = true;
|
|
23
|
+
notify?.();
|
|
24
|
+
notify = null;
|
|
25
|
+
},
|
|
26
|
+
error(e) {
|
|
27
|
+
err = e;
|
|
28
|
+
notify?.();
|
|
29
|
+
notify = null;
|
|
30
|
+
},
|
|
31
|
+
iterable: {
|
|
32
|
+
[Symbol.asyncIterator]() {
|
|
33
|
+
return {
|
|
34
|
+
async next() {
|
|
35
|
+
while (buffer.length === 0 && !done && !err)
|
|
36
|
+
await wait();
|
|
37
|
+
if (err)
|
|
38
|
+
throw err;
|
|
39
|
+
if (buffer.length > 0)
|
|
40
|
+
return { value: buffer.shift(), done: false };
|
|
41
|
+
return { value: undefined, done: true };
|
|
42
|
+
},
|
|
43
|
+
};
|
|
44
|
+
},
|
|
45
|
+
},
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
class Rerank {
|
|
49
|
+
_ctx;
|
|
50
|
+
_nSeqMax;
|
|
51
|
+
_nCtx;
|
|
52
|
+
_yesId;
|
|
53
|
+
_noId;
|
|
54
|
+
_prefixTokens;
|
|
55
|
+
_midTokens;
|
|
56
|
+
_suffixTokens;
|
|
57
|
+
_pending = [];
|
|
58
|
+
_draining = false;
|
|
59
|
+
_disposed = false;
|
|
60
|
+
constructor(ctx, nSeqMax, nCtx, yesId, noId, prefixTokens, midTokens, suffixTokens) {
|
|
61
|
+
this._ctx = ctx;
|
|
62
|
+
this._nSeqMax = nSeqMax;
|
|
63
|
+
this._nCtx = nCtx;
|
|
64
|
+
this._yesId = yesId;
|
|
65
|
+
this._noId = noId;
|
|
66
|
+
this._prefixTokens = prefixTokens;
|
|
67
|
+
this._midTokens = midTokens;
|
|
68
|
+
this._suffixTokens = suffixTokens;
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Create a Rerank instance from a pre-created SessionContext
|
|
72
|
+
*
|
|
73
|
+
* The caller is responsible for creating the context with appropriate
|
|
74
|
+
* settings (nSeqMax, nCtx, typeK, typeV). Rerank takes ownership of
|
|
75
|
+
* the context and will dispose it on `dispose()`.
|
|
76
|
+
*
|
|
77
|
+
* @param ctx - SessionContext configured for reranking
|
|
78
|
+
* @param opts - Capacity hints (nSeqMax, nCtx) — must match context creation params
|
|
79
|
+
*/
|
|
80
|
+
static async create(ctx, opts) {
|
|
81
|
+
const nSeqMax = opts?.nSeqMax ?? 8;
|
|
82
|
+
const nCtx = opts?.nCtx ?? ctx._storeKvPressure().nCtx;
|
|
83
|
+
const [yesId] = await ctx.tokenize('yes', false);
|
|
84
|
+
const [noId] = await ctx.tokenize('no', false);
|
|
85
|
+
const SENTINEL_Q = '\x00QUERY\x00';
|
|
86
|
+
const SENTINEL_D = '\x00DOC\x00';
|
|
87
|
+
const probe = await ctx.formatChat(JSON.stringify([
|
|
88
|
+
{ role: 'system', content: SYSTEM_PROMPT },
|
|
89
|
+
{ role: 'user', content: `${USER_PREFIX}${SENTINEL_Q}\n\n<Document>: ${SENTINEL_D}` },
|
|
90
|
+
]), { addGenerationPrompt: true, enableThinking: false });
|
|
91
|
+
const p = probe.prompt;
|
|
92
|
+
const qi = p.indexOf(SENTINEL_Q);
|
|
93
|
+
const di = p.indexOf(SENTINEL_D);
|
|
94
|
+
const prefixTokens = await ctx.tokenize(p.slice(0, qi), true);
|
|
95
|
+
const midTokens = await ctx.tokenize(p.slice(qi + SENTINEL_Q.length, di), false);
|
|
96
|
+
const suffixTokens = await ctx.tokenize(p.slice(di + SENTINEL_D.length), false);
|
|
97
|
+
return new Rerank(ctx, nSeqMax, nCtx, yesId, noId, prefixTokens, midTokens, suffixTokens);
|
|
98
|
+
}
|
|
99
|
+
score(query, documents, topK) {
|
|
100
|
+
if (this._disposed)
|
|
101
|
+
throw new Error('Rerank disposed');
|
|
102
|
+
const self = this;
|
|
103
|
+
const ch = channel();
|
|
104
|
+
(async () => {
|
|
105
|
+
try {
|
|
106
|
+
const queryTokens = await self._ctx.tokenize(query, false);
|
|
107
|
+
const shared = [...self._prefixTokens, ...queryTokens, ...self._midTokens];
|
|
108
|
+
const maxDoc = Math.floor(self._nCtx / self._nSeqMax) - shared.length - self._suffixTokens.length;
|
|
109
|
+
const tokenArrays = documents.map((doc) => {
|
|
110
|
+
const trimmed = doc.length > maxDoc ? doc.slice(0, maxDoc) : doc;
|
|
111
|
+
return [...shared, ...trimmed, ...self._suffixTokens];
|
|
112
|
+
});
|
|
113
|
+
self._enqueue(tokenArrays, topK, ch.push, ch.finish, ch.error);
|
|
114
|
+
}
|
|
115
|
+
catch (err) {
|
|
116
|
+
ch.error(err instanceof Error ? err : new Error(String(err)));
|
|
117
|
+
}
|
|
118
|
+
})();
|
|
119
|
+
return ch.iterable;
|
|
120
|
+
}
|
|
121
|
+
async tokenize(text) {
|
|
122
|
+
return this._ctx.tokenize(text, false);
|
|
123
|
+
}
|
|
124
|
+
dispose() {
|
|
125
|
+
this._disposed = true;
|
|
126
|
+
const err = new Error('Rerank disposed');
|
|
127
|
+
for (const req of this._pending)
|
|
128
|
+
req.error(err);
|
|
129
|
+
this._pending.length = 0;
|
|
130
|
+
this._ctx.dispose();
|
|
131
|
+
}
|
|
132
|
+
// ── Queue internals ──────────────────────────────────────────
|
|
133
|
+
_sortResults(scores, topK) {
|
|
134
|
+
const sorted = scores
|
|
135
|
+
.map((score, index) => ({ score: Math.round(score * 1000) / 1000, index }))
|
|
136
|
+
.sort((a, b) => b.score - a.score);
|
|
137
|
+
return topK != null ? sorted.slice(0, topK) : sorted;
|
|
138
|
+
}
|
|
139
|
+
_enqueue(tokenArrays, topK, push, finish, error) {
|
|
140
|
+
this._pending.push({
|
|
141
|
+
tokenArrays, cursor: 0,
|
|
142
|
+
scores: new Array(tokenArrays.length),
|
|
143
|
+
filled: 0,
|
|
144
|
+
topK,
|
|
145
|
+
total: tokenArrays.length,
|
|
146
|
+
push, finish, error,
|
|
147
|
+
});
|
|
148
|
+
this._drain();
|
|
149
|
+
}
|
|
150
|
+
_fillGroup() {
|
|
151
|
+
const group = [];
|
|
152
|
+
let added = true;
|
|
153
|
+
while (group.length < this._nSeqMax && added) {
|
|
154
|
+
added = false;
|
|
155
|
+
for (let r = 0; r < this._pending.length && group.length < this._nSeqMax; r++) {
|
|
156
|
+
const req = this._pending[r];
|
|
157
|
+
if (req.cursor < req.tokenArrays.length) {
|
|
158
|
+
group.push({ reqIdx: r, promptIdx: req.cursor, tokens: req.tokenArrays[req.cursor] });
|
|
159
|
+
req.cursor++;
|
|
160
|
+
added = true;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
return group;
|
|
165
|
+
}
|
|
166
|
+
async _drain() {
|
|
167
|
+
if (this._draining)
|
|
168
|
+
return;
|
|
169
|
+
this._draining = true;
|
|
170
|
+
try {
|
|
171
|
+
while (this._pending.length > 0) {
|
|
172
|
+
const group = this._fillGroup();
|
|
173
|
+
if (group.length === 0)
|
|
174
|
+
break;
|
|
175
|
+
let logits;
|
|
176
|
+
try {
|
|
177
|
+
logits = await this._ctx._scoreGroup(group.map((g) => g.tokens));
|
|
178
|
+
}
|
|
179
|
+
catch (err) {
|
|
180
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
181
|
+
for (const req of this._pending)
|
|
182
|
+
req.error(error);
|
|
183
|
+
this._pending.length = 0;
|
|
184
|
+
return;
|
|
185
|
+
}
|
|
186
|
+
// Track which requests got new scores this group
|
|
187
|
+
const touched = new Set();
|
|
188
|
+
for (let i = 0; i < group.length; i++) {
|
|
189
|
+
const req = this._pending[group[i].reqIdx];
|
|
190
|
+
req.scores[group[i].promptIdx] = this._rerankScore(logits[i]);
|
|
191
|
+
req.filled++;
|
|
192
|
+
touched.add(group[i].reqIdx);
|
|
193
|
+
}
|
|
194
|
+
// Push progress for each request that advanced, finish completed ones
|
|
195
|
+
for (let r = this._pending.length - 1; r >= 0; r--) {
|
|
196
|
+
const req = this._pending[r];
|
|
197
|
+
if (!touched.has(r))
|
|
198
|
+
continue;
|
|
199
|
+
const results = this._sortResults(req.scores, req.topK);
|
|
200
|
+
req.push({ filled: req.filled, total: req.total, results });
|
|
201
|
+
if (req.filled === req.total) {
|
|
202
|
+
req.finish();
|
|
203
|
+
this._pending.splice(r, 1);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
finally {
|
|
209
|
+
this._draining = false;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
_rerankScore(logits) {
|
|
213
|
+
const max = Math.max(logits[this._yesId], logits[this._noId]);
|
|
214
|
+
const yesExp = Math.exp(logits[this._yesId] - max);
|
|
215
|
+
const noExp = Math.exp(logits[this._noId] - max);
|
|
216
|
+
return yesExp / (yesExp + noExp);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
exports.Rerank = Rerank;
|
|
220
|
+
//# sourceMappingURL=Rerank.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"Rerank.js","sourceRoot":"","sources":["../src/Rerank.ts"],"names":[],"mappings":";;;AAEA,MAAM,aAAa,GACjB,uEAAuE;IACvE,4EAA4E,CAAC;AAE/E,MAAM,WAAW,GACf,4FAA4F;IAC5F,WAAW,CAAC;AAcd,yEAAyE;AACzE,SAAS,OAAO;IAMd,MAAM,MAAM,GAAQ,EAAE,CAAC;IACvB,IAAI,IAAI,GAAG,KAAK,CAAC;IACjB,IAAI,GAAG,GAAiB,IAAI,CAAC;IAC7B,IAAI,MAAM,GAAwB,IAAI,CAAC;IAEvC,MAAM,IAAI,GAAG,GAAG,EAAE,CAAC,IAAI,OAAO,CAAO,CAAC,CAAC,EAAE,EAAE,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAE7D,OAAO;QACL,IAAI,CAAC,KAAQ;YACX,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACnB,MAAM,EAAE,EAAE,CAAC;YACX,MAAM,GAAG,IAAI,CAAC;QAChB,CAAC;QACD,MAAM;YACJ,IAAI,GAAG,IAAI,CAAC;YACZ,MAAM,EAAE,EAAE,CAAC;YACX,MAAM,GAAG,IAAI,CAAC;QAChB,CAAC;QACD,KAAK,CAAC,CAAQ;YACZ,GAAG,GAAG,CAAC,CAAC;YACR,MAAM,EAAE,EAAE,CAAC;YACX,MAAM,GAAG,IAAI,CAAC;QAChB,CAAC;QACD,QAAQ,EAAE;YACR,CAAC,MAAM,CAAC,aAAa,CAAC;gBACpB,OAAO;oBACL,KAAK,CAAC,IAAI;wBACR,OAAO,MAAM,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,GAAG;4BAAE,MAAM,IAAI,EAAE,CAAC;wBAC1D,IAAI,GAAG;4BAAE,MAAM,GAAG,CAAC;wBACnB,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;4BAAE,OAAO,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAG,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;wBACtE,OAAO,EAAE,KAAK,EAAE,SAAyB,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;oBAC1D,CAAC;iBACF,CAAC;YACJ,CAAC;SACF;KACF,CAAC;AACJ,CAAC;AAED,MAAa,MAAM;IACT,IAAI,CAAiB;IACrB,QAAQ,CAAS;IACjB,KAAK,CAAS;IACd,MAAM,CAAS;IACf,KAAK,CAAS;IACd,aAAa,CAAW;IACxB,UAAU,CAAW;IACrB,aAAa,CAAW;IACxB,QAAQ,GAAqB,EAAE,CAAC;IAChC,SAAS,GAAG,KAAK,CAAC;IAClB,SAAS,GAAG,KAAK,CAAC;IAE1B,YACE,GAAmB,EACnB,OAAe,EACf,IAAY,EACZ,KAAa,EACb,IAAY,EACZ,YAAsB,EACtB,SAAmB,EACnB,YAAsB;QAEtB,IAAI,CAAC,IAAI,GAAG,GAAG,CAAC;QAChB,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC;QACxB,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;QACpB,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;QAClC,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;IACpC,CAAC;IAED;;;;;;;;;OASG;IACH,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,GAAmB,EAAE,IAA0C;QACjF,MAAM,OAAO,GAAG,IAAI,EAAE,OAAO,IAAI,CAAC,CAAC;QACnC,MAAM,IAAI,GAAG,IAAI,EAAE,IAAI,IAAI,GAAG,CAAC,gBAAgB,EAAE,CAAC,IAAI,CAAC;QAEvD,MAAM,CAAC,KAAK,CAAC,GAAG,MAAM,GAAG,CAAC,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QACjD,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;QAE/C,MAAM,UAAU,GAAG,eAAe,CAAC;QACnC,MAAM,UAAU,GAAG,aAAa,CAAC;QACjC,MAAM,KAAK,GAAG,MAAM,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC;YAChD,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,aAAa,EAAE;YAC1C,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,GAAG,WAAW,GAAG,UAAU,mBAAmB,UAAU,EAAE,EAAE;SACtF,CAAC,EAAE,EAAE,mBAAmB,EAAE,IAAI,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC,CAAC;QAE1D,MAAM,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;QACvB,MAAM,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;QACjC,MAAM,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;QACjC,MAAM,YAAY,GAAG,MAAM,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,IAAI,CAAC,CAAC;QAC9D,MAAM,SAAS,GAAG,MAAM,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,GAAG,UAAU,CAAC,MAAM,EAAE,EAAE,CAAC,EAAE,KAAK,CAAC,CAAC;QACjF,MAAM,YAAY,GAAG,MAAM,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC,EAAE,KAAK,CAAC,CAAC;QAEhF,OAAO,IAAI,MAAM,CAAC,GAAG,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,EAAE,YAAY,CAAC,CAAC;IAC5F,CAAC;IAED,KAAK,CAAC,KAAa,EAAE,SAAqB,EAAE,IAAa;QACvD,IAAI,IAAI,CAAC,SAAS;YAAE,MAAM,IAAI,KAAK,CAAC,iBAAiB,CAAC,CAAC;QAEvD,MAAM,IAAI,GAAG,IAAI,CAAC;QAClB,MAAM,EAAE,GAAG,OAAO,EAAkB,CAAC;QAErC,CAAC,KAAK,IAAI,EAAE;YACV,IAAI,CAAC;gBACH,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;gBAC3D,MAAM,MAAM,GAAG,CAAC,GAAG,IAAI,CAAC,aAAa,EAAE,GAAG,WAAW,EAAE,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC;gBAC3E,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC;gBAElG,MAAM,WAAW,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;oBACxC,MAAM,OAAO,GAAG,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;oBACjE,OAAO,CAAC,GAAG,MAAM,EAAE,GAAG,OAAO,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC,CAAC;gBACxD,CAAC,CAAC,CAAC;gBAEH,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,IAAI,EAAE,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC;YACjE,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,EAAE,CAAC,KAAK,CAAC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YAChE,CAAC;QACH,CAAC,CAAC,EAAE,CAAC;QAEL,OAAO,EAAE,CAAC,QAAQ,CAAC;IACrB,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,IAAY;QACzB,OAAO,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;IACzC,CAAC;IAED,OAAO;QACL,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACtB,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,iBAAiB,CAAC,CAAC;QACzC,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,QAAQ;YAAE,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAChD,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;IACtB,CAAC;IAED,gEAAgE;IAExD,YAAY,CAAC,MAAgB,EAAE,IAAwB;QAC7D,MAAM,MAAM,GAAG,MAAM;aAClB,GAAG,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC;aAC1E,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QACrC,OAAO,IAAI,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IACvD,CAAC;IAEO,QAAQ,CACd,WAAuB,EACvB,IAAwB,EACxB,IAAwC,EACxC,MAAkB,EAClB,KAA2B;QAE3B,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC;YACjB,WAAW,EAAE,MAAM,EAAE,CAAC;YACtB,MAAM,EAAE,IAAI,KAAK,CAAC,WAAW,CAAC,MAAM,CAAC;YACrC,MAAM,EAAE,CAAC;YACT,IAAI;YACJ,KAAK,EAAE,WAAW,CAAC,MAAM;YACzB,IAAI,EAAE,MAAM,EAAE,KAAK;SACpB,CAAC,CAAC;QACH,IAAI,CAAC,MAAM,EAAE,CAAC;IAChB,CAAC;IAEO,UAAU;QAChB,MAAM,KAAK,GAA8D,EAAE,CAAC;QAC5E,IAAI,KAAK,GAAG,IAAI,CAAC;QACjB,OAAO,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,QAAQ,IAAI,KAAK,EAAE,CAAC;YAC7C,KAAK,GAAG,KAAK,CAAC;YACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,IAAI,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC9E,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;gBAC7B,IAAI,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC;oBACxC,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;oBACtF,GAAG,CAAC,MAAM,EAAE,CAAC;oBACb,KAAK,GAAG,IAAI,CAAC;gBACf,CAAC;YACH,CAAC;QACH,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IAEO,KAAK,CAAC,MAAM;QAClB,IAAI,IAAI,CAAC,SAAS;YAAE,OAAO;QAC3B,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QAEtB,IAAI,CAAC;YACH,OAAO,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAChC,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC;gBAChC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;oBAAE,MAAM;gBAE9B,IAAI,MAAsB,CAAC;gBAC3B,IAAI,CAAC;oBACH,MAAM,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;gBACnE,CAAC;gBAAC,OAAO,GAAG,EAAE,CAAC;oBACb,MAAM,KAAK,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;oBAClE,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,QAAQ;wBAAE,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;oBAClD,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC;oBACzB,OAAO;gBACT,CAAC;gBAED,iDAAiD;gBACjD,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;gBAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;oBACtC,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;oBAC3C,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;oBAC9D,GAAG,CAAC,MAAM,EAAE,CAAC;oBACb,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;gBAC/B,CAAC;gBAED,sEAAsE;gBACtE,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;oBACnD,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;oBAC7B,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;wBAAE,SAAS;oBAE9B,MAAM,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;oBACxD,GAAG,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;oBAE5D,IAAI,GAAG,CAAC,MAAM,KAAK,GAAG,CAAC,KAAK,EAAE,CAAC;wBAC7B,GAAG,CAAC,MAAM,EAAE,CAAC;wBACb,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;oBAC7B,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;gBAAS,CAAC;YACT,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;QACzB,CAAC;IACH,CAAC;IAEO,YAAY,CAAC,MAAoB;QACvC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QAC9D,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC;QACnD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC;QACjD,OAAO,MAAM,GAAG,CAAC,MAAM,GAAG,KAAK,CAAC,CAAC;IACnC,CAAC;CACF;AA1MD,wBA0MC"}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import type { Branch } from './Branch';
|
|
2
|
+
import type { BranchStore } from './BranchStore';
|
|
3
|
+
import type { SessionContext } from './types';
|
|
4
|
+
/**
|
|
5
|
+
* Session - Trunk lifecycle + conversation delta helpers
|
|
6
|
+
*
|
|
7
|
+
* Owns the current "trunk" branch and provides promote() to crown a winner,
|
|
8
|
+
* plus delta helpers that centralize the sep + formatChat + tokenize + prefill
|
|
9
|
+
* pattern for injecting new turns into an ongoing conversation.
|
|
10
|
+
*
|
|
11
|
+
* Session does NOT own the SessionContext or BranchStore — the consumer
|
|
12
|
+
* creates those and passes them in. dispose() prunes trunk only.
|
|
13
|
+
*
|
|
14
|
+
* @example
|
|
15
|
+
* ```typescript
|
|
16
|
+
* const session = new Session({ ctx, store });
|
|
17
|
+
* session.trunk = initialBranch;
|
|
18
|
+
*
|
|
19
|
+
* // After verification, promote the best attempt
|
|
20
|
+
* await session.promote(bestAttempt.branch);
|
|
21
|
+
*
|
|
22
|
+
* // Inject a user turn and generate
|
|
23
|
+
* await session.prefillUser('What about X?');
|
|
24
|
+
* for await (const { text } of session.trunk) {
|
|
25
|
+
* process.stdout.write(text);
|
|
26
|
+
* }
|
|
27
|
+
*
|
|
28
|
+
* // Cleanup
|
|
29
|
+
* await session.dispose();
|
|
30
|
+
* ctx.dispose();
|
|
31
|
+
* ```
|
|
32
|
+
*
|
|
33
|
+
* @category Branching
|
|
34
|
+
*/
|
|
35
|
+
export declare class Session {
|
|
36
|
+
private _ctx;
|
|
37
|
+
private _store;
|
|
38
|
+
private _trunk;
|
|
39
|
+
constructor({ ctx, store }: {
|
|
40
|
+
ctx: SessionContext;
|
|
41
|
+
store: BranchStore;
|
|
42
|
+
});
|
|
43
|
+
/** Current trunk branch */
|
|
44
|
+
get trunk(): Branch | null;
|
|
45
|
+
/** Assign initial trunk (no promote) */
|
|
46
|
+
set trunk(branch: Branch | null);
|
|
47
|
+
/**
|
|
48
|
+
* Promote a winner to trunk — retainOnly + reassign
|
|
49
|
+
*
|
|
50
|
+
* Safe even if winner is the only branch (resets topology, no-op on KV).
|
|
51
|
+
*/
|
|
52
|
+
promote(winner: Branch): Promise<void>;
|
|
53
|
+
/**
|
|
54
|
+
* Dispose trunk only — consumer owns ctx and other resources
|
|
55
|
+
*/
|
|
56
|
+
dispose(): Promise<void>;
|
|
57
|
+
/**
|
|
58
|
+
* Prefill a user turn into trunk
|
|
59
|
+
*
|
|
60
|
+
* @param content - User message content
|
|
61
|
+
* @param opts - Optional tools JSON string
|
|
62
|
+
*/
|
|
63
|
+
prefillUser(content: string, opts?: {
|
|
64
|
+
tools?: string;
|
|
65
|
+
}): Promise<void>;
|
|
66
|
+
/**
|
|
67
|
+
* Prefill a tool result turn into trunk
|
|
68
|
+
*
|
|
69
|
+
* @param resultStr - JSON-stringified tool result
|
|
70
|
+
* @param callId - Tool call ID
|
|
71
|
+
*/
|
|
72
|
+
prefillToolResult(resultStr: string, callId: string): Promise<void>;
|
|
73
|
+
}
|
|
74
|
+
//# sourceMappingURL=Session.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"Session.d.ts","sourceRoot":"","sources":["../src/Session.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,UAAU,CAAC;AACvC,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AACjD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AAG9C;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AACH,qBAAa,OAAO;IAClB,OAAO,CAAC,IAAI,CAAiB;IAC7B,OAAO,CAAC,MAAM,CAAc;IAC5B,OAAO,CAAC,MAAM,CAAgB;gBAElB,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE;QAAE,GAAG,EAAE,cAAc,CAAC;QAAC,KAAK,EAAE,WAAW,CAAA;KAAE;IAMvE,2BAA2B;IAC3B,IAAI,KAAK,IAAI,MAAM,GAAG,IAAI,CAEzB;IAED,wCAAwC;IACxC,IAAI,KAAK,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,EAE9B;IAED;;;;OAIG;IACG,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAK5C;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAO9B;;;;;OAKG;IACG,WAAW,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,GAAE;QAAE,KAAK,CAAC,EAAE,MAAM,CAAA;KAAO,GAAG,OAAO,CAAC,IAAI,CAAC;IAKhF;;;;;OAKG;IACG,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;CAI1E"}
|