@lloyal-labs/sdk 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,125 @@
1
+ import type { Branch } from './Branch';
2
+ import type { SessionContext } from './types';
3
+ /**
4
+ * High-throughput multi-branch decode operations
5
+ *
6
+ * The naive approach to N-branch generation is N sequential llama_decode()
7
+ * calls — each paying full GPU kernel launch overhead, memory barrier, and
8
+ * PCIe round-trip. BranchStore eliminates this by packing all branches into
9
+ * a single llama_batch and dispatching once: O(1) GPU round-trips regardless
10
+ * of branch count. The GPU parallelizes across sequences within the batch,
11
+ * so N branches approach the wall-time cost of 1.
12
+ *
13
+ * Two operations, two packing strategies:
14
+ *
15
+ * **commit()** — Generation step. Each branch contributes exactly 1 token.
16
+ * Packs N tokens into a single batch via `decode_each` (one row per sequence,
17
+ * all at their respective positions). Single `llama_decode()` call. Logits
18
+ * captured per-branch at batch index `i`. O(N) total work, O(1) GPU
19
+ * dispatches, O(1) amortized dispatch overhead per branch. Accept-first
20
+ * ordering with rollback: accepts each token into its branch's repeat-penalty
21
+ * window before decode, restores from clones if decode throws.
22
+ *
23
+ * **prefill()** — Bulk token injection. Each branch contributes a
24
+ * variable-length token array. Uses a two-pass bin-packing algorithm:
25
+ *
26
+ * - *Pass 1 (planning)*: Greedy first-fit packs items into chunks ≤ nBatch.
27
+ * Items larger than nBatch get a dedicated chunk and fall through to
28
+ * decode_many's internal auto-chunking (ceil(nTokens / nBatch) calls).
29
+ * - *Pass 2 (dispatch)*: Normal chunks dispatch via `decode_scatter` (one
30
+ * `llama_decode` per chunk). Logits are indexed by flattened cursor
31
+ * position: for item k in a chunk, logits live at `cursor + nTokens[k] - 1`.
32
+ *
33
+ * For T total tokens across N branches with batch capacity B:
34
+ * - Best case (T ≤ B): 1 GPU dispatch, all branches in one batch.
35
+ * - Worst case: ceil(T / B) dispatches. Each dispatch is fully packed.
36
+ * - Amortized per-token GPU overhead: O(1/B) — vanishes as batch fills.
37
+ *
38
+ * Does NOT accept tokens into the sampler penalty window — use for
39
+ * external/replayed tokens where repeat-penalty tracking is unwanted.
40
+ * For model-generated tokens, use {@link commit} instead.
41
+ *
42
+ * Both methods take `[branch, token(s)]` tuples — the branch-to-token
43
+ * binding is structural, not positional. After either call, each branch's
44
+ * logits snapshot is updated with the output distribution from its decoded
45
+ * token(s), ready for the next `produce()`/`sample()` call.
46
+ *
47
+ * @example 32-branch generation step — one GPU dispatch
48
+ * ```typescript
49
+ * const store = new BranchStore(ctx);
50
+ * const entries = await Promise.all(branches.map(async b => [b, (await b.produce()).token] as [Branch, number]));
51
+ * await store.commit(entries); // 32 tokens, 1 llama_decode()
52
+ * ```
53
+ *
54
+ * @example Best-of-N with batched commit
55
+ * ```typescript
56
+ * const store = new BranchStore(ctx);
57
+ * const branches = [];
58
+ * for (const _ of [1, 2, 3]) branches.push(await root.fork());
59
+ *
60
+ * for (let step = 0; step < 50; step++) {
61
+ * const produced = await Promise.all(branches.map(async b => [b, await b.produce()] as const));
62
+ * const live = produced.filter(([, p]) => !p.isStop);
63
+ * if (!live.length) break;
64
+ * await store.commit(live.map(([b, p]) => [b, p.token]));
65
+ * }
66
+ * ```
67
+ *
68
+ * @example Asymmetric prefill — variable-length injections, auto-chunked
69
+ * ```typescript
70
+ * await store.prefill([
71
+ * [branchA, systemPromptTokens], // 200 tokens
72
+ * [branchB, shortQueryTokens], // 12 tokens
73
+ * [branchC, longDocumentTokens], // 800 tokens
74
+ * ]);
75
+ * // Bin-packed into ceil(1012 / nBatch) GPU dispatches
76
+ * ```
77
+ *
78
+ * @category Branching
79
+ */
80
+ export declare class BranchStore {
81
+ private _ctx;
82
+ constructor(ctx: SessionContext);
83
+ /**
84
+ * Batched single-token commit for model-generated tokens
85
+ *
86
+ * Each tuple `[branch, token]` binds one token to one branch.
87
+ * Accepts each token into its branch's repeat-penalty window (for correct
88
+ * PPL measurement), then decodes all N tokens in a single llama_decode()
89
+ * call via decode_each and captures logits per-branch. Accept-first
90
+ * ordering with rollback: if decode throws, sampler/grammar/metrics are
91
+ * restored from clones taken before the accept.
92
+ *
93
+ * @param entries - Array of `[branch, token]` tuples (branches must not be disposed)
94
+ * @throws If any branch is disposed
95
+ */
96
+ commit(entries: [Branch, number][]): Promise<void>;
97
+ /**
98
+ * Batched variable-length prefill for external tokens
99
+ *
100
+ * Each tuple `[branch, tokens]` binds a token array to one branch.
101
+ * Each branch can receive a different number of tokens — decode_scatter
102
+ * handles variable-length runs and auto-chunks to fit nBatch.
103
+ *
104
+ * Does NOT call accept_token — use for external/replayed tokens where
105
+ * repeat-penalty tracking is unwanted. For model-generated tokens,
106
+ * use {@link commit} instead.
107
+ *
108
+ * @param entries - Array of `[branch, tokens]` tuples (branches must not be disposed)
109
+ * @throws If any branch is disposed
110
+ */
111
+ prefill(entries: [Branch, number[]][]): Promise<void>;
112
+ /**
113
+ * Retain only the winner branch — evict all other leases and free their slots.
114
+ *
115
+ * Nuclear operation: calls `kv::seq_keep` on the winner's seq_id (stripping all
116
+ * other sequences from KV cache in a single pass), then frees all loser slots
117
+ * and rebuilds the vacancy list. The winner's topology is reset (no parent, no children).
118
+ *
119
+ * @param winner - The branch to keep (must not be disposed, must hold a lease)
120
+ * @throws If winner is disposed or has no lease
121
+ */
122
+ retainOnly(winner: Branch): Promise<void>;
123
+ get available(): number;
124
+ }
125
+ //# sourceMappingURL=BranchStore.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"BranchStore.d.ts","sourceRoot":"","sources":["../src/BranchStore.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,UAAU,CAAC;AACvC,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AAE9C;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4EG;AACH,qBAAa,WAAW;IACtB,OAAO,CAAC,IAAI,CAAiB;gBAEjB,GAAG,EAAE,cAAc;IAI/B;;;;;;;;;;;;OAYG;IACG,MAAM,CAAC,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAWxD;;;;;;;;;;;;;OAaG;IACG,OAAO,CAAC,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAW3D;;;;;;;;;OASG;IACG,UAAU,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAK/C,IAAI,SAAS,IAAI,MAAM,CAEtB;CACF"}
@@ -0,0 +1,155 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.BranchStore = void 0;
4
+ /**
5
+ * High-throughput multi-branch decode operations
6
+ *
7
+ * The naive approach to N-branch generation is N sequential llama_decode()
8
+ * calls — each paying full GPU kernel launch overhead, memory barrier, and
9
+ * PCIe round-trip. BranchStore eliminates this by packing all branches into
10
+ * a single llama_batch and dispatching once: O(1) GPU round-trips regardless
11
+ * of branch count. The GPU parallelizes across sequences within the batch,
12
+ * so N branches approach the wall-time cost of 1.
13
+ *
14
+ * Two operations, two packing strategies:
15
+ *
16
+ * **commit()** — Generation step. Each branch contributes exactly 1 token.
17
+ * Packs N tokens into a single batch via `decode_each` (one row per sequence,
18
+ * all at their respective positions). Single `llama_decode()` call. Logits
19
+ * captured per-branch at batch index `i`. O(N) total work, O(1) GPU
20
+ * dispatches, O(1) amortized dispatch overhead per branch. Accept-first
21
+ * ordering with rollback: accepts each token into its branch's repeat-penalty
22
+ * window before decode, restores from clones if decode throws.
23
+ *
24
+ * **prefill()** — Bulk token injection. Each branch contributes a
25
+ * variable-length token array. Uses a two-pass bin-packing algorithm:
26
+ *
27
+ * - *Pass 1 (planning)*: Greedy first-fit packs items into chunks ≤ nBatch.
28
+ * Items larger than nBatch get a dedicated chunk and fall through to
29
+ * decode_many's internal auto-chunking (ceil(nTokens / nBatch) calls).
30
+ * - *Pass 2 (dispatch)*: Normal chunks dispatch via `decode_scatter` (one
31
+ * `llama_decode` per chunk). Logits are indexed by flattened cursor
32
+ * position: for item k in a chunk, logits live at `cursor + nTokens[k] - 1`.
33
+ *
34
+ * For T total tokens across N branches with batch capacity B:
35
+ * - Best case (T ≤ B): 1 GPU dispatch, all branches in one batch.
36
+ * - Worst case: ceil(T / B) dispatches. Each dispatch is fully packed.
37
+ * - Amortized per-token GPU overhead: O(1/B) — vanishes as batch fills.
38
+ *
39
+ * Does NOT accept tokens into the sampler penalty window — use for
40
+ * external/replayed tokens where repeat-penalty tracking is unwanted.
41
+ * For model-generated tokens, use {@link commit} instead.
42
+ *
43
+ * Both methods take `[branch, token(s)]` tuples — the branch-to-token
44
+ * binding is structural, not positional. After either call, each branch's
45
+ * logits snapshot is updated with the output distribution from its decoded
46
+ * token(s), ready for the next `produce()`/`sample()` call.
47
+ *
48
+ * @example 32-branch generation step — one GPU dispatch
49
+ * ```typescript
50
+ * const store = new BranchStore(ctx);
51
+ * const entries = await Promise.all(branches.map(async b => [b, (await b.produce()).token] as [Branch, number]));
52
+ * await store.commit(entries); // 32 tokens, 1 llama_decode()
53
+ * ```
54
+ *
55
+ * @example Best-of-N with batched commit
56
+ * ```typescript
57
+ * const store = new BranchStore(ctx);
58
+ * const branches = [];
59
+ * for (const _ of [1, 2, 3]) branches.push(await root.fork());
60
+ *
61
+ * for (let step = 0; step < 50; step++) {
62
+ * const produced = await Promise.all(branches.map(async b => [b, await b.produce()] as const));
63
+ * const live = produced.filter(([, p]) => !p.isStop);
64
+ * if (!live.length) break;
65
+ * await store.commit(live.map(([b, p]) => [b, p.token]));
66
+ * }
67
+ * ```
68
+ *
69
+ * @example Asymmetric prefill — variable-length injections, auto-chunked
70
+ * ```typescript
71
+ * await store.prefill([
72
+ * [branchA, systemPromptTokens], // 200 tokens
73
+ * [branchB, shortQueryTokens], // 12 tokens
74
+ * [branchC, longDocumentTokens], // 800 tokens
75
+ * ]);
76
+ * // Bin-packed into ceil(1012 / nBatch) GPU dispatches
77
+ * ```
78
+ *
79
+ * @category Branching
80
+ */
81
+ class BranchStore {
82
+ _ctx;
83
+ constructor(ctx) {
84
+ this._ctx = ctx;
85
+ }
86
+ /**
87
+ * Batched single-token commit for model-generated tokens
88
+ *
89
+ * Each tuple `[branch, token]` binds one token to one branch.
90
+ * Accepts each token into its branch's repeat-penalty window (for correct
91
+ * PPL measurement), then decodes all N tokens in a single llama_decode()
92
+ * call via decode_each and captures logits per-branch. Accept-first
93
+ * ordering with rollback: if decode throws, sampler/grammar/metrics are
94
+ * restored from clones taken before the accept.
95
+ *
96
+ * @param entries - Array of `[branch, token]` tuples (branches must not be disposed)
97
+ * @throws If any branch is disposed
98
+ */
99
+ async commit(entries) {
100
+ const handles = [];
101
+ const tokens = [];
102
+ for (const [branch, token] of entries) {
103
+ if (branch.disposed)
104
+ throw new Error('BranchStore.commit: branch is disposed');
105
+ handles.push(branch.handle);
106
+ tokens.push(token);
107
+ }
108
+ await this._ctx._storeCommit(handles, tokens);
109
+ }
110
+ /**
111
+ * Batched variable-length prefill for external tokens
112
+ *
113
+ * Each tuple `[branch, tokens]` binds a token array to one branch.
114
+ * Each branch can receive a different number of tokens — decode_scatter
115
+ * handles variable-length runs and auto-chunks to fit nBatch.
116
+ *
117
+ * Does NOT call accept_token — use for external/replayed tokens where
118
+ * repeat-penalty tracking is unwanted. For model-generated tokens,
119
+ * use {@link commit} instead.
120
+ *
121
+ * @param entries - Array of `[branch, tokens]` tuples (branches must not be disposed)
122
+ * @throws If any branch is disposed
123
+ */
124
+ async prefill(entries) {
125
+ const handles = [];
126
+ const tokenArrays = [];
127
+ for (const [branch, tokens] of entries) {
128
+ if (branch.disposed)
129
+ throw new Error('BranchStore.prefill: branch is disposed');
130
+ handles.push(branch.handle);
131
+ tokenArrays.push(tokens);
132
+ }
133
+ await this._ctx._storePrefill(handles, tokenArrays);
134
+ }
135
+ /**
136
+ * Retain only the winner branch — evict all other leases and free their slots.
137
+ *
138
+ * Nuclear operation: calls `kv::seq_keep` on the winner's seq_id (stripping all
139
+ * other sequences from KV cache in a single pass), then frees all loser slots
140
+ * and rebuilds the vacancy list. The winner's topology is reset (no parent, no children).
141
+ *
142
+ * @param winner - The branch to keep (must not be disposed, must hold a lease)
143
+ * @throws If winner is disposed or has no lease
144
+ */
145
+ async retainOnly(winner) {
146
+ if (winner.disposed)
147
+ throw new Error('BranchStore.retainOnly: winner is disposed');
148
+ this._ctx._storeRetainOnly(winner.handle);
149
+ }
150
+ get available() {
151
+ return this._ctx._storeAvailable();
152
+ }
153
+ }
154
+ exports.BranchStore = BranchStore;
155
+ //# sourceMappingURL=BranchStore.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"BranchStore.js","sourceRoot":"","sources":["../src/BranchStore.ts"],"names":[],"mappings":";;;AAGA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4EG;AACH,MAAa,WAAW;IACd,IAAI,CAAiB;IAE7B,YAAY,GAAmB;QAC7B,IAAI,CAAC,IAAI,GAAG,GAAG,CAAC;IAClB,CAAC;IAED;;;;;;;;;;;;OAYG;IACH,KAAK,CAAC,MAAM,CAAC,OAA2B;QACtC,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,KAAK,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,OAAO,EAAE,CAAC;YACtC,IAAI,MAAM,CAAC,QAAQ;gBAAE,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;YAC/E,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YAC5B,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;QACD,MAAM,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IAChD,CAAC;IAED;;;;;;;;;;;;;OAaG;IACH,KAAK,CAAC,OAAO,CAAC,OAA6B;QACzC,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,MAAM,WAAW,GAAe,EAAE,CAAC;QACnC,KAAK,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACvC,IAAI,MAAM,CAAC,QAAQ;gBAAE,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;YAChF,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YAC5B,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC3B,CAAC;QACD,MAAM,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;IACtD,CAAC;IAED;;;;;;;;;OASG;IACH,KAAK,CAAC,UAAU,CAAC,MAAc;QAC7B,IAAI,MAAM,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;QACnF,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IAC5C,CAAC;IAED,IAAI,SAAS;QACX,OAAO,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC;IACrC,CAAC;CACF;AA1ED,kCA0EC"}
@@ -0,0 +1,38 @@
1
+ import type { SessionContext, RerankProgress } from './types';
2
+ export declare class Rerank {
3
+ private _ctx;
4
+ private _nSeqMax;
5
+ private _nCtx;
6
+ private _yesId;
7
+ private _noId;
8
+ private _prefixTokens;
9
+ private _midTokens;
10
+ private _suffixTokens;
11
+ private _pending;
12
+ private _draining;
13
+ private _disposed;
14
+ private constructor();
15
+ /**
16
+ * Create a Rerank instance from a pre-created SessionContext
17
+ *
18
+ * The caller is responsible for creating the context with appropriate
19
+ * settings (nSeqMax, nCtx, typeK, typeV). Rerank takes ownership of
20
+ * the context and will dispose it on `dispose()`.
21
+ *
22
+ * @param ctx - SessionContext configured for reranking
23
+ * @param opts - Capacity hints (nSeqMax, nCtx) — must match context creation params
24
+ */
25
+ static create(ctx: SessionContext, opts?: {
26
+ nSeqMax?: number;
27
+ nCtx?: number;
28
+ }): Promise<Rerank>;
29
+ score(query: string, documents: number[][], topK?: number): AsyncIterable<RerankProgress>;
30
+ tokenize(text: string): Promise<number[]>;
31
+ dispose(): void;
32
+ private _sortResults;
33
+ private _enqueue;
34
+ private _fillGroup;
35
+ private _drain;
36
+ private _rerankScore;
37
+ }
38
+ //# sourceMappingURL=Rerank.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"Rerank.d.ts","sourceRoot":"","sources":["../src/Rerank.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAgB,cAAc,EAAE,MAAM,SAAS,CAAC;AAmE5E,qBAAa,MAAM;IACjB,OAAO,CAAC,IAAI,CAAiB;IAC7B,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,aAAa,CAAW;IAChC,OAAO,CAAC,UAAU,CAAW;IAC7B,OAAO,CAAC,aAAa,CAAW;IAChC,OAAO,CAAC,QAAQ,CAAwB;IACxC,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAS;IAE1B,OAAO;IAoBP;;;;;;;;;OASG;WACU,MAAM,CAAC,GAAG,EAAE,cAAc,EAAE,IAAI,CAAC,EAAE;QAAE,OAAO,CAAC,EAAE,MAAM,CAAC;QAAC,IAAI,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAwBrG,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,EAAE,EAAE,IAAI,CAAC,EAAE,MAAM,GAAG,aAAa,CAAC,cAAc,CAAC;IA0BnF,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAI/C,OAAO,IAAI,IAAI;IAUf,OAAO,CAAC,YAAY;IAOpB,OAAO,CAAC,QAAQ;IAkBhB,OAAO,CAAC,UAAU;YAiBJ,MAAM;IA+CpB,OAAO,CAAC,YAAY;CAMrB"}
package/dist/Rerank.js ADDED
@@ -0,0 +1,220 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.Rerank = void 0;
4
+ const SYSTEM_PROMPT = 'Judge whether the Document meets the requirements based on the Query ' +
5
+ 'and the Instruct provided. Note that the answer can only be "yes" or "no".';
6
+ const USER_PREFIX = '<Instruct>: Given a web search query, retrieve relevant passages that answer the query\n\n' +
7
+ '<Query>: ';
8
+ /** Simple async channel — _drain pushes, consumer pulls via for-await */
9
+ function channel() {
10
+ const buffer = [];
11
+ let done = false;
12
+ let err = null;
13
+ let notify = null;
14
+ const wait = () => new Promise((r) => { notify = r; });
15
+ return {
16
+ push(value) {
17
+ buffer.push(value);
18
+ notify?.();
19
+ notify = null;
20
+ },
21
+ finish() {
22
+ done = true;
23
+ notify?.();
24
+ notify = null;
25
+ },
26
+ error(e) {
27
+ err = e;
28
+ notify?.();
29
+ notify = null;
30
+ },
31
+ iterable: {
32
+ [Symbol.asyncIterator]() {
33
+ return {
34
+ async next() {
35
+ while (buffer.length === 0 && !done && !err)
36
+ await wait();
37
+ if (err)
38
+ throw err;
39
+ if (buffer.length > 0)
40
+ return { value: buffer.shift(), done: false };
41
+ return { value: undefined, done: true };
42
+ },
43
+ };
44
+ },
45
+ },
46
+ };
47
+ }
48
+ class Rerank {
49
+ _ctx;
50
+ _nSeqMax;
51
+ _nCtx;
52
+ _yesId;
53
+ _noId;
54
+ _prefixTokens;
55
+ _midTokens;
56
+ _suffixTokens;
57
+ _pending = [];
58
+ _draining = false;
59
+ _disposed = false;
60
+ constructor(ctx, nSeqMax, nCtx, yesId, noId, prefixTokens, midTokens, suffixTokens) {
61
+ this._ctx = ctx;
62
+ this._nSeqMax = nSeqMax;
63
+ this._nCtx = nCtx;
64
+ this._yesId = yesId;
65
+ this._noId = noId;
66
+ this._prefixTokens = prefixTokens;
67
+ this._midTokens = midTokens;
68
+ this._suffixTokens = suffixTokens;
69
+ }
70
+ /**
71
+ * Create a Rerank instance from a pre-created SessionContext
72
+ *
73
+ * The caller is responsible for creating the context with appropriate
74
+ * settings (nSeqMax, nCtx, typeK, typeV). Rerank takes ownership of
75
+ * the context and will dispose it on `dispose()`.
76
+ *
77
+ * @param ctx - SessionContext configured for reranking
78
+ * @param opts - Capacity hints (nSeqMax, nCtx) — must match context creation params
79
+ */
80
+ static async create(ctx, opts) {
81
+ const nSeqMax = opts?.nSeqMax ?? 8;
82
+ const nCtx = opts?.nCtx ?? ctx._storeKvPressure().nCtx;
83
+ const [yesId] = await ctx.tokenize('yes', false);
84
+ const [noId] = await ctx.tokenize('no', false);
85
+ const SENTINEL_Q = '\x00QUERY\x00';
86
+ const SENTINEL_D = '\x00DOC\x00';
87
+ const probe = await ctx.formatChat(JSON.stringify([
88
+ { role: 'system', content: SYSTEM_PROMPT },
89
+ { role: 'user', content: `${USER_PREFIX}${SENTINEL_Q}\n\n<Document>: ${SENTINEL_D}` },
90
+ ]), { addGenerationPrompt: true, enableThinking: false });
91
+ const p = probe.prompt;
92
+ const qi = p.indexOf(SENTINEL_Q);
93
+ const di = p.indexOf(SENTINEL_D);
94
+ const prefixTokens = await ctx.tokenize(p.slice(0, qi), true);
95
+ const midTokens = await ctx.tokenize(p.slice(qi + SENTINEL_Q.length, di), false);
96
+ const suffixTokens = await ctx.tokenize(p.slice(di + SENTINEL_D.length), false);
97
+ return new Rerank(ctx, nSeqMax, nCtx, yesId, noId, prefixTokens, midTokens, suffixTokens);
98
+ }
99
+ score(query, documents, topK) {
100
+ if (this._disposed)
101
+ throw new Error('Rerank disposed');
102
+ const self = this;
103
+ const ch = channel();
104
+ (async () => {
105
+ try {
106
+ const queryTokens = await self._ctx.tokenize(query, false);
107
+ const shared = [...self._prefixTokens, ...queryTokens, ...self._midTokens];
108
+ const maxDoc = Math.floor(self._nCtx / self._nSeqMax) - shared.length - self._suffixTokens.length;
109
+ const tokenArrays = documents.map((doc) => {
110
+ const trimmed = doc.length > maxDoc ? doc.slice(0, maxDoc) : doc;
111
+ return [...shared, ...trimmed, ...self._suffixTokens];
112
+ });
113
+ self._enqueue(tokenArrays, topK, ch.push, ch.finish, ch.error);
114
+ }
115
+ catch (err) {
116
+ ch.error(err instanceof Error ? err : new Error(String(err)));
117
+ }
118
+ })();
119
+ return ch.iterable;
120
+ }
121
+ async tokenize(text) {
122
+ return this._ctx.tokenize(text, false);
123
+ }
124
+ dispose() {
125
+ this._disposed = true;
126
+ const err = new Error('Rerank disposed');
127
+ for (const req of this._pending)
128
+ req.error(err);
129
+ this._pending.length = 0;
130
+ this._ctx.dispose();
131
+ }
132
+ // ── Queue internals ──────────────────────────────────────────
133
+ _sortResults(scores, topK) {
134
+ const sorted = scores
135
+ .map((score, index) => ({ score: Math.round(score * 1000) / 1000, index }))
136
+ .sort((a, b) => b.score - a.score);
137
+ return topK != null ? sorted.slice(0, topK) : sorted;
138
+ }
139
+ _enqueue(tokenArrays, topK, push, finish, error) {
140
+ this._pending.push({
141
+ tokenArrays, cursor: 0,
142
+ scores: new Array(tokenArrays.length),
143
+ filled: 0,
144
+ topK,
145
+ total: tokenArrays.length,
146
+ push, finish, error,
147
+ });
148
+ this._drain();
149
+ }
150
+ _fillGroup() {
151
+ const group = [];
152
+ let added = true;
153
+ while (group.length < this._nSeqMax && added) {
154
+ added = false;
155
+ for (let r = 0; r < this._pending.length && group.length < this._nSeqMax; r++) {
156
+ const req = this._pending[r];
157
+ if (req.cursor < req.tokenArrays.length) {
158
+ group.push({ reqIdx: r, promptIdx: req.cursor, tokens: req.tokenArrays[req.cursor] });
159
+ req.cursor++;
160
+ added = true;
161
+ }
162
+ }
163
+ }
164
+ return group;
165
+ }
166
+ async _drain() {
167
+ if (this._draining)
168
+ return;
169
+ this._draining = true;
170
+ try {
171
+ while (this._pending.length > 0) {
172
+ const group = this._fillGroup();
173
+ if (group.length === 0)
174
+ break;
175
+ let logits;
176
+ try {
177
+ logits = await this._ctx._scoreGroup(group.map((g) => g.tokens));
178
+ }
179
+ catch (err) {
180
+ const error = err instanceof Error ? err : new Error(String(err));
181
+ for (const req of this._pending)
182
+ req.error(error);
183
+ this._pending.length = 0;
184
+ return;
185
+ }
186
+ // Track which requests got new scores this group
187
+ const touched = new Set();
188
+ for (let i = 0; i < group.length; i++) {
189
+ const req = this._pending[group[i].reqIdx];
190
+ req.scores[group[i].promptIdx] = this._rerankScore(logits[i]);
191
+ req.filled++;
192
+ touched.add(group[i].reqIdx);
193
+ }
194
+ // Push progress for each request that advanced, finish completed ones
195
+ for (let r = this._pending.length - 1; r >= 0; r--) {
196
+ const req = this._pending[r];
197
+ if (!touched.has(r))
198
+ continue;
199
+ const results = this._sortResults(req.scores, req.topK);
200
+ req.push({ filled: req.filled, total: req.total, results });
201
+ if (req.filled === req.total) {
202
+ req.finish();
203
+ this._pending.splice(r, 1);
204
+ }
205
+ }
206
+ }
207
+ }
208
+ finally {
209
+ this._draining = false;
210
+ }
211
+ }
212
+ _rerankScore(logits) {
213
+ const max = Math.max(logits[this._yesId], logits[this._noId]);
214
+ const yesExp = Math.exp(logits[this._yesId] - max);
215
+ const noExp = Math.exp(logits[this._noId] - max);
216
+ return yesExp / (yesExp + noExp);
217
+ }
218
+ }
219
+ exports.Rerank = Rerank;
220
+ //# sourceMappingURL=Rerank.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"Rerank.js","sourceRoot":"","sources":["../src/Rerank.ts"],"names":[],"mappings":";;;AAEA,MAAM,aAAa,GACjB,uEAAuE;IACvE,4EAA4E,CAAC;AAE/E,MAAM,WAAW,GACf,4FAA4F;IAC5F,WAAW,CAAC;AAcd,yEAAyE;AACzE,SAAS,OAAO;IAMd,MAAM,MAAM,GAAQ,EAAE,CAAC;IACvB,IAAI,IAAI,GAAG,KAAK,CAAC;IACjB,IAAI,GAAG,GAAiB,IAAI,CAAC;IAC7B,IAAI,MAAM,GAAwB,IAAI,CAAC;IAEvC,MAAM,IAAI,GAAG,GAAG,EAAE,CAAC,IAAI,OAAO,CAAO,CAAC,CAAC,EAAE,EAAE,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAE7D,OAAO;QACL,IAAI,CAAC,KAAQ;YACX,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACnB,MAAM,EAAE,EAAE,CAAC;YACX,MAAM,GAAG,IAAI,CAAC;QAChB,CAAC;QACD,MAAM;YACJ,IAAI,GAAG,IAAI,CAAC;YACZ,MAAM,EAAE,EAAE,CAAC;YACX,MAAM,GAAG,IAAI,CAAC;QAChB,CAAC;QACD,KAAK,CAAC,CAAQ;YACZ,GAAG,GAAG,CAAC,CAAC;YACR,MAAM,EAAE,EAAE,CAAC;YACX,MAAM,GAAG,IAAI,CAAC;QAChB,CAAC;QACD,QAAQ,EAAE;YACR,CAAC,MAAM,CAAC,aAAa,CAAC;gBACpB,OAAO;oBACL,KAAK,CAAC,IAAI;wBACR,OAAO,MAAM,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,GAAG;4BAAE,MAAM,IAAI,EAAE,CAAC;wBAC1D,IAAI,GAAG;4BAAE,MAAM,GAAG,CAAC;wBACnB,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;4BAAE,OAAO,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAG,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;wBACtE,OAAO,EAAE,KAAK,EAAE,SAAyB,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;oBAC1D,CAAC;iBACF,CAAC;YACJ,CAAC;SACF;KACF,CAAC;AACJ,CAAC;AAED,MAAa,MAAM;IACT,IAAI,CAAiB;IACrB,QAAQ,CAAS;IACjB,KAAK,CAAS;IACd,MAAM,CAAS;IACf,KAAK,CAAS;IACd,aAAa,CAAW;IACxB,UAAU,CAAW;IACrB,aAAa,CAAW;IACxB,QAAQ,GAAqB,EAAE,CAAC;IAChC,SAAS,GAAG,KAAK,CAAC;IAClB,SAAS,GAAG,KAAK,CAAC;IAE1B,YACE,GAAmB,EACnB,OAAe,EACf,IAAY,EACZ,KAAa,EACb,IAAY,EACZ,YAAsB,EACtB,SAAmB,EACnB,YAAsB;QAEtB,IAAI,CAAC,IAAI,GAAG,GAAG,CAAC;QAChB,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC;QACxB,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;QACpB,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;QAClC,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;IACpC,CAAC;IAED;;;;;;;;;OASG;IACH,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,GAAmB,EAAE,IAA0C;QACjF,MAAM,OAAO,GAAG,IAAI,EAAE,OAAO,IAAI,CAAC,CAAC;QACnC,MAAM,IAAI,GAAG,IAAI,EAAE,IAAI,IAAI,GAAG,CAAC,gBAAgB,EAAE,CAAC,IAAI,CAAC;QAEvD,MAAM,CAAC,KAAK,CAAC,GAAG,MAAM,GAAG,CAAC,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QACjD,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;QAE/C,MAAM,UAAU,GAAG,eAAe,CAAC;QACnC,MAAM,UAAU,GAAG,aAAa,CAAC;QACjC,MAAM,KAAK,GAAG,MAAM,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC;YAChD,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,aAAa,EAAE;YAC1C,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,GAAG,WAAW,GAAG,UAAU,mBAAmB,UAAU,EAAE,EAAE;SACtF,CAAC,EAAE,EAAE,mBAAmB,EAAE,IAAI,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC,CAAC;QAE1D,MAAM,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;QACvB,MAAM,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;QACjC,MAAM,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;QACjC,MAAM,YAAY,GAAG,MAAM,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,IAAI,CAAC,CAAC;QAC9D,MAAM,SAAS,GAAG,MAAM,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,GAAG,UAAU,CAAC,MAAM,EAAE,EAAE,CAAC,EAAE,KAAK,CAAC,CAAC;QACjF,MAAM,YAAY,GAAG,MAAM,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC,EAAE,KAAK,CAAC,CAAC;QAEhF,OAAO,IAAI,MAAM,CAAC,GAAG,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,EAAE,YAAY,CAAC,CAAC;IAC5F,CAAC;IAED,KAAK,CAAC,KAAa,EAAE,SAAqB,EAAE,IAAa;QACvD,IAAI,IAAI,CAAC,SAAS;YAAE,MAAM,IAAI,KAAK,CAAC,iBAAiB,CAAC,CAAC;QAEvD,MAAM,IAAI,GAAG,IAAI,CAAC;QAClB,MAAM,EAAE,GAAG,OAAO,EAAkB,CAAC;QAErC,CAAC,KAAK,IAAI,EAAE;YACV,IAAI,CAAC;gBACH,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;gBAC3D,MAAM,MAAM,GAAG,CAAC,GAAG,IAAI,CAAC,aAAa,EAAE,GAAG,WAAW,EAAE,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC;gBAC3E,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC;gBAElG,MAAM,WAAW,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;oBACxC,MAAM,OAAO,GAAG,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;oBACjE,OAAO,CAAC,GAAG,MAAM,EAAE,GAAG,OAAO,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC,CAAC;gBACxD,CAAC,CAAC,CAAC;gBAEH,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,IAAI,EAAE,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC;YACjE,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,EAAE,CAAC,KAAK,CAAC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YAChE,CAAC;QACH,CAAC,CAAC,EAAE,CAAC;QAEL,OAAO,EAAE,CAAC,QAAQ,CAAC;IACrB,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,IAAY;QACzB,OAAO,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;IACzC,CAAC;IAED,OAAO;QACL,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACtB,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,iBAAiB,CAAC,CAAC;QACzC,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,QAAQ;YAAE,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAChD,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;IACtB,CAAC;IAED,gEAAgE;IAExD,YAAY,CAAC,MAAgB,EAAE,IAAwB;QAC7D,MAAM,MAAM,GAAG,MAAM;aAClB,GAAG,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC;aAC1E,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QACrC,OAAO,IAAI,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IACvD,CAAC;IAEO,QAAQ,CACd,WAAuB,EACvB,IAAwB,EACxB,IAAwC,EACxC,MAAkB,EAClB,KAA2B;QAE3B,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC;YACjB,WAAW,EAAE,MAAM,EAAE,CAAC;YACtB,MAAM,EAAE,IAAI,KAAK,CAAC,WAAW,CAAC,MAAM,CAAC;YACrC,MAAM,EAAE,CAAC;YACT,IAAI;YACJ,KAAK,EAAE,WAAW,CAAC,MAAM;YACzB,IAAI,EAAE,MAAM,EAAE,KAAK;SACpB,CAAC,CAAC;QACH,IAAI,CAAC,MAAM,EAAE,CAAC;IAChB,CAAC;IAEO,UAAU;QAChB,MAAM,KAAK,GAA8D,EAAE,CAAC;QAC5E,IAAI,KAAK,GAAG,IAAI,CAAC;QACjB,OAAO,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,QAAQ,IAAI,KAAK,EAAE,CAAC;YAC7C,KAAK,GAAG,KAAK,CAAC;YACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,IAAI,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC9E,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;gBAC7B,IAAI,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC;oBACxC,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;oBACtF,GAAG,CAAC,MAAM,EAAE,CAAC;oBACb,KAAK,GAAG,IAAI,CAAC;gBACf,CAAC;YACH,CAAC;QACH,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IAEO,KAAK,CAAC,MAAM;QAClB,IAAI,IAAI,CAAC,SAAS;YAAE,OAAO;QAC3B,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QAEtB,IAAI,CAAC;YACH,OAAO,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAChC,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC;gBAChC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;oBAAE,MAAM;gBAE9B,IAAI,MAAsB,CAAC;gBAC3B,IAAI,CAAC;oBACH,MAAM,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;gBACnE,CAAC;gBAAC,OAAO,GAAG,EAAE,CAAC;oBACb,MAAM,KAAK,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;oBAClE,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,QAAQ;wBAAE,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;oBAClD,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC;oBACzB,OAAO;gBACT,CAAC;gBAED,iDAAiD;gBACjD,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;gBAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;oBACtC,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;oBAC3C,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;oBAC9D,GAAG,CAAC,MAAM,EAAE,CAAC;oBACb,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;gBAC/B,CAAC;gBAED,sEAAsE;gBACtE,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;oBACnD,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;oBAC7B,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;wBAAE,SAAS;oBAE9B,MAAM,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;oBACxD,GAAG,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;oBAE5D,IAAI,GAAG,CAAC,MAAM,KAAK,GAAG,CAAC,KAAK,EAAE,CAAC;wBAC7B,GAAG,CAAC,MAAM,EAAE,CAAC;wBACb,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;oBAC7B,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;gBAAS,CAAC;YACT,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;QACzB,CAAC;IACH,CAAC;IAEO,YAAY,CAAC,MAAoB;QACvC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QAC9D,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC;QACnD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC;QACjD,OAAO,MAAM,GAAG,CAAC,MAAM,GAAG,KAAK,CAAC,CAAC;IACnC,CAAC;CACF;AA1MD,wBA0MC"}
@@ -0,0 +1,74 @@
1
+ import type { Branch } from './Branch';
2
+ import type { BranchStore } from './BranchStore';
3
+ import type { SessionContext } from './types';
4
+ /**
5
+ * Session - Trunk lifecycle + conversation delta helpers
6
+ *
7
+ * Owns the current "trunk" branch and provides promote() to crown a winner,
8
+ * plus delta helpers that centralize the sep + formatChat + tokenize + prefill
9
+ * pattern for injecting new turns into an ongoing conversation.
10
+ *
11
+ * Session does NOT own the SessionContext or BranchStore — the consumer
12
+ * creates those and passes them in. dispose() prunes trunk only.
13
+ *
14
+ * @example
15
+ * ```typescript
16
+ * const session = new Session({ ctx, store });
17
+ * session.trunk = initialBranch;
18
+ *
19
+ * // After verification, promote the best attempt
20
+ * await session.promote(bestAttempt.branch);
21
+ *
22
+ * // Inject a user turn and generate
23
+ * await session.prefillUser('What about X?');
24
+ * for await (const { text } of session.trunk) {
25
+ * process.stdout.write(text);
26
+ * }
27
+ *
28
+ * // Cleanup
29
+ * await session.dispose();
30
+ * ctx.dispose();
31
+ * ```
32
+ *
33
+ * @category Branching
34
+ */
35
+ export declare class Session {
36
+ private _ctx;
37
+ private _store;
38
+ private _trunk;
39
+ constructor({ ctx, store }: {
40
+ ctx: SessionContext;
41
+ store: BranchStore;
42
+ });
43
+ /** Current trunk branch */
44
+ get trunk(): Branch | null;
45
+ /** Assign initial trunk (no promote) */
46
+ set trunk(branch: Branch | null);
47
+ /**
48
+ * Promote a winner to trunk — retainOnly + reassign
49
+ *
50
+ * Safe even if winner is the only branch (resets topology, no-op on KV).
51
+ */
52
+ promote(winner: Branch): Promise<void>;
53
+ /**
54
+ * Dispose trunk only — consumer owns ctx and other resources
55
+ */
56
+ dispose(): Promise<void>;
57
+ /**
58
+ * Prefill a user turn into trunk
59
+ *
60
+ * @param content - User message content
61
+ * @param opts - Optional tools JSON string
62
+ */
63
+ prefillUser(content: string, opts?: {
64
+ tools?: string;
65
+ }): Promise<void>;
66
+ /**
67
+ * Prefill a tool result turn into trunk
68
+ *
69
+ * @param resultStr - JSON-stringified tool result
70
+ * @param callId - Tool call ID
71
+ */
72
+ prefillToolResult(resultStr: string, callId: string): Promise<void>;
73
+ }
74
+ //# sourceMappingURL=Session.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"Session.d.ts","sourceRoot":"","sources":["../src/Session.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,UAAU,CAAC;AACvC,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AACjD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AAG9C;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AACH,qBAAa,OAAO;IAClB,OAAO,CAAC,IAAI,CAAiB;IAC7B,OAAO,CAAC,MAAM,CAAc;IAC5B,OAAO,CAAC,MAAM,CAAgB;gBAElB,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE;QAAE,GAAG,EAAE,cAAc,CAAC;QAAC,KAAK,EAAE,WAAW,CAAA;KAAE;IAMvE,2BAA2B;IAC3B,IAAI,KAAK,IAAI,MAAM,GAAG,IAAI,CAEzB;IAED,wCAAwC;IACxC,IAAI,KAAK,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,EAE9B;IAED;;;;OAIG;IACG,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAK5C;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAO9B;;;;;OAKG;IACG,WAAW,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,GAAE;QAAE,KAAK,CAAC,EAAE,MAAM,CAAA;KAAO,GAAG,OAAO,CAAC,IAAI,CAAC;IAKhF;;;;;OAKG;IACG,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;CAI1E"}