@lloyal-labs/sdk 2.1.0 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +107 -0
- package/LICENSE-FAQ.md +256 -0
- package/README.md +3 -3
- package/dist/Branch.d.ts +36 -2
- package/dist/Branch.d.ts.map +1 -1
- package/dist/Branch.js +35 -6
- package/dist/Branch.js.map +1 -1
- package/dist/Rerank.d.ts +160 -26
- package/dist/Rerank.d.ts.map +1 -1
- package/dist/Rerank.js +460 -128
- package/dist/Rerank.js.map +1 -1
- package/dist/Session.d.ts +18 -0
- package/dist/Session.d.ts.map +1 -1
- package/dist/Session.js +19 -0
- package/dist/Session.js.map +1 -1
- package/dist/deltas.d.ts +21 -0
- package/dist/deltas.d.ts.map +1 -1
- package/dist/deltas.js +30 -0
- package/dist/deltas.js.map +1 -1
- package/dist/index.d.ts +5 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -1
- package/dist/index.js.map +1 -1
- package/dist/types.d.ts +3 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +5 -3
package/dist/Rerank.d.ts
CHANGED
|
@@ -1,50 +1,184 @@
|
|
|
1
1
|
import type { SessionContext, RerankProgress } from './types';
|
|
2
|
+
/**
|
|
3
|
+
* Thrown by {@link Rerank.create} when calibration gates fail (single-token
|
|
4
|
+
* yes/no, BPE-boundary invariance, boot canary signs). Each instance includes
|
|
5
|
+
* the specific gate and the empirical evidence so callers can fix at the
|
|
6
|
+
* right layer (model swap, sentinel change, template drift).
|
|
7
|
+
*/
|
|
8
|
+
export declare class RerankCalibrationError extends Error {
|
|
9
|
+
readonly name = "RerankCalibrationError";
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Thrown when Rerank's internal invariants are violated (lease exhaustion
|
|
13
|
+
* mid-query, fork returning a disposed handle, etc.). These represent state
|
|
14
|
+
* the consumer cannot fix; the diagnostic exists for framework-side triage.
|
|
15
|
+
*/
|
|
16
|
+
export declare class RerankInternalError extends Error {
|
|
17
|
+
readonly name = "RerankInternalError";
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Truncation event emitted via {@link RerankOpts.onTruncate} when a document's
|
|
21
|
+
* token count exceeds the per-leaf budget and gets sliced from the head.
|
|
22
|
+
*/
|
|
23
|
+
export interface RerankTruncation {
|
|
24
|
+
/** Position of the truncated document in the score() / scoreBatch() input array. */
|
|
25
|
+
docIndex: number;
|
|
26
|
+
/** Original token count of the document. */
|
|
27
|
+
origLen: number;
|
|
28
|
+
/** Tokens kept (slice from start to maxLen). */
|
|
29
|
+
maxLen: number;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Construction options for {@link Rerank.create}.
|
|
33
|
+
*/
|
|
34
|
+
export interface RerankOpts {
|
|
35
|
+
/**
|
|
36
|
+
* Maximum parallel scoring sequences in the underlying BranchStore.
|
|
37
|
+
* Effective per-group leaf budget is `nSeqMax - 2` because the warm trunk
|
|
38
|
+
* and per-query branch each hold one lease. Default 10 (= 8 leaves), which
|
|
39
|
+
* preserves the prior default leaf width while introducing the warm-trunk
|
|
40
|
+
* lease.
|
|
41
|
+
*/
|
|
42
|
+
nSeqMax?: number;
|
|
43
|
+
/**
|
|
44
|
+
* Per-context KV budget. Defaults to the value reported by the underlying
|
|
45
|
+
* SessionContext. Per-sequence token budget is `floor(nCtx / nSeqMax)`.
|
|
46
|
+
*/
|
|
47
|
+
nCtx?: number;
|
|
48
|
+
/**
|
|
49
|
+
* Optional callback invoked once per document whose tokens exceeded the
|
|
50
|
+
* per-leaf budget. Consumers can forward to a trace surface
|
|
51
|
+
* (`rerank:truncate`) or to a metric. Silent in the SDK by default.
|
|
52
|
+
*/
|
|
53
|
+
onTruncate?: (event: RerankTruncation) => void;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Cross-encoder reranker composed over the SDK's Branch / BranchStore primitives.
|
|
57
|
+
*
|
|
58
|
+
* # Lifetime + concurrency contract
|
|
59
|
+
*
|
|
60
|
+
* Rerank takes **exclusive ownership** of its SessionContext. Routing any other
|
|
61
|
+
* decode through the same context concurrently is undefined behavior — the
|
|
62
|
+
* kernel's `llama_context::decode` carries no internal mutex (verified at
|
|
63
|
+
* llama.cpp b9581). Enforcement is at construction: `Rerank.create()` marks
|
|
64
|
+
* the context with a `__decodeOwner` flag and refuses a second instance.
|
|
65
|
+
* The flag is cleared by `dispose()`, so test/REPL re-creation works.
|
|
66
|
+
*
|
|
67
|
+
* Concurrent `score()` / `scoreBatch()` calls **on the same Rerank instance**
|
|
68
|
+
* are serialized by a per-instance Promise chain (~10 LOC). The kernel sees
|
|
69
|
+
* them in arrival order; consumers still get a concurrent-looking API.
|
|
70
|
+
*
|
|
71
|
+
* # Architecture
|
|
72
|
+
*
|
|
73
|
+
* [SYSTEM][USER_PREFIX][QUERY][MID][DOC_i][SUFFIX][GEN_PROMPT]
|
|
74
|
+
* └── permanent trunk ─┘ └── per-query branch ──┘ └─── per-chunk leaves ─┘
|
|
75
|
+
*
|
|
76
|
+
* - **trunk**: prefilled with the static [SYSTEM][USER_PREFIX] segment ONCE
|
|
77
|
+
* at `Rerank.create()`; lives for the instance lifetime. Warm KV is
|
|
78
|
+
* amortized across every score() via multi-tag KV survival.
|
|
79
|
+
* - **queryBranch**: forked from trunk per score() call, prefilled with
|
|
80
|
+
* `[query, ...midTokens]`. Forked with `cloneLogits: false` because we
|
|
81
|
+
* immediately overwrite the logits with the prefill.
|
|
82
|
+
* - **leaves**: forked from queryBranch in groups of `BranchStore.available`,
|
|
83
|
+
* scatter-prefilled with `[doc_i, ...suffixTokens]` via `BranchStore.prefill`
|
|
84
|
+
* (one `llama_decode` per group), scored via `_branchLogitsAt` reading
|
|
85
|
+
* exactly two floats per leaf, pruned.
|
|
86
|
+
*
|
|
87
|
+
* # Calibration gates (fail-loud at create() time)
|
|
88
|
+
*
|
|
89
|
+
* 1. `yes` and `no` must tokenize as single tokens (the score formula
|
|
90
|
+
* `logit("yes") − logit("no")` assumes this; broader support requires
|
|
91
|
+
* log-sum-exp over label sequences, a 3.x ticket).
|
|
92
|
+
* 2. BPE-boundary invariance — tokenizing a canary full prompt must equal
|
|
93
|
+
* the concat of (prefix, query, mid, doc, suffix) tokenized separately,
|
|
94
|
+
* so segment seams don't silently shift the leaf prompts.
|
|
95
|
+
* 3. Boot canary — score a known relevant + irrelevant pair; relevant
|
|
96
|
+
* must outscore irrelevant by > 1.0 logit unit. Asserts the *gap*,
|
|
97
|
+
* NOT absolute signs — quantization shifts calibration enough that
|
|
98
|
+
* sign assertions are brittle, while the ordering gap still catches
|
|
99
|
+
* yes/no token swap, model swap, and template drift.
|
|
100
|
+
*
|
|
101
|
+
* # Score formula
|
|
102
|
+
*
|
|
103
|
+
* score = `logit("yes") − logit("no")` (unbounded).
|
|
104
|
+
*
|
|
105
|
+
* **This is the log-odds of an absolute yes/no relevance judgment.** The
|
|
106
|
+
* model is a pointwise binary cross-encoder; the official Qwen3-Reranker
|
|
107
|
+
* score is the two-token softmax over {yes,no} — i.e. `sigmoid(score)` =
|
|
108
|
+
* P(yes) ∈ [0,1] — and our log-odds is its monotone equivalent (identical
|
|
109
|
+
* rankings, full dynamic range). Scores ARE thresholdable (0 ≡ P 0.5) and
|
|
110
|
+
* comparable across queries to the extent of the model's calibration;
|
|
111
|
+
* quantization adds noise at the extremes. Top-1 routinely goes negative
|
|
112
|
+
* on real corpora when no document is strongly relevant — an honest
|
|
113
|
+
* "probably not", with the ranking still useful. Production traces show
|
|
114
|
+
* top-1 ranging from +10 (P≈.9999) to -3 (P≈.05, weak best match).
|
|
115
|
+
*
|
|
116
|
+
* The previous softmax form compressed small logit gaps into extreme
|
|
117
|
+
* probabilities (gap of 5 → 0.993; gap of 10 → 0.99995), saturating top-K
|
|
118
|
+
* ordering. Logit-diff preserves the full dynamic range. See
|
|
119
|
+
* `reasoning.run/scripts/inspect-rerank.mjs` for empirical evidence.
|
|
120
|
+
*
|
|
121
|
+
* Consumers that want a confidence threshold should calibrate against their
|
|
122
|
+
* own corpus rather than assuming `> 0` means "relevant" — see SearchTool's
|
|
123
|
+
* threshold envelope.
|
|
124
|
+
*/
|
|
2
125
|
export declare class Rerank {
|
|
3
126
|
private _ctx;
|
|
127
|
+
private _store;
|
|
128
|
+
private _trunk;
|
|
4
129
|
private _nSeqMax;
|
|
5
130
|
private _nCtx;
|
|
6
131
|
private _yesId;
|
|
7
132
|
private _noId;
|
|
8
|
-
private _prefixTokens;
|
|
9
133
|
private _midTokens;
|
|
10
134
|
private _suffixTokens;
|
|
11
|
-
private
|
|
12
|
-
private
|
|
135
|
+
private _staticPrefix;
|
|
136
|
+
private _onTruncate?;
|
|
137
|
+
private _inflight;
|
|
13
138
|
private _disposed;
|
|
14
139
|
private constructor();
|
|
15
140
|
/**
|
|
16
|
-
* Create a Rerank instance
|
|
141
|
+
* Create a Rerank instance bound to a pre-created SessionContext.
|
|
17
142
|
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
20
|
-
*
|
|
143
|
+
* Rerank takes exclusive ownership of `ctx` (see class docstring). The
|
|
144
|
+
* caller must construct `ctx` with `nSeqMax` ≥ 3 (one slot each for trunk
|
|
145
|
+
* + queryBranch + at least one leaf).
|
|
21
146
|
*
|
|
22
|
-
*
|
|
23
|
-
* @
|
|
147
|
+
* Fires three calibration gates at boot. If any gate fails, throws
|
|
148
|
+
* {@link RerankCalibrationError} with a diagnostic naming the failure and
|
|
149
|
+
* cleans up partial state (no ctx leak).
|
|
24
150
|
*/
|
|
25
|
-
static create(ctx: SessionContext, opts?:
|
|
26
|
-
nSeqMax?: number;
|
|
27
|
-
nCtx?: number;
|
|
28
|
-
}): Promise<Rerank>;
|
|
29
|
-
score(query: string, documents: number[][], topK?: number): AsyncIterable<RerankProgress>;
|
|
151
|
+
static create(ctx: SessionContext, opts?: RerankOpts): Promise<Rerank>;
|
|
30
152
|
/**
|
|
31
|
-
*
|
|
153
|
+
* Stream progressive ranking results for `documents` against `query`.
|
|
32
154
|
*
|
|
33
|
-
*
|
|
34
|
-
*
|
|
35
|
-
* Up to `nSeqMax` texts are scored per batch call.
|
|
155
|
+
* Pre-tokenized documents must come from {@link tokenize} or a reranker-
|
|
156
|
+
* compatible tokenizer; mismatched tokenizers silently produce wrong scores.
|
|
36
157
|
*
|
|
37
|
-
*
|
|
38
|
-
*
|
|
39
|
-
*
|
|
158
|
+
* Consumers may cancel by calling `iterator.return()` directly or by
|
|
159
|
+
* `for-await break`. Cancellation bounds the post-cancel cost at the one
|
|
160
|
+
* leaf group already in flight; subsequent groups are skipped.
|
|
161
|
+
*/
|
|
162
|
+
score(query: string, documents: number[][], topK?: number): AsyncIterable<RerankProgress>;
|
|
163
|
+
/**
|
|
164
|
+
* Batch-score raw text strings against a query. Returns logit-diff scores
|
|
165
|
+
* (unbounded; positive = "yes", negative = "no", magnitude = confidence) in
|
|
166
|
+
* input order.
|
|
40
167
|
*/
|
|
41
168
|
scoreBatch(query: string, texts: string[]): Promise<number[]>;
|
|
169
|
+
/** Tokenize text using the reranker's underlying tokenizer. */
|
|
42
170
|
tokenize(text: string): Promise<number[]>;
|
|
171
|
+
/** Release Rerank state, clear ctx ownership, dispose ctx. Idempotent. */
|
|
43
172
|
dispose(): void;
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
private
|
|
173
|
+
/**
|
|
174
|
+
* The shared scoring driver. Both `score()` (async-iterable) and
|
|
175
|
+
* `scoreBatch()` (Promise) call into this once the serializer is held.
|
|
176
|
+
*/
|
|
177
|
+
private _scoreInternal;
|
|
178
|
+
/**
|
|
179
|
+
* Sort scores descending, raw (unrounded). Consumers that want display
|
|
180
|
+
* rounding apply `Math.round(score * 1000) / 1000` themselves.
|
|
181
|
+
*/
|
|
182
|
+
private _sortRaw;
|
|
49
183
|
}
|
|
50
184
|
//# sourceMappingURL=Rerank.d.ts.map
|
package/dist/Rerank.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"Rerank.d.ts","sourceRoot":"","sources":["../src/Rerank.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAgB,cAAc,EAAE,MAAM,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"Rerank.d.ts","sourceRoot":"","sources":["../src/Rerank.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAgB,cAAc,EAAE,MAAM,SAAS,CAAC;AA2B5E;;;;;GAKG;AACH,qBAAa,sBAAuB,SAAQ,KAAK;IAC/C,QAAQ,CAAC,IAAI,4BAA4B;CAC1C;AAED;;;;GAIG;AACH,qBAAa,mBAAoB,SAAQ,KAAK;IAC5C,QAAQ,CAAC,IAAI,yBAAyB;CACvC;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAC/B,oFAAoF;IACpF,QAAQ,EAAE,MAAM,CAAC;IACjB,4CAA4C;IAC5C,OAAO,EAAE,MAAM,CAAC;IAChB,gDAAgD;IAChD,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB;;;;;;OAMG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;;;OAIG;IACH,UAAU,CAAC,EAAE,CAAC,KAAK,EAAE,gBAAgB,KAAK,IAAI,CAAC;CAChD;AAyED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqEG;AACH,qBAAa,MAAM;IACjB,OAAO,CAAC,IAAI,CAAiB;IAC7B,OAAO,CAAC,MAAM,CAAc;IAC5B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,UAAU,CAAW;IAC7B,OAAO,CAAC,aAAa,CAAW;IAChC,OAAO,CAAC,aAAa,CAAW;IAChC,OAAO,CAAC,WAAW,CAAC,CAAoC;IACxD,OAAO,CAAC,SAAS,CAAoC;IACrD,OAAO,CAAC,SAAS,CAAS;IAE1B,OAAO;IA0BP;;;;;;;;;;OAUG;WACU,MAAM,CAAC,GAAG,EAAE,cAAc,EAAE,IAAI,CAAC,EAAE,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC;IA2K5E;;;;;;;;;OASG;IACH,KAAK,CACH,KAAK,EAAE,MAAM,EACb,SAAS,EAAE,MAAM,EAAE,EAAE,EACrB,IAAI,CAAC,EAAE,MAAM,GACZ,aAAa,CAAC,cAAc,CAAC;IAgChC;;;;OAIG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IA0CnE,+DAA+D;IACzD,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAI/C,0EAA0E;IAC1E,OAAO,IAAI,IAAI;IAcf;;;OAGG;YACW,cAAc;IAoJ5B;;;OAGG;IACH,OAAO,CAAC,QAAQ;CAMjB"}
|