@lloyal-labs/sdk 2.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/Rerank.d.ts CHANGED
@@ -1,50 +1,184 @@
1
1
  import type { SessionContext, RerankProgress } from './types';
2
+ /**
3
+ * Thrown by {@link Rerank.create} when calibration gates fail (single-token
4
+ * yes/no, BPE-boundary invariance, boot canary signs). Each instance includes
5
+ * the specific gate and the empirical evidence so callers can fix at the
6
+ * right layer (model swap, sentinel change, template drift).
7
+ */
8
+ export declare class RerankCalibrationError extends Error {
9
+ readonly name = "RerankCalibrationError";
10
+ }
11
+ /**
12
+ * Thrown when Rerank's internal invariants are violated (lease exhaustion
13
+ * mid-query, fork returning a disposed handle, etc.). These represent state
14
+ * the consumer cannot fix; the diagnostic exists for framework-side triage.
15
+ */
16
+ export declare class RerankInternalError extends Error {
17
+ readonly name = "RerankInternalError";
18
+ }
19
+ /**
20
+ * Truncation event emitted via {@link RerankOpts.onTruncate} when a document's
21
+ * token count exceeds the per-leaf budget and gets sliced from the head.
22
+ */
23
+ export interface RerankTruncation {
24
+ /** Position of the truncated document in the score() / scoreBatch() input array. */
25
+ docIndex: number;
26
+ /** Original token count of the document. */
27
+ origLen: number;
28
+ /** Tokens kept (slice from start to maxLen). */
29
+ maxLen: number;
30
+ }
31
+ /**
32
+ * Construction options for {@link Rerank.create}.
33
+ */
34
+ export interface RerankOpts {
35
+ /**
36
+ * Maximum parallel scoring sequences in the underlying BranchStore.
37
+ * Effective per-group leaf budget is `nSeqMax - 2` because the warm trunk
38
+ * and per-query branch each hold one lease. Default 10 (= 8 leaves), which
39
+ * preserves the prior default leaf width while introducing the warm-trunk
40
+ * lease.
41
+ */
42
+ nSeqMax?: number;
43
+ /**
44
+ * Per-context KV budget. Defaults to the value reported by the underlying
45
+ * SessionContext. Per-sequence token budget is `floor(nCtx / nSeqMax)`.
46
+ */
47
+ nCtx?: number;
48
+ /**
49
+ * Optional callback invoked once per document whose tokens exceeded the
50
+ * per-leaf budget. Consumers can forward to a trace surface
51
+ * (`rerank:truncate`) or to a metric. Silent in the SDK by default.
52
+ */
53
+ onTruncate?: (event: RerankTruncation) => void;
54
+ }
55
+ /**
56
+ * Cross-encoder reranker composed over the SDK's Branch / BranchStore primitives.
57
+ *
58
+ * # Lifetime + concurrency contract
59
+ *
60
+ * Rerank takes **exclusive ownership** of its SessionContext. Routing any other
61
+ * decode through the same context concurrently is undefined behavior — the
62
+ * kernel's `llama_context::decode` carries no internal mutex (verified at
63
+ * llama.cpp b9581). Enforcement is at construction: `Rerank.create()` marks
64
+ * the context with a `__decodeOwner` flag and refuses a second instance.
65
+ * The flag is cleared by `dispose()`, so test/REPL re-creation works.
66
+ *
67
+ * Concurrent `score()` / `scoreBatch()` calls **on the same Rerank instance**
68
+ * are serialized by a per-instance Promise chain (~10 LOC). The kernel sees
69
+ * them in arrival order; consumers still get a concurrent-looking API.
70
+ *
71
+ * # Architecture
72
+ *
73
+ * [SYSTEM][USER_PREFIX][QUERY][MID][DOC_i][SUFFIX][GEN_PROMPT]
74
+ * └── permanent trunk ─┘ └── per-query branch ──┘ └─── per-chunk leaves ─┘
75
+ *
76
+ * - **trunk**: prefilled with the static [SYSTEM][USER_PREFIX] segment ONCE
77
+ * at `Rerank.create()`; lives for the instance lifetime. Warm KV is
78
+ * amortized across every score() via multi-tag KV survival.
79
+ * - **queryBranch**: forked from trunk per score() call, prefilled with
80
+ * `[query, ...midTokens]`. Forked with `cloneLogits: false` because we
81
+ * immediately overwrite the logits with the prefill.
82
+ * - **leaves**: forked from queryBranch in groups of `BranchStore.available`,
83
+ * scatter-prefilled with `[doc_i, ...suffixTokens]` via `BranchStore.prefill`
84
+ * (one `llama_decode` per group), scored via `_branchLogitsAt` reading
85
+ * exactly two floats per leaf, pruned.
86
+ *
87
+ * # Calibration gates (fail-loud at create() time)
88
+ *
89
+ * 1. `yes` and `no` must tokenize as single tokens (the score formula
90
+ * `logit("yes") − logit("no")` assumes this; broader support requires
91
+ * log-sum-exp over label sequences, a 3.x ticket).
92
+ * 2. BPE-boundary invariance — tokenizing a canary full prompt must equal
93
+ * the concat of (prefix, query, mid, doc, suffix) tokenized separately,
94
+ * so segment seams don't silently shift the leaf prompts.
95
+ * 3. Boot canary — score a known relevant + irrelevant pair; relevant
96
+ * must outscore irrelevant by > 1.0 logit unit. Asserts the *gap*,
97
+ * NOT absolute signs — quantization shifts calibration enough that
98
+ * sign assertions are brittle, while the ordering gap still catches
99
+ * yes/no token swap, model swap, and template drift.
100
+ *
101
+ * # Score formula
102
+ *
103
+ * score = `logit("yes") − logit("no")` (unbounded).
104
+ *
105
+ * **This is the log-odds of an absolute yes/no relevance judgment.** The
106
+ * model is a pointwise binary cross-encoder; the official Qwen3-Reranker
107
+ * score is the two-token softmax over {yes,no} — i.e. `sigmoid(score)` =
108
+ * P(yes) ∈ [0,1] — and our log-odds is its monotone equivalent (identical
109
+ * rankings, full dynamic range). Scores ARE thresholdable (0 ≡ P 0.5) and
110
+ * comparable across queries to the extent of the model's calibration;
111
+ * quantization adds noise at the extremes. Top-1 routinely goes negative
112
+ * on real corpora when no document is strongly relevant — an honest
113
+ * "probably not", with the ranking still useful. Production traces show
114
+ * top-1 ranging from +10 (P≈.9999) to -3 (P≈.05, weak best match).
115
+ *
116
+ * The previous softmax form compressed small logit gaps into extreme
117
+ * probabilities (gap of 5 → 0.993; gap of 10 → 0.99995), saturating top-K
118
+ * ordering. Logit-diff preserves the full dynamic range. See
119
+ * `reasoning.run/scripts/inspect-rerank.mjs` for empirical evidence.
120
+ *
121
+ * Consumers that want a confidence threshold should calibrate against their
122
+ * own corpus rather than assuming `> 0` means "relevant" — see SearchTool's
123
+ * threshold envelope.
124
+ */
2
125
  export declare class Rerank {
3
126
  private _ctx;
127
+ private _store;
128
+ private _trunk;
4
129
  private _nSeqMax;
5
130
  private _nCtx;
6
131
  private _yesId;
7
132
  private _noId;
8
- private _prefixTokens;
9
133
  private _midTokens;
10
134
  private _suffixTokens;
11
- private _pending;
12
- private _draining;
135
+ private _staticPrefix;
136
+ private _onTruncate?;
137
+ private _inflight;
13
138
  private _disposed;
14
139
  private constructor();
15
140
  /**
16
- * Create a Rerank instance from a pre-created SessionContext
141
+ * Create a Rerank instance bound to a pre-created SessionContext.
17
142
  *
18
- * The caller is responsible for creating the context with appropriate
19
- * settings (nSeqMax, nCtx, typeK, typeV). Rerank takes ownership of
20
- * the context and will dispose it on `dispose()`.
143
+ * Rerank takes exclusive ownership of `ctx` (see class docstring). The
144
+ * caller must construct `ctx` with `nSeqMax` 3 (one slot each for trunk
145
+ * + queryBranch + at least one leaf).
21
146
  *
22
- * @param ctx - SessionContext configured for reranking
23
- * @param opts - Capacity hints (nSeqMax, nCtx) must match context creation params
147
+ * Fires three calibration gates at boot. If any gate fails, throws
148
+ * {@link RerankCalibrationError} with a diagnostic naming the failure and
149
+ * cleans up partial state (no ctx leak).
24
150
  */
25
- static create(ctx: SessionContext, opts?: {
26
- nSeqMax?: number;
27
- nCtx?: number;
28
- }): Promise<Rerank>;
29
- score(query: string, documents: number[][], topK?: number): AsyncIterable<RerankProgress>;
151
+ static create(ctx: SessionContext, opts?: RerankOpts): Promise<Rerank>;
30
152
  /**
31
- * Score raw text strings against a query in one batch.
153
+ * Stream progressive ranking results for `documents` against `query`.
32
154
  *
33
- * Tokenizes texts synchronously, builds reranker prompt arrays, and
34
- * dispatches via `_scoreGroup` for parallel cross-encoder scoring.
35
- * Up to `nSeqMax` texts are scored per batch call.
155
+ * Pre-tokenized documents must come from {@link tokenize} or a reranker-
156
+ * compatible tokenizer; mismatched tokenizers silently produce wrong scores.
36
157
  *
37
- * @param query - Reference query to score against
38
- * @param texts - Raw text strings to score
39
- * @returns Scores (0–1) in input order
158
+ * Consumers may cancel by calling `iterator.return()` directly or by
159
+ * `for-await break`. Cancellation bounds the post-cancel cost at the one
160
+ * leaf group already in flight; subsequent groups are skipped.
161
+ */
162
+ score(query: string, documents: number[][], topK?: number): AsyncIterable<RerankProgress>;
163
+ /**
164
+ * Batch-score raw text strings against a query. Returns logit-diff scores
165
+ * (unbounded; positive = "yes", negative = "no", magnitude = confidence) in
166
+ * input order.
40
167
  */
41
168
  scoreBatch(query: string, texts: string[]): Promise<number[]>;
169
+ /** Tokenize text using the reranker's underlying tokenizer. */
42
170
  tokenize(text: string): Promise<number[]>;
171
+ /** Release Rerank state, clear ctx ownership, dispose ctx. Idempotent. */
43
172
  dispose(): void;
44
- private _sortResults;
45
- private _enqueue;
46
- private _fillGroup;
47
- private _drain;
48
- private _rerankScore;
173
+ /**
174
+ * The shared scoring driver. Both `score()` (async-iterable) and
175
+ * `scoreBatch()` (Promise) call into this once the serializer is held.
176
+ */
177
+ private _scoreInternal;
178
+ /**
179
+ * Sort scores descending, raw (unrounded). Consumers that want display
180
+ * rounding apply `Math.round(score * 1000) / 1000` themselves.
181
+ */
182
+ private _sortRaw;
49
183
  }
50
184
  //# sourceMappingURL=Rerank.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"Rerank.d.ts","sourceRoot":"","sources":["../src/Rerank.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAgB,cAAc,EAAE,MAAM,SAAS,CAAC;AAmE5E,qBAAa,MAAM;IACjB,OAAO,CAAC,IAAI,CAAiB;IAC7B,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,aAAa,CAAW;IAChC,OAAO,CAAC,UAAU,CAAW;IAC7B,OAAO,CAAC,aAAa,CAAW;IAChC,OAAO,CAAC,QAAQ,CAAwB;IACxC,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAS;IAE1B,OAAO;IAoBP;;;;;;;;;OASG;WACU,MAAM,CAAC,GAAG,EAAE,cAAc,EAAE,IAAI,CAAC,EAAE;QAAE,OAAO,CAAC,EAAE,MAAM,CAAC;QAAC,IAAI,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAwBrG,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,EAAE,EAAE,IAAI,CAAC,EAAE,MAAM,GAAG,aAAa,CAAC,cAAc,CAAC;IA0BzF;;;;;;;;;;OAUG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAqB7D,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAI/C,OAAO,IAAI,IAAI;IAUf,OAAO,CAAC,YAAY;IAOpB,OAAO,CAAC,QAAQ;IAkBhB,OAAO,CAAC,UAAU;YAiBJ,MAAM;IA+CpB,OAAO,CAAC,YAAY;CAMrB"}
1
+ {"version":3,"file":"Rerank.d.ts","sourceRoot":"","sources":["../src/Rerank.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAgB,cAAc,EAAE,MAAM,SAAS,CAAC;AA2B5E;;;;;GAKG;AACH,qBAAa,sBAAuB,SAAQ,KAAK;IAC/C,QAAQ,CAAC,IAAI,4BAA4B;CAC1C;AAED;;;;GAIG;AACH,qBAAa,mBAAoB,SAAQ,KAAK;IAC5C,QAAQ,CAAC,IAAI,yBAAyB;CACvC;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAC/B,oFAAoF;IACpF,QAAQ,EAAE,MAAM,CAAC;IACjB,4CAA4C;IAC5C,OAAO,EAAE,MAAM,CAAC;IAChB,gDAAgD;IAChD,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB;;;;;;OAMG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;;;OAIG;IACH,UAAU,CAAC,EAAE,CAAC,KAAK,EAAE,gBAAgB,KAAK,IAAI,CAAC;CAChD;AAyED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqEG;AACH,qBAAa,MAAM;IACjB,OAAO,CAAC,IAAI,CAAiB;IAC7B,OAAO,CAAC,MAAM,CAAc;IAC5B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,UAAU,CAAW;IAC7B,OAAO,CAAC,aAAa,CAAW;IAChC,OAAO,CAAC,aAAa,CAAW;IAChC,OAAO,CAAC,WAAW,CAAC,CAAoC;IACxD,OAAO,CAAC,SAAS,CAAoC;IACrD,OAAO,CAAC,SAAS,CAAS;IAE1B,OAAO;IA0BP;;;;;;;;;;OAUG;WACU,MAAM,CAAC,GAAG,EAAE,cAAc,EAAE,IAAI,CAAC,EAAE,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC;IA2K5E;;;;;;;;;OASG;IACH,KAAK,CACH,KAAK,EAAE,MAAM,EACb,SAAS,EAAE,MAAM,EAAE,EAAE,EACrB,IAAI,CAAC,EAAE,MAAM,GACZ,aAAa,CAAC,cAAc,CAAC;IAgChC;;;;OAIG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IA0CnE,+DAA+D;IACzD,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAI/C,0EAA0E;IAC1E,OAAO,IAAI,IAAI;IAcf;;;OAGG;YACW,cAAc;IAoJ5B;;;OAGG;IACH,OAAO,CAAC,QAAQ;CAMjB"}