@knolo/core 3.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/query.js ADDED
@@ -0,0 +1,463 @@
1
+ /*
2
+ * query.ts
3
+ *
4
+ * Deterministic, embedding-free retrieval with:
5
+ * - REQUIRED phrase enforcement (quoted and requirePhrases)
6
+ * - Proximity bonus based on min cover span
7
+ * - Optional heading overlap boost
8
+ * - KNS numeric-signature tie-breaker (tiny)
9
+ * - Near-duplicate suppression + MMR diversity
10
+ */
11
+ import { tokenize, parsePhrases, normalize } from "./tokenize.js";
12
+ import { rankBM25L } from "./rank.js";
13
+ import { minCoverSpan, proximityMultiplier } from "./quality/proximity.js";
14
+ import { diversifyAndDedupe } from "./quality/diversify.js";
15
+ import { knsSignature, knsDistance } from "./quality/signature.js";
16
+ import { decodeScaleF16, quantizeEmbeddingInt8L2Norm } from "./semantic.js";
17
+ export function validateQueryOptions(opts) {
18
+ if (!opts)
19
+ return;
20
+ if (opts.topK !== undefined && (!Number.isInteger(opts.topK) || opts.topK < 1)) {
21
+ throw new Error("query(...): topK must be a positive integer.");
22
+ }
23
+ if (opts.minScore !== undefined && (!Number.isFinite(opts.minScore) || opts.minScore < 0)) {
24
+ throw new Error("query(...): minScore must be a finite number >= 0.");
25
+ }
26
+ if (opts.requirePhrases !== undefined && (!Array.isArray(opts.requirePhrases) || opts.requirePhrases.some((p) => typeof p !== "string"))) {
27
+ throw new Error("query(...): requirePhrases must be an array of strings when provided.");
28
+ }
29
+ validateStringOrStringArrayOption("namespace", opts.namespace);
30
+ validateStringOrStringArrayOption("source", opts.source);
31
+ if (opts.queryExpansion) {
32
+ const qe = opts.queryExpansion;
33
+ if (qe.enabled !== undefined && typeof qe.enabled !== "boolean") {
34
+ throw new Error("query(...): queryExpansion.enabled must be a boolean when provided.");
35
+ }
36
+ if (qe.docs !== undefined && (!Number.isInteger(qe.docs) || qe.docs < 1)) {
37
+ throw new Error("query(...): queryExpansion.docs must be a positive integer.");
38
+ }
39
+ if (qe.terms !== undefined && (!Number.isInteger(qe.terms) || qe.terms < 1)) {
40
+ throw new Error("query(...): queryExpansion.terms must be a positive integer.");
41
+ }
42
+ if (qe.weight !== undefined && (!Number.isFinite(qe.weight) || qe.weight < 0)) {
43
+ throw new Error("query(...): queryExpansion.weight must be a finite number >= 0.");
44
+ }
45
+ if (qe.minTermLength !== undefined && (!Number.isInteger(qe.minTermLength) || qe.minTermLength < 1)) {
46
+ throw new Error("query(...): queryExpansion.minTermLength must be a positive integer.");
47
+ }
48
+ }
49
+ validateSemanticQueryOptions(opts.semantic);
50
+ }
51
+ export function validateSemanticQueryOptions(options) {
52
+ if (!options)
53
+ return;
54
+ if (options.enabled !== undefined && typeof options.enabled !== "boolean") {
55
+ throw new Error("query(...): semantic.enabled must be a boolean when provided.");
56
+ }
57
+ if (options.mode !== undefined && options.mode !== "rerank") {
58
+ throw new Error('query(...): semantic.mode currently only supports "rerank".');
59
+ }
60
+ if (options.topN !== undefined && (!Number.isInteger(options.topN) || options.topN < 1)) {
61
+ throw new Error("query(...): semantic.topN must be a positive integer.");
62
+ }
63
+ if (options.minLexConfidence !== undefined && (!Number.isFinite(options.minLexConfidence) || options.minLexConfidence < 0 || options.minLexConfidence > 1)) {
64
+ throw new Error("query(...): semantic.minLexConfidence must be a finite number between 0 and 1.");
65
+ }
66
+ if (options.queryEmbedding !== undefined && !(options.queryEmbedding instanceof Float32Array)) {
67
+ throw new Error("query(...): semantic.queryEmbedding must be a Float32Array.");
68
+ }
69
+ if (options.blend) {
70
+ if (options.blend.enabled !== undefined && typeof options.blend.enabled !== "boolean") {
71
+ throw new Error("query(...): semantic.blend.enabled must be a boolean when provided.");
72
+ }
73
+ if (options.blend.wLex !== undefined && (!Number.isFinite(options.blend.wLex) || options.blend.wLex < 0)) {
74
+ throw new Error("query(...): semantic.blend.wLex must be a finite number >= 0.");
75
+ }
76
+ if (options.blend.wSem !== undefined && (!Number.isFinite(options.blend.wSem) || options.blend.wSem < 0)) {
77
+ throw new Error("query(...): semantic.blend.wSem must be a finite number >= 0.");
78
+ }
79
+ }
80
+ if (options.force !== undefined && typeof options.force !== "boolean") {
81
+ throw new Error("query(...): semantic.force must be a boolean when provided.");
82
+ }
83
+ }
84
+ export function query(pack, q, opts = {}) {
85
+ validateQueryOptions(opts);
86
+ const topK = opts.topK ?? 10;
87
+ const minScore = Number.isFinite(opts.minScore) ? Math.max(0, opts.minScore) : 0;
88
+ const expansionOpts = {
89
+ enabled: opts.queryExpansion?.enabled ?? true,
90
+ docs: Math.max(1, opts.queryExpansion?.docs ?? 3),
91
+ terms: Math.max(1, opts.queryExpansion?.terms ?? 4),
92
+ weight: Math.max(0, opts.queryExpansion?.weight ?? 0.35),
93
+ minTermLength: Math.max(2, opts.queryExpansion?.minTermLength ?? 3),
94
+ };
95
+ const semanticOpts = {
96
+ enabled: opts.semantic?.enabled ?? false,
97
+ mode: opts.semantic?.mode ?? "rerank",
98
+ topN: Math.max(1, opts.semantic?.topN ?? 50),
99
+ minLexConfidence: clamp01(opts.semantic?.minLexConfidence ?? 0.35),
100
+ blend: {
101
+ enabled: opts.semantic?.blend?.enabled ?? true,
102
+ wLex: Math.max(0, opts.semantic?.blend?.wLex ?? 0.75),
103
+ wSem: Math.max(0, opts.semantic?.blend?.wSem ?? 0.25),
104
+ },
105
+ queryEmbedding: opts.semantic?.queryEmbedding,
106
+ force: opts.semantic?.force ?? false,
107
+ };
108
+ // --- Query parsing
109
+ const normTokens = tokenize(q).map((t) => t.term);
110
+ // Normalize quoted phrases from q
111
+ const quotedRaw = parsePhrases(q);
112
+ const quoted = quotedRaw.map((seq) => seq.map((t) => normalize(t)).flatMap((s) => s.split(/\s+/)).filter(Boolean));
113
+ // Normalize requirePhrases the same way
114
+ const extraReq = (opts.requirePhrases ?? [])
115
+ .map((s) => tokenize(s).map((t) => t.term))
116
+ .filter((arr) => arr.length > 0);
117
+ const requiredPhrases = [...quoted, ...extraReq];
118
+ const namespaceFilter = normalizeNamespaceFilter(opts.namespace);
119
+ const sourceFilter = normalizeSourceFilter(opts.source);
120
+ // --- Term ids for the free (unquoted) tokens in q
121
+ const termIds = normTokens
122
+ .map((t) => pack.lexicon.get(t))
123
+ .filter((id) => id !== undefined);
124
+ // If there are no free tokens but there ARE required phrases, we'll fill candidates from phrases later.
125
+ const termSet = new Set(termIds);
126
+ // --- Candidate map
127
+ const candidates = new Map();
128
+ // Query-time document frequency collection for BM25 IDF.
129
+ const dfs = new Map();
130
+ const usesOffsetBlockIds = (pack.meta?.version ?? 1) >= 3;
131
+ // Helper to harvest postings for a given set of termIds into candidates
132
+ function scanForTermIds(idWeights, cfg = { collectPositions: true, createCandidates: true }) {
133
+ const p = pack.postings;
134
+ let i = 0;
135
+ while (i < p.length) {
136
+ const tid = p[i++];
137
+ if (tid === 0)
138
+ continue;
139
+ const weight = idWeights.get(tid) ?? 0;
140
+ const relevant = weight > 0;
141
+ let termDf = 0;
142
+ let encodedBid = p[i++];
143
+ while (encodedBid !== 0) {
144
+ const bid = usesOffsetBlockIds ? encodedBid - 1 : encodedBid;
145
+ let pos = p[i++];
146
+ const positions = [];
147
+ while (pos !== 0) {
148
+ positions.push(pos);
149
+ pos = p[i++];
150
+ }
151
+ termDf++;
152
+ if (relevant && bid >= 0) {
153
+ let entry = candidates.get(bid);
154
+ if (!entry && cfg.createCandidates !== false) {
155
+ entry = { tf: new Map(), pos: new Map() };
156
+ candidates.set(bid, entry);
157
+ }
158
+ if (entry) {
159
+ const prevTf = entry.tf.get(tid) ?? 0;
160
+ entry.tf.set(tid, prevTf + positions.length * weight);
161
+ if (cfg.collectPositions !== false) {
162
+ entry.pos.set(tid, positions);
163
+ }
164
+ }
165
+ }
166
+ encodedBid = p[i++];
167
+ }
168
+ if (relevant)
169
+ dfs.set(tid, termDf);
170
+ }
171
+ }
172
+ // 1) Scan using tokens from q (if any)
173
+ if (termSet.size > 0) {
174
+ scanForTermIds(new Map(Array.from(termSet.values(), (tid) => [tid, 1])));
175
+ }
176
+ // 2) Phrase-first rescue:
177
+ // If nothing matched the free tokens, but we do have required phrases,
178
+ // build a fallback term set from ALL tokens that appear in those phrases and scan again.
179
+ if (candidates.size === 0 && requiredPhrases.length > 0) {
180
+ const phraseTokenIds = new Set();
181
+ for (const seq of requiredPhrases) {
182
+ for (const t of seq) {
183
+ const id = pack.lexicon.get(t);
184
+ if (id !== undefined)
185
+ phraseTokenIds.add(id);
186
+ }
187
+ }
188
+ if (phraseTokenIds.size > 0) {
189
+ scanForTermIds(new Map(Array.from(phraseTokenIds.values(), (tid) => [tid, 1])));
190
+ }
191
+ }
192
+ // --- Namespace filtering
193
+ if (namespaceFilter.size > 0) {
194
+ for (const bid of [...candidates.keys()]) {
195
+ const ns = pack.namespaces?.[bid];
196
+ const normalizedNs = typeof ns === "string" ? normalize(ns) : "";
197
+ if (!normalizedNs || !namespaceFilter.has(normalizedNs)) {
198
+ candidates.delete(bid);
199
+ }
200
+ }
201
+ }
202
+ // --- Source/docId filtering
203
+ if (sourceFilter.size > 0) {
204
+ for (const bid of [...candidates.keys()]) {
205
+ const source = pack.docIds?.[bid];
206
+ const normalizedSource = typeof source === "string" ? normalize(source) : "";
207
+ if (!normalizedSource || !sourceFilter.has(normalizedSource)) {
208
+ candidates.delete(bid);
209
+ }
210
+ }
211
+ }
212
+ // --- Phrase enforcement (now that we have some candidates)
213
+ if (requiredPhrases.length > 0) {
214
+ for (const [bid, data] of [...candidates]) {
215
+ const text = pack.blocks[bid] || "";
216
+ const ok = requiredPhrases.every((seq) => containsPhrase(text, seq));
217
+ if (!ok)
218
+ candidates.delete(bid);
219
+ else
220
+ data.hasPhrase = true;
221
+ }
222
+ }
223
+ else if (quoted.length > 0) {
224
+ for (const [bid, data] of candidates) {
225
+ const text = pack.blocks[bid] || "";
226
+ data.hasPhrase = quoted.some((seq) => containsPhrase(text, seq));
227
+ }
228
+ }
229
+ // If still nothing, bail early
230
+ if (candidates.size === 0)
231
+ return [];
232
+ // --- Heading overlap
233
+ if (pack.headings?.length) {
234
+ const qset = new Set(normTokens);
235
+ const qUniqueCount = new Set(normTokens).size || 1;
236
+ for (const [bid, data] of candidates) {
237
+ const h = pack.headings[bid] ?? "";
238
+ const hTerms = tokenize(h || "").map((t) => t.term);
239
+ const overlap = new Set(hTerms.filter((t) => qset.has(t))).size;
240
+ data.headingScore = overlap / qUniqueCount;
241
+ }
242
+ }
243
+ // --- Rank with proximity bonus
244
+ const avgLen = pack.meta?.stats?.avgBlockLen ??
245
+ (pack.blocks.length
246
+ ? pack.blocks.reduce((s, b) => s + tokenize(b).length, 0) / pack.blocks.length
247
+ : 1);
248
+ const docCount = pack.meta?.stats?.blocks ?? pack.blocks.length;
249
+ let prelim = rankBM25L(candidates, avgLen, docCount, dfs, pack.blockTokenLens, {
250
+ proximityBonus: (cand) => proximityMultiplier(minCoverSpan(cand.pos)),
251
+ });
252
+ if (expansionOpts.enabled && prelim.length > 0) {
253
+ const expansionWeights = deriveExpansionTerms(pack, prelim, termSet, requiredPhrases, expansionOpts);
254
+ if (expansionWeights.size > 0) {
255
+ scanForTermIds(expansionWeights, { collectPositions: false, createCandidates: true });
256
+ prelim = rankBM25L(candidates, avgLen, docCount, dfs, pack.blockTokenLens, {
257
+ proximityBonus: (cand) => proximityMultiplier(minCoverSpan(cand.pos)),
258
+ });
259
+ }
260
+ }
261
+ if (prelim.length === 0)
262
+ return [];
263
+ if (minScore > 0) {
264
+ prelim = prelim.filter((item) => item.score >= minScore);
265
+ if (prelim.length === 0)
266
+ return [];
267
+ }
268
+ const confidence = lexConfidence(prelim);
269
+ if (shouldRerankWithSemantic(pack, semanticOpts, confidence)) {
270
+ prelim = rerankLexicalHitsWithSemantic(pack, prelim, semanticOpts);
271
+ }
272
+ // --- KNS tie-breaker + de-dup/MMR
273
+ const qSig = knsSignature(normalize(q));
274
+ const pool = prelim.slice(0, topK * 5).map((r) => {
275
+ const text = pack.blocks[r.blockId] || "";
276
+ const boost = 1 + 0.02 * (1 - knsDistance(qSig, knsSignature(text)));
277
+ return {
278
+ blockId: r.blockId,
279
+ score: r.score * boost,
280
+ text,
281
+ source: pack.docIds?.[r.blockId] ?? undefined,
282
+ namespace: pack.namespaces?.[r.blockId] ?? undefined,
283
+ };
284
+ });
285
+ const finalHits = diversifyAndDedupe(pool, { k: topK });
286
+ return finalHits;
287
+ }
288
+ export function lexConfidence(hits) {
289
+ if (hits.length === 0)
290
+ return 0;
291
+ const top1 = Math.max(0, hits[0]?.score ?? 0);
292
+ const top2 = Math.max(0, hits[1]?.score ?? 0);
293
+ const gapRatio = top1 > 0 ? clamp01((top1 - top2) / top1) : 0;
294
+ const strength = top1 > 0 ? top1 / (top1 + 1) : 0;
295
+ return clamp01(0.65 * gapRatio + 0.35 * strength);
296
+ }
297
+ function shouldRerankWithSemantic(pack, opts, confidence) {
298
+ if (!opts.enabled || opts.mode !== "rerank")
299
+ return false;
300
+ if (!pack.semantic)
301
+ return false;
302
+ if (!opts.queryEmbedding) {
303
+ throw new Error("query(...): semantic.queryEmbedding (Float32Array) is required when semantic.enabled=true.");
304
+ }
305
+ return opts.force || confidence < opts.minLexConfidence;
306
+ }
307
+ function rerankLexicalHitsWithSemantic(pack, prelim, opts) {
308
+ const sem = pack.semantic;
309
+ if (!sem || !opts.queryEmbedding)
310
+ return prelim;
311
+ if (sem.dims <= 0 || sem.vecs.length === 0 || sem.dims !== opts.queryEmbedding.length)
312
+ return prelim;
313
+ const topN = Math.min(opts.topN, prelim.length);
314
+ const rerankSlice = prelim.slice(0, topN);
315
+ const tail = prelim.slice(topN);
316
+ const lexScores = new Float64Array(topN);
317
+ for (let i = 0; i < topN; i++)
318
+ lexScores[i] = rerankSlice[i].score;
319
+ const normLex = minMaxNormalizeTyped(lexScores);
320
+ const quantizedQuery = quantizeEmbeddingInt8L2Norm(opts.queryEmbedding);
321
+ const semScores = scoreSemanticInt8(quantizedQuery.q, quantizedQuery.scale, sem, rerankSlice);
322
+ const normSem = minMaxNormalizeTyped(semScores);
323
+ const denom = opts.blend.wLex + opts.blend.wSem;
324
+ const wLex = denom > 0 ? opts.blend.wLex / denom : 0.5;
325
+ const wSem = denom > 0 ? opts.blend.wSem / denom : 0.5;
326
+ const reranked = new Array(topN);
327
+ for (let i = 0; i < topN; i++) {
328
+ const hit = rerankSlice[i];
329
+ reranked[i] = {
330
+ blockId: hit.blockId,
331
+ score: opts.blend.enabled ? wLex * normLex[i] + wSem * normSem[i] : semScores[i],
332
+ };
333
+ }
334
+ reranked.sort((a, b) => b.score - a.score || a.blockId - b.blockId);
335
+ return [...reranked, ...tail];
336
+ }
337
+ function scoreSemanticInt8(queryQ, queryScale, semantic, hits) {
338
+ const scores = new Float64Array(hits.length);
339
+ const dims = semantic.dims;
340
+ if (queryQ.length !== dims || queryScale === 0)
341
+ return scores;
342
+ const vecs = semantic.vecs;
343
+ const scales = semantic.scales;
344
+ for (let h = 0; h < hits.length; h++) {
345
+ const blockId = hits[h].blockId;
346
+ if (blockId < 0)
347
+ continue;
348
+ const base = blockId * dims;
349
+ if (base + dims > vecs.length)
350
+ continue;
351
+ let dot = 0;
352
+ for (let i = 0; i < dims; i++) {
353
+ dot += queryQ[i] * vecs[base + i];
354
+ }
355
+ const blockScale = scales?.[blockId] !== undefined ? decodeScaleF16(scales[blockId]) : (1 / 127);
356
+ scores[h] = dot * queryScale * blockScale;
357
+ }
358
+ return scores;
359
+ }
360
+ function minMaxNormalizeTyped(values) {
361
+ if (values.length === 0)
362
+ return values;
363
+ let min = Infinity;
364
+ let max = -Infinity;
365
+ for (let i = 0; i < values.length; i++) {
366
+ const v = values[i];
367
+ if (v < min)
368
+ min = v;
369
+ if (v > max)
370
+ max = v;
371
+ }
372
+ if (!Number.isFinite(min) || !Number.isFinite(max) || max <= min) {
373
+ const out = new Float64Array(values.length);
374
+ out.fill(1);
375
+ return out;
376
+ }
377
+ const out = new Float64Array(values.length);
378
+ const span = max - min;
379
+ for (let i = 0; i < values.length; i++) {
380
+ out[i] = clamp01((values[i] - min) / span);
381
+ }
382
+ return out;
383
+ }
384
+ function clamp01(v) {
385
+ if (!Number.isFinite(v))
386
+ return 0;
387
+ if (v < 0)
388
+ return 0;
389
+ if (v > 1)
390
+ return 1;
391
+ return v;
392
+ }
393
+ function deriveExpansionTerms(pack, prelim, baseTermSet, requiredPhrases, opts) {
394
+ if (prelim.length === 0 || opts.weight <= 0)
395
+ return new Map();
396
+ const forbidden = new Set(baseTermSet);
397
+ for (const seq of requiredPhrases) {
398
+ for (const term of seq) {
399
+ const tid = pack.lexicon.get(term);
400
+ if (tid !== undefined)
401
+ forbidden.add(tid);
402
+ }
403
+ }
404
+ const cap = Math.min(opts.docs, prelim.length);
405
+ const bestScore = Math.max(prelim[0]?.score ?? 0, 1e-6);
406
+ const termScores = new Map();
407
+ for (let i = 0; i < cap; i++) {
408
+ const item = prelim[i];
409
+ const text = pack.blocks[item.blockId] ?? "";
410
+ const docWeight = Math.max(item.score / bestScore, 0.2);
411
+ const localTfs = new Map();
412
+ for (const tok of tokenize(text)) {
413
+ if (tok.term.length < opts.minTermLength)
414
+ continue;
415
+ const tid = pack.lexicon.get(tok.term);
416
+ if (tid === undefined || forbidden.has(tid))
417
+ continue;
418
+ localTfs.set(tid, (localTfs.get(tid) ?? 0) + 1);
419
+ }
420
+ for (const [tid, tf] of localTfs) {
421
+ termScores.set(tid, (termScores.get(tid) ?? 0) + tf * docWeight);
422
+ }
423
+ }
424
+ const selected = [...termScores.entries()]
425
+ .sort((a, b) => b[1] - a[1])
426
+ .slice(0, opts.terms);
427
+ return new Map(selected.map(([tid, score]) => [tid, opts.weight * Math.max(0.5, Math.min(1.5, score))]));
428
+ }
429
+ /** Ordered phrase check using the SAME tokenizer/normalizer path as the index. */
430
+ function containsPhrase(text, seq) {
431
+ if (seq.length === 0)
432
+ return false;
433
+ const seqNorm = tokenize(seq.join(" ")).map((t) => t.term);
434
+ const toks = tokenize(text).map((t) => t.term);
435
+ outer: for (let i = 0; i <= toks.length - seqNorm.length; i++) {
436
+ for (let j = 0; j < seqNorm.length; j++) {
437
+ if (toks[i + j] !== seqNorm[j])
438
+ continue outer;
439
+ }
440
+ return true;
441
+ }
442
+ return false;
443
+ }
444
+ function normalizeNamespaceFilter(input) {
445
+ if (input === undefined)
446
+ return new Set();
447
+ const values = Array.isArray(input) ? input : [input];
448
+ return new Set(values.map((v) => normalize(v)).filter(Boolean));
449
+ }
450
+ function normalizeSourceFilter(input) {
451
+ if (input === undefined)
452
+ return new Set();
453
+ const values = Array.isArray(input) ? input : [input];
454
+ return new Set(values.map((v) => normalize(v)).filter(Boolean));
455
+ }
456
+ function validateStringOrStringArrayOption(name, value) {
457
+ if (value === undefined)
458
+ return;
459
+ const valid = typeof value === "string" || (Array.isArray(value) && value.every((entry) => typeof entry === "string"));
460
+ if (!valid) {
461
+ throw new Error(`query(...): ${name} must be a string or an array of strings when provided.`);
462
+ }
463
+ }
package/dist/rank.d.ts ADDED
@@ -0,0 +1,21 @@
1
+ export type RankOptions = {
2
+ k1?: number;
3
+ b?: number;
4
+ headingBoost?: number;
5
+ phraseBoost?: number;
6
+ proximityBonus?: (cand: {
7
+ tf: Map<number, number>;
8
+ pos?: Map<number, number[]>;
9
+ hasPhrase?: boolean;
10
+ headingScore?: number;
11
+ }) => number;
12
+ };
13
+ export declare function rankBM25L(candidates: Map<number, {
14
+ tf: Map<number, number>;
15
+ pos?: Map<number, number[]>;
16
+ hasPhrase?: boolean;
17
+ headingScore?: number;
18
+ }>, avgLen: number, docCount: number, dfs: Map<number, number>, blockTokenLens?: number[], opts?: RankOptions): Array<{
19
+ blockId: number;
20
+ score: number;
21
+ }>;
package/dist/rank.js ADDED
@@ -0,0 +1,31 @@
1
+ /*
2
+ * rank.ts
3
+ * BM25L ranker with optional heading/phrase boosts and a proximity bonus hook.
4
+ */
5
+ export function rankBM25L(candidates, avgLen, docCount, dfs, blockTokenLens, opts = {}) {
6
+ const k1 = opts.k1 ?? 1.5;
7
+ const b = opts.b ?? 0.75;
8
+ const headingBoost = opts.headingBoost ?? 0.3;
9
+ const phraseBoost = opts.phraseBoost ?? 0.6;
10
+ const results = [];
11
+ for (const [bid, data] of candidates) {
12
+ const len = blockTokenLens?.[bid] ?? (Array.from(data.tf.values()).reduce((sum, tf) => sum + tf, 0) || 1);
13
+ let score = 0;
14
+ for (const [tid, tf] of data.tf) {
15
+ const df = dfs.get(tid) ?? 0;
16
+ const idf = Math.log(1 + (docCount - df + 0.5) / (df + 0.5));
17
+ const numer = tf * (k1 + 1);
18
+ const denom = tf + k1 * (1 - b + b * (len / avgLen));
19
+ score += idf * (numer / denom);
20
+ }
21
+ if (opts.proximityBonus)
22
+ score *= opts.proximityBonus(data) ?? 1;
23
+ if (data.hasPhrase)
24
+ score *= 1 + phraseBoost;
25
+ if (data.headingScore)
26
+ score *= 1 + headingBoost * data.headingScore;
27
+ results.push({ blockId: bid, score });
28
+ }
29
+ results.sort((a, b2) => b2.score - a.score);
30
+ return results;
31
+ }
@@ -0,0 +1,28 @@
1
+ import type { AgentDefinitionV1 } from './agent.js';
2
+ export interface RouteCandidateV1 {
3
+ agentId: string;
4
+ score: number;
5
+ why?: string;
6
+ }
7
+ export interface RouteDecisionV1 {
8
+ type: 'route_decision';
9
+ intent?: string;
10
+ entities?: Record<string, unknown>;
11
+ candidates: RouteCandidateV1[];
12
+ selected: string;
13
+ needsTools?: string[];
14
+ risk?: 'low' | 'med' | 'high';
15
+ }
16
+ export declare function isRouteDecisionV1(x: unknown): x is RouteDecisionV1;
17
+ export declare function validateRouteDecisionV1(decision: RouteDecisionV1, agentRegistry: Record<string, AgentDefinitionV1>): {
18
+ ok: true;
19
+ } | {
20
+ ok: false;
21
+ error: string;
22
+ };
23
+ export declare function selectAgentIdFromRouteDecisionV1(decision: RouteDecisionV1, agentRegistry: Record<string, AgentDefinitionV1>, opts?: {
24
+ fallbackAgentId?: string;
25
+ }): {
26
+ agentId: string;
27
+ reason: 'selected' | 'top_candidate' | 'fallback';
28
+ };
package/dist/router.js ADDED
@@ -0,0 +1,74 @@
1
+ export function isRouteDecisionV1(x) {
2
+ if (!x || typeof x !== 'object')
3
+ return false;
4
+ const v = x;
5
+ if (v.type !== 'route_decision')
6
+ return false;
7
+ if (typeof v.selected !== 'string' || !v.selected.trim())
8
+ return false;
9
+ if (!Array.isArray(v.candidates) || v.candidates.length < 1)
10
+ return false;
11
+ if (v.needsTools !== undefined &&
12
+ (!Array.isArray(v.needsTools) ||
13
+ v.needsTools.some((toolId) => typeof toolId !== 'string'))) {
14
+ return false;
15
+ }
16
+ for (const candidate of v.candidates) {
17
+ if (!candidate || typeof candidate !== 'object')
18
+ return false;
19
+ const c = candidate;
20
+ if (typeof c.agentId !== 'string' || !c.agentId.trim())
21
+ return false;
22
+ if (typeof c.score !== 'number' || !Number.isFinite(c.score))
23
+ return false;
24
+ if (c.score < 0 || c.score > 1)
25
+ return false;
26
+ if (c.why !== undefined && typeof c.why !== 'string')
27
+ return false;
28
+ }
29
+ return true;
30
+ }
31
+ export function validateRouteDecisionV1(decision, agentRegistry) {
32
+ if (!agentRegistry[decision.selected]) {
33
+ return {
34
+ ok: false,
35
+ error: `selected agent is not registered: ${decision.selected}`,
36
+ };
37
+ }
38
+ const seen = new Set();
39
+ for (const candidate of decision.candidates) {
40
+ if (seen.has(candidate.agentId)) {
41
+ return {
42
+ ok: false,
43
+ error: `duplicate candidate agentId: ${candidate.agentId}`,
44
+ };
45
+ }
46
+ seen.add(candidate.agentId);
47
+ if (!agentRegistry[candidate.agentId]) {
48
+ return {
49
+ ok: false,
50
+ error: `candidate agent is not registered: ${candidate.agentId}`,
51
+ };
52
+ }
53
+ }
54
+ return { ok: true };
55
+ }
56
+ export function selectAgentIdFromRouteDecisionV1(decision, agentRegistry, opts = {}) {
57
+ if (agentRegistry[decision.selected]) {
58
+ return { agentId: decision.selected, reason: 'selected' };
59
+ }
60
+ const sortedCandidates = [...decision.candidates].sort((a, b) => b.score - a.score || a.agentId.localeCompare(b.agentId));
61
+ for (const candidate of sortedCandidates) {
62
+ if (agentRegistry[candidate.agentId]) {
63
+ return { agentId: candidate.agentId, reason: 'top_candidate' };
64
+ }
65
+ }
66
+ if (opts.fallbackAgentId && agentRegistry[opts.fallbackAgentId]) {
67
+ return { agentId: opts.fallbackAgentId, reason: 'fallback' };
68
+ }
69
+ const defaultAgentId = Object.keys(agentRegistry).sort()[0];
70
+ if (defaultAgentId) {
71
+ return { agentId: defaultAgentId, reason: 'fallback' };
72
+ }
73
+ return { agentId: '', reason: 'fallback' };
74
+ }
@@ -0,0 +1,19 @@
1
+ import type { AgentDefinitionV1 } from './agent.js';
2
+ import type { Pack } from './pack.js';
3
+ export interface AgentRoutingProfileV1 {
4
+ agentId: string;
5
+ namespace?: string;
6
+ heading?: string;
7
+ description?: string;
8
+ tags: string[];
9
+ examples: string[];
10
+ capabilities: string[];
11
+ toolPolicy?: unknown;
12
+ toolPolicySummary?: {
13
+ mode: 'allow_all' | 'deny_all' | 'mixed' | 'unknown';
14
+ allowed?: string[];
15
+ denied?: string[];
16
+ };
17
+ }
18
+ export declare function getAgentRoutingProfileV1(agent: AgentDefinitionV1): AgentRoutingProfileV1;
19
+ export declare function getPackRoutingProfilesV1(pack: Pack): AgentRoutingProfileV1[];