@tekmidian/pai 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +16 -10
- package/README.md +46 -6
- package/dist/{auto-route-JjW3f7pV.mjs → auto-route-B5MSUJZK.mjs} +3 -3
- package/dist/{auto-route-JjW3f7pV.mjs.map → auto-route-B5MSUJZK.mjs.map} +1 -1
- package/dist/cli/index.mjs +34 -22
- package/dist/cli/index.mjs.map +1 -1
- package/dist/{config-DELNqq3Z.mjs → config-B4brrHHE.mjs} +1 -1
- package/dist/{config-DELNqq3Z.mjs.map → config-B4brrHHE.mjs.map} +1 -1
- package/dist/daemon/index.mjs +7 -7
- package/dist/daemon-mcp/index.mjs +11 -4
- package/dist/daemon-mcp/index.mjs.map +1 -1
- package/dist/{daemon-CeTX4NpF.mjs → daemon-s868Paua.mjs} +12 -12
- package/dist/{daemon-CeTX4NpF.mjs.map → daemon-s868Paua.mjs.map} +1 -1
- package/dist/{detect-D7gPV3fQ.mjs → detect-CdaA48EI.mjs} +1 -1
- package/dist/{detect-D7gPV3fQ.mjs.map → detect-CdaA48EI.mjs.map} +1 -1
- package/dist/{detector-cYYhK2Mi.mjs → detector-Bp-2SM3x.mjs} +2 -2
- package/dist/{detector-cYYhK2Mi.mjs.map → detector-Bp-2SM3x.mjs.map} +1 -1
- package/dist/{factory-DZLvRf4m.mjs → factory-CeXQzlwn.mjs} +3 -3
- package/dist/{factory-DZLvRf4m.mjs.map → factory-CeXQzlwn.mjs.map} +1 -1
- package/dist/index.d.mts +29 -1
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +4 -3
- package/dist/{indexer-backend-BHztlJJg.mjs → indexer-backend-DQO-FqAI.mjs} +1 -1
- package/dist/{indexer-backend-BHztlJJg.mjs.map → indexer-backend-DQO-FqAI.mjs.map} +1 -1
- package/dist/{ipc-client-CLt2fNlC.mjs → ipc-client-CgSpwHDC.mjs} +1 -1
- package/dist/{ipc-client-CLt2fNlC.mjs.map → ipc-client-CgSpwHDC.mjs.map} +1 -1
- package/dist/mcp/index.mjs +15 -5
- package/dist/mcp/index.mjs.map +1 -1
- package/dist/{postgres-CRBe30Ag.mjs → postgres-CIxeqf_n.mjs} +1 -1
- package/dist/{postgres-CRBe30Ag.mjs.map → postgres-CIxeqf_n.mjs.map} +1 -1
- package/dist/reranker-D7bRAHi6.mjs +71 -0
- package/dist/reranker-D7bRAHi6.mjs.map +1 -0
- package/dist/{schemas-BY3Pjvje.mjs → schemas-BFIgGntb.mjs} +1 -1
- package/dist/{schemas-BY3Pjvje.mjs.map → schemas-BFIgGntb.mjs.map} +1 -1
- package/dist/{search-GK0ibTJy.mjs → search-_oHfguA5.mjs} +47 -4
- package/dist/search-_oHfguA5.mjs.map +1 -0
- package/dist/{sqlite-RyR8Up1v.mjs → sqlite-CymLKiDE.mjs} +2 -2
- package/dist/{sqlite-RyR8Up1v.mjs.map → sqlite-CymLKiDE.mjs.map} +1 -1
- package/dist/{tools-CUg0Lyg-.mjs → tools-Dx7GjOHd.mjs} +23 -14
- package/dist/tools-Dx7GjOHd.mjs.map +1 -0
- package/dist/{vault-indexer-Bo2aPSzP.mjs → vault-indexer-DXWs9pDn.mjs} +1 -1
- package/dist/{vault-indexer-Bo2aPSzP.mjs.map → vault-indexer-DXWs9pDn.mjs.map} +1 -1
- package/dist/{zettelkasten-Co-w0XSZ.mjs → zettelkasten-e-a4rW_6.mjs} +2 -2
- package/dist/{zettelkasten-Co-w0XSZ.mjs.map → zettelkasten-e-a4rW_6.mjs.map} +1 -1
- package/package.json +1 -1
- package/dist/search-GK0ibTJy.mjs.map +0 -1
- package/dist/tools-CUg0Lyg-.mjs.map +0 -1
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import { t as __exportAll } from "./rolldown-runtime-95iHPtFO.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/memory/reranker.ts
|
|
4
|
+
var reranker_exports = /* @__PURE__ */ __exportAll({
|
|
5
|
+
configureRerankerModel: () => configureRerankerModel,
|
|
6
|
+
rerankResults: () => rerankResults
|
|
7
|
+
});
|
|
8
|
+
const DEFAULT_RERANKER_MODEL = "Xenova/ms-marco-MiniLM-L-6-v2";
|
|
9
|
+
let _tokenizer = null;
|
|
10
|
+
let _model = null;
|
|
11
|
+
let _currentModel = null;
|
|
12
|
+
let _loading = null;
|
|
13
|
+
/**
|
|
14
|
+
* Configure the reranker model.
|
|
15
|
+
* Must be called before the first rerank() call if you want a non-default model.
|
|
16
|
+
*/
|
|
17
|
+
function configureRerankerModel(model) {
|
|
18
|
+
const resolved = model?.trim() || DEFAULT_RERANKER_MODEL;
|
|
19
|
+
if (_currentModel !== null && _currentModel !== resolved) {
|
|
20
|
+
_tokenizer = null;
|
|
21
|
+
_model = null;
|
|
22
|
+
_loading = null;
|
|
23
|
+
}
|
|
24
|
+
_currentModel = resolved;
|
|
25
|
+
}
|
|
26
|
+
async function ensureLoaded() {
|
|
27
|
+
if (_tokenizer && _model) return;
|
|
28
|
+
if (_loading) return _loading;
|
|
29
|
+
_loading = (async () => {
|
|
30
|
+
const model = _currentModel ?? DEFAULT_RERANKER_MODEL;
|
|
31
|
+
const { AutoTokenizer, AutoModelForSequenceClassification } = await import("@huggingface/transformers");
|
|
32
|
+
_tokenizer = await AutoTokenizer.from_pretrained(model);
|
|
33
|
+
_model = await AutoModelForSequenceClassification.from_pretrained(model, { dtype: "q8" });
|
|
34
|
+
_currentModel = model;
|
|
35
|
+
})();
|
|
36
|
+
return _loading;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Rerank search results using a cross-encoder model.
|
|
40
|
+
*
|
|
41
|
+
* Takes the top `maxCandidates` results from a first-stage retriever,
|
|
42
|
+
* scores each (query, snippet) pair through the cross-encoder, and
|
|
43
|
+
* returns them sorted by cross-encoder relevance score.
|
|
44
|
+
*
|
|
45
|
+
* The original retrieval score is replaced with the cross-encoder score.
|
|
46
|
+
*/
|
|
47
|
+
async function rerankResults(query, results, opts) {
|
|
48
|
+
if (results.length === 0) return [];
|
|
49
|
+
const maxCandidates = opts?.maxCandidates ?? 50;
|
|
50
|
+
const topK = opts?.topK ?? results.length;
|
|
51
|
+
const candidates = results.slice(0, maxCandidates);
|
|
52
|
+
await ensureLoaded();
|
|
53
|
+
const queries = new Array(candidates.length).fill(query);
|
|
54
|
+
const documents = candidates.map((r) => r.snippet);
|
|
55
|
+
const inputs = _tokenizer(queries, {
|
|
56
|
+
text_pair: documents,
|
|
57
|
+
padding: true,
|
|
58
|
+
truncation: true
|
|
59
|
+
});
|
|
60
|
+
const scores = (await _model(inputs)).logits.tolist();
|
|
61
|
+
const scored = candidates.map((result, i) => ({
|
|
62
|
+
...result,
|
|
63
|
+
score: scores[i][0]
|
|
64
|
+
}));
|
|
65
|
+
scored.sort((a, b) => b.score - a.score);
|
|
66
|
+
return scored.slice(0, topK);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
//#endregion
|
|
70
|
+
export { rerankResults as n, reranker_exports as r, configureRerankerModel as t };
|
|
71
|
+
//# sourceMappingURL=reranker-D7bRAHi6.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reranker-D7bRAHi6.mjs","names":[],"sources":["../src/memory/reranker.ts"],"sourcesContent":["/**\n * Cross-encoder reranker for PAI memory search results.\n *\n * Uses Xenova/ms-marco-MiniLM-L-6-v2 — a 22.7M param cross-encoder trained on\n * MS MARCO passage ranking. The q8 quantized ONNX model is ~23 MB.\n *\n * Cross-encoders score (query, document) pairs jointly, producing more accurate\n * relevance scores than bi-encoder cosine similarity alone. The trade-off is\n * latency: cross-encoders must score each pair independently, so they are used\n * as a reranking step on top of a fast first-stage retriever (BM25 / cosine).\n *\n * The model is loaded as a lazy singleton — no startup cost until the first\n * rerank call. Subsequent calls reuse the loaded model.\n *\n * Inspired by QMD's Qwen3-reranker step (tobi/qmd).\n */\n\nimport type { SearchResult } from \"./search.js\";\n\n// ---------------------------------------------------------------------------\n// Constants\n// ---------------------------------------------------------------------------\n\nconst DEFAULT_RERANKER_MODEL = \"Xenova/ms-marco-MiniLM-L-6-v2\";\n\n// ---------------------------------------------------------------------------\n// Lazy singleton\n// ---------------------------------------------------------------------------\n\nlet _tokenizer: any = null;\nlet _model: any = null;\nlet _currentModel: string | null = null;\nlet _loading: Promise<void> | null = null;\n\n/**\n * Configure the reranker model.\n * Must be called before the first rerank() call if you want a non-default model.\n */\nexport function configureRerankerModel(model?: string): void {\n const resolved = model?.trim() || DEFAULT_RERANKER_MODEL;\n if (_currentModel !== null && _currentModel !== resolved) {\n _tokenizer = null;\n _model = null;\n _loading = null;\n }\n _currentModel = resolved;\n}\n\nasync function ensureLoaded(): Promise<void> {\n if (_tokenizer && _model) return;\n if (_loading) return _loading;\n\n _loading = (async () => {\n const model = _currentModel ?? DEFAULT_RERANKER_MODEL;\n const {\n AutoTokenizer,\n AutoModelForSequenceClassification,\n } = await import(\"@huggingface/transformers\");\n\n _tokenizer = await AutoTokenizer.from_pretrained(model);\n _model = await AutoModelForSequenceClassification.from_pretrained(\n model,\n { dtype: \"q8\" },\n );\n _currentModel = model;\n })();\n\n return _loading;\n}\n\n// ---------------------------------------------------------------------------\n// Reranking\n// ---------------------------------------------------------------------------\n\nexport interface RerankOptions {\n /** Maximum number of results to return after reranking. */\n topK?: number;\n /**\n * Maximum number of candidates to rerank.\n * Cross-encoders are O(n) per candidate, so we cap to keep latency\n * reasonable. Default: 50.\n */\n maxCandidates?: number;\n}\n\n/**\n * Rerank search results using a cross-encoder model.\n *\n * Takes the top `maxCandidates` results from a first-stage retriever,\n * scores each (query, snippet) pair through the cross-encoder, and\n * returns them sorted by cross-encoder relevance score.\n *\n * The original retrieval score is replaced with the cross-encoder score.\n */\nexport async function rerankResults(\n query: string,\n results: SearchResult[],\n opts?: RerankOptions,\n): Promise<SearchResult[]> {\n if (results.length === 0) return [];\n\n const maxCandidates = opts?.maxCandidates ?? 50;\n const topK = opts?.topK ?? results.length;\n\n // Cap candidates to rerank\n const candidates = results.slice(0, maxCandidates);\n\n await ensureLoaded();\n\n // Tokenize all (query, document) pairs in a single batch\n const queries = new Array(candidates.length).fill(query);\n const documents = candidates.map((r) => r.snippet);\n\n const inputs = _tokenizer!(queries, {\n text_pair: documents,\n padding: true,\n truncation: true,\n });\n\n // Run the cross-encoder\n const output = await _model!(inputs);\n const logits = output.logits;\n\n // ms-marco-MiniLM returns raw logits (not sigmoid-normalized).\n // Higher = more relevant.\n const scores: number[][] = logits.tolist();\n\n // Build reranked results\n const scored = candidates.map((result, i) => ({\n ...result,\n score: scores[i][0],\n }));\n\n // Sort by cross-encoder score descending\n scored.sort((a, b) => b.score - a.score);\n\n return scored.slice(0, topK);\n}\n"],"mappings":";;;;;;;AAuBA,MAAM,yBAAyB;AAM/B,IAAI,aAAkB;AACtB,IAAI,SAAc;AAClB,IAAI,gBAA+B;AACnC,IAAI,WAAiC;;;;;AAMrC,SAAgB,uBAAuB,OAAsB;CAC3D,MAAM,WAAW,OAAO,MAAM,IAAI;AAClC,KAAI,kBAAkB,QAAQ,kBAAkB,UAAU;AACxD,eAAa;AACb,WAAS;AACT,aAAW;;AAEb,iBAAgB;;AAGlB,eAAe,eAA8B;AAC3C,KAAI,cAAc,OAAQ;AAC1B,KAAI,SAAU,QAAO;AAErB,aAAY,YAAY;EACtB,MAAM,QAAQ,iBAAiB;EAC/B,MAAM,EACJ,eACA,uCACE,MAAM,OAAO;AAEjB,eAAa,MAAM,cAAc,gBAAgB,MAAM;AACvD,WAAS,MAAM,mCAAmC,gBAChD,OACA,EAAE,OAAO,MAAM,CAChB;AACD,kBAAgB;KACd;AAEJ,QAAO;;;;;;;;;;;AA2BT,eAAsB,cACpB,OACA,SACA,MACyB;AACzB,KAAI,QAAQ,WAAW,EAAG,QAAO,EAAE;CAEnC,MAAM,gBAAgB,MAAM,iBAAiB;CAC7C,MAAM,OAAO,MAAM,QAAQ,QAAQ;CAGnC,MAAM,aAAa,QAAQ,MAAM,GAAG,cAAc;AAElD,OAAM,cAAc;CAGpB,MAAM,UAAU,IAAI,MAAM,WAAW,OAAO,CAAC,KAAK,MAAM;CACxD,MAAM,YAAY,WAAW,KAAK,MAAM,EAAE,QAAQ;CAElD,MAAM,SAAS,WAAY,SAAS;EAClC,WAAW;EACX,SAAS;EACT,YAAY;EACb,CAAC;CAQF,MAAM,UALS,MAAM,OAAQ,OAAO,EACd,OAIY,QAAQ;CAG1C,MAAM,SAAS,WAAW,KAAK,QAAQ,OAAO;EAC5C,GAAG;EACH,OAAO,OAAO,GAAG;EAClB,EAAE;AAGH,QAAO,MAAM,GAAG,MAAM,EAAE,QAAQ,EAAE,MAAM;AAExC,QAAO,OAAO,MAAM,GAAG,KAAK"}
|