@keel_flow/kb-pipeline 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/chunking/ast.d.ts +8 -0
- package/dist/chunking/ast.d.ts.map +1 -0
- package/dist/chunking/ast.js +86 -0
- package/dist/chunking/ast.js.map +1 -0
- package/dist/chunking/contextual-fallback.d.ts +15 -0
- package/dist/chunking/contextual-fallback.d.ts.map +1 -0
- package/dist/chunking/contextual-fallback.js +33 -0
- package/dist/chunking/contextual-fallback.js.map +1 -0
- package/dist/chunking/fixed.d.ts +6 -0
- package/dist/chunking/fixed.d.ts.map +1 -0
- package/dist/chunking/fixed.js +24 -0
- package/dist/chunking/fixed.js.map +1 -0
- package/dist/chunking/index.d.ts +24 -0
- package/dist/chunking/index.d.ts.map +1 -0
- package/dist/chunking/index.js +86 -0
- package/dist/chunking/index.js.map +1 -0
- package/dist/chunking/late.d.ts +11 -0
- package/dist/chunking/late.d.ts.map +1 -0
- package/dist/chunking/late.js +27 -0
- package/dist/chunking/late.js.map +1 -0
- package/dist/chunking/recursive.d.ts +7 -0
- package/dist/chunking/recursive.d.ts.map +1 -0
- package/dist/chunking/recursive.js +87 -0
- package/dist/chunking/recursive.js.map +1 -0
- package/dist/embedding/index.d.ts +14 -0
- package/dist/embedding/index.d.ts.map +1 -0
- package/dist/embedding/index.js +33 -0
- package/dist/embedding/index.js.map +1 -0
- package/dist/embedding/local-minilm.d.ts +8 -0
- package/dist/embedding/local-minilm.d.ts.map +1 -0
- package/dist/embedding/local-minilm.js +51 -0
- package/dist/embedding/local-minilm.js.map +1 -0
- package/dist/embedding/openai-text-3-small.d.ts +9 -0
- package/dist/embedding/openai-text-3-small.d.ts.map +1 -0
- package/dist/embedding/openai-text-3-small.js +51 -0
- package/dist/embedding/openai-text-3-small.js.map +1 -0
- package/dist/embedding/types.d.ts +8 -0
- package/dist/embedding/types.d.ts.map +1 -0
- package/dist/embedding/types.js +2 -0
- package/dist/embedding/types.js.map +1 -0
- package/dist/embedding/voyage-context-3.d.ts +9 -0
- package/dist/embedding/voyage-context-3.d.ts.map +1 -0
- package/dist/embedding/voyage-context-3.js +55 -0
- package/dist/embedding/voyage-context-3.js.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +5 -0
- package/dist/index.js.map +1 -0
- package/dist/pipeline.d.ts +51 -0
- package/dist/pipeline.d.ts.map +1 -0
- package/dist/pipeline.js +90 -0
- package/dist/pipeline.js.map +1 -0
- package/dist/retrieval/bm25.d.ts +14 -0
- package/dist/retrieval/bm25.d.ts.map +1 -0
- package/dist/retrieval/bm25.js +60 -0
- package/dist/retrieval/bm25.js.map +1 -0
- package/dist/retrieval/dense.d.ts +11 -0
- package/dist/retrieval/dense.d.ts.map +1 -0
- package/dist/retrieval/dense.js +27 -0
- package/dist/retrieval/dense.js.map +1 -0
- package/dist/retrieval/index.d.ts +71 -0
- package/dist/retrieval/index.d.ts.map +1 -0
- package/dist/retrieval/index.js +90 -0
- package/dist/retrieval/index.js.map +1 -0
- package/dist/retrieval/rerank.d.ts +34 -0
- package/dist/retrieval/rerank.d.ts.map +1 -0
- package/dist/retrieval/rerank.js +101 -0
- package/dist/retrieval/rerank.js.map +1 -0
- package/dist/retrieval/rrf.d.ts +17 -0
- package/dist/retrieval/rrf.d.ts.map +1 -0
- package/dist/retrieval/rrf.js +23 -0
- package/dist/retrieval/rrf.js.map +1 -0
- package/package.json +47 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"local-minilm.js","sourceRoot":"","sources":["../../src/embedding/local-minilm.ts"],"names":[],"mappings":"AASA,MAAM,KAAK,GAAG,yBAAyB,CAAC;AACxC,MAAM,GAAG,GAAG,GAAG,CAAC;AAEhB,8EAA8E;AAC9E,sFAAsF;AACtF,6EAA6E;AAC7E,6DAA6D;AAC7D,KAAK,UAAU,eAAe;IAC5B,IAAI,CAAC;QACH,2KAA2K;QAC3K,OAAO,MAAM,MAAM,CAAC,2BAAkC,CAAC,CAAC;IAC1D,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CACb,gIAAgI;YAC9H,wDAAwD;YACxD,8EAA8E,EAChF,EAAE,KAAK,EAAE,CACV,CAAC;IACJ,CAAC;AACH,CAAC;AAQD,MAAM,UAAU,yBAAyB,CAAC,OAAwB,EAAE;IAClE,IAAI,eAAe,GAER,IAAI,CAAC;IAEhB,KAAK,UAAU,WAAW;QAGxB,IAAI,eAAe;YAAE,OAAO,eAAe,CAAC;QAC5C,eAAe,GAAG,CAAC,KAAK,IAAI,EAAE;YAC5B,IAAI,IAAI,CAAC,eAAe;gBAAE,OAAO,IAAI,CAAC,eAAe,EAAE,CAAC;YACxD,MAAM,GAAG,GAAG,CAAC,MAAM,eAAe,EAAE,CAMnC,CAAC;YACF,OAAO,GAAG,CAAC,QAAQ,CAAC,oBAAoB,EAAE,KAAK,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAC;QACzE,CAAC,CAAC,EAAE,CAAC;QACL,eAAe,CAAC,KAAK,CAAC,GAAG,EAAE;YACzB,eAAe,GAAG,IAAI,CAAC;QACzB,CAAC,CAAC,CAAC;QACH,OAAO,eAAe,CAAC;IACzB,CAAC;IAED,OAAO;QACL,OAAO,EAAE,KAAK;QACd,GAAG,EAAE,GAAG;QACR,cAAc,EAAE,KAAK;QACrB,KAAK,CAAC,KAAK,CAAC,KAAe,EAAE,KAAiB;YAC5C,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,EAAE,CAAC;YAClC,MAAM,QAAQ,GAAG,MAAM,WAAW,EAAE,CAAC;YACrC,MAAM,GAAG,GAAe,EAAE,CAAC;YAC3B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACzB,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;gBAC1E,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAoB,CAAC,CAAC,CAAC;YACpD,CAAC;YACD,OAAO,GAAG,CAAC;QACb,CAAC;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { Embedder } from "./types.js";
|
|
2
|
+
export interface OpenAIEmbedderOpts {
|
|
3
|
+
apiKey?: string;
|
|
4
|
+
endpoint?: string;
|
|
5
|
+
fetchFn?: typeof fetch;
|
|
6
|
+
dimensions?: number;
|
|
7
|
+
}
|
|
8
|
+
export declare function createOpenAIEmbedder(opts?: OpenAIEmbedderOpts): Embedder;
|
|
9
|
+
//# sourceMappingURL=openai-text-3-small.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openai-text-3-small.d.ts","sourceRoot":"","sources":["../../src/embedding/openai-text-3-small.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAa,MAAM,YAAY,CAAC;AAEtD,MAAM,WAAW,kBAAkB;IACjC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,OAAO,KAAK,CAAC;IACvB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAMD,wBAAgB,oBAAoB,CAAC,IAAI,GAAE,kBAAuB,GAAG,QAAQ,CA8C5E"}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
const MODEL = "text-embedding-3-small";
|
|
2
|
+
const DEFAULT_DIM = 1536;
|
|
3
|
+
const DEFAULT_ENDPOINT = "https://api.openai.com/v1/embeddings";
|
|
4
|
+
export function createOpenAIEmbedder(opts = {}) {
|
|
5
|
+
const apiKey = opts.apiKey ?? process.env["OPENAI_API_KEY"];
|
|
6
|
+
if (!apiKey) {
|
|
7
|
+
throw new Error("OPENAI_API_KEY is required for text-embedding-3-small");
|
|
8
|
+
}
|
|
9
|
+
const dim = opts.dimensions ?? DEFAULT_DIM;
|
|
10
|
+
const endpoint = opts.endpoint ?? DEFAULT_ENDPOINT;
|
|
11
|
+
const fetchFn = opts.fetchFn ?? fetch;
|
|
12
|
+
return {
|
|
13
|
+
modelId: MODEL,
|
|
14
|
+
dim,
|
|
15
|
+
isContextAware: false,
|
|
16
|
+
async embed(texts, _kind) {
|
|
17
|
+
if (texts.length === 0)
|
|
18
|
+
return [];
|
|
19
|
+
const body = {
|
|
20
|
+
model: MODEL,
|
|
21
|
+
input: texts,
|
|
22
|
+
};
|
|
23
|
+
if (opts.dimensions !== undefined)
|
|
24
|
+
body["dimensions"] = opts.dimensions;
|
|
25
|
+
const res = await fetchFn(endpoint, {
|
|
26
|
+
method: "POST",
|
|
27
|
+
headers: {
|
|
28
|
+
"Authorization": `Bearer ${apiKey}`,
|
|
29
|
+
"Content-Type": "application/json",
|
|
30
|
+
},
|
|
31
|
+
body: JSON.stringify(body),
|
|
32
|
+
});
|
|
33
|
+
if (!res.ok) {
|
|
34
|
+
const detail = await res.text().catch(() => "");
|
|
35
|
+
throw new Error(`openai embeddings failed: ${res.status} ${detail}`);
|
|
36
|
+
}
|
|
37
|
+
const json = (await res.json());
|
|
38
|
+
const out = [];
|
|
39
|
+
for (const row of json.data ?? []) {
|
|
40
|
+
if (!row.embedding)
|
|
41
|
+
throw new Error("openai response missing embedding");
|
|
42
|
+
out.push(row.embedding);
|
|
43
|
+
}
|
|
44
|
+
if (out.length !== texts.length) {
|
|
45
|
+
throw new Error(`openai returned ${out.length} vectors for ${texts.length} texts`);
|
|
46
|
+
}
|
|
47
|
+
return out;
|
|
48
|
+
},
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
//# sourceMappingURL=openai-text-3-small.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openai-text-3-small.js","sourceRoot":"","sources":["../../src/embedding/openai-text-3-small.ts"],"names":[],"mappings":"AASA,MAAM,KAAK,GAAG,wBAAwB,CAAC;AACvC,MAAM,WAAW,GAAG,IAAI,CAAC;AACzB,MAAM,gBAAgB,GAAG,sCAAsC,CAAC;AAEhE,MAAM,UAAU,oBAAoB,CAAC,OAA2B,EAAE;IAChE,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;IAC5D,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,IAAI,KAAK,CAAC,uDAAuD,CAAC,CAAC;IAC3E,CAAC;IACD,MAAM,GAAG,GAAG,IAAI,CAAC,UAAU,IAAI,WAAW,CAAC;IAC3C,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,gBAAgB,CAAC;IACnD,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,IAAI,KAAK,CAAC;IAEtC,OAAO;QACL,OAAO,EAAE,KAAK;QACd,GAAG;QACH,cAAc,EAAE,KAAK;QACrB,KAAK,CAAC,KAAK,CAAC,KAAe,EAAE,KAAiB;YAC5C,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,EAAE,CAAC;YAClC,MAAM,IAAI,GAA4B;gBACpC,KAAK,EAAE,KAAK;gBACZ,KAAK,EAAE,KAAK;aACb,CAAC;YACF,IAAI,IAAI,CAAC,UAAU,KAAK,SAAS;gBAAE,IAAI,CAAC,YAAY,CAAC,GAAG,IAAI,CAAC,UAAU,CAAC;YACxE,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,QAAQ,EAAE;gBAClC,MAAM,EAAE,MAAM;gBACd,OAAO,EAAE;oBACP,eAAe,EAAE,UAAU,MAAM,EAAE;oBACnC,cAAc,EAAE,kBAAkB;iBACnC;gBACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;aAC3B,CAAC,CAAC;YACH,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;gBACZ,MAAM,MAAM,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;gBAChD,MAAM,IAAI,KAAK,CAAC,6BAA6B,GAAG,CAAC,MAAM,IAAI,MAAM,EAAE,CAAC,CAAC;YACvE,CAAC;YACD,MAAM,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,CAE7B,CAAC;YACF,MAAM,GAAG,GAAe,EAAE,CAAC;YAC3B,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,IAAI,IAAI,EAAE,EAAE,CAAC;gBAClC,IAAI,CAAC,GAAG,CAAC,SAAS;oBAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;gBACzE,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;YAC1B,CAAC;YACD,IAAI,GAAG,CAAC,MAAM,KAAK,KAAK,CAAC,MAAM,EAAE,CAAC;gBAChC,MAAM,IAAI,KAAK,CAAC,mBAAmB,GAAG,CAAC,MAAM,gBAAgB,KAAK,CAAC,MAAM,QAAQ,CAAC,CAAC;YACrF,CAAC;YACD,OAAO,GAAG,CAAC;QACb,CAAC;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/embedding/types.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,SAAS,GAAG,UAAU,GAAG,OAAO,CAAC;AAE7C,MAAM,WAAW,QAAQ;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,cAAc,EAAE,OAAO,CAAC;IACxB,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;CAC/D"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/embedding/types.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { Embedder } from "./types.js";
|
|
2
|
+
export interface VoyageContext3Opts {
|
|
3
|
+
apiKey?: string;
|
|
4
|
+
outputDimension?: 256 | 512 | 1024 | 2048;
|
|
5
|
+
endpoint?: string;
|
|
6
|
+
fetchFn?: typeof fetch;
|
|
7
|
+
}
|
|
8
|
+
export declare function createVoyageContext3Embedder(opts?: VoyageContext3Opts): Embedder;
|
|
9
|
+
//# sourceMappingURL=voyage-context-3.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"voyage-context-3.d.ts","sourceRoot":"","sources":["../../src/embedding/voyage-context-3.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAa,MAAM,YAAY,CAAC;AAEtD,MAAM,WAAW,kBAAkB;IACjC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,eAAe,CAAC,EAAE,GAAG,GAAG,GAAG,GAAG,IAAI,GAAG,IAAI,CAAC;IAC1C,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,OAAO,KAAK,CAAC;CACxB;AASD,wBAAgB,4BAA4B,CAAC,IAAI,GAAE,kBAAuB,GAAG,QAAQ,CAmDpF"}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
const DEFAULT_DIM = 1024;
|
|
2
|
+
const DEFAULT_ENDPOINT = "https://api.voyageai.com/v1/contextualizedembeddings";
|
|
3
|
+
// Voyage's contextualised embeddings endpoint accepts an array of "chunks" and
|
|
4
|
+
// returns one vector per chunk that has been informed by the whole document.
|
|
5
|
+
// We treat each batch of input texts as one document. Callers who want
|
|
6
|
+
// per-document context should batch their chunks together.
|
|
7
|
+
export function createVoyageContext3Embedder(opts = {}) {
|
|
8
|
+
const apiKey = opts.apiKey ?? process.env["VOYAGE_API_KEY"];
|
|
9
|
+
if (!apiKey) {
|
|
10
|
+
throw new Error("VOYAGE_API_KEY is required for voyage-context-3");
|
|
11
|
+
}
|
|
12
|
+
const dim = opts.outputDimension ?? DEFAULT_DIM;
|
|
13
|
+
const endpoint = opts.endpoint ?? DEFAULT_ENDPOINT;
|
|
14
|
+
const fetchFn = opts.fetchFn ?? fetch;
|
|
15
|
+
return {
|
|
16
|
+
modelId: "voyage-context-3",
|
|
17
|
+
dim,
|
|
18
|
+
isContextAware: true,
|
|
19
|
+
async embed(texts, kind = "document") {
|
|
20
|
+
if (texts.length === 0)
|
|
21
|
+
return [];
|
|
22
|
+
const res = await fetchFn(endpoint, {
|
|
23
|
+
method: "POST",
|
|
24
|
+
headers: {
|
|
25
|
+
"Authorization": `Bearer ${apiKey}`,
|
|
26
|
+
"Content-Type": "application/json",
|
|
27
|
+
},
|
|
28
|
+
body: JSON.stringify({
|
|
29
|
+
inputs: [texts],
|
|
30
|
+
model: "voyage-context-3",
|
|
31
|
+
input_type: kind,
|
|
32
|
+
output_dimension: dim,
|
|
33
|
+
}),
|
|
34
|
+
});
|
|
35
|
+
if (!res.ok) {
|
|
36
|
+
const body = await res.text().catch(() => "");
|
|
37
|
+
throw new Error(`voyage-context-3 request failed: ${res.status} ${body}`);
|
|
38
|
+
}
|
|
39
|
+
const json = (await res.json());
|
|
40
|
+
const first = json.data?.[0]?.data ?? [];
|
|
41
|
+
const out = [];
|
|
42
|
+
for (const row of first) {
|
|
43
|
+
if (!row.embedding) {
|
|
44
|
+
throw new Error("voyage-context-3 response missing embedding");
|
|
45
|
+
}
|
|
46
|
+
out.push(row.embedding);
|
|
47
|
+
}
|
|
48
|
+
if (out.length !== texts.length) {
|
|
49
|
+
throw new Error(`voyage-context-3 returned ${out.length} vectors for ${texts.length} texts`);
|
|
50
|
+
}
|
|
51
|
+
return out;
|
|
52
|
+
},
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
//# sourceMappingURL=voyage-context-3.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"voyage-context-3.js","sourceRoot":"","sources":["../../src/embedding/voyage-context-3.ts"],"names":[],"mappings":"AASA,MAAM,WAAW,GAAG,IAAI,CAAC;AACzB,MAAM,gBAAgB,GAAG,sDAAsD,CAAC;AAEhF,+EAA+E;AAC/E,6EAA6E;AAC7E,uEAAuE;AACvE,2DAA2D;AAC3D,MAAM,UAAU,4BAA4B,CAAC,OAA2B,EAAE;IACxE,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;IAC5D,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,IAAI,KAAK,CAAC,iDAAiD,CAAC,CAAC;IACrE,CAAC;IACD,MAAM,GAAG,GAAG,IAAI,CAAC,eAAe,IAAI,WAAW,CAAC;IAChD,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,gBAAgB,CAAC;IACnD,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,IAAI,KAAK,CAAC;IAEtC,OAAO;QACL,OAAO,EAAE,kBAAkB;QAC3B,GAAG;QACH,cAAc,EAAE,IAAI;QACpB,KAAK,CAAC,KAAK,CAAC,KAAe,EAAE,OAAkB,UAAU;YACvD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,EAAE,CAAC;YAClC,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,QAAQ,EAAE;gBAClC,MAAM,EAAE,MAAM;gBACd,OAAO,EAAE;oBACP,eAAe,EAAE,UAAU,MAAM,EAAE;oBACnC,cAAc,EAAE,kBAAkB;iBACnC;gBACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;oBACnB,MAAM,EAAE,CAAC,KAAK,CAAC;oBACf,KAAK,EAAE,kBAAkB;oBACzB,UAAU,EAAE,IAAI;oBAChB,gBAAgB,EAAE,GAAG;iBACtB,CAAC;aACH,CAAC,CAAC;YACH,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;gBACZ,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;gBAC9C,MAAM,IAAI,KAAK,CAAC,oCAAoC,GAAG,CAAC,MAAM,IAAI,IAAI,EAAE,CAAC,CAAC;YAC5E,CAAC;YACD,MAAM,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,CAE7B,CAAC;YACF,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,IAAI,EAAE,CAAC;YACzC,MAAM,GAAG,GAAe,EAAE,CAAC;YAC3B,KAAK,MAAM,GAAG,IAAI,KAAK,EAAE,CAAC;gBACxB,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,CAAC;oBACnB,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;gBACjE,CAAC;gBACD,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;YAC1B,CAAC;YACD,IAAI,GAAG,CAAC,MAAM,KAAK,KAAK,CAAC,MAAM,EAAE,CAAC;gBAChC,MAAM,IAAI,KAAK,CACb,6BAA6B,GAAG,CAAC,MAAM,gBAAgB,KAAK,CAAC,MAAM,QAAQ,CAC5E,CAAC;YACJ,CAAC;YACD,OAAO,GAAG,CAAC;QACb,CAAC;KACF,CAAC;AACJ,CAAC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export { chunkDocument, fixedChunk, recursiveChunk, astChunk, meanPoolByRange, addChunkContext, prependContext, } from "./chunking/index.js";
|
|
2
|
+
export type { Chunk, ChunkMode, ChunkDocumentOpts, LateChunkRange, LateChunkTokenPosition, ContextProvider, AddChunkContextArgs, } from "./chunking/index.js";
|
|
3
|
+
export { createEmbedder, createVoyageContext3Embedder, createOpenAIEmbedder, createLocalMiniLMEmbedder, } from "./embedding/index.js";
|
|
4
|
+
export type { Embedder, EmbedKind, EmbedderModel, CreateEmbedderOpts } from "./embedding/index.js";
|
|
5
|
+
export { retrieve, embedQuery, denseRank, cosineSim, bm25Rank, reciprocalRankFusion, createVoyageReranker, createCrossEncoderReranker, createDefaultReranker, shouldRerank, } from "./retrieval/index.js";
|
|
6
|
+
export type { RetrievalCandidate, RetrievalScoredChunk, RetrievalTrace, RetrieveOpts, RetrieveResult, Reranker, RerankCandidate, RerankScore, } from "./retrieval/index.js";
|
|
7
|
+
export { ingest, query } from "./pipeline.js";
|
|
8
|
+
export type { IngestOpts, IngestResult, IngestChunkRow, QueryOpts, } from "./pipeline.js";
|
|
9
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,aAAa,EACb,UAAU,EACV,cAAc,EACd,QAAQ,EACR,eAAe,EACf,eAAe,EACf,cAAc,GACf,MAAM,qBAAqB,CAAC;AAC7B,YAAY,EACV,KAAK,EACL,SAAS,EACT,iBAAiB,EACjB,cAAc,EACd,sBAAsB,EACtB,eAAe,EACf,mBAAmB,GACpB,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EACL,cAAc,EACd,4BAA4B,EAC5B,oBAAoB,EACpB,yBAAyB,GAC1B,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EAAE,QAAQ,EAAE,SAAS,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AAEnG,OAAO,EACL,QAAQ,EACR,UAAU,EACV,SAAS,EACT,SAAS,EACT,QAAQ,EACR,oBAAoB,EACpB,oBAAoB,EACpB,0BAA0B,EAC1B,qBAAqB,EACrB,YAAY,GACb,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EACV,kBAAkB,EAClB,oBAAoB,EACpB,cAAc,EACd,YAAY,EACZ,cAAc,EACd,QAAQ,EACR,eAAe,EACf,WAAW,GACZ,MAAM,sBAAsB,CAAC;AAE9B,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,eAAe,CAAC;AAC9C,YAAY,EACV,UAAU,EACV,YAAY,EACZ,cAAc,EACd,SAAS,GACV,MAAM,eAAe,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export { chunkDocument, fixedChunk, recursiveChunk, astChunk, meanPoolByRange, addChunkContext, prependContext, } from "./chunking/index.js";
|
|
2
|
+
export { createEmbedder, createVoyageContext3Embedder, createOpenAIEmbedder, createLocalMiniLMEmbedder, } from "./embedding/index.js";
|
|
3
|
+
export { retrieve, embedQuery, denseRank, cosineSim, bm25Rank, reciprocalRankFusion, createVoyageReranker, createCrossEncoderReranker, createDefaultReranker, shouldRerank, } from "./retrieval/index.js";
|
|
4
|
+
export { ingest, query } from "./pipeline.js";
|
|
5
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,aAAa,EACb,UAAU,EACV,cAAc,EACd,QAAQ,EACR,eAAe,EACf,eAAe,EACf,cAAc,GACf,MAAM,qBAAqB,CAAC;AAW7B,OAAO,EACL,cAAc,EACd,4BAA4B,EAC5B,oBAAoB,EACpB,yBAAyB,GAC1B,MAAM,sBAAsB,CAAC;AAG9B,OAAO,EACL,QAAQ,EACR,UAAU,EACV,SAAS,EACT,SAAS,EACT,QAAQ,EACR,oBAAoB,EACpB,oBAAoB,EACpB,0BAA0B,EAC1B,qBAAqB,EACrB,YAAY,GACb,MAAM,sBAAsB,CAAC;AAY9B,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,eAAe,CAAC"}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import type { Embedder } from "./embedding/types.js";
|
|
2
|
+
import type { Reranker } from "./retrieval/rerank.js";
|
|
3
|
+
import { type Chunk, type ChunkMode } from "./chunking/index.js";
|
|
4
|
+
import { type ContextProvider } from "./chunking/contextual-fallback.js";
|
|
5
|
+
import { type RetrievalCandidate, type RetrieveResult } from "./retrieval/index.js";
|
|
6
|
+
export interface IngestChunkRow {
|
|
7
|
+
ordinal: number;
|
|
8
|
+
content: string;
|
|
9
|
+
contentForEmbedding: string;
|
|
10
|
+
contextParagraph: string | null;
|
|
11
|
+
embedding: number[];
|
|
12
|
+
embeddingDim: number;
|
|
13
|
+
embeddingModel: string;
|
|
14
|
+
tokenCount: number;
|
|
15
|
+
charStart: number;
|
|
16
|
+
charEnd: number;
|
|
17
|
+
mode: ChunkMode;
|
|
18
|
+
}
|
|
19
|
+
export interface IngestResult {
|
|
20
|
+
rows: IngestChunkRow[];
|
|
21
|
+
modelId: string;
|
|
22
|
+
dim: number;
|
|
23
|
+
isContextAware: boolean;
|
|
24
|
+
contextAdded: number;
|
|
25
|
+
}
|
|
26
|
+
export interface IngestOpts {
|
|
27
|
+
content: string;
|
|
28
|
+
mimeType?: string;
|
|
29
|
+
embedder: Embedder;
|
|
30
|
+
mode?: ChunkMode;
|
|
31
|
+
maxSize?: number;
|
|
32
|
+
overlap?: number;
|
|
33
|
+
contextProvider?: ContextProvider;
|
|
34
|
+
}
|
|
35
|
+
export declare function ingest(opts: IngestOpts): Promise<IngestResult>;
|
|
36
|
+
export interface QueryOpts {
|
|
37
|
+
query: string;
|
|
38
|
+
candidates: RetrievalCandidate[];
|
|
39
|
+
embedder: Embedder;
|
|
40
|
+
precomputedDenseScores?: Array<{
|
|
41
|
+
id: string;
|
|
42
|
+
score: number;
|
|
43
|
+
}>;
|
|
44
|
+
k?: number;
|
|
45
|
+
reranker?: Reranker;
|
|
46
|
+
rerankTopN?: number;
|
|
47
|
+
minScore?: number;
|
|
48
|
+
}
|
|
49
|
+
export declare function query(opts: QueryOpts): Promise<RetrieveResult>;
|
|
50
|
+
export type { Chunk, ChunkMode };
|
|
51
|
+
//# sourceMappingURL=pipeline.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AACtD,OAAO,EAAiB,KAAK,KAAK,EAAE,KAAK,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAChF,OAAO,EAEL,KAAK,eAAe,EACrB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EAEL,KAAK,kBAAkB,EACvB,KAAK,cAAc,EACpB,MAAM,sBAAsB,CAAC;AAE9B,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,CAAC;IACvB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,SAAS,CAAC;CACjB;AAED,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,cAAc,EAAE,CAAC;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,cAAc,EAAE,OAAO,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,QAAQ,CAAC;IACnB,IAAI,CAAC,EAAE,SAAS,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IAIjB,eAAe,CAAC,EAAE,eAAe,CAAC;CACnC;AAED,wBAAsB,MAAM,CAAC,IAAI,EAAE,UAAU,GAAG,OAAO,CAAC,YAAY,CAAC,CA4DpE;AAED,MAAM,WAAW,SAAS;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,kBAAkB,EAAE,CAAC;IACjC,QAAQ,EAAE,QAAQ,CAAC;IAGnB,sBAAsB,CAAC,EAAE,KAAK,CAAC;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAC9D,CAAC,CAAC,EAAE,MAAM,CAAC;IACX,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,wBAAsB,KAAK,CAAC,IAAI,EAAE,SAAS,GAAG,OAAO,CAAC,cAAc,CAAC,CAyBpE;AAMD,YAAY,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC"}
|
package/dist/pipeline.js
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import { chunkDocument } from "./chunking/index.js";
|
|
2
|
+
import { addChunkContext, } from "./chunking/contextual-fallback.js";
|
|
3
|
+
import { retrieve as runRetrieve, } from "./retrieval/index.js";
|
|
4
|
+
export async function ingest(opts) {
|
|
5
|
+
if (opts.mode === "late") {
|
|
6
|
+
throw new Error('ingest mode "late" is not yet implemented: meanPoolByRange token-level embedding is not wired. Use "recursive", "ast", or "fixed" instead.');
|
|
7
|
+
}
|
|
8
|
+
const chunks = chunkDocument({
|
|
9
|
+
content: opts.content,
|
|
10
|
+
...(opts.mode ? { mode: opts.mode } : {}),
|
|
11
|
+
...(opts.mimeType ? { mimeType: opts.mimeType } : {}),
|
|
12
|
+
...(opts.maxSize !== undefined ? { maxSize: opts.maxSize } : {}),
|
|
13
|
+
...(opts.overlap !== undefined ? { overlap: opts.overlap } : {}),
|
|
14
|
+
});
|
|
15
|
+
const isContextAware = opts.embedder.isContextAware;
|
|
16
|
+
const useFallback = !isContextAware && opts.contextProvider !== undefined;
|
|
17
|
+
const contextParagraphs = [];
|
|
18
|
+
for (const chunk of chunks) {
|
|
19
|
+
if (useFallback && opts.contextProvider) {
|
|
20
|
+
const ctx = await addChunkContext({
|
|
21
|
+
chunk: chunk.content,
|
|
22
|
+
document: opts.content,
|
|
23
|
+
provider: opts.contextProvider,
|
|
24
|
+
});
|
|
25
|
+
contextParagraphs.push(ctx);
|
|
26
|
+
}
|
|
27
|
+
else {
|
|
28
|
+
contextParagraphs.push(null);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
const embedTexts = chunks.map((c, i) => {
|
|
32
|
+
const ctx = contextParagraphs[i];
|
|
33
|
+
return ctx ? `${ctx}\n\n${c.content}` : c.content;
|
|
34
|
+
});
|
|
35
|
+
const vectors = await opts.embedder.embed(embedTexts, "document");
|
|
36
|
+
const rows = chunks.map((c, i) => {
|
|
37
|
+
const vec = vectors[i] ?? [];
|
|
38
|
+
return {
|
|
39
|
+
ordinal: c.ordinal,
|
|
40
|
+
content: c.content,
|
|
41
|
+
contentForEmbedding: embedTexts[i] ?? c.content,
|
|
42
|
+
contextParagraph: contextParagraphs[i] ?? null,
|
|
43
|
+
embedding: vec,
|
|
44
|
+
embeddingDim: vec.length,
|
|
45
|
+
embeddingModel: opts.embedder.modelId,
|
|
46
|
+
tokenCount: estimateTokens(embedTexts[i] ?? c.content),
|
|
47
|
+
charStart: c.charStart,
|
|
48
|
+
charEnd: c.charEnd,
|
|
49
|
+
mode: c.mode,
|
|
50
|
+
};
|
|
51
|
+
});
|
|
52
|
+
return {
|
|
53
|
+
rows,
|
|
54
|
+
modelId: opts.embedder.modelId,
|
|
55
|
+
dim: opts.embedder.dim,
|
|
56
|
+
isContextAware,
|
|
57
|
+
contextAdded: contextParagraphs.filter((p) => p !== null).length,
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
export async function query(opts) {
|
|
61
|
+
// With database-side dense scores (pgvector) there is nothing to embed here:
|
|
62
|
+
// the caller already embedded the query to run the SQL ranking.
|
|
63
|
+
if (opts.precomputedDenseScores) {
|
|
64
|
+
return runRetrieve({
|
|
65
|
+
query: opts.query,
|
|
66
|
+
precomputedDenseScores: opts.precomputedDenseScores,
|
|
67
|
+
candidates: opts.candidates,
|
|
68
|
+
...(opts.k !== undefined ? { k: opts.k } : {}),
|
|
69
|
+
...(opts.rerankTopN !== undefined ? { rerankTopN: opts.rerankTopN } : {}),
|
|
70
|
+
...(opts.minScore !== undefined ? { minScore: opts.minScore } : {}),
|
|
71
|
+
...(opts.reranker ? { reranker: opts.reranker } : {}),
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
const [qVec] = await opts.embedder.embed([opts.query], "query");
|
|
75
|
+
if (!qVec)
|
|
76
|
+
throw new Error("embedder returned no vector for query");
|
|
77
|
+
return runRetrieve({
|
|
78
|
+
query: opts.query,
|
|
79
|
+
queryEmbedding: qVec,
|
|
80
|
+
candidates: opts.candidates,
|
|
81
|
+
...(opts.k !== undefined ? { k: opts.k } : {}),
|
|
82
|
+
...(opts.rerankTopN !== undefined ? { rerankTopN: opts.rerankTopN } : {}),
|
|
83
|
+
...(opts.minScore !== undefined ? { minScore: opts.minScore } : {}),
|
|
84
|
+
...(opts.reranker ? { reranker: opts.reranker } : {}),
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
function estimateTokens(text) {
|
|
88
|
+
return Math.ceil(text.length / 4);
|
|
89
|
+
}
|
|
90
|
+
//# sourceMappingURL=pipeline.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,aAAa,EAA8B,MAAM,qBAAqB,CAAC;AAChF,OAAO,EACL,eAAe,GAEhB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EACL,QAAQ,IAAI,WAAW,GAGxB,MAAM,sBAAsB,CAAC;AAqC9B,MAAM,CAAC,KAAK,UAAU,MAAM,CAAC,IAAgB;IAC3C,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;QACzB,MAAM,IAAI,KAAK,CACb,4IAA4I,CAC7I,CAAC;IACJ,CAAC;IACD,MAAM,MAAM,GAAG,aAAa,CAAC;QAC3B,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACzC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACrD,GAAG,CAAC,IAAI,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAChE,GAAG,CAAC,IAAI,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KACjE,CAAC,CAAC;IAEH,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,CAAC,cAAc,CAAC;IACpD,MAAM,WAAW,GAAG,CAAC,cAAc,IAAI,IAAI,CAAC,eAAe,KAAK,SAAS,CAAC;IAC1E,MAAM,iBAAiB,GAAyB,EAAE,CAAC;IACnD,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,WAAW,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACxC,MAAM,GAAG,GAAG,MAAM,eAAe,CAAC;gBAChC,KAAK,EAAE,KAAK,CAAC,OAAO;gBACpB,QAAQ,EAAE,IAAI,CAAC,OAAO;gBACtB,QAAQ,EAAE,IAAI,CAAC,eAAe;aAC/B,CAAC,CAAC;YACH,iBAAiB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC9B,CAAC;aAAM,CAAC;YACN,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,MAAM,UAAU,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACrC,MAAM,GAAG,GAAG,iBAAiB,CAAC,CAAC,CAAC,CAAC;QACjC,OAAO,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,OAAO,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;IACpD,CAAC,CAAC,CAAC;IACH,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;IAElE,MAAM,IAAI,GAAqB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACjD,MAAM,GAAG,GAAG,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC7B,OAAO;YACL,OAAO,EAAE,CAAC,CAAC,OAAO;YAClB,OAAO,EAAE,CAAC,CAAC,OAAO;YAClB,mBAAmB,EAAE,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,OAAO;YAC/C,gBAAgB,EAAE,iBAAiB,CAAC,CAAC,CAAC,IAAI,IAAI;YAC9C,SAAS,EAAE,GAAG;YACd,YAAY,EAAE,GAAG,CAAC,MAAM;YACxB,cAAc,EAAE,IAAI,CAAC,QAAQ,CAAC,OAAO;YACrC,UAAU,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC;YACtD,SAAS,EAAE,CAAC,CAAC,SAAS;YACtB,OAAO,EAAE,CAAC,CAAC,OAAO;YAClB,IAAI,EAAE,CAAC,CAAC,IAAI;SACb,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,OAAO;QACL,IAAI;QACJ,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC,OAAO;QAC9B,GAAG,EAAE,IAAI,CAAC,QAAQ,CAAC,GAAG;QACtB,cAAc;QACd,YAAY,EAAE,iBAAiB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,MAAM;KACjE,CAAC;AACJ,CAAC;AAeD,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,IAAe;IACzC,6EAA6E;IAC7E,gEAAgE;IAChE,IAAI,IAAI,CAAC,sBAAsB,EAAE,CAAC;QAChC,OAAO,WAAW,CAAC;YACjB,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,sBAAsB,EAAE,IAAI,CAAC,sBAAsB;YACnD,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC9C,GAAG,CAAC,IAAI,CAAC,UAAU,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACzE,GAAG,CAAC,IAAI,CAAC,QAAQ,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACnE,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SACtD,CAAC,CAAC;IACL,CAAC;IACD,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,OAAO,CAAC,CAAC;IAChE,IAAI,CAAC,IAAI;QAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;IACpE,OAAO,WAAW,CAAC;QACjB,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,cAAc,EAAE,IAAI;QACpB,UAAU,EAAE,IAAI,CAAC,UAAU;QAC3B,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAC9C,GAAG,CAAC,IAAI,CAAC,UAAU,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACzE,GAAG,CAAC,IAAI,CAAC,QAAQ,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACnE,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KACtD,CAAC,CAAC;AACL,CAAC;AAED,SAAS,cAAc,CAAC,IAAY;IAClC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACpC,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export interface Bm25Candidate {
|
|
2
|
+
id: string;
|
|
3
|
+
content: string;
|
|
4
|
+
}
|
|
5
|
+
export interface Bm25Score {
|
|
6
|
+
id: string;
|
|
7
|
+
score: number;
|
|
8
|
+
}
|
|
9
|
+
export interface Bm25Opts {
|
|
10
|
+
k1?: number;
|
|
11
|
+
b?: number;
|
|
12
|
+
}
|
|
13
|
+
export declare function bm25Rank(query: string, candidates: Bm25Candidate[], k: number, opts?: Bm25Opts): Bm25Score[];
|
|
14
|
+
//# sourceMappingURL=bm25.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bm25.d.ts","sourceRoot":"","sources":["../../src/retrieval/bm25.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,aAAa;IAC5B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,QAAQ;IACvB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,CAAC,CAAC,EAAE,MAAM,CAAC;CACZ;AASD,wBAAgB,QAAQ,CACtB,KAAK,EAAE,MAAM,EACb,UAAU,EAAE,aAAa,EAAE,EAC3B,CAAC,EAAE,MAAM,EACT,IAAI,GAAE,QAAa,GAClB,SAAS,EAAE,CA6Cb"}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
const K1_DEFAULT = 1.5;
|
|
2
|
+
const B_DEFAULT = 0.75;
|
|
3
|
+
// Small in-process BM25 — used in tests and as a fallback when Postgres is
|
|
4
|
+
// not available. The production path uses Postgres tsvector + ts_rank in the
|
|
5
|
+
// API layer; this module gives the pipeline a portable implementation it can
|
|
6
|
+
// run anywhere without a DB.
|
|
7
|
+
export function bm25Rank(query, candidates, k, opts = {}) {
|
|
8
|
+
const k1 = opts.k1 ?? K1_DEFAULT;
|
|
9
|
+
const b = opts.b ?? B_DEFAULT;
|
|
10
|
+
const queryTerms = tokenize(query);
|
|
11
|
+
if (queryTerms.length === 0 || candidates.length === 0)
|
|
12
|
+
return [];
|
|
13
|
+
const docs = candidates.map((c) => tokenize(c.content));
|
|
14
|
+
const lengths = docs.map((d) => d.length);
|
|
15
|
+
const avgLen = lengths.reduce((s, l) => s + l, 0) / Math.max(lengths.length, 1);
|
|
16
|
+
const df = new Map();
|
|
17
|
+
for (const doc of docs) {
|
|
18
|
+
const seen = new Set();
|
|
19
|
+
for (const t of doc) {
|
|
20
|
+
if (seen.has(t))
|
|
21
|
+
continue;
|
|
22
|
+
seen.add(t);
|
|
23
|
+
df.set(t, (df.get(t) ?? 0) + 1);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
const N = docs.length;
|
|
27
|
+
const idf = new Map();
|
|
28
|
+
for (const t of queryTerms) {
|
|
29
|
+
const n = df.get(t) ?? 0;
|
|
30
|
+
idf.set(t, Math.log(1 + (N - n + 0.5) / (n + 0.5)));
|
|
31
|
+
}
|
|
32
|
+
const scores = candidates.map((c, idx) => {
|
|
33
|
+
const doc = docs[idx] ?? [];
|
|
34
|
+
const docLen = lengths[idx] ?? 0;
|
|
35
|
+
const tf = new Map();
|
|
36
|
+
for (const t of doc)
|
|
37
|
+
tf.set(t, (tf.get(t) ?? 0) + 1);
|
|
38
|
+
let score = 0;
|
|
39
|
+
for (const term of queryTerms) {
|
|
40
|
+
const f = tf.get(term) ?? 0;
|
|
41
|
+
if (f === 0)
|
|
42
|
+
continue;
|
|
43
|
+
const num = f * (k1 + 1);
|
|
44
|
+
const denom = f + k1 * (1 - b + b * (docLen / (avgLen || 1)));
|
|
45
|
+
score += (idf.get(term) ?? 0) * (num / (denom || 1));
|
|
46
|
+
}
|
|
47
|
+
return { id: c.id, score };
|
|
48
|
+
});
|
|
49
|
+
return scores
|
|
50
|
+
.filter((s) => s.score > 0)
|
|
51
|
+
.sort((a, b2) => b2.score - a.score)
|
|
52
|
+
.slice(0, k);
|
|
53
|
+
}
|
|
54
|
+
function tokenize(text) {
|
|
55
|
+
return text
|
|
56
|
+
.toLowerCase()
|
|
57
|
+
.split(/[^a-z0-9_]+/)
|
|
58
|
+
.filter((s) => s.length > 0);
|
|
59
|
+
}
|
|
60
|
+
//# sourceMappingURL=bm25.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bm25.js","sourceRoot":"","sources":["../../src/retrieval/bm25.ts"],"names":[],"mappings":"AAeA,MAAM,UAAU,GAAG,GAAG,CAAC;AACvB,MAAM,SAAS,GAAG,IAAI,CAAC;AAEvB,2EAA2E;AAC3E,6EAA6E;AAC7E,6EAA6E;AAC7E,6BAA6B;AAC7B,MAAM,UAAU,QAAQ,CACtB,KAAa,EACb,UAA2B,EAC3B,CAAS,EACT,OAAiB,EAAE;IAEnB,MAAM,EAAE,GAAG,IAAI,CAAC,EAAE,IAAI,UAAU,CAAC;IACjC,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC,IAAI,SAAS,CAAC;IAC9B,MAAM,UAAU,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;IACnC,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAElE,MAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;IACxD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAChF,MAAM,EAAE,GAAG,IAAI,GAAG,EAAkB,CAAC;IACrC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAC/B,KAAK,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC;YACpB,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;gBAAE,SAAS;YAC1B,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACZ,EAAE,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IACD,MAAM,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC;IACtB,MAAM,GAAG,GAAG,IAAI,GAAG,EAAkB,CAAC;IACtC,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;QAC3B,MAAM,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACzB,GAAG,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;IACtD,CAAC;IAED,MAAM,MAAM,GAAgB,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE;QACpD,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QAC5B,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACjC,MAAM,EAAE,GAAG,IAAI,GAAG,EAAkB,CAAC;QACrC,KAAK,MAAM,CAAC,IAAI,GAAG;YAAE,EAAE,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACrD,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;YAC9B,MAAM,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC5B,IAAI,CAAC,KAAK,CAAC;gBAAE,SAAS;YACtB,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;YACzB,MAAM,KAAK,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9D,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC;QACvD,CAAC;QACD,OAAO,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,KAAK,EAAE,CAAC;IAC7B,CAAC,CAAC,CAAC;IAEH,OAAO,MAAM;SACV,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC;SAC1B,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;SACnC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;AACjB,CAAC;AAED,SAAS,QAAQ,CAAC,IAAY;IAC5B,OAAO,IAAI;SACR,WAAW,EAAE;SACb,KAAK,CAAC,aAAa,CAAC;SACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACjC,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export interface DenseCandidate {
|
|
2
|
+
id: string;
|
|
3
|
+
embedding: number[];
|
|
4
|
+
}
|
|
5
|
+
export interface DenseScore {
|
|
6
|
+
id: string;
|
|
7
|
+
score: number;
|
|
8
|
+
}
|
|
9
|
+
export declare function cosineSim(a: number[], b: number[]): number;
|
|
10
|
+
export declare function denseRank(queryVector: number[], candidates: DenseCandidate[], k: number): DenseScore[];
|
|
11
|
+
//# sourceMappingURL=dense.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dense.d.ts","sourceRoot":"","sources":["../../src/retrieval/dense.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,cAAc;IAC7B,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;CACf;AAED,wBAAgB,SAAS,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAiB1D;AAED,wBAAgB,SAAS,CACvB,WAAW,EAAE,MAAM,EAAE,EACrB,UAAU,EAAE,cAAc,EAAE,EAC5B,CAAC,EAAE,MAAM,GACR,UAAU,EAAE,CAMd"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
export function cosineSim(a, b) {
|
|
2
|
+
if (a.length !== b.length) {
|
|
3
|
+
throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);
|
|
4
|
+
}
|
|
5
|
+
let dot = 0;
|
|
6
|
+
let normA = 0;
|
|
7
|
+
let normB = 0;
|
|
8
|
+
for (let i = 0; i < a.length; i++) {
|
|
9
|
+
const ai = a[i] ?? 0;
|
|
10
|
+
const bi = b[i] ?? 0;
|
|
11
|
+
dot += ai * bi;
|
|
12
|
+
normA += ai * ai;
|
|
13
|
+
normB += bi * bi;
|
|
14
|
+
}
|
|
15
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
16
|
+
if (denom === 0)
|
|
17
|
+
return 0;
|
|
18
|
+
return dot / denom;
|
|
19
|
+
}
|
|
20
|
+
export function denseRank(queryVector, candidates, k) {
|
|
21
|
+
const scored = candidates
|
|
22
|
+
.filter((c) => c.embedding.length === queryVector.length)
|
|
23
|
+
.map((c) => ({ id: c.id, score: cosineSim(c.embedding, queryVector) }));
|
|
24
|
+
scored.sort((a, b) => b.score - a.score);
|
|
25
|
+
return scored.slice(0, k);
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=dense.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dense.js","sourceRoot":"","sources":["../../src/retrieval/dense.ts"],"names":[],"mappings":"AAUA,MAAM,UAAU,SAAS,CAAC,CAAW,EAAE,CAAW;IAChD,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC;QAC1B,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC,MAAM,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;IAC3E,CAAC;IACD,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACrB,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACrB,GAAG,IAAI,EAAE,GAAG,EAAE,CAAC;QACf,KAAK,IAAI,EAAE,GAAG,EAAE,CAAC;QACjB,KAAK,IAAI,EAAE,GAAG,EAAE,CAAC;IACnB,CAAC;IACD,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAClD,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC1B,OAAO,GAAG,GAAG,KAAK,CAAC;AACrB,CAAC;AAED,MAAM,UAAU,SAAS,CACvB,WAAqB,EACrB,UAA4B,EAC5B,CAAS;IAET,MAAM,MAAM,GAAG,UAAU;SACtB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,KAAK,WAAW,CAAC,MAAM,CAAC;SACxD,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC,SAAS,EAAE,WAAW,CAAC,EAAE,CAAC,CAAC,CAAC;IAC1E,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IACzC,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;AAC5B,CAAC"}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import type { Embedder } from "../embedding/types.js";
|
|
2
|
+
import type { Reranker } from "./rerank.js";
|
|
3
|
+
export interface RetrievalCandidate {
|
|
4
|
+
id: string;
|
|
5
|
+
content: string;
|
|
6
|
+
embedding?: number[];
|
|
7
|
+
contentForBm25?: string;
|
|
8
|
+
}
|
|
9
|
+
export interface RetrievalTrace {
|
|
10
|
+
denseScores: Array<{
|
|
11
|
+
id: string;
|
|
12
|
+
score: number;
|
|
13
|
+
}>;
|
|
14
|
+
bm25Scores: Array<{
|
|
15
|
+
id: string;
|
|
16
|
+
score: number;
|
|
17
|
+
}>;
|
|
18
|
+
rrfScores: Array<{
|
|
19
|
+
id: string;
|
|
20
|
+
score: number;
|
|
21
|
+
}>;
|
|
22
|
+
rerankScores: Array<{
|
|
23
|
+
id: string;
|
|
24
|
+
score: number;
|
|
25
|
+
}> | null;
|
|
26
|
+
finalIds: string[];
|
|
27
|
+
latencyMs: number;
|
|
28
|
+
}
|
|
29
|
+
export interface RetrievalScoredChunk {
|
|
30
|
+
id: string;
|
|
31
|
+
content: string;
|
|
32
|
+
rrfScore: number;
|
|
33
|
+
denseScore: number | undefined;
|
|
34
|
+
bm25Score: number | undefined;
|
|
35
|
+
rerankScore: number | undefined;
|
|
36
|
+
}
|
|
37
|
+
export interface RetrieveOpts {
|
|
38
|
+
query: string;
|
|
39
|
+
queryEmbedding?: number[];
|
|
40
|
+
precomputedDenseScores?: Array<{
|
|
41
|
+
id: string;
|
|
42
|
+
score: number;
|
|
43
|
+
}>;
|
|
44
|
+
candidates: RetrievalCandidate[];
|
|
45
|
+
k?: number;
|
|
46
|
+
rerankTopN?: number;
|
|
47
|
+
reranker?: Reranker;
|
|
48
|
+
/**
|
|
49
|
+
* Minimum score threshold for returned chunks.
|
|
50
|
+
*
|
|
51
|
+
* Scale depends on whether a reranker is configured:
|
|
52
|
+
* - With reranker: filters on `rerankScore` (model-dependent; typically 0–1).
|
|
53
|
+
* - Without reranker: filters on `rrfScore` (RRF scale; max ≈ 0.033 for two
|
|
54
|
+
* source lists with k=60). Cosine-style thresholds (e.g. 0.2) will silently
|
|
55
|
+
* eliminate every result when no reranker is active. Use a value < 0.033 or
|
|
56
|
+
* rely on reranking when applying a meaningful score floor.
|
|
57
|
+
*/
|
|
58
|
+
minScore?: number;
|
|
59
|
+
}
|
|
60
|
+
export interface RetrieveResult {
|
|
61
|
+
chunks: RetrievalScoredChunk[];
|
|
62
|
+
trace: RetrievalTrace;
|
|
63
|
+
}
|
|
64
|
+
export declare function retrieve(opts: RetrieveOpts): Promise<RetrieveResult>;
|
|
65
|
+
export declare function embedQuery(embedder: Embedder, query: string): Promise<number[]>;
|
|
66
|
+
export { denseRank, cosineSim } from "./dense.js";
|
|
67
|
+
export { bm25Rank } from "./bm25.js";
|
|
68
|
+
export { reciprocalRankFusion } from "./rrf.js";
|
|
69
|
+
export { createVoyageReranker, createCrossEncoderReranker, createDefaultReranker, shouldRerank, } from "./rerank.js";
|
|
70
|
+
export type { Reranker, RerankCandidate, RerankScore } from "./rerank.js";
|
|
71
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/retrieval/index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AAItD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAE5C,MAAM,WAAW,kBAAkB;IACjC,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAGhB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,cAAc;IAC7B,WAAW,EAAE,KAAK,CAAC;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAClD,UAAU,EAAE,KAAK,CAAC;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IACjD,SAAS,EAAE,KAAK,CAAC;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAChD,YAAY,EAAE,KAAK,CAAC;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,GAAG,IAAI,CAAC;IAC1D,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,oBAAoB;IACnC,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B,SAAS,EAAE,MAAM,GAAG,SAAS,CAAC;IAC9B,WAAW,EAAE,MAAM,GAAG,SAAS,CAAC;CACjC;AAED,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,CAAC;IAId,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,sBAAsB,CAAC,EAAE,KAAK,CAAC;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAC9D,UAAU,EAAE,kBAAkB,EAAE,CAAC;IACjC,CAAC,CAAC,EAAE,MAAM,CAAC;IACX,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB;;;;;;;;;OASG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,oBAAoB,EAAE,CAAC;IAC/B,KAAK,EAAE,cAAc,CAAC;CACvB;AAQD,wBAAsB,QAAQ,CAAC,IAAI,EAAE,YAAY,GAAG,OAAO,CAAC,cAAc,CAAC,CAoF1E;AAED,wBAAsB,UAAU,CAAC,QAAQ,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAIrF;AAED,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAClD,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AACrC,OAAO,EAAE,oBAAoB,EAAE,MAAM,UAAU,CAAC;AAChD,OAAO,EACL,oBAAoB,EACpB,0BAA0B,EAC1B,qBAAqB,EACrB,YAAY,GACb,MAAM,aAAa,CAAC;AACrB,YAAY,EAAE,QAAQ,EAAE,eAAe,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC"}
|