@fastrag/pageindex 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +251 -0
- package/README.zh-CN.md +251 -0
- package/dist/errors/index.d.ts +10 -0
- package/dist/errors/index.d.ts.map +1 -0
- package/dist/errors/index.js +19 -0
- package/dist/errors/index.js.map +1 -0
- package/dist/index.d.ts +14 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +20 -0
- package/dist/index.js.map +1 -0
- package/dist/internal-types/config.d.ts +35 -0
- package/dist/internal-types/config.d.ts.map +1 -0
- package/dist/internal-types/config.js +16 -0
- package/dist/internal-types/config.js.map +1 -0
- package/dist/internal-types/document-parser.d.ts +5 -0
- package/dist/internal-types/document-parser.d.ts.map +1 -0
- package/dist/internal-types/document-parser.js +2 -0
- package/dist/internal-types/document-parser.js.map +1 -0
- package/dist/internal-types/index.d.ts +9 -0
- package/dist/internal-types/index.d.ts.map +1 -0
- package/dist/internal-types/index.js +2 -0
- package/dist/internal-types/index.js.map +1 -0
- package/dist/internal-types/llm-provider.d.ts +19 -0
- package/dist/internal-types/llm-provider.d.ts.map +1 -0
- package/dist/internal-types/llm-provider.js +2 -0
- package/dist/internal-types/llm-provider.js.map +1 -0
- package/dist/internal-types/logger.d.ts +7 -0
- package/dist/internal-types/logger.d.ts.map +1 -0
- package/dist/internal-types/logger.js +2 -0
- package/dist/internal-types/logger.js.map +1 -0
- package/dist/internal-types/page.d.ts +5 -0
- package/dist/internal-types/page.d.ts.map +1 -0
- package/dist/internal-types/page.js +2 -0
- package/dist/internal-types/page.js.map +1 -0
- package/dist/internal-types/processing.d.ts +21 -0
- package/dist/internal-types/processing.d.ts.map +1 -0
- package/dist/internal-types/processing.js +2 -0
- package/dist/internal-types/processing.js.map +1 -0
- package/dist/internal-types/tree-node.d.ts +30 -0
- package/dist/internal-types/tree-node.d.ts.map +1 -0
- package/dist/internal-types/tree-node.js +2 -0
- package/dist/internal-types/tree-node.js.map +1 -0
- package/dist/llm/index.d.ts +3 -0
- package/dist/llm/index.d.ts.map +1 -0
- package/dist/llm/index.js +3 -0
- package/dist/llm/index.js.map +1 -0
- package/dist/llm/llm-client.d.ts +26 -0
- package/dist/llm/llm-client.d.ts.map +1 -0
- package/dist/llm/llm-client.js +88 -0
- package/dist/llm/llm-client.js.map +1 -0
- package/dist/llm/prompts.d.ts +33 -0
- package/dist/llm/prompts.d.ts.map +1 -0
- package/dist/llm/prompts.js +312 -0
- package/dist/llm/prompts.js.map +1 -0
- package/dist/markdown/index.d.ts +6 -0
- package/dist/markdown/index.d.ts.map +1 -0
- package/dist/markdown/index.js +5 -0
- package/dist/markdown/index.js.map +1 -0
- package/dist/markdown/md-extractor.d.ts +14 -0
- package/dist/markdown/md-extractor.d.ts.map +1 -0
- package/dist/markdown/md-extractor.js +30 -0
- package/dist/markdown/md-extractor.js.map +1 -0
- package/dist/markdown/md-to-tree.d.ts +8 -0
- package/dist/markdown/md-to-tree.d.ts.map +1 -0
- package/dist/markdown/md-to-tree.js +20 -0
- package/dist/markdown/md-to-tree.js.map +1 -0
- package/dist/markdown/md-tree-builder.d.ts +7 -0
- package/dist/markdown/md-tree-builder.d.ts.map +1 -0
- package/dist/markdown/md-tree-builder.js +36 -0
- package/dist/markdown/md-tree-builder.js.map +1 -0
- package/dist/markdown/tree-thinning.d.ts +8 -0
- package/dist/markdown/tree-thinning.d.ts.map +1 -0
- package/dist/markdown/tree-thinning.js +42 -0
- package/dist/markdown/tree-thinning.js.map +1 -0
- package/dist/page-index.d.ts +10 -0
- package/dist/page-index.d.ts.map +1 -0
- package/dist/page-index.js +54 -0
- package/dist/page-index.js.map +1 -0
- package/dist/post-processing/doc-description.d.ts +12 -0
- package/dist/post-processing/doc-description.d.ts.map +1 -0
- package/dist/post-processing/doc-description.js +31 -0
- package/dist/post-processing/doc-description.js.map +1 -0
- package/dist/post-processing/index.d.ts +5 -0
- package/dist/post-processing/index.d.ts.map +1 -0
- package/dist/post-processing/index.js +5 -0
- package/dist/post-processing/index.js.map +1 -0
- package/dist/post-processing/node-id.d.ts +7 -0
- package/dist/post-processing/node-id.d.ts.map +1 -0
- package/dist/post-processing/node-id.js +20 -0
- package/dist/post-processing/node-id.js.map +1 -0
- package/dist/post-processing/node-text.d.ts +11 -0
- package/dist/post-processing/node-text.d.ts.map +1 -0
- package/dist/post-processing/node-text.js +37 -0
- package/dist/post-processing/node-text.js.map +1 -0
- package/dist/post-processing/summary.d.ts +7 -0
- package/dist/post-processing/summary.d.ts.map +1 -0
- package/dist/post-processing/summary.js +31 -0
- package/dist/post-processing/summary.js.map +1 -0
- package/dist/processing/index.d.ts +6 -0
- package/dist/processing/index.d.ts.map +1 -0
- package/dist/processing/index.js +6 -0
- package/dist/processing/index.js.map +1 -0
- package/dist/processing/large-node.d.ts +9 -0
- package/dist/processing/large-node.d.ts.map +1 -0
- package/dist/processing/large-node.js +40 -0
- package/dist/processing/large-node.js.map +1 -0
- package/dist/processing/meta-processor.d.ts +19 -0
- package/dist/processing/meta-processor.d.ts.map +1 -0
- package/dist/processing/meta-processor.js +91 -0
- package/dist/processing/meta-processor.js.map +1 -0
- package/dist/processing/no-toc.d.ts +10 -0
- package/dist/processing/no-toc.d.ts.map +1 -0
- package/dist/processing/no-toc.js +44 -0
- package/dist/processing/no-toc.js.map +1 -0
- package/dist/processing/toc-no-pages.d.ts +11 -0
- package/dist/processing/toc-no-pages.d.ts.map +1 -0
- package/dist/processing/toc-no-pages.js +46 -0
- package/dist/processing/toc-no-pages.js.map +1 -0
- package/dist/processing/toc-with-pages.d.ts +15 -0
- package/dist/processing/toc-with-pages.d.ts.map +1 -0
- package/dist/processing/toc-with-pages.js +151 -0
- package/dist/processing/toc-with-pages.js.map +1 -0
- package/dist/toc/index.d.ts +4 -0
- package/dist/toc/index.d.ts.map +1 -0
- package/dist/toc/index.js +4 -0
- package/dist/toc/index.js.map +1 -0
- package/dist/toc/toc-detector.d.ts +23 -0
- package/dist/toc/toc-detector.d.ts.map +1 -0
- package/dist/toc/toc-detector.js +65 -0
- package/dist/toc/toc-detector.js.map +1 -0
- package/dist/toc/toc-extractor.d.ts +13 -0
- package/dist/toc/toc-extractor.d.ts.map +1 -0
- package/dist/toc/toc-extractor.js +32 -0
- package/dist/toc/toc-extractor.js.map +1 -0
- package/dist/toc/toc-transformer.d.ts +11 -0
- package/dist/toc/toc-transformer.d.ts.map +1 -0
- package/dist/toc/toc-transformer.js +69 -0
- package/dist/toc/toc-transformer.js.map +1 -0
- package/dist/tree/index.d.ts +4 -0
- package/dist/tree/index.d.ts.map +1 -0
- package/dist/tree/index.js +4 -0
- package/dist/tree/index.js.map +1 -0
- package/dist/tree/list-to-tree.d.ts +7 -0
- package/dist/tree/list-to-tree.d.ts.map +1 -0
- package/dist/tree/list-to-tree.js +33 -0
- package/dist/tree/list-to-tree.js.map +1 -0
- package/dist/tree/post-processing.d.ts +12 -0
- package/dist/tree/post-processing.d.ts.map +1 -0
- package/dist/tree/post-processing.js +87 -0
- package/dist/tree/post-processing.js.map +1 -0
- package/dist/tree/tree-utils.d.ts +18 -0
- package/dist/tree/tree-utils.d.ts.map +1 -0
- package/dist/tree/tree-utils.js +43 -0
- package/dist/tree/tree-utils.js.map +1 -0
- package/dist/tree-parser.d.ts +30 -0
- package/dist/tree-parser.d.ts.map +1 -0
- package/dist/tree-parser.js +73 -0
- package/dist/tree-parser.js.map +1 -0
- package/dist/types.d.ts +3 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/config-loader.d.ts +15 -0
- package/dist/utils/config-loader.d.ts.map +1 -0
- package/dist/utils/config-loader.js +19 -0
- package/dist/utils/config-loader.js.map +1 -0
- package/dist/utils/index.d.ts +7 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +6 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/json-parser.d.ts +2 -0
- package/dist/utils/json-parser.d.ts.map +1 -0
- package/dist/utils/json-parser.js +76 -0
- package/dist/utils/json-parser.js.map +1 -0
- package/dist/utils/logger.d.ts +3 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +10 -0
- package/dist/utils/logger.js.map +1 -0
- package/dist/utils/page-utils.d.ts +16 -0
- package/dist/utils/page-utils.d.ts.map +1 -0
- package/dist/utils/page-utils.js +56 -0
- package/dist/utils/page-utils.js.map +1 -0
- package/dist/utils/token-counter.d.ts +2 -0
- package/dist/utils/token-counter.d.ts.map +1 -0
- package/dist/utils/token-counter.js +5 -0
- package/dist/utils/token-counter.js.map +1 -0
- package/dist/vector-lib/adapters/in-memory-adapter.d.ts +14 -0
- package/dist/vector-lib/adapters/in-memory-adapter.d.ts.map +1 -0
- package/dist/vector-lib/adapters/in-memory-adapter.js +55 -0
- package/dist/vector-lib/adapters/in-memory-adapter.js.map +1 -0
- package/dist/vector-lib/adapters/vector-store.d.ts +10 -0
- package/dist/vector-lib/adapters/vector-store.d.ts.map +1 -0
- package/dist/vector-lib/adapters/vector-store.js +2 -0
- package/dist/vector-lib/adapters/vector-store.js.map +1 -0
- package/dist/vector-lib/chunker/tree-chunker.d.ts +8 -0
- package/dist/vector-lib/chunker/tree-chunker.d.ts.map +1 -0
- package/dist/vector-lib/chunker/tree-chunker.js +59 -0
- package/dist/vector-lib/chunker/tree-chunker.js.map +1 -0
- package/dist/vector-lib/embedder/embedder.d.ts +8 -0
- package/dist/vector-lib/embedder/embedder.d.ts.map +1 -0
- package/dist/vector-lib/embedder/embedder.js +2 -0
- package/dist/vector-lib/embedder/embedder.js.map +1 -0
- package/dist/vector-lib/index.d.ts +10 -0
- package/dist/vector-lib/index.d.ts.map +1 -0
- package/dist/vector-lib/index.js +6 -0
- package/dist/vector-lib/index.js.map +1 -0
- package/dist/vector-lib/search/hybrid-search.d.ts +19 -0
- package/dist/vector-lib/search/hybrid-search.d.ts.map +1 -0
- package/dist/vector-lib/search/hybrid-search.js +25 -0
- package/dist/vector-lib/search/hybrid-search.js.map +1 -0
- package/dist/vector-lib/search/reranker.d.ts +14 -0
- package/dist/vector-lib/search/reranker.d.ts.map +1 -0
- package/dist/vector-lib/search/reranker.js +2 -0
- package/dist/vector-lib/search/reranker.js.map +1 -0
- package/dist/vector-lib/types.d.ts +29 -0
- package/dist/vector-lib/types.d.ts.map +1 -0
- package/dist/vector-lib/types.js +2 -0
- package/dist/vector-lib/types.js.map +1 -0
- package/dist/vector-lib/vector-enhancer.d.ts +28 -0
- package/dist/vector-lib/vector-enhancer.d.ts.map +1 -0
- package/dist/vector-lib/vector-enhancer.js +54 -0
- package/dist/vector-lib/vector-enhancer.js.map +1 -0
- package/dist/vector.d.ts +5 -0
- package/dist/vector.d.ts.map +1 -0
- package/dist/vector.js +3 -0
- package/dist/vector.js.map +1 -0
- package/dist/verification/fix-toc.d.ts +13 -0
- package/dist/verification/fix-toc.d.ts.map +1 -0
- package/dist/verification/fix-toc.js +73 -0
- package/dist/verification/fix-toc.js.map +1 -0
- package/dist/verification/index.d.ts +3 -0
- package/dist/verification/index.d.ts.map +1 -0
- package/dist/verification/index.js +3 -0
- package/dist/verification/index.js.map +1 -0
- package/dist/verification/verify-toc.d.ts +17 -0
- package/dist/verification/verify-toc.d.ts.map +1 -0
- package/dist/verification/verify-toc.js +64 -0
- package/dist/verification/verify-toc.js.map +1 -0
- package/package.json +58 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Splits a PageIndexResult tree into chunks suitable for vector indexing.
|
|
3
|
+
* Each leaf node with text becomes one or more chunks.
|
|
4
|
+
*/
|
|
5
|
+
export const treeChunker = ((result, config = {}) => {
|
|
6
|
+
const maxChunkTokens = config.chunkMaxTokens ?? 1000;
|
|
7
|
+
const chunks = [];
|
|
8
|
+
const docName = result.docName;
|
|
9
|
+
function processNode(node) {
|
|
10
|
+
if (node.text) {
|
|
11
|
+
// Simple chunking: split by paragraphs if text is too long
|
|
12
|
+
const text = node.text;
|
|
13
|
+
const chunkTexts = splitText(text, maxChunkTokens);
|
|
14
|
+
for (let i = 0; i < chunkTexts.length; i++) {
|
|
15
|
+
chunks.push({
|
|
16
|
+
id: `${docName}:${node.nodeId ?? 'unknown'}:${i}`,
|
|
17
|
+
text: chunkTexts[i],
|
|
18
|
+
metadata: {
|
|
19
|
+
docName,
|
|
20
|
+
nodeId: node.nodeId ?? '',
|
|
21
|
+
title: node.title,
|
|
22
|
+
startIndex: node.startIndex,
|
|
23
|
+
endIndex: node.endIndex,
|
|
24
|
+
chunkIndex: i,
|
|
25
|
+
},
|
|
26
|
+
});
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
for (const child of node.nodes) {
|
|
30
|
+
processNode(child);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
for (const node of result.structure) {
|
|
34
|
+
processNode(node);
|
|
35
|
+
}
|
|
36
|
+
return chunks;
|
|
37
|
+
});
|
|
38
|
+
function splitText(text, maxTokens) {
|
|
39
|
+
// Rough estimate: 1 token ≈ 4 chars
|
|
40
|
+
const maxChars = maxTokens * 4;
|
|
41
|
+
if (text.length <= maxChars) {
|
|
42
|
+
return [text];
|
|
43
|
+
}
|
|
44
|
+
const paragraphs = text.split(/\n\n+/);
|
|
45
|
+
const chunks = [];
|
|
46
|
+
let current = '';
|
|
47
|
+
for (const para of paragraphs) {
|
|
48
|
+
if (current.length + para.length > maxChars && current.length > 0) {
|
|
49
|
+
chunks.push(current.trim());
|
|
50
|
+
current = '';
|
|
51
|
+
}
|
|
52
|
+
current += (current ? '\n\n' : '') + para;
|
|
53
|
+
}
|
|
54
|
+
if (current.trim()) {
|
|
55
|
+
chunks.push(current.trim());
|
|
56
|
+
}
|
|
57
|
+
return chunks.length > 0 ? chunks : [text];
|
|
58
|
+
}
|
|
59
|
+
//# sourceMappingURL=tree-chunker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tree-chunker.js","sourceRoot":"","sources":["../../../src/vector-lib/chunker/tree-chunker.ts"],"names":[],"mappings":"AAGA;;;GAGG;AACH,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,CAC1B,MAAuB,EACvB,SAAuB,EAAE,EAChB,EAAE;IACX,MAAM,cAAc,GAAG,MAAM,CAAC,cAAc,IAAI,IAAI,CAAC;IACrD,MAAM,MAAM,GAAY,EAAE,CAAC;IAC3B,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC;IAE/B,SAAS,WAAW,CAAC,IAAc;QACjC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YACd,2DAA2D;YAC3D,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;YACvB,MAAM,UAAU,GAAG,SAAS,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;YAEnD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC3C,MAAM,CAAC,IAAI,CAAC;oBACV,EAAE,EAAE,GAAG,OAAO,IAAI,IAAI,CAAC,MAAM,IAAI,SAAS,IAAI,CAAC,EAAE;oBACjD,IAAI,EAAE,UAAU,CAAC,CAAC,CAAC;oBACnB,QAAQ,EAAE;wBACR,OAAO;wBACP,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,EAAE;wBACzB,KAAK,EAAE,IAAI,CAAC,KAAK;wBACjB,UAAU,EAAE,IAAI,CAAC,UAAU;wBAC3B,QAAQ,EAAE,IAAI,CAAC,QAAQ;wBACvB,UAAU,EAAE,CAAC;qBACd;iBACF,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YAC/B,WAAW,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;QACpC,WAAW,CAAC,IAAI,CAAC,CAAC;IACpB,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC,CAAmB,CAAC;AAErB,SAAS,SAAS,CAAC,IAAY,EAAE,SAAiB;IAChD,oCAAoC;IACpC,MAAM,QAAQ,GAAG,SAAS,GAAG,CAAC,CAAC;IAE/B,IAAI,IAAI,CAAC,MAAM,IAAI,QAAQ,EAAE,CAAC;QAC5B,OAAO,CAAC,IAAI,CAAC,CAAC;IAChB,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACvC,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,OAAO,GAAG,EAAE,CAAC;IAEjB,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,IAAI,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,GAAG,QAAQ,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClE,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;YAC5B,OAAO,GAAG,EAAE,CAAC;QACf,CAAC;QACD,OAAO,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC;IAC5C,CAAC;IAED,IAAI,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;QACnB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IAC9B,CAAC;IAED,OAAO,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;AAC7C,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embedder.d.ts","sourceRoot":"","sources":["../../../src/vector-lib/embedder/embedder.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IAC5C,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embedder.js","sourceRoot":"","sources":["../../../src/vector-lib/embedder/embedder.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export type { VectorConfig, SearchResult, Chunk, VectorRecord, Chunker, } from './types.js';
|
|
2
|
+
export type { Embedder } from './embedder/embedder.js';
|
|
3
|
+
export type { VectorStore } from './adapters/vector-store.js';
|
|
4
|
+
export type { Reranker } from './search/reranker.js';
|
|
5
|
+
export { InMemoryAdapter } from './adapters/in-memory-adapter.js';
|
|
6
|
+
export { treeChunker } from './chunker/tree-chunker.js';
|
|
7
|
+
export { VectorEnhancer } from './vector-enhancer.js';
|
|
8
|
+
export { HybridSearch } from './search/hybrid-search.js';
|
|
9
|
+
export type { HybridSearchConfig } from './search/hybrid-search.js';
|
|
10
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/vector-lib/index.ts"],"names":[],"mappings":"AACA,YAAY,EACV,YAAY,EACZ,YAAY,EACZ,KAAK,EACL,YAAY,EACZ,OAAO,GACR,MAAM,YAAY,CAAC;AAGpB,YAAY,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AACvD,YAAY,EAAE,WAAW,EAAE,MAAM,4BAA4B,CAAC;AAC9D,YAAY,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAGrD,OAAO,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAClE,OAAO,EAAE,WAAW,EAAE,MAAM,2BAA2B,CAAC;AACxD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,MAAM,2BAA2B,CAAC;AACzD,YAAY,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC"}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
// Implementations
|
|
2
|
+
export { InMemoryAdapter } from './adapters/in-memory-adapter.js';
|
|
3
|
+
export { treeChunker } from './chunker/tree-chunker.js';
|
|
4
|
+
export { VectorEnhancer } from './vector-enhancer.js';
|
|
5
|
+
export { HybridSearch } from './search/hybrid-search.js';
|
|
6
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/vector-lib/index.ts"],"names":[],"mappings":"AAcA,kBAAkB;AAClB,OAAO,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAClE,OAAO,EAAE,WAAW,EAAE,MAAM,2BAA2B,CAAC;AACxD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,MAAM,2BAA2B,CAAC"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { SearchResult } from '../types.js';
|
|
2
|
+
import { VectorEnhancer } from '../vector-enhancer.js';
|
|
3
|
+
export interface HybridSearchConfig {
|
|
4
|
+
vectorTopK?: number;
|
|
5
|
+
rerankTopK?: number;
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* Hybrid search combining vector similarity with tree structure awareness.
|
|
9
|
+
*/
|
|
10
|
+
export declare class HybridSearch {
|
|
11
|
+
private readonly enhancer;
|
|
12
|
+
private readonly config;
|
|
13
|
+
constructor(enhancer: VectorEnhancer, config?: HybridSearchConfig);
|
|
14
|
+
/**
|
|
15
|
+
* Search with vector retrieval, then rerank/filter to top results.
|
|
16
|
+
*/
|
|
17
|
+
search(query: string, filter?: Record<string, unknown>): Promise<SearchResult[]>;
|
|
18
|
+
}
|
|
19
|
+
//# sourceMappingURL=hybrid-search.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hybrid-search.d.ts","sourceRoot":"","sources":["../../../src/vector-lib/search/hybrid-search.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAEvD,MAAM,WAAW,kBAAkB;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAiB;IAC1C,OAAO,CAAC,QAAQ,CAAC,MAAM,CAA+B;gBAE1C,QAAQ,EAAE,cAAc,EAAE,MAAM,GAAE,kBAAuB;IAQrE;;OAEG;IACG,MAAM,CACV,KAAK,EAAE,MAAM,EACb,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC/B,OAAO,CAAC,YAAY,EAAE,CAAC;CAW3B"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { VectorEnhancer } from '../vector-enhancer.js';
|
|
2
|
+
/**
|
|
3
|
+
* Hybrid search combining vector similarity with tree structure awareness.
|
|
4
|
+
*/
|
|
5
|
+
export class HybridSearch {
|
|
6
|
+
enhancer;
|
|
7
|
+
config;
|
|
8
|
+
constructor(enhancer, config = {}) {
|
|
9
|
+
this.enhancer = enhancer;
|
|
10
|
+
this.config = {
|
|
11
|
+
vectorTopK: config.vectorTopK ?? 20,
|
|
12
|
+
rerankTopK: config.rerankTopK ?? 5,
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Search with vector retrieval, then rerank/filter to top results.
|
|
17
|
+
*/
|
|
18
|
+
async search(query, filter) {
|
|
19
|
+
const results = await this.enhancer.search(query, this.config.vectorTopK, filter);
|
|
20
|
+
// Simple reranking: just take top results by score
|
|
21
|
+
// A real implementation would use a reranker model
|
|
22
|
+
return results.slice(0, this.config.rerankTopK);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
//# sourceMappingURL=hybrid-search.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hybrid-search.js","sourceRoot":"","sources":["../../../src/vector-lib/search/hybrid-search.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAOvD;;GAEG;AACH,MAAM,OAAO,YAAY;IACN,QAAQ,CAAiB;IACzB,MAAM,CAA+B;IAEtD,YAAY,QAAwB,EAAE,SAA6B,EAAE;QACnE,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,MAAM,GAAG;YACZ,UAAU,EAAE,MAAM,CAAC,UAAU,IAAI,EAAE;YACnC,UAAU,EAAE,MAAM,CAAC,UAAU,IAAI,CAAC;SACnC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CACV,KAAa,EACb,MAAgC;QAEhC,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,MAAM,CACxC,KAAK,EACL,IAAI,CAAC,MAAM,CAAC,UAAU,EACtB,MAAM,CACP,CAAC;QAEF,mDAAmD;QACnD,mDAAmD;QACnD,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;IAClD,CAAC;CACF"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reranker interface — user implements this for custom reranking.
|
|
3
|
+
*/
|
|
4
|
+
export interface Reranker {
|
|
5
|
+
rerank(query: string, documents: Array<{
|
|
6
|
+
id: string;
|
|
7
|
+
text: string;
|
|
8
|
+
score: number;
|
|
9
|
+
}>, topK: number): Promise<Array<{
|
|
10
|
+
id: string;
|
|
11
|
+
score: number;
|
|
12
|
+
}>>;
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=reranker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reranker.d.ts","sourceRoot":"","sources":["../../../src/vector-lib/search/reranker.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,MAAM,CACJ,KAAK,EAAE,MAAM,EACb,SAAS,EAAE,KAAK,CAAC;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,EAC7D,IAAI,EAAE,MAAM,GACX,OAAO,CAAC,KAAK,CAAC;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC,CAAC;CAClD"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reranker.js","sourceRoot":"","sources":["../../../src/vector-lib/search/reranker.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import type { PageIndexResult } from '../types.js';
|
|
2
|
+
export interface VectorRecord {
|
|
3
|
+
id: string;
|
|
4
|
+
vector: number[];
|
|
5
|
+
payload: Record<string, unknown>;
|
|
6
|
+
}
|
|
7
|
+
export interface SearchResult {
|
|
8
|
+
id: string;
|
|
9
|
+
score: number;
|
|
10
|
+
payload: Record<string, unknown>;
|
|
11
|
+
}
|
|
12
|
+
export interface VectorConfig {
|
|
13
|
+
chunkMaxTokens?: number;
|
|
14
|
+
chunkOverlap?: number;
|
|
15
|
+
}
|
|
16
|
+
export interface Chunk {
|
|
17
|
+
id: string;
|
|
18
|
+
text: string;
|
|
19
|
+
metadata: {
|
|
20
|
+
docName: string;
|
|
21
|
+
nodeId: string;
|
|
22
|
+
title: string;
|
|
23
|
+
startIndex?: number;
|
|
24
|
+
endIndex?: number;
|
|
25
|
+
chunkIndex: number;
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
export type Chunker = (result: PageIndexResult, config?: VectorConfig) => Chunk[];
|
|
29
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/vector-lib/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAEnD,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,YAAY;IAC3B,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,KAAK;IACpB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE;QACR,OAAO,EAAE,MAAM,CAAC;QAChB,MAAM,EAAE,MAAM,CAAC;QACf,KAAK,EAAE,MAAM,CAAC;QACd,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC;CACH;AAED,MAAM,MAAM,OAAO,GAAG,CACpB,MAAM,EAAE,eAAe,EACvB,MAAM,CAAC,EAAE,YAAY,KAClB,KAAK,EAAE,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/vector-lib/types.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import type { PageIndexResult } from '../types.js';
|
|
2
|
+
import type { Embedder } from './embedder/embedder.js';
|
|
3
|
+
import type { VectorStore } from './adapters/vector-store.js';
|
|
4
|
+
import type { SearchResult, VectorConfig, Chunker } from './types.js';
|
|
5
|
+
/**
|
|
6
|
+
* VectorEnhancer: indexes PageIndexResult into a vector store
|
|
7
|
+
* and provides search capabilities.
|
|
8
|
+
*/
|
|
9
|
+
export declare class VectorEnhancer {
|
|
10
|
+
private readonly store;
|
|
11
|
+
private readonly embedder;
|
|
12
|
+
private readonly chunker;
|
|
13
|
+
private readonly config;
|
|
14
|
+
constructor(store: VectorStore, embedder: Embedder, chunker: Chunker, config?: VectorConfig);
|
|
15
|
+
/**
|
|
16
|
+
* Index a PageIndexResult into the vector store.
|
|
17
|
+
*/
|
|
18
|
+
index(result: PageIndexResult): Promise<number>;
|
|
19
|
+
/**
|
|
20
|
+
* Search the vector store with a text query.
|
|
21
|
+
*/
|
|
22
|
+
search(query: string, topK?: number, filter?: Record<string, unknown>): Promise<SearchResult[]>;
|
|
23
|
+
/**
|
|
24
|
+
* Delete all chunks for a document.
|
|
25
|
+
*/
|
|
26
|
+
deleteDocument(docName: string): Promise<void>;
|
|
27
|
+
}
|
|
28
|
+
//# sourceMappingURL=vector-enhancer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vector-enhancer.d.ts","sourceRoot":"","sources":["../../src/vector-lib/vector-enhancer.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AACnD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AACvD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,4BAA4B,CAAC;AAC9D,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AAEtE;;;GAGG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAc;IACpC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAW;IACpC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAU;IAClC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAe;gBAGpC,KAAK,EAAE,WAAW,EAClB,QAAQ,EAAE,QAAQ,EAClB,OAAO,EAAE,OAAO,EAChB,MAAM,GAAE,YAAiB;IAQ3B;;OAEG;IACG,KAAK,CAAC,MAAM,EAAE,eAAe,GAAG,OAAO,CAAC,MAAM,CAAC;IAkBrD;;OAEG;IACG,MAAM,CACV,KAAK,EAAE,MAAM,EACb,IAAI,SAAI,EACR,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC/B,OAAO,CAAC,YAAY,EAAE,CAAC;IAK1B;;OAEG;IACG,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;CAcrD"}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* VectorEnhancer: indexes PageIndexResult into a vector store
|
|
3
|
+
* and provides search capabilities.
|
|
4
|
+
*/
|
|
5
|
+
export class VectorEnhancer {
|
|
6
|
+
store;
|
|
7
|
+
embedder;
|
|
8
|
+
chunker;
|
|
9
|
+
config;
|
|
10
|
+
constructor(store, embedder, chunker, config = {}) {
|
|
11
|
+
this.store = store;
|
|
12
|
+
this.embedder = embedder;
|
|
13
|
+
this.chunker = chunker;
|
|
14
|
+
this.config = config;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Index a PageIndexResult into the vector store.
|
|
18
|
+
*/
|
|
19
|
+
async index(result) {
|
|
20
|
+
const chunks = this.chunker(result, this.config);
|
|
21
|
+
if (chunks.length === 0)
|
|
22
|
+
return 0;
|
|
23
|
+
const texts = chunks.map((c) => c.text);
|
|
24
|
+
const embeddings = await this.embedder.embed(texts);
|
|
25
|
+
const records = chunks.map((chunk, i) => ({
|
|
26
|
+
id: chunk.id,
|
|
27
|
+
vector: embeddings[i],
|
|
28
|
+
payload: chunk.metadata,
|
|
29
|
+
}));
|
|
30
|
+
await this.store.upsert(records);
|
|
31
|
+
return chunks.length;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Search the vector store with a text query.
|
|
35
|
+
*/
|
|
36
|
+
async search(query, topK = 5, filter) {
|
|
37
|
+
const [queryVector] = await this.embedder.embed([query]);
|
|
38
|
+
return this.store.search(queryVector, topK, filter);
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Delete all chunks for a document.
|
|
42
|
+
*/
|
|
43
|
+
async deleteDocument(docName) {
|
|
44
|
+
// This requires the store to support filtering by docName
|
|
45
|
+
// For now, this is a placeholder — real implementations would
|
|
46
|
+
// use the store's filter capabilities
|
|
47
|
+
const results = await this.store.search(new Array(this.embedder.dimension).fill(0), 10000, { docName });
|
|
48
|
+
const ids = results.map((r) => r.id);
|
|
49
|
+
if (ids.length > 0) {
|
|
50
|
+
await this.store.delete(ids);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
//# sourceMappingURL=vector-enhancer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vector-enhancer.js","sourceRoot":"","sources":["../../src/vector-lib/vector-enhancer.ts"],"names":[],"mappings":"AAKA;;;GAGG;AACH,MAAM,OAAO,cAAc;IACR,KAAK,CAAc;IACnB,QAAQ,CAAW;IACnB,OAAO,CAAU;IACjB,MAAM,CAAe;IAEtC,YACE,KAAkB,EAClB,QAAkB,EAClB,OAAgB,EAChB,SAAuB,EAAE;QAEzB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,KAAK,CAAC,MAAuB;QACjC,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QAEjD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QAElC,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAEpD,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;YACxC,EAAE,EAAE,KAAK,CAAC,EAAE;YACZ,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC;YACrB,OAAO,EAAE,KAAK,CAAC,QAA8C;SAC9D,CAAC,CAAC,CAAC;QAEJ,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACjC,OAAO,MAAM,CAAC,MAAM,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CACV,KAAa,EACb,IAAI,GAAG,CAAC,EACR,MAAgC;QAEhC,MAAM,CAAC,WAAW,CAAC,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;QACzD,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,WAAW,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;IACtD,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,cAAc,CAAC,OAAe;QAClC,0DAA0D;QAC1D,8DAA8D;QAC9D,sCAAsC;QACtC,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CACrC,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAC1C,KAAK,EACL,EAAE,OAAO,EAAE,CACZ,CAAC;QACF,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACrC,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnB,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;CACF"}
|
package/dist/vector.d.ts
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export type { VectorConfig, SearchResult, Chunk, VectorRecord, Chunker, } from './vector-lib/index.js';
|
|
2
|
+
export type { Embedder, VectorStore, Reranker } from './vector-lib/index.js';
|
|
3
|
+
export { InMemoryAdapter, treeChunker, VectorEnhancer, HybridSearch, } from './vector-lib/index.js';
|
|
4
|
+
export type { HybridSearchConfig } from './vector-lib/index.js';
|
|
5
|
+
//# sourceMappingURL=vector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vector.d.ts","sourceRoot":"","sources":["../src/vector.ts"],"names":[],"mappings":"AACA,YAAY,EACV,YAAY,EACZ,YAAY,EACZ,KAAK,EACL,YAAY,EACZ,OAAO,GACR,MAAM,uBAAuB,CAAC;AAG/B,YAAY,EAAE,QAAQ,EAAE,WAAW,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AAG7E,OAAO,EACL,eAAe,EACf,WAAW,EACX,cAAc,EACd,YAAY,GACb,MAAM,uBAAuB,CAAC;AAC/B,YAAY,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAC"}
|
package/dist/vector.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vector.js","sourceRoot":"","sources":["../src/vector.ts"],"names":[],"mappings":"AAYA,kBAAkB;AAClB,OAAO,EACL,eAAe,EACf,WAAW,EACX,cAAc,EACd,YAAY,GACb,MAAM,uBAAuB,CAAC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { TocItem } from '../types.js';
|
|
2
|
+
import { LlmClient } from '../llm/llm-client.js';
|
|
3
|
+
/**
|
|
4
|
+
* Attempts to fix incorrect TOC entries with retries.
|
|
5
|
+
*/
|
|
6
|
+
export declare function fixIncorrectTocWithRetries(toc: TocItem[], pageList: Array<{
|
|
7
|
+
text: string;
|
|
8
|
+
}>, incorrectResults: Array<{
|
|
9
|
+
index: number;
|
|
10
|
+
title: string;
|
|
11
|
+
physicalIndex: number;
|
|
12
|
+
}>, llmClient: LlmClient, maxAttempts?: number): Promise<TocItem[]>;
|
|
13
|
+
//# sourceMappingURL=fix-toc.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fix-toc.d.ts","sourceRoot":"","sources":["../../src/verification/fix-toc.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAKjD;;GAEG;AACH,wBAAsB,0BAA0B,CAC9C,GAAG,EAAE,OAAO,EAAE,EACd,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,EACjC,gBAAgB,EAAE,KAAK,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,MAAM,CAAA;CAAE,CAAC,EAChF,SAAS,EAAE,SAAS,EACpB,WAAW,SAAI,GACd,OAAO,CAAC,OAAO,EAAE,CAAC,CAcpB"}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { LlmClient } from '../llm/llm-client.js';
|
|
2
|
+
import { singleTocItemIndexFixerPrompt } from '../llm/prompts.js';
|
|
3
|
+
import { convertPhysicalIndexToInt, addPhysicalIndexTags } from '../utils/page-utils.js';
|
|
4
|
+
import { checkTitleAppearance } from './verify-toc.js';
|
|
5
|
+
/**
|
|
6
|
+
* Attempts to fix incorrect TOC entries with retries.
|
|
7
|
+
*/
|
|
8
|
+
export async function fixIncorrectTocWithRetries(toc, pageList, incorrectResults, llmClient, maxAttempts = 3) {
|
|
9
|
+
let currentIncorrect = incorrectResults;
|
|
10
|
+
let fixAttempt = 0;
|
|
11
|
+
while (currentIncorrect.length > 0 && fixAttempt < maxAttempts) {
|
|
12
|
+
const { updatedToc, stillIncorrect } = await fixIncorrectToc(toc, pageList, currentIncorrect, llmClient);
|
|
13
|
+
toc = updatedToc;
|
|
14
|
+
currentIncorrect = stillIncorrect;
|
|
15
|
+
fixAttempt++;
|
|
16
|
+
}
|
|
17
|
+
return toc;
|
|
18
|
+
}
|
|
19
|
+
async function fixIncorrectToc(toc, pageList, incorrectResults, llmClient) {
|
|
20
|
+
const stillIncorrect = [];
|
|
21
|
+
for (const incorrect of incorrectResults) {
|
|
22
|
+
const { index, title } = incorrect;
|
|
23
|
+
// Find prev/next correct entries for range
|
|
24
|
+
let prevCorrectIdx = 1;
|
|
25
|
+
for (let i = index - 1; i >= 0; i--) {
|
|
26
|
+
const pi = toc[i].physicalIndex;
|
|
27
|
+
if (pi != null) {
|
|
28
|
+
prevCorrectIdx = pi;
|
|
29
|
+
break;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
let nextCorrectIdx = pageList.length;
|
|
33
|
+
for (let i = index + 1; i < toc.length; i++) {
|
|
34
|
+
const pi = toc[i].physicalIndex;
|
|
35
|
+
if (pi != null) {
|
|
36
|
+
nextCorrectIdx = pi;
|
|
37
|
+
break;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
// Extract range pages with tags
|
|
41
|
+
const rangePages = pageList.slice(prevCorrectIdx - 1, nextCorrectIdx);
|
|
42
|
+
const taggedContent = addPhysicalIndexTags(rangePages, prevCorrectIdx);
|
|
43
|
+
// Ask LLM to find the correct page
|
|
44
|
+
const result = await llmClient.chatJson([
|
|
45
|
+
{
|
|
46
|
+
role: 'user',
|
|
47
|
+
content: singleTocItemIndexFixerPrompt(title, taggedContent),
|
|
48
|
+
},
|
|
49
|
+
]);
|
|
50
|
+
if (result.physical_index) {
|
|
51
|
+
try {
|
|
52
|
+
const newIdx = convertPhysicalIndexToInt(result.physical_index);
|
|
53
|
+
toc[index].physicalIndex = newIdx;
|
|
54
|
+
// Verify the fix
|
|
55
|
+
const pageIdx = newIdx - 1;
|
|
56
|
+
if (pageIdx >= 0 && pageIdx < pageList.length) {
|
|
57
|
+
const isCorrect = await checkTitleAppearance(title, pageList[pageIdx].text, llmClient);
|
|
58
|
+
if (!isCorrect) {
|
|
59
|
+
stillIncorrect.push({ index, title, physicalIndex: newIdx });
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
catch {
|
|
64
|
+
stillIncorrect.push(incorrect);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
else {
|
|
68
|
+
stillIncorrect.push(incorrect);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return { updatedToc: toc, stillIncorrect };
|
|
72
|
+
}
|
|
73
|
+
//# sourceMappingURL=fix-toc.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fix-toc.js","sourceRoot":"","sources":["../../src/verification/fix-toc.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,OAAO,EAAE,6BAA6B,EAAE,MAAM,mBAAmB,CAAC;AAClE,OAAO,EAAE,yBAAyB,EAAE,oBAAoB,EAAE,MAAM,wBAAwB,CAAC;AACzF,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAEvD;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAC9C,GAAc,EACd,QAAiC,EACjC,gBAAgF,EAChF,SAAoB,EACpB,WAAW,GAAG,CAAC;IAEf,IAAI,gBAAgB,GAAG,gBAAgB,CAAC;IACxC,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,OAAO,gBAAgB,CAAC,MAAM,GAAG,CAAC,IAAI,UAAU,GAAG,WAAW,EAAE,CAAC;QAC/D,MAAM,EAAE,UAAU,EAAE,cAAc,EAAE,GAAG,MAAM,eAAe,CAC1D,GAAG,EAAE,QAAQ,EAAE,gBAAgB,EAAE,SAAS,CAC3C,CAAC;QACF,GAAG,GAAG,UAAU,CAAC;QACjB,gBAAgB,GAAG,cAAc,CAAC;QAClC,UAAU,EAAE,CAAC;IACf,CAAC;IAED,OAAO,GAAG,CAAC;AACb,CAAC;AAED,KAAK,UAAU,eAAe,CAC5B,GAAc,EACd,QAAiC,EACjC,gBAAgF,EAChF,SAAoB;IAKpB,MAAM,cAAc,GAAmE,EAAE,CAAC;IAE1F,KAAK,MAAM,SAAS,IAAI,gBAAgB,EAAE,CAAC;QACzC,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,GAAG,SAAS,CAAC;QAEnC,2CAA2C;QAC3C,IAAI,cAAc,GAAG,CAAC,CAAC;QACvB,KAAK,IAAI,CAAC,GAAG,KAAK,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACpC,MAAM,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC;YAChC,IAAI,EAAE,IAAI,IAAI,EAAE,CAAC;gBACf,cAAc,GAAG,EAAE,CAAC;gBACpB,MAAM;YACR,CAAC;QACH,CAAC;QAED,IAAI,cAAc,GAAG,QAAQ,CAAC,MAAM,CAAC;QACrC,KAAK,IAAI,CAAC,GAAG,KAAK,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5C,MAAM,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC;YAChC,IAAI,EAAE,IAAI,IAAI,EAAE,CAAC;gBACf,cAAc,GAAG,EAAE,CAAC;gBACpB,MAAM;YACR,CAAC;QACH,CAAC;QAED,gCAAgC;QAChC,MAAM,UAAU,GAAG,QAAQ,CAAC,KAAK,CAAC,cAAc,GAAG,CAAC,EAAE,cAAc,CAAC,CAAC;QACtE,MAAM,aAAa,GAAG,oBAAoB,CAAC,UAAU,EAAE,cAAc,CAAC,CAAC;QAEvE,mCAAmC;QACnC,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,QAAQ,CAA6B;YAClE;gBACE,IAAI,EAAE,MAAM;gBACZ,OAAO,EAAE,6BAA6B,CAAC,KAAK,EAAE,aAAa,CAAC;aAC7D;SACF,CAAC,CAAC;QAEH,IAAI,MAAM,CAAC,cAAc,EAAE,CAAC;YAC1B,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,yBAAyB,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC;gBAChE,GAAG,CAAC,KAAK,CAAC,CAAC,aAAa,GAAG,MAAM,CAAC;gBAElC,iBAAiB;gBACjB,MAAM,OAAO,GAAG,MAAM,GAAG,CAAC,CAAC;gBAC3B,IAAI,OAAO,IAAI,CAAC,IAAI,OAAO,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;oBAC9C,MAAM,SAAS,GAAG,MAAM,oBAAoB,CAC1C,KAAK,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,SAAS,CACzC,CAAC;oBACF,IAAI,CAAC,SAAS,EAAE,CAAC;wBACf,cAAc,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,aAAa,EAAE,MAAM,EAAE,CAAC,CAAC;oBAC/D,CAAC;gBACH,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;aAAM,CAAC;YACN,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACjC,CAAC;IACH,CAAC;IAED,OAAO,EAAE,UAAU,EAAE,GAAG,EAAE,cAAc,EAAE,CAAC;AAC7C,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/verification/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EAAE,0BAA0B,EAAE,MAAM,cAAc,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/verification/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EAAE,0BAA0B,EAAE,MAAM,cAAc,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { TocItem, VerificationResult } from '../types.js';
|
|
2
|
+
import { LlmClient } from '../llm/llm-client.js';
|
|
3
|
+
/**
|
|
4
|
+
* Verifies TOC accuracy by checking if titles appear on their assigned pages.
|
|
5
|
+
*
|
|
6
|
+
* @param toc - The TOC items to verify
|
|
7
|
+
* @param pageList - All document pages
|
|
8
|
+
* @param sampleSize - Number of items to sample (null = check all)
|
|
9
|
+
*/
|
|
10
|
+
export declare function verifyToc(toc: TocItem[], pageList: Array<{
|
|
11
|
+
text: string;
|
|
12
|
+
}>, llmClient: LlmClient, sampleSize?: number | null): Promise<VerificationResult>;
|
|
13
|
+
/**
|
|
14
|
+
* Checks if a title appears on a given page using LLM.
|
|
15
|
+
*/
|
|
16
|
+
export declare function checkTitleAppearance(title: string, pageText: string, llmClient: LlmClient): Promise<boolean>;
|
|
17
|
+
//# sourceMappingURL=verify-toc.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"verify-toc.d.ts","sourceRoot":"","sources":["../../src/verification/verify-toc.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAC/D,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAGjD;;;;;;GAMG;AACH,wBAAsB,SAAS,CAC7B,GAAG,EAAE,OAAO,EAAE,EACd,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,EACjC,SAAS,EAAE,SAAS,EACpB,UAAU,GAAE,MAAM,GAAG,IAAW,GAC/B,OAAO,CAAC,kBAAkB,CAAC,CAsD7B;AAED;;GAEG;AACH,wBAAsB,oBAAoB,CACxC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,SAAS,GACnB,OAAO,CAAC,OAAO,CAAC,CAKlB"}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { LlmClient } from '../llm/llm-client.js';
|
|
2
|
+
import { checkTitleAppearancePrompt } from '../llm/prompts.js';
|
|
3
|
+
/**
|
|
4
|
+
* Verifies TOC accuracy by checking if titles appear on their assigned pages.
|
|
5
|
+
*
|
|
6
|
+
* @param toc - The TOC items to verify
|
|
7
|
+
* @param pageList - All document pages
|
|
8
|
+
* @param sampleSize - Number of items to sample (null = check all)
|
|
9
|
+
*/
|
|
10
|
+
export async function verifyToc(toc, pageList, llmClient, sampleSize = null) {
|
|
11
|
+
// Find last non-null physicalIndex
|
|
12
|
+
let lastPhysicalIndex = -1;
|
|
13
|
+
for (let i = toc.length - 1; i >= 0; i--) {
|
|
14
|
+
const pi = toc[i].physicalIndex;
|
|
15
|
+
if (pi != null) {
|
|
16
|
+
lastPhysicalIndex = pi;
|
|
17
|
+
break;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
// If last physical index is less than half the document, clearly wrong
|
|
21
|
+
if (lastPhysicalIndex >= 0 && lastPhysicalIndex < pageList.length / 2) {
|
|
22
|
+
return { accuracy: 0, incorrectResults: [] };
|
|
23
|
+
}
|
|
24
|
+
// Select items to verify
|
|
25
|
+
const itemsWithIndex = toc
|
|
26
|
+
.map((item, index) => ({ item, index }))
|
|
27
|
+
.filter(({ item }) => item.physicalIndex != null);
|
|
28
|
+
let toVerify = itemsWithIndex;
|
|
29
|
+
if (sampleSize != null && sampleSize < itemsWithIndex.length) {
|
|
30
|
+
// Fisher-Yates shuffle for uniform random sampling
|
|
31
|
+
const shuffled = [...itemsWithIndex];
|
|
32
|
+
for (let i = shuffled.length - 1; i > 0; i--) {
|
|
33
|
+
const j = Math.floor(Math.random() * (i + 1));
|
|
34
|
+
[shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]];
|
|
35
|
+
}
|
|
36
|
+
toVerify = shuffled.slice(0, sampleSize);
|
|
37
|
+
}
|
|
38
|
+
// Verify concurrently
|
|
39
|
+
const results = await Promise.all(toVerify.map(async ({ item, index }) => {
|
|
40
|
+
const physicalIndex = item.physicalIndex; // guaranteed by filter above
|
|
41
|
+
const pageIdx = physicalIndex - 1; // Convert to 0-based
|
|
42
|
+
if (pageIdx < 0 || pageIdx >= pageList.length) {
|
|
43
|
+
return { index, title: item.title, physicalIndex, correct: false };
|
|
44
|
+
}
|
|
45
|
+
const correct = await checkTitleAppearance(item.title, pageList[pageIdx].text, llmClient);
|
|
46
|
+
return { index, title: item.title, physicalIndex, correct };
|
|
47
|
+
}));
|
|
48
|
+
const correctCount = results.filter((r) => r.correct).length;
|
|
49
|
+
const accuracy = toVerify.length > 0 ? correctCount / toVerify.length : 1;
|
|
50
|
+
const incorrectResults = results
|
|
51
|
+
.filter((r) => !r.correct)
|
|
52
|
+
.map(({ index, title, physicalIndex }) => ({ index, title, physicalIndex }));
|
|
53
|
+
return { accuracy, incorrectResults };
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Checks if a title appears on a given page using LLM.
|
|
57
|
+
*/
|
|
58
|
+
export async function checkTitleAppearance(title, pageText, llmClient) {
|
|
59
|
+
const result = await llmClient.chatJson([
|
|
60
|
+
{ role: 'user', content: checkTitleAppearancePrompt(title, pageText) },
|
|
61
|
+
]);
|
|
62
|
+
return result.answer === 'yes';
|
|
63
|
+
}
|
|
64
|
+
//# sourceMappingURL=verify-toc.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"verify-toc.js","sourceRoot":"","sources":["../../src/verification/verify-toc.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,OAAO,EAAE,0BAA0B,EAAE,MAAM,mBAAmB,CAAC;AAE/D;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,GAAc,EACd,QAAiC,EACjC,SAAoB,EACpB,aAA4B,IAAI;IAEhC,mCAAmC;IACnC,IAAI,iBAAiB,GAAG,CAAC,CAAC,CAAC;IAC3B,KAAK,IAAI,CAAC,GAAG,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QACzC,MAAM,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC;QAChC,IAAI,EAAE,IAAI,IAAI,EAAE,CAAC;YACf,iBAAiB,GAAG,EAAE,CAAC;YACvB,MAAM;QACR,CAAC;IACH,CAAC;IAED,uEAAuE;IACvE,IAAI,iBAAiB,IAAI,CAAC,IAAI,iBAAiB,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtE,OAAO,EAAE,QAAQ,EAAE,CAAC,EAAE,gBAAgB,EAAE,EAAE,EAAE,CAAC;IAC/C,CAAC;IAED,yBAAyB;IACzB,MAAM,cAAc,GAAG,GAAG;SACvB,GAAG,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC;SACvC,MAAM,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,CAAC;IAEpD,IAAI,QAAQ,GAAG,cAAc,CAAC;IAC9B,IAAI,UAAU,IAAI,IAAI,IAAI,UAAU,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC;QAC7D,mDAAmD;QACnD,MAAM,QAAQ,GAAG,CAAC,GAAG,cAAc,CAAC,CAAC;QACrC,KAAK,IAAI,CAAC,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC7C,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YAC9C,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1D,CAAC;QACD,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;IAC3C,CAAC;IAED,sBAAsB;IACtB,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAC/B,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE;QACrC,MAAM,aAAa,GAAG,IAAI,CAAC,aAAuB,CAAC,CAAC,6BAA6B;QACjF,MAAM,OAAO,GAAG,aAAa,GAAG,CAAC,CAAC,CAAC,qBAAqB;QACxD,IAAI,OAAO,GAAG,CAAC,IAAI,OAAO,IAAI,QAAQ,CAAC,MAAM,EAAE,CAAC;YAC9C,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;QACrE,CAAC;QACD,MAAM,OAAO,GAAG,MAAM,oBAAoB,CACxC,IAAI,CAAC,KAAK,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,SAAS,CAC9C,CAAC;QACF,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,CAAC;IAC9D,CAAC,CAAC,CACH,CAAC;IAEF,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;IAC7D,MAAM,QAAQ,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAC1E,MAAM,gBAAgB,GAAG,OAAO;SAC7B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;SACzB,GAAG,CAAC,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,aAAa,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,aAAa,EAAE,CAAC,CAAC,CAAC;IAE/E,OAAO,EAAE,QAAQ,EAAE,gBAAgB,EAAE,CAAC;AACxC,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,KAAa,EACb,QAAgB,EAChB,SAAoB;IAEpB,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,QAAQ,CAAqB;QAC1D,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,0BAA0B,CAAC,KAAK,EAAE,QAAQ,CAAC,EAAE;KACvE,CAAC,CAAC;IACH,OAAO,MAAM,CAAC,MAAM,KAAK,KAAK,CAAC;AACjC,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@fastrag/pageindex",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "TypeScript SDK for PageIndex hierarchical document indexing and optional vector enhancement.",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"keywords": [
|
|
7
|
+
"document-indexing",
|
|
8
|
+
"rag",
|
|
9
|
+
"vector-search",
|
|
10
|
+
"chunking",
|
|
11
|
+
"pageindex"
|
|
12
|
+
],
|
|
13
|
+
"type": "module",
|
|
14
|
+
"exports": {
|
|
15
|
+
".": {
|
|
16
|
+
"types": "./dist/index.d.ts",
|
|
17
|
+
"import": "./dist/index.js"
|
|
18
|
+
},
|
|
19
|
+
"./types": {
|
|
20
|
+
"types": "./dist/types.d.ts",
|
|
21
|
+
"import": "./dist/types.js"
|
|
22
|
+
},
|
|
23
|
+
"./vector": {
|
|
24
|
+
"types": "./dist/vector.d.ts",
|
|
25
|
+
"import": "./dist/vector.js"
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
"main": "./dist/index.js",
|
|
29
|
+
"types": "./dist/index.d.ts",
|
|
30
|
+
"files": [
|
|
31
|
+
"dist"
|
|
32
|
+
],
|
|
33
|
+
"engines": {
|
|
34
|
+
"node": ">=20"
|
|
35
|
+
},
|
|
36
|
+
"packageManager": "pnpm@10.4.1",
|
|
37
|
+
"scripts": {
|
|
38
|
+
"build": "rm -rf dist && tsc -p tsconfig.build.json",
|
|
39
|
+
"test": "vitest --run",
|
|
40
|
+
"lint": "eslint src/**/*.ts",
|
|
41
|
+
"typecheck": "tsc --noEmit",
|
|
42
|
+
"prepack": "pnpm build"
|
|
43
|
+
},
|
|
44
|
+
"publishConfig": {
|
|
45
|
+
"access": "public"
|
|
46
|
+
},
|
|
47
|
+
"dependencies": {
|
|
48
|
+
"gpt-tokenizer": "^2.8.0"
|
|
49
|
+
},
|
|
50
|
+
"devDependencies": {
|
|
51
|
+
"@types/node": "^22.0.0",
|
|
52
|
+
"@vitest/coverage-v8": "^3.2.4",
|
|
53
|
+
"eslint": "^9.0.0",
|
|
54
|
+
"typescript": "^5.7.0",
|
|
55
|
+
"typescript-eslint": "^8.0.0",
|
|
56
|
+
"vitest": "^3.0.0"
|
|
57
|
+
}
|
|
58
|
+
}
|