@fastrag/pageindex 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +251 -0
- package/README.zh-CN.md +251 -0
- package/dist/errors/index.d.ts +10 -0
- package/dist/errors/index.d.ts.map +1 -0
- package/dist/errors/index.js +19 -0
- package/dist/errors/index.js.map +1 -0
- package/dist/index.d.ts +14 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +20 -0
- package/dist/index.js.map +1 -0
- package/dist/internal-types/config.d.ts +35 -0
- package/dist/internal-types/config.d.ts.map +1 -0
- package/dist/internal-types/config.js +16 -0
- package/dist/internal-types/config.js.map +1 -0
- package/dist/internal-types/document-parser.d.ts +5 -0
- package/dist/internal-types/document-parser.d.ts.map +1 -0
- package/dist/internal-types/document-parser.js +2 -0
- package/dist/internal-types/document-parser.js.map +1 -0
- package/dist/internal-types/index.d.ts +9 -0
- package/dist/internal-types/index.d.ts.map +1 -0
- package/dist/internal-types/index.js +2 -0
- package/dist/internal-types/index.js.map +1 -0
- package/dist/internal-types/llm-provider.d.ts +19 -0
- package/dist/internal-types/llm-provider.d.ts.map +1 -0
- package/dist/internal-types/llm-provider.js +2 -0
- package/dist/internal-types/llm-provider.js.map +1 -0
- package/dist/internal-types/logger.d.ts +7 -0
- package/dist/internal-types/logger.d.ts.map +1 -0
- package/dist/internal-types/logger.js +2 -0
- package/dist/internal-types/logger.js.map +1 -0
- package/dist/internal-types/page.d.ts +5 -0
- package/dist/internal-types/page.d.ts.map +1 -0
- package/dist/internal-types/page.js +2 -0
- package/dist/internal-types/page.js.map +1 -0
- package/dist/internal-types/processing.d.ts +21 -0
- package/dist/internal-types/processing.d.ts.map +1 -0
- package/dist/internal-types/processing.js +2 -0
- package/dist/internal-types/processing.js.map +1 -0
- package/dist/internal-types/tree-node.d.ts +30 -0
- package/dist/internal-types/tree-node.d.ts.map +1 -0
- package/dist/internal-types/tree-node.js +2 -0
- package/dist/internal-types/tree-node.js.map +1 -0
- package/dist/llm/index.d.ts +3 -0
- package/dist/llm/index.d.ts.map +1 -0
- package/dist/llm/index.js +3 -0
- package/dist/llm/index.js.map +1 -0
- package/dist/llm/llm-client.d.ts +26 -0
- package/dist/llm/llm-client.d.ts.map +1 -0
- package/dist/llm/llm-client.js +88 -0
- package/dist/llm/llm-client.js.map +1 -0
- package/dist/llm/prompts.d.ts +33 -0
- package/dist/llm/prompts.d.ts.map +1 -0
- package/dist/llm/prompts.js +312 -0
- package/dist/llm/prompts.js.map +1 -0
- package/dist/markdown/index.d.ts +6 -0
- package/dist/markdown/index.d.ts.map +1 -0
- package/dist/markdown/index.js +5 -0
- package/dist/markdown/index.js.map +1 -0
- package/dist/markdown/md-extractor.d.ts +14 -0
- package/dist/markdown/md-extractor.d.ts.map +1 -0
- package/dist/markdown/md-extractor.js +30 -0
- package/dist/markdown/md-extractor.js.map +1 -0
- package/dist/markdown/md-to-tree.d.ts +8 -0
- package/dist/markdown/md-to-tree.d.ts.map +1 -0
- package/dist/markdown/md-to-tree.js +20 -0
- package/dist/markdown/md-to-tree.js.map +1 -0
- package/dist/markdown/md-tree-builder.d.ts +7 -0
- package/dist/markdown/md-tree-builder.d.ts.map +1 -0
- package/dist/markdown/md-tree-builder.js +36 -0
- package/dist/markdown/md-tree-builder.js.map +1 -0
- package/dist/markdown/tree-thinning.d.ts +8 -0
- package/dist/markdown/tree-thinning.d.ts.map +1 -0
- package/dist/markdown/tree-thinning.js +42 -0
- package/dist/markdown/tree-thinning.js.map +1 -0
- package/dist/page-index.d.ts +10 -0
- package/dist/page-index.d.ts.map +1 -0
- package/dist/page-index.js +54 -0
- package/dist/page-index.js.map +1 -0
- package/dist/post-processing/doc-description.d.ts +12 -0
- package/dist/post-processing/doc-description.d.ts.map +1 -0
- package/dist/post-processing/doc-description.js +31 -0
- package/dist/post-processing/doc-description.js.map +1 -0
- package/dist/post-processing/index.d.ts +5 -0
- package/dist/post-processing/index.d.ts.map +1 -0
- package/dist/post-processing/index.js +5 -0
- package/dist/post-processing/index.js.map +1 -0
- package/dist/post-processing/node-id.d.ts +7 -0
- package/dist/post-processing/node-id.d.ts.map +1 -0
- package/dist/post-processing/node-id.js +20 -0
- package/dist/post-processing/node-id.js.map +1 -0
- package/dist/post-processing/node-text.d.ts +11 -0
- package/dist/post-processing/node-text.d.ts.map +1 -0
- package/dist/post-processing/node-text.js +37 -0
- package/dist/post-processing/node-text.js.map +1 -0
- package/dist/post-processing/summary.d.ts +7 -0
- package/dist/post-processing/summary.d.ts.map +1 -0
- package/dist/post-processing/summary.js +31 -0
- package/dist/post-processing/summary.js.map +1 -0
- package/dist/processing/index.d.ts +6 -0
- package/dist/processing/index.d.ts.map +1 -0
- package/dist/processing/index.js +6 -0
- package/dist/processing/index.js.map +1 -0
- package/dist/processing/large-node.d.ts +9 -0
- package/dist/processing/large-node.d.ts.map +1 -0
- package/dist/processing/large-node.js +40 -0
- package/dist/processing/large-node.js.map +1 -0
- package/dist/processing/meta-processor.d.ts +19 -0
- package/dist/processing/meta-processor.d.ts.map +1 -0
- package/dist/processing/meta-processor.js +91 -0
- package/dist/processing/meta-processor.js.map +1 -0
- package/dist/processing/no-toc.d.ts +10 -0
- package/dist/processing/no-toc.d.ts.map +1 -0
- package/dist/processing/no-toc.js +44 -0
- package/dist/processing/no-toc.js.map +1 -0
- package/dist/processing/toc-no-pages.d.ts +11 -0
- package/dist/processing/toc-no-pages.d.ts.map +1 -0
- package/dist/processing/toc-no-pages.js +46 -0
- package/dist/processing/toc-no-pages.js.map +1 -0
- package/dist/processing/toc-with-pages.d.ts +15 -0
- package/dist/processing/toc-with-pages.d.ts.map +1 -0
- package/dist/processing/toc-with-pages.js +151 -0
- package/dist/processing/toc-with-pages.js.map +1 -0
- package/dist/toc/index.d.ts +4 -0
- package/dist/toc/index.d.ts.map +1 -0
- package/dist/toc/index.js +4 -0
- package/dist/toc/index.js.map +1 -0
- package/dist/toc/toc-detector.d.ts +23 -0
- package/dist/toc/toc-detector.d.ts.map +1 -0
- package/dist/toc/toc-detector.js +65 -0
- package/dist/toc/toc-detector.js.map +1 -0
- package/dist/toc/toc-extractor.d.ts +13 -0
- package/dist/toc/toc-extractor.d.ts.map +1 -0
- package/dist/toc/toc-extractor.js +32 -0
- package/dist/toc/toc-extractor.js.map +1 -0
- package/dist/toc/toc-transformer.d.ts +11 -0
- package/dist/toc/toc-transformer.d.ts.map +1 -0
- package/dist/toc/toc-transformer.js +69 -0
- package/dist/toc/toc-transformer.js.map +1 -0
- package/dist/tree/index.d.ts +4 -0
- package/dist/tree/index.d.ts.map +1 -0
- package/dist/tree/index.js +4 -0
- package/dist/tree/index.js.map +1 -0
- package/dist/tree/list-to-tree.d.ts +7 -0
- package/dist/tree/list-to-tree.d.ts.map +1 -0
- package/dist/tree/list-to-tree.js +33 -0
- package/dist/tree/list-to-tree.js.map +1 -0
- package/dist/tree/post-processing.d.ts +12 -0
- package/dist/tree/post-processing.d.ts.map +1 -0
- package/dist/tree/post-processing.js +87 -0
- package/dist/tree/post-processing.js.map +1 -0
- package/dist/tree/tree-utils.d.ts +18 -0
- package/dist/tree/tree-utils.d.ts.map +1 -0
- package/dist/tree/tree-utils.js +43 -0
- package/dist/tree/tree-utils.js.map +1 -0
- package/dist/tree-parser.d.ts +30 -0
- package/dist/tree-parser.d.ts.map +1 -0
- package/dist/tree-parser.js +73 -0
- package/dist/tree-parser.js.map +1 -0
- package/dist/types.d.ts +3 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/config-loader.d.ts +15 -0
- package/dist/utils/config-loader.d.ts.map +1 -0
- package/dist/utils/config-loader.js +19 -0
- package/dist/utils/config-loader.js.map +1 -0
- package/dist/utils/index.d.ts +7 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +6 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/json-parser.d.ts +2 -0
- package/dist/utils/json-parser.d.ts.map +1 -0
- package/dist/utils/json-parser.js +76 -0
- package/dist/utils/json-parser.js.map +1 -0
- package/dist/utils/logger.d.ts +3 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +10 -0
- package/dist/utils/logger.js.map +1 -0
- package/dist/utils/page-utils.d.ts +16 -0
- package/dist/utils/page-utils.d.ts.map +1 -0
- package/dist/utils/page-utils.js +56 -0
- package/dist/utils/page-utils.js.map +1 -0
- package/dist/utils/token-counter.d.ts +2 -0
- package/dist/utils/token-counter.d.ts.map +1 -0
- package/dist/utils/token-counter.js +5 -0
- package/dist/utils/token-counter.js.map +1 -0
- package/dist/vector-lib/adapters/in-memory-adapter.d.ts +14 -0
- package/dist/vector-lib/adapters/in-memory-adapter.d.ts.map +1 -0
- package/dist/vector-lib/adapters/in-memory-adapter.js +55 -0
- package/dist/vector-lib/adapters/in-memory-adapter.js.map +1 -0
- package/dist/vector-lib/adapters/vector-store.d.ts +10 -0
- package/dist/vector-lib/adapters/vector-store.d.ts.map +1 -0
- package/dist/vector-lib/adapters/vector-store.js +2 -0
- package/dist/vector-lib/adapters/vector-store.js.map +1 -0
- package/dist/vector-lib/chunker/tree-chunker.d.ts +8 -0
- package/dist/vector-lib/chunker/tree-chunker.d.ts.map +1 -0
- package/dist/vector-lib/chunker/tree-chunker.js +59 -0
- package/dist/vector-lib/chunker/tree-chunker.js.map +1 -0
- package/dist/vector-lib/embedder/embedder.d.ts +8 -0
- package/dist/vector-lib/embedder/embedder.d.ts.map +1 -0
- package/dist/vector-lib/embedder/embedder.js +2 -0
- package/dist/vector-lib/embedder/embedder.js.map +1 -0
- package/dist/vector-lib/index.d.ts +10 -0
- package/dist/vector-lib/index.d.ts.map +1 -0
- package/dist/vector-lib/index.js +6 -0
- package/dist/vector-lib/index.js.map +1 -0
- package/dist/vector-lib/search/hybrid-search.d.ts +19 -0
- package/dist/vector-lib/search/hybrid-search.d.ts.map +1 -0
- package/dist/vector-lib/search/hybrid-search.js +25 -0
- package/dist/vector-lib/search/hybrid-search.js.map +1 -0
- package/dist/vector-lib/search/reranker.d.ts +14 -0
- package/dist/vector-lib/search/reranker.d.ts.map +1 -0
- package/dist/vector-lib/search/reranker.js +2 -0
- package/dist/vector-lib/search/reranker.js.map +1 -0
- package/dist/vector-lib/types.d.ts +29 -0
- package/dist/vector-lib/types.d.ts.map +1 -0
- package/dist/vector-lib/types.js +2 -0
- package/dist/vector-lib/types.js.map +1 -0
- package/dist/vector-lib/vector-enhancer.d.ts +28 -0
- package/dist/vector-lib/vector-enhancer.d.ts.map +1 -0
- package/dist/vector-lib/vector-enhancer.js +54 -0
- package/dist/vector-lib/vector-enhancer.js.map +1 -0
- package/dist/vector.d.ts +5 -0
- package/dist/vector.d.ts.map +1 -0
- package/dist/vector.js +3 -0
- package/dist/vector.js.map +1 -0
- package/dist/verification/fix-toc.d.ts +13 -0
- package/dist/verification/fix-toc.d.ts.map +1 -0
- package/dist/verification/fix-toc.js +73 -0
- package/dist/verification/fix-toc.js.map +1 -0
- package/dist/verification/index.d.ts +3 -0
- package/dist/verification/index.d.ts.map +1 -0
- package/dist/verification/index.js +3 -0
- package/dist/verification/index.js.map +1 -0
- package/dist/verification/verify-toc.d.ts +17 -0
- package/dist/verification/verify-toc.d.ts.map +1 -0
- package/dist/verification/verify-toc.js +64 -0
- package/dist/verification/verify-toc.js.map +1 -0
- package/package.json +58 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recursively collects all nodes in the tree into a flat array.
|
|
3
|
+
*/
|
|
4
|
+
export function getNodes(tree) {
|
|
5
|
+
const result = [];
|
|
6
|
+
for (const node of tree) {
|
|
7
|
+
result.push(node);
|
|
8
|
+
if (node.nodes.length > 0) {
|
|
9
|
+
result.push(...getNodes(node.nodes));
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
return result;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Recursively collects all leaf nodes (nodes with no children).
|
|
16
|
+
*/
|
|
17
|
+
export function getLeafNodes(tree) {
|
|
18
|
+
const result = [];
|
|
19
|
+
for (const node of tree) {
|
|
20
|
+
if (node.nodes.length === 0) {
|
|
21
|
+
result.push(node);
|
|
22
|
+
}
|
|
23
|
+
else {
|
|
24
|
+
result.push(...getLeafNodes(node.nodes));
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
return result;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Converts a tree back to a flat list with structure indices.
|
|
31
|
+
*/
|
|
32
|
+
export function structureToList(tree, prefix = '') {
|
|
33
|
+
const result = [];
|
|
34
|
+
for (let i = 0; i < tree.length; i++) {
|
|
35
|
+
const structure = prefix ? `${prefix}.${i + 1}` : `${i + 1}`;
|
|
36
|
+
result.push({ structure, title: tree[i].title, node: tree[i] });
|
|
37
|
+
if (tree[i].nodes.length > 0) {
|
|
38
|
+
result.push(...structureToList(tree[i].nodes, structure));
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return result;
|
|
42
|
+
}
|
|
43
|
+
//# sourceMappingURL=tree-utils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tree-utils.js","sourceRoot":"","sources":["../../src/tree/tree-utils.ts"],"names":[],"mappings":"AAEA;;GAEG;AACH,MAAM,UAAU,QAAQ,CAAC,IAAgB;IACvC,MAAM,MAAM,GAAe,EAAE,CAAC;IAC9B,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;QACxB,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClB,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,MAAM,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QACvC,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,IAAgB;IAC3C,MAAM,MAAM,GAAe,EAAE,CAAC;IAC9B,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;QACxB,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACpB,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QAC3C,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAC7B,IAAgB,EAChB,MAAM,GAAG,EAAE;IAEX,MAAM,MAAM,GAAgE,EAAE,CAAC;IAC/E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,SAAS,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;QAC7D,MAAM,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QAChE,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,MAAM,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC;QAC5D,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import type { TreeNode, TocItem, ProcessingMode, DegradationEvent, Logger } from './types.js';
|
|
2
|
+
import { LlmClient } from './llm/llm-client.js';
|
|
3
|
+
/**
|
|
4
|
+
* Main tree parsing flow.
|
|
5
|
+
* 1. Detect TOC
|
|
6
|
+
* 2. Process based on TOC presence and page numbers
|
|
7
|
+
* 3. Add preface if needed
|
|
8
|
+
* 4. Check title appearance at start
|
|
9
|
+
* 5. Build tree with postProcessing
|
|
10
|
+
* 6. Recursively split large nodes
|
|
11
|
+
*/
|
|
12
|
+
export declare function treeParser(pageList: Array<{
|
|
13
|
+
text: string;
|
|
14
|
+
}>, llmClient: LlmClient, logger: Logger, config: {
|
|
15
|
+
tocCheckPageNum: number;
|
|
16
|
+
maxPageNumEachNode: number;
|
|
17
|
+
maxTokenNumEachNode: number;
|
|
18
|
+
onDegradation?: (event: DegradationEvent) => void;
|
|
19
|
+
}): Promise<{
|
|
20
|
+
structure: TreeNode[];
|
|
21
|
+
finalMode: ProcessingMode;
|
|
22
|
+
degradations: DegradationEvent[];
|
|
23
|
+
}>;
|
|
24
|
+
/**
|
|
25
|
+
* Concurrently checks if titles appear at the start of their pages.
|
|
26
|
+
*/
|
|
27
|
+
export declare function checkTitleAppearanceInStartConcurrent(items: TocItem[], pageList: Array<{
|
|
28
|
+
text: string;
|
|
29
|
+
}>, llmClient: LlmClient): Promise<void>;
|
|
30
|
+
//# sourceMappingURL=tree-parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tree-parser.d.ts","sourceRoot":"","sources":["../src/tree-parser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,cAAc,EAAE,gBAAgB,EAAE,MAAM,EAAE,MAAM,YAAY,CAAC;AAC9F,OAAO,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAOhD;;;;;;;;GAQG;AACH,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,EACjC,SAAS,EAAE,SAAS,EACpB,MAAM,EAAE,MAAM,EACd,MAAM,EAAE;IACN,eAAe,EAAE,MAAM,CAAC;IACxB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,mBAAmB,EAAE,MAAM,CAAC;IAC5B,aAAa,CAAC,EAAE,CAAC,KAAK,EAAE,gBAAgB,KAAK,IAAI,CAAC;CACnD,GACA,OAAO,CAAC;IAAE,SAAS,EAAE,QAAQ,EAAE,CAAC;IAAC,SAAS,EAAE,cAAc,CAAC;IAAC,YAAY,EAAE,gBAAgB,EAAE,CAAA;CAAE,CAAC,CA6CjG;AAYD;;GAEG;AACH,wBAAsB,qCAAqC,CACzD,KAAK,EAAE,OAAO,EAAE,EAChB,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,EACjC,SAAS,EAAE,SAAS,GACnB,OAAO,CAAC,IAAI,CAAC,CAkBf"}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { LlmClient } from './llm/llm-client.js';
|
|
2
|
+
import { checkToc } from './toc/toc-detector.js';
|
|
3
|
+
import { metaProcessor } from './processing/meta-processor.js';
|
|
4
|
+
import { processLargeNodeRecursively } from './processing/large-node.js';
|
|
5
|
+
import { postProcessing } from './tree/post-processing.js';
|
|
6
|
+
import { checkTitleAppearanceInStartPrompt } from './llm/prompts.js';
|
|
7
|
+
/**
|
|
8
|
+
* Main tree parsing flow.
|
|
9
|
+
* 1. Detect TOC
|
|
10
|
+
* 2. Process based on TOC presence and page numbers
|
|
11
|
+
* 3. Add preface if needed
|
|
12
|
+
* 4. Check title appearance at start
|
|
13
|
+
* 5. Build tree with postProcessing
|
|
14
|
+
* 6. Recursively split large nodes
|
|
15
|
+
*/
|
|
16
|
+
export async function treeParser(pageList, llmClient, logger, config) {
|
|
17
|
+
// Step 1: Check for TOC
|
|
18
|
+
const tocResult = await checkToc(pageList, config.tocCheckPageNum, llmClient, logger);
|
|
19
|
+
// Step 2: Determine mode and process
|
|
20
|
+
let mode;
|
|
21
|
+
if (tocResult.tocContent) {
|
|
22
|
+
mode = tocResult.pageIndexGivenInToc
|
|
23
|
+
? 'process_toc_with_page_numbers'
|
|
24
|
+
: 'process_toc_no_page_numbers';
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
mode = 'process_no_toc';
|
|
28
|
+
}
|
|
29
|
+
const { items, finalMode, degradations } = await metaProcessor(pageList, mode, llmClient, logger, {
|
|
30
|
+
tocContent: tocResult.tocContent,
|
|
31
|
+
tocPageList: tocResult.tocPageList,
|
|
32
|
+
onDegradation: config.onDegradation,
|
|
33
|
+
});
|
|
34
|
+
// Step 3: Add preface if needed
|
|
35
|
+
addPrefaceIfNeeded(items);
|
|
36
|
+
// Step 4: Check title appearance at start (concurrent)
|
|
37
|
+
await checkTitleAppearanceInStartConcurrent(items, pageList, llmClient);
|
|
38
|
+
// Step 5: Filter and build tree
|
|
39
|
+
const validItems = items.filter((item) => item.physicalIndex != null);
|
|
40
|
+
const structure = postProcessing(validItems, pageList.length);
|
|
41
|
+
// Step 6: Process large nodes
|
|
42
|
+
await Promise.all(structure.map((node) => processLargeNodeRecursively(node, pageList, llmClient, logger, config.maxPageNumEachNode, config.maxTokenNumEachNode)));
|
|
43
|
+
return { structure, finalMode, degradations };
|
|
44
|
+
}
|
|
45
|
+
function addPrefaceIfNeeded(items) {
|
|
46
|
+
if (items.length > 0 && items[0].physicalIndex != null && items[0].physicalIndex > 1) {
|
|
47
|
+
items.unshift({
|
|
48
|
+
structure: '0',
|
|
49
|
+
title: 'Preface',
|
|
50
|
+
physicalIndex: 1,
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Concurrently checks if titles appear at the start of their pages.
|
|
56
|
+
*/
|
|
57
|
+
export async function checkTitleAppearanceInStartConcurrent(items, pageList, llmClient) {
|
|
58
|
+
await Promise.all(items.map(async (item) => {
|
|
59
|
+
if (item.physicalIndex == null)
|
|
60
|
+
return;
|
|
61
|
+
const pageIdx = item.physicalIndex - 1;
|
|
62
|
+
if (pageIdx < 0 || pageIdx >= pageList.length)
|
|
63
|
+
return;
|
|
64
|
+
const result = await llmClient.chatJson([
|
|
65
|
+
{
|
|
66
|
+
role: 'user',
|
|
67
|
+
content: checkTitleAppearanceInStartPrompt(item.title, pageList[pageIdx].text),
|
|
68
|
+
},
|
|
69
|
+
]);
|
|
70
|
+
item.appearStart = result.start_begin === 'yes' ? 'yes' : 'no';
|
|
71
|
+
}));
|
|
72
|
+
}
|
|
73
|
+
//# sourceMappingURL=tree-parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tree-parser.js","sourceRoot":"","sources":["../src/tree-parser.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAChD,OAAO,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AACjD,OAAO,EAAE,aAAa,EAAE,MAAM,gCAAgC,CAAC;AAC/D,OAAO,EAAE,2BAA2B,EAAE,MAAM,4BAA4B,CAAC;AACzE,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EAAE,iCAAiC,EAAE,MAAM,kBAAkB,CAAC;AAErE;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,QAAiC,EACjC,SAAoB,EACpB,MAAc,EACd,MAKC;IAED,wBAAwB;IACxB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAC9B,QAAQ,EAAE,MAAM,CAAC,eAAe,EAAE,SAAS,EAAE,MAAM,CACpD,CAAC;IAEF,qCAAqC;IACrC,IAAI,IAAoB,CAAC;IACzB,IAAI,SAAS,CAAC,UAAU,EAAE,CAAC;QACzB,IAAI,GAAG,SAAS,CAAC,mBAAmB;YAClC,CAAC,CAAC,+BAA+B;YACjC,CAAC,CAAC,6BAA6B,CAAC;IACpC,CAAC;SAAM,CAAC;QACN,IAAI,GAAG,gBAAgB,CAAC;IAC1B,CAAC;IAED,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,YAAY,EAAE,GAAG,MAAM,aAAa,CAC5D,QAAQ,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE;QACjC,UAAU,EAAE,SAAS,CAAC,UAAU;QAChC,WAAW,EAAE,SAAS,CAAC,WAAW;QAClC,aAAa,EAAE,MAAM,CAAC,aAAa;KACpC,CACF,CAAC;IAEF,gCAAgC;IAChC,kBAAkB,CAAC,KAAK,CAAC,CAAC;IAE1B,uDAAuD;IACvD,MAAM,qCAAqC,CAAC,KAAK,EAAE,QAAQ,EAAE,SAAS,CAAC,CAAC;IAExE,gCAAgC;IAChC,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,CAAC;IACtE,MAAM,SAAS,GAAG,cAAc,CAAC,UAAU,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;IAE9D,8BAA8B;IAC9B,MAAM,OAAO,CAAC,GAAG,CACf,SAAS,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CACrB,2BAA2B,CACzB,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EACjC,MAAM,CAAC,kBAAkB,EAAE,MAAM,CAAC,mBAAmB,CACtD,CACF,CACF,CAAC;IAEF,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY,EAAE,CAAC;AAChD,CAAC;AAED,SAAS,kBAAkB,CAAC,KAAgB;IAC1C,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,aAAa,IAAI,IAAI,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,aAAa,GAAG,CAAC,EAAE,CAAC;QACrF,KAAK,CAAC,OAAO,CAAC;YACZ,SAAS,EAAE,GAAG;YACd,KAAK,EAAE,SAAS;YAChB,aAAa,EAAE,CAAC;SACjB,CAAC,CAAC;IACL,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,qCAAqC,CACzD,KAAgB,EAChB,QAAiC,EACjC,SAAoB;IAEpB,MAAM,OAAO,CAAC,GAAG,CACf,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;QACvB,IAAI,IAAI,CAAC,aAAa,IAAI,IAAI;YAAE,OAAO;QACvC,MAAM,OAAO,GAAG,IAAI,CAAC,aAAa,GAAG,CAAC,CAAC;QACvC,IAAI,OAAO,GAAG,CAAC,IAAI,OAAO,IAAI,QAAQ,CAAC,MAAM;YAAE,OAAO;QAEtD,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,QAAQ,CAA0B;YAC/D;gBACE,IAAI,EAAE,MAAM;gBACZ,OAAO,EAAE,iCAAiC,CACxC,IAAI,CAAC,KAAK,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC,IAAI,CACnC;aACF;SACF,CAAC,CAAC;QACH,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW,KAAK,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;IACjE,CAAC,CAAC,CACH,CAAC;AACJ,CAAC"}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
export { DEFAULT_CONFIG, DEFAULT_RETRY_CONFIG, } from './internal-types/index.js';
|
|
2
|
+
export type { TreeNode, TocItem, PageIndexResult, PageIndexConfig, RetryConfig, MdConfig, PageContent, DocumentParser, LlmProvider, LlmMessage, LlmOptions, LlmResponse, JsonSchema, Logger, ProcessingMode, TocCheckResult, VerificationResult, DegradationEvent, } from './internal-types/index.js';
|
|
3
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,cAAc,EACd,oBAAoB,GACrB,MAAM,2BAA2B,CAAC;AAEnC,YAAY,EACV,QAAQ,EACR,OAAO,EACP,eAAe,EACf,eAAe,EACf,WAAW,EACX,QAAQ,EACR,WAAW,EACX,cAAc,EACd,WAAW,EACX,UAAU,EACV,UAAU,EACV,WAAW,EACX,UAAU,EACV,MAAM,EACN,cAAc,EACd,cAAc,EACd,kBAAkB,EAClB,gBAAgB,GACjB,MAAM,2BAA2B,CAAC"}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,cAAc,EACd,oBAAoB,GACrB,MAAM,2BAA2B,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { PageIndexConfig, RetryConfig, Logger, DegradationEvent } from '../types.js';
|
|
2
|
+
export interface ResolvedConfig {
|
|
3
|
+
tocCheckPageNum: number;
|
|
4
|
+
maxPageNumEachNode: number;
|
|
5
|
+
maxTokenNumEachNode: number;
|
|
6
|
+
addNodeId: boolean;
|
|
7
|
+
addNodeSummary: boolean;
|
|
8
|
+
addDocDescription: boolean;
|
|
9
|
+
addNodeText: boolean;
|
|
10
|
+
onDegradation?: (event: DegradationEvent) => void;
|
|
11
|
+
retryConfig: RetryConfig;
|
|
12
|
+
logger: Logger;
|
|
13
|
+
}
|
|
14
|
+
export declare function mergeConfig(userConfig: Partial<PageIndexConfig>): ResolvedConfig;
|
|
15
|
+
//# sourceMappingURL=config-loader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config-loader.d.ts","sourceRoot":"","sources":["../../src/utils/config-loader.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EACV,eAAe,EACf,WAAW,EACX,MAAM,EACN,gBAAgB,EACjB,MAAM,aAAa,CAAC;AAGrB,MAAM,WAAW,cAAc;IAC7B,eAAe,EAAE,MAAM,CAAC;IACxB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,mBAAmB,EAAE,MAAM,CAAC;IAC5B,SAAS,EAAE,OAAO,CAAC;IACnB,cAAc,EAAE,OAAO,CAAC;IACxB,iBAAiB,EAAE,OAAO,CAAC;IAC3B,WAAW,EAAE,OAAO,CAAC;IACrB,aAAa,CAAC,EAAE,CAAC,KAAK,EAAE,gBAAgB,KAAK,IAAI,CAAC;IAClD,WAAW,EAAE,WAAW,CAAC;IACzB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,wBAAgB,WAAW,CACzB,UAAU,EAAE,OAAO,CAAC,eAAe,CAAC,GACnC,cAAc,CAoBhB"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { DEFAULT_CONFIG, DEFAULT_RETRY_CONFIG, } from '../types.js';
|
|
2
|
+
import { createSilentLogger } from './logger.js';
|
|
3
|
+
export function mergeConfig(userConfig) {
|
|
4
|
+
return {
|
|
5
|
+
tocCheckPageNum: userConfig.tocCheckPageNum ?? DEFAULT_CONFIG.tocCheckPageNum,
|
|
6
|
+
maxPageNumEachNode: userConfig.maxPageNumEachNode ?? DEFAULT_CONFIG.maxPageNumEachNode,
|
|
7
|
+
maxTokenNumEachNode: userConfig.maxTokenNumEachNode ?? DEFAULT_CONFIG.maxTokenNumEachNode,
|
|
8
|
+
addNodeId: userConfig.addNodeId ?? DEFAULT_CONFIG.addNodeId,
|
|
9
|
+
addNodeSummary: userConfig.addNodeSummary ?? DEFAULT_CONFIG.addNodeSummary,
|
|
10
|
+
addDocDescription: userConfig.addDocDescription ?? DEFAULT_CONFIG.addDocDescription,
|
|
11
|
+
addNodeText: userConfig.addNodeText ?? DEFAULT_CONFIG.addNodeText,
|
|
12
|
+
onDegradation: userConfig.onDegradation,
|
|
13
|
+
retryConfig: userConfig.retryConfig
|
|
14
|
+
? { ...DEFAULT_RETRY_CONFIG, ...userConfig.retryConfig }
|
|
15
|
+
: { ...DEFAULT_RETRY_CONFIG },
|
|
16
|
+
logger: userConfig.logger ?? createSilentLogger(),
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
//# sourceMappingURL=config-loader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config-loader.js","sourceRoot":"","sources":["../../src/utils/config-loader.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,cAAc,EACd,oBAAoB,GACrB,MAAM,aAAa,CAAC;AAOrB,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAejD,MAAM,UAAU,WAAW,CACzB,UAAoC;IAEpC,OAAO;QACL,eAAe,EACb,UAAU,CAAC,eAAe,IAAI,cAAc,CAAC,eAAe;QAC9D,kBAAkB,EAChB,UAAU,CAAC,kBAAkB,IAAI,cAAc,CAAC,kBAAkB;QACpE,mBAAmB,EACjB,UAAU,CAAC,mBAAmB,IAAI,cAAc,CAAC,mBAAmB;QACtE,SAAS,EAAE,UAAU,CAAC,SAAS,IAAI,cAAc,CAAC,SAAS;QAC3D,cAAc,EACZ,UAAU,CAAC,cAAc,IAAI,cAAc,CAAC,cAAc;QAC5D,iBAAiB,EACf,UAAU,CAAC,iBAAiB,IAAI,cAAc,CAAC,iBAAiB;QAClE,WAAW,EAAE,UAAU,CAAC,WAAW,IAAI,cAAc,CAAC,WAAW;QACjE,aAAa,EAAE,UAAU,CAAC,aAAa;QACvC,WAAW,EAAE,UAAU,CAAC,WAAW;YACjC,CAAC,CAAC,EAAE,GAAG,oBAAoB,EAAE,GAAG,UAAU,CAAC,WAAW,EAAE;YACxD,CAAC,CAAC,EAAE,GAAG,oBAAoB,EAAE;QAC/B,MAAM,EAAE,UAAU,CAAC,MAAM,IAAI,kBAAkB,EAAE;KAClD,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export { countTokens } from './token-counter.js';
|
|
2
|
+
export { extractJson } from './json-parser.js';
|
|
3
|
+
export { mergeConfig } from './config-loader.js';
|
|
4
|
+
export type { ResolvedConfig } from './config-loader.js';
|
|
5
|
+
export { pageListToGroupText, convertPhysicalIndexToInt, addPhysicalIndexTags, } from './page-utils.js';
|
|
6
|
+
export { createSilentLogger } from './logger.js';
|
|
7
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,YAAY,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,EACL,mBAAmB,EACnB,yBAAyB,EACzB,oBAAoB,GACrB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC"}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
export { countTokens } from './token-counter.js';
|
|
2
|
+
export { extractJson } from './json-parser.js';
|
|
3
|
+
export { mergeConfig } from './config-loader.js';
|
|
4
|
+
export { pageListToGroupText, convertPhysicalIndexToInt, addPhysicalIndexTags, } from './page-utils.js';
|
|
5
|
+
export { createSilentLogger } from './logger.js';
|
|
6
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,mBAAmB,EACnB,yBAAyB,EACzB,oBAAoB,GACrB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"json-parser.d.ts","sourceRoot":"","sources":["../../src/utils/json-parser.ts"],"names":[],"mappings":"AAwDA,wBAAgB,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAgChD"}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { LlmError } from '../errors/index.js';
|
|
2
|
+
/**
|
|
3
|
+
* Replace Python-style `None` with `null`, but only outside of quoted strings.
|
|
4
|
+
*/
|
|
5
|
+
function replaceNoneOutsideStrings(text) {
|
|
6
|
+
let result = '';
|
|
7
|
+
let inString = false;
|
|
8
|
+
let escapeNext = false;
|
|
9
|
+
let quoteChar = '';
|
|
10
|
+
for (let i = 0; i < text.length; i++) {
|
|
11
|
+
const ch = text[i];
|
|
12
|
+
if (escapeNext) {
|
|
13
|
+
result += ch;
|
|
14
|
+
escapeNext = false;
|
|
15
|
+
continue;
|
|
16
|
+
}
|
|
17
|
+
if (ch === '\\' && inString) {
|
|
18
|
+
result += ch;
|
|
19
|
+
escapeNext = true;
|
|
20
|
+
continue;
|
|
21
|
+
}
|
|
22
|
+
if (!inString && (ch === '"' || ch === "'")) {
|
|
23
|
+
inString = true;
|
|
24
|
+
quoteChar = ch;
|
|
25
|
+
result += ch;
|
|
26
|
+
continue;
|
|
27
|
+
}
|
|
28
|
+
if (inString && ch === quoteChar) {
|
|
29
|
+
inString = false;
|
|
30
|
+
result += ch;
|
|
31
|
+
continue;
|
|
32
|
+
}
|
|
33
|
+
// Check for word boundary `None` outside strings
|
|
34
|
+
if (!inString && text.slice(i, i + 4) === 'None') {
|
|
35
|
+
const before = i > 0 ? text[i - 1] : ' ';
|
|
36
|
+
const after = i + 4 < text.length ? text[i + 4] : ' ';
|
|
37
|
+
if (!/\w/.test(before) && !/\w/.test(after)) {
|
|
38
|
+
result += 'null';
|
|
39
|
+
i += 3; // skip remaining 3 chars of "None"
|
|
40
|
+
continue;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
result += ch;
|
|
44
|
+
}
|
|
45
|
+
return result;
|
|
46
|
+
}
|
|
47
|
+
export function extractJson(raw) {
|
|
48
|
+
let text = raw.trim();
|
|
49
|
+
// Step 1: Detect ```json ... ``` wrapping
|
|
50
|
+
const codeBlockMatch = text.match(/```(?:json)?\s*\n?([\s\S]*?)\n?\s*```/);
|
|
51
|
+
if (codeBlockMatch) {
|
|
52
|
+
text = codeBlockMatch[1].trim();
|
|
53
|
+
}
|
|
54
|
+
// Step 2: Replace Python-style None → null (only outside strings)
|
|
55
|
+
text = replaceNoneOutsideStrings(text);
|
|
56
|
+
// Step 3: Remove newlines, normalize whitespace
|
|
57
|
+
text = text.replace(/\n/g, ' ').replace(/\s+/g, ' ');
|
|
58
|
+
// Step 4: Try JSON.parse
|
|
59
|
+
try {
|
|
60
|
+
return JSON.parse(text);
|
|
61
|
+
}
|
|
62
|
+
catch {
|
|
63
|
+
// Step 5: Remove trailing commas → retry
|
|
64
|
+
const cleaned = text
|
|
65
|
+
.replace(/,\s*]/g, ']')
|
|
66
|
+
.replace(/,\s*}/g, '}');
|
|
67
|
+
try {
|
|
68
|
+
return JSON.parse(cleaned);
|
|
69
|
+
}
|
|
70
|
+
catch {
|
|
71
|
+
// Step 6: Throw instead of silently returning {}
|
|
72
|
+
throw new LlmError(`Failed to parse JSON from LLM response: ${raw.slice(0, 200)}`);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
//# sourceMappingURL=json-parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"json-parser.js","sourceRoot":"","sources":["../../src/utils/json-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAE9C;;GAEG;AACH,SAAS,yBAAyB,CAAC,IAAY;IAC7C,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,IAAI,QAAQ,GAAG,KAAK,CAAC;IACrB,IAAI,UAAU,GAAG,KAAK,CAAC;IACvB,IAAI,SAAS,GAAG,EAAE,CAAC;IAEnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QAEnB,IAAI,UAAU,EAAE,CAAC;YACf,MAAM,IAAI,EAAE,CAAC;YACb,UAAU,GAAG,KAAK,CAAC;YACnB,SAAS;QACX,CAAC;QAED,IAAI,EAAE,KAAK,IAAI,IAAI,QAAQ,EAAE,CAAC;YAC5B,MAAM,IAAI,EAAE,CAAC;YACb,UAAU,GAAG,IAAI,CAAC;YAClB,SAAS;QACX,CAAC;QAED,IAAI,CAAC,QAAQ,IAAI,CAAC,EAAE,KAAK,GAAG,IAAI,EAAE,KAAK,GAAG,CAAC,EAAE,CAAC;YAC5C,QAAQ,GAAG,IAAI,CAAC;YAChB,SAAS,GAAG,EAAE,CAAC;YACf,MAAM,IAAI,EAAE,CAAC;YACb,SAAS;QACX,CAAC;QAED,IAAI,QAAQ,IAAI,EAAE,KAAK,SAAS,EAAE,CAAC;YACjC,QAAQ,GAAG,KAAK,CAAC;YACjB,MAAM,IAAI,EAAE,CAAC;YACb,SAAS;QACX,CAAC;QAED,iDAAiD;QACjD,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,KAAK,MAAM,EAAE,CAAC;YACjD,MAAM,MAAM,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;YACzC,MAAM,KAAK,GAAG,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;YACtD,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC5C,MAAM,IAAI,MAAM,CAAC;gBACjB,CAAC,IAAI,CAAC,CAAC,CAAC,mCAAmC;gBAC3C,SAAS;YACX,CAAC;QACH,CAAC;QAED,MAAM,IAAI,EAAE,CAAC;IACf,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,GAAW;IACrC,IAAI,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;IAEtB,0CAA0C;IAC1C,MAAM,cAAc,GAAG,IAAI,CAAC,KAAK,CAAC,uCAAuC,CAAC,CAAC;IAC3E,IAAI,cAAc,EAAE,CAAC;QACnB,IAAI,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAClC,CAAC;IAED,kEAAkE;IAClE,IAAI,GAAG,yBAAyB,CAAC,IAAI,CAAC,CAAC;IAEvC,gDAAgD;IAChD,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAErD,yBAAyB;IACzB,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAAC,MAAM,CAAC;QACP,yCAAyC;QACzC,MAAM,OAAO,GAAG,IAAI;aACjB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;aACtB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QAC1B,IAAI,CAAC;YACH,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAC7B,CAAC;QAAC,MAAM,CAAC;YACP,iDAAiD;YACjD,MAAM,IAAI,QAAQ,CAChB,2CAA2C,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAC/D,CAAC;QACJ,CAAC;IACH,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../../src/utils/logger.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAI1C,wBAAgB,kBAAkB,IAAI,MAAM,CAO3C"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"logger.js","sourceRoot":"","sources":["../../src/utils/logger.ts"],"names":[],"mappings":"AAEA,MAAM,IAAI,GAAG,GAAS,EAAE,GAAE,CAAC,CAAC;AAE5B,MAAM,UAAU,kBAAkB;IAChC,OAAO;QACL,KAAK,EAAE,IAAI;QACX,IAAI,EAAE,IAAI;QACV,IAAI,EAAE,IAAI;QACV,KAAK,EAAE,IAAI;KACZ,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Groups pages into text chunks that fit within maxTokens.
|
|
3
|
+
* Uses overlap pages for context continuity between groups.
|
|
4
|
+
*/
|
|
5
|
+
export declare function pageListToGroupText(pageContents: string[], tokenLengths: number[], maxTokens?: number, overlapPage?: number): string[];
|
|
6
|
+
/**
|
|
7
|
+
* Parses "<physical_index_5>" or "physical_index_5" → 5
|
|
8
|
+
*/
|
|
9
|
+
export declare function convertPhysicalIndexToInt(value: string): number;
|
|
10
|
+
/**
|
|
11
|
+
* Wraps each page's text with <physical_index_X> tags.
|
|
12
|
+
*/
|
|
13
|
+
export declare function addPhysicalIndexTags(pages: Array<{
|
|
14
|
+
text: string;
|
|
15
|
+
}>, startIndex: number): string;
|
|
16
|
+
//# sourceMappingURL=page-utils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"page-utils.d.ts","sourceRoot":"","sources":["../../src/utils/page-utils.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,wBAAgB,mBAAmB,CACjC,YAAY,EAAE,MAAM,EAAE,EACtB,YAAY,EAAE,MAAM,EAAE,EACtB,SAAS,SAAQ,EACjB,WAAW,SAAI,GACd,MAAM,EAAE,CAuCV;AAED;;GAEG;AACH,wBAAgB,yBAAyB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAM/D;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAClC,KAAK,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,EAC9B,UAAU,EAAE,MAAM,GACjB,MAAM,CAOR"}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Groups pages into text chunks that fit within maxTokens.
|
|
3
|
+
* Uses overlap pages for context continuity between groups.
|
|
4
|
+
*/
|
|
5
|
+
export function pageListToGroupText(pageContents, tokenLengths, maxTokens = 20000, overlapPage = 1) {
|
|
6
|
+
const totalTokens = tokenLengths.reduce((sum, t) => sum + t, 0);
|
|
7
|
+
if (totalTokens <= maxTokens) {
|
|
8
|
+
return [pageContents.join('\n')];
|
|
9
|
+
}
|
|
10
|
+
const expectedParts = Math.ceil(totalTokens / maxTokens);
|
|
11
|
+
const averageTokensPerPart = Math.ceil((totalTokens / expectedParts + maxTokens) / 2);
|
|
12
|
+
const groups = [];
|
|
13
|
+
let currentPages = [];
|
|
14
|
+
let currentTokenCount = 0;
|
|
15
|
+
for (let i = 0; i < pageContents.length; i++) {
|
|
16
|
+
if (currentTokenCount + tokenLengths[i] > averageTokensPerPart &&
|
|
17
|
+
currentPages.length > 0) {
|
|
18
|
+
groups.push(currentPages.join('\n'));
|
|
19
|
+
const startIdx = Math.max(i - overlapPage, 0);
|
|
20
|
+
currentPages = [];
|
|
21
|
+
currentTokenCount = 0;
|
|
22
|
+
for (let j = startIdx; j < i; j++) {
|
|
23
|
+
currentPages.push(pageContents[j]);
|
|
24
|
+
currentTokenCount += tokenLengths[j];
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
currentPages.push(pageContents[i]);
|
|
28
|
+
currentTokenCount += tokenLengths[i];
|
|
29
|
+
}
|
|
30
|
+
if (currentPages.length > 0) {
|
|
31
|
+
groups.push(currentPages.join('\n'));
|
|
32
|
+
}
|
|
33
|
+
return groups;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Parses "<physical_index_5>" or "physical_index_5" → 5
|
|
37
|
+
*/
|
|
38
|
+
export function convertPhysicalIndexToInt(value) {
|
|
39
|
+
const match = value.match(/physical_index_(\d+)/);
|
|
40
|
+
if (!match) {
|
|
41
|
+
throw new Error(`Invalid physical index format: ${value}`);
|
|
42
|
+
}
|
|
43
|
+
return parseInt(match[1], 10);
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Wraps each page's text with <physical_index_X> tags.
|
|
47
|
+
*/
|
|
48
|
+
export function addPhysicalIndexTags(pages, startIndex) {
|
|
49
|
+
return pages
|
|
50
|
+
.map((page, i) => {
|
|
51
|
+
const idx = startIndex + i;
|
|
52
|
+
return `<physical_index_${idx}>\n${page.text}\n<physical_index_${idx}>`;
|
|
53
|
+
})
|
|
54
|
+
.join('\n');
|
|
55
|
+
}
|
|
56
|
+
//# sourceMappingURL=page-utils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"page-utils.js","sourceRoot":"","sources":["../../src/utils/page-utils.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,MAAM,UAAU,mBAAmB,CACjC,YAAsB,EACtB,YAAsB,EACtB,SAAS,GAAG,KAAK,EACjB,WAAW,GAAG,CAAC;IAEf,MAAM,WAAW,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IAEhE,IAAI,WAAW,IAAI,SAAS,EAAE,CAAC;QAC7B,OAAO,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IACnC,CAAC;IAED,MAAM,aAAa,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,GAAG,SAAS,CAAC,CAAC;IACzD,MAAM,oBAAoB,GAAG,IAAI,CAAC,IAAI,CACpC,CAAC,WAAW,GAAG,aAAa,GAAG,SAAS,CAAC,GAAG,CAAC,CAC9C,CAAC;IAEF,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,YAAY,GAAa,EAAE,CAAC;IAChC,IAAI,iBAAiB,GAAG,CAAC,CAAC;IAE1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7C,IACE,iBAAiB,GAAG,YAAY,CAAC,CAAC,CAAC,GAAG,oBAAoB;YAC1D,YAAY,CAAC,MAAM,GAAG,CAAC,EACvB,CAAC;YACD,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;YACrC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,WAAW,EAAE,CAAC,CAAC,CAAC;YAC9C,YAAY,GAAG,EAAE,CAAC;YAClB,iBAAiB,GAAG,CAAC,CAAC;YACtB,KAAK,IAAI,CAAC,GAAG,QAAQ,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBAClC,YAAY,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC;gBACnC,iBAAiB,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC;YACvC,CAAC;QACH,CAAC;QACD,YAAY,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC;QACnC,iBAAiB,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC;IAED,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IACvC,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,yBAAyB,CAAC,KAAa;IACrD,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,sBAAsB,CAAC,CAAC;IAClD,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,kCAAkC,KAAK,EAAE,CAAC,CAAC;IAC7D,CAAC;IACD,OAAO,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AAChC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,oBAAoB,CAClC,KAA8B,EAC9B,UAAkB;IAElB,OAAO,KAAK;SACT,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE;QACf,MAAM,GAAG,GAAG,UAAU,GAAG,CAAC,CAAC;QAC3B,OAAO,mBAAmB,GAAG,MAAM,IAAI,CAAC,IAAI,qBAAqB,GAAG,GAAG,CAAC;IAC1E,CAAC,CAAC;SACD,IAAI,CAAC,IAAI,CAAC,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"token-counter.d.ts","sourceRoot":"","sources":["../../src/utils/token-counter.ts"],"names":[],"mappings":"AAEA,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEhD"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"token-counter.js","sourceRoot":"","sources":["../../src/utils/token-counter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAEvC,MAAM,UAAU,WAAW,CAAC,IAAY;IACtC,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;AAC7B,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { VectorStore } from './vector-store.js';
|
|
2
|
+
import type { VectorRecord, SearchResult } from '../types.js';
|
|
3
|
+
/**
|
|
4
|
+
* In-memory VectorStore implementation for testing and small-scale use.
|
|
5
|
+
* Uses cosine similarity for search.
|
|
6
|
+
*/
|
|
7
|
+
export declare class InMemoryAdapter implements VectorStore {
|
|
8
|
+
private records;
|
|
9
|
+
upsert(vectors: VectorRecord[]): Promise<void>;
|
|
10
|
+
search(query: number[], topK: number, filter?: Record<string, unknown>): Promise<SearchResult[]>;
|
|
11
|
+
delete(ids: string[]): Promise<void>;
|
|
12
|
+
get size(): number;
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=in-memory-adapter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"in-memory-adapter.d.ts","sourceRoot":"","sources":["../../../src/vector-lib/adapters/in-memory-adapter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE9D;;;GAGG;AACH,qBAAa,eAAgB,YAAW,WAAW;IACjD,OAAO,CAAC,OAAO,CAAmC;IAE5C,MAAM,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAM9C,MAAM,CACV,KAAK,EAAE,MAAM,EAAE,EACf,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC/B,OAAO,CAAC,YAAY,EAAE,CAAC;IAapB,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAM1C,IAAI,IAAI,IAAI,MAAM,CAEjB;CACF"}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-memory VectorStore implementation for testing and small-scale use.
|
|
3
|
+
* Uses cosine similarity for search.
|
|
4
|
+
*/
|
|
5
|
+
export class InMemoryAdapter {
|
|
6
|
+
records = new Map();
|
|
7
|
+
async upsert(vectors) {
|
|
8
|
+
for (const v of vectors) {
|
|
9
|
+
this.records.set(v.id, v);
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
async search(query, topK, filter) {
|
|
13
|
+
const results = [];
|
|
14
|
+
for (const record of this.records.values()) {
|
|
15
|
+
if (!matchesFilter(record.payload, filter))
|
|
16
|
+
continue;
|
|
17
|
+
const score = cosineSimilarity(query, record.vector);
|
|
18
|
+
results.push({ id: record.id, score, payload: record.payload });
|
|
19
|
+
}
|
|
20
|
+
results.sort((a, b) => b.score - a.score);
|
|
21
|
+
return results.slice(0, topK);
|
|
22
|
+
}
|
|
23
|
+
async delete(ids) {
|
|
24
|
+
for (const id of ids) {
|
|
25
|
+
this.records.delete(id);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
get size() {
|
|
29
|
+
return this.records.size;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
function matchesFilter(payload, filter) {
|
|
33
|
+
if (!filter)
|
|
34
|
+
return true;
|
|
35
|
+
for (const [key, expected] of Object.entries(filter)) {
|
|
36
|
+
if (!Object.is(payload[key], expected))
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
return true;
|
|
40
|
+
}
|
|
41
|
+
function cosineSimilarity(a, b) {
|
|
42
|
+
if (a.length !== b.length)
|
|
43
|
+
return 0;
|
|
44
|
+
let dotProduct = 0;
|
|
45
|
+
let normA = 0;
|
|
46
|
+
let normB = 0;
|
|
47
|
+
for (let i = 0; i < a.length; i++) {
|
|
48
|
+
dotProduct += a[i] * b[i];
|
|
49
|
+
normA += a[i] * a[i];
|
|
50
|
+
normB += b[i] * b[i];
|
|
51
|
+
}
|
|
52
|
+
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
|
53
|
+
return denominator === 0 ? 0 : dotProduct / denominator;
|
|
54
|
+
}
|
|
55
|
+
//# sourceMappingURL=in-memory-adapter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"in-memory-adapter.js","sourceRoot":"","sources":["../../../src/vector-lib/adapters/in-memory-adapter.ts"],"names":[],"mappings":"AAGA;;;GAGG;AACH,MAAM,OAAO,eAAe;IAClB,OAAO,GAAG,IAAI,GAAG,EAAwB,CAAC;IAElD,KAAK,CAAC,MAAM,CAAC,OAAuB;QAClC,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;QAC5B,CAAC;IACH,CAAC;IAED,KAAK,CAAC,MAAM,CACV,KAAe,EACf,IAAY,EACZ,MAAgC;QAEhC,MAAM,OAAO,GAAmB,EAAE,CAAC;QAEnC,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;YAC3C,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC;gBAAE,SAAS;YACrD,MAAM,KAAK,GAAG,gBAAgB,CAAC,KAAK,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;YACrD,OAAO,CAAC,IAAI,CAAC,EAAE,EAAE,EAAE,MAAM,CAAC,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC;QAClE,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QAC1C,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IAChC,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,GAAa;QACxB,KAAK,MAAM,EAAE,IAAI,GAAG,EAAE,CAAC;YACrB,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,IAAI,IAAI;QACN,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC;IAC3B,CAAC;CACF;AAED,SAAS,aAAa,CACpB,OAAgC,EAChC,MAAgC;IAEhC,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAEzB,KAAK,MAAM,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QACrD,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,QAAQ,CAAC;YAAE,OAAO,KAAK,CAAC;IACvD,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,gBAAgB,CAAC,CAAW,EAAE,CAAW;IAChD,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM;QAAE,OAAO,CAAC,CAAC;IAEpC,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,UAAU,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1B,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACvB,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACxD,OAAO,WAAW,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,WAAW,CAAC;AAC1D,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { VectorRecord, SearchResult } from '../types.js';
|
|
2
|
+
/**
|
|
3
|
+
* VectorStore interface — user implements this for their vector DB.
|
|
4
|
+
*/
|
|
5
|
+
export interface VectorStore {
|
|
6
|
+
upsert(vectors: VectorRecord[]): Promise<void>;
|
|
7
|
+
search(query: number[], topK: number, filter?: Record<string, unknown>): Promise<SearchResult[]>;
|
|
8
|
+
delete(ids: string[]): Promise<void>;
|
|
9
|
+
}
|
|
10
|
+
//# sourceMappingURL=vector-store.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vector-store.d.ts","sourceRoot":"","sources":["../../../src/vector-lib/adapters/vector-store.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE9D;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,MAAM,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC/C,MAAM,CACJ,KAAK,EAAE,MAAM,EAAE,EACf,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC/B,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;IAC3B,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACtC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vector-store.js","sourceRoot":"","sources":["../../../src/vector-lib/adapters/vector-store.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { PageIndexResult } from '../../types.js';
|
|
2
|
+
import type { Chunk, VectorConfig } from '../types.js';
|
|
3
|
+
/**
|
|
4
|
+
* Splits a PageIndexResult tree into chunks suitable for vector indexing.
|
|
5
|
+
* Each leaf node with text becomes one or more chunks.
|
|
6
|
+
*/
|
|
7
|
+
export declare const treeChunker: (result: PageIndexResult, config?: VectorConfig) => Chunk[];
|
|
8
|
+
//# sourceMappingURL=tree-chunker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tree-chunker.d.ts","sourceRoot":"","sources":["../../../src/vector-lib/chunker/tree-chunker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAY,eAAe,EAAE,MAAM,gBAAgB,CAAC;AAChE,OAAO,KAAK,EAAE,KAAK,EAAW,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhE;;;GAGG;AACH,eAAO,MAAM,WAAW,WACd,eAAe,WACf,YAAY,KACnB,KAAK,EAqCY,CAAC"}
|