@fastrag/pageindex 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +251 -0
  3. package/README.zh-CN.md +251 -0
  4. package/dist/errors/index.d.ts +10 -0
  5. package/dist/errors/index.d.ts.map +1 -0
  6. package/dist/errors/index.js +19 -0
  7. package/dist/errors/index.js.map +1 -0
  8. package/dist/index.d.ts +14 -0
  9. package/dist/index.d.ts.map +1 -0
  10. package/dist/index.js +20 -0
  11. package/dist/index.js.map +1 -0
  12. package/dist/internal-types/config.d.ts +35 -0
  13. package/dist/internal-types/config.d.ts.map +1 -0
  14. package/dist/internal-types/config.js +16 -0
  15. package/dist/internal-types/config.js.map +1 -0
  16. package/dist/internal-types/document-parser.d.ts +5 -0
  17. package/dist/internal-types/document-parser.d.ts.map +1 -0
  18. package/dist/internal-types/document-parser.js +2 -0
  19. package/dist/internal-types/document-parser.js.map +1 -0
  20. package/dist/internal-types/index.d.ts +9 -0
  21. package/dist/internal-types/index.d.ts.map +1 -0
  22. package/dist/internal-types/index.js +2 -0
  23. package/dist/internal-types/index.js.map +1 -0
  24. package/dist/internal-types/llm-provider.d.ts +19 -0
  25. package/dist/internal-types/llm-provider.d.ts.map +1 -0
  26. package/dist/internal-types/llm-provider.js +2 -0
  27. package/dist/internal-types/llm-provider.js.map +1 -0
  28. package/dist/internal-types/logger.d.ts +7 -0
  29. package/dist/internal-types/logger.d.ts.map +1 -0
  30. package/dist/internal-types/logger.js +2 -0
  31. package/dist/internal-types/logger.js.map +1 -0
  32. package/dist/internal-types/page.d.ts +5 -0
  33. package/dist/internal-types/page.d.ts.map +1 -0
  34. package/dist/internal-types/page.js +2 -0
  35. package/dist/internal-types/page.js.map +1 -0
  36. package/dist/internal-types/processing.d.ts +21 -0
  37. package/dist/internal-types/processing.d.ts.map +1 -0
  38. package/dist/internal-types/processing.js +2 -0
  39. package/dist/internal-types/processing.js.map +1 -0
  40. package/dist/internal-types/tree-node.d.ts +30 -0
  41. package/dist/internal-types/tree-node.d.ts.map +1 -0
  42. package/dist/internal-types/tree-node.js +2 -0
  43. package/dist/internal-types/tree-node.js.map +1 -0
  44. package/dist/llm/index.d.ts +3 -0
  45. package/dist/llm/index.d.ts.map +1 -0
  46. package/dist/llm/index.js +3 -0
  47. package/dist/llm/index.js.map +1 -0
  48. package/dist/llm/llm-client.d.ts +26 -0
  49. package/dist/llm/llm-client.d.ts.map +1 -0
  50. package/dist/llm/llm-client.js +88 -0
  51. package/dist/llm/llm-client.js.map +1 -0
  52. package/dist/llm/prompts.d.ts +33 -0
  53. package/dist/llm/prompts.d.ts.map +1 -0
  54. package/dist/llm/prompts.js +312 -0
  55. package/dist/llm/prompts.js.map +1 -0
  56. package/dist/markdown/index.d.ts +6 -0
  57. package/dist/markdown/index.d.ts.map +1 -0
  58. package/dist/markdown/index.js +5 -0
  59. package/dist/markdown/index.js.map +1 -0
  60. package/dist/markdown/md-extractor.d.ts +14 -0
  61. package/dist/markdown/md-extractor.d.ts.map +1 -0
  62. package/dist/markdown/md-extractor.js +30 -0
  63. package/dist/markdown/md-extractor.js.map +1 -0
  64. package/dist/markdown/md-to-tree.d.ts +8 -0
  65. package/dist/markdown/md-to-tree.d.ts.map +1 -0
  66. package/dist/markdown/md-to-tree.js +20 -0
  67. package/dist/markdown/md-to-tree.js.map +1 -0
  68. package/dist/markdown/md-tree-builder.d.ts +7 -0
  69. package/dist/markdown/md-tree-builder.d.ts.map +1 -0
  70. package/dist/markdown/md-tree-builder.js +36 -0
  71. package/dist/markdown/md-tree-builder.js.map +1 -0
  72. package/dist/markdown/tree-thinning.d.ts +8 -0
  73. package/dist/markdown/tree-thinning.d.ts.map +1 -0
  74. package/dist/markdown/tree-thinning.js +42 -0
  75. package/dist/markdown/tree-thinning.js.map +1 -0
  76. package/dist/page-index.d.ts +10 -0
  77. package/dist/page-index.d.ts.map +1 -0
  78. package/dist/page-index.js +54 -0
  79. package/dist/page-index.js.map +1 -0
  80. package/dist/post-processing/doc-description.d.ts +12 -0
  81. package/dist/post-processing/doc-description.d.ts.map +1 -0
  82. package/dist/post-processing/doc-description.js +31 -0
  83. package/dist/post-processing/doc-description.js.map +1 -0
  84. package/dist/post-processing/index.d.ts +5 -0
  85. package/dist/post-processing/index.d.ts.map +1 -0
  86. package/dist/post-processing/index.js +5 -0
  87. package/dist/post-processing/index.js.map +1 -0
  88. package/dist/post-processing/node-id.d.ts +7 -0
  89. package/dist/post-processing/node-id.d.ts.map +1 -0
  90. package/dist/post-processing/node-id.js +20 -0
  91. package/dist/post-processing/node-id.js.map +1 -0
  92. package/dist/post-processing/node-text.d.ts +11 -0
  93. package/dist/post-processing/node-text.d.ts.map +1 -0
  94. package/dist/post-processing/node-text.js +37 -0
  95. package/dist/post-processing/node-text.js.map +1 -0
  96. package/dist/post-processing/summary.d.ts +7 -0
  97. package/dist/post-processing/summary.d.ts.map +1 -0
  98. package/dist/post-processing/summary.js +31 -0
  99. package/dist/post-processing/summary.js.map +1 -0
  100. package/dist/processing/index.d.ts +6 -0
  101. package/dist/processing/index.d.ts.map +1 -0
  102. package/dist/processing/index.js +6 -0
  103. package/dist/processing/index.js.map +1 -0
  104. package/dist/processing/large-node.d.ts +9 -0
  105. package/dist/processing/large-node.d.ts.map +1 -0
  106. package/dist/processing/large-node.js +40 -0
  107. package/dist/processing/large-node.js.map +1 -0
  108. package/dist/processing/meta-processor.d.ts +19 -0
  109. package/dist/processing/meta-processor.d.ts.map +1 -0
  110. package/dist/processing/meta-processor.js +91 -0
  111. package/dist/processing/meta-processor.js.map +1 -0
  112. package/dist/processing/no-toc.d.ts +10 -0
  113. package/dist/processing/no-toc.d.ts.map +1 -0
  114. package/dist/processing/no-toc.js +44 -0
  115. package/dist/processing/no-toc.js.map +1 -0
  116. package/dist/processing/toc-no-pages.d.ts +11 -0
  117. package/dist/processing/toc-no-pages.d.ts.map +1 -0
  118. package/dist/processing/toc-no-pages.js +46 -0
  119. package/dist/processing/toc-no-pages.js.map +1 -0
  120. package/dist/processing/toc-with-pages.d.ts +15 -0
  121. package/dist/processing/toc-with-pages.d.ts.map +1 -0
  122. package/dist/processing/toc-with-pages.js +151 -0
  123. package/dist/processing/toc-with-pages.js.map +1 -0
  124. package/dist/toc/index.d.ts +4 -0
  125. package/dist/toc/index.d.ts.map +1 -0
  126. package/dist/toc/index.js +4 -0
  127. package/dist/toc/index.js.map +1 -0
  128. package/dist/toc/toc-detector.d.ts +23 -0
  129. package/dist/toc/toc-detector.d.ts.map +1 -0
  130. package/dist/toc/toc-detector.js +65 -0
  131. package/dist/toc/toc-detector.js.map +1 -0
  132. package/dist/toc/toc-extractor.d.ts +13 -0
  133. package/dist/toc/toc-extractor.d.ts.map +1 -0
  134. package/dist/toc/toc-extractor.js +32 -0
  135. package/dist/toc/toc-extractor.js.map +1 -0
  136. package/dist/toc/toc-transformer.d.ts +11 -0
  137. package/dist/toc/toc-transformer.d.ts.map +1 -0
  138. package/dist/toc/toc-transformer.js +69 -0
  139. package/dist/toc/toc-transformer.js.map +1 -0
  140. package/dist/tree/index.d.ts +4 -0
  141. package/dist/tree/index.d.ts.map +1 -0
  142. package/dist/tree/index.js +4 -0
  143. package/dist/tree/index.js.map +1 -0
  144. package/dist/tree/list-to-tree.d.ts +7 -0
  145. package/dist/tree/list-to-tree.d.ts.map +1 -0
  146. package/dist/tree/list-to-tree.js +33 -0
  147. package/dist/tree/list-to-tree.js.map +1 -0
  148. package/dist/tree/post-processing.d.ts +12 -0
  149. package/dist/tree/post-processing.d.ts.map +1 -0
  150. package/dist/tree/post-processing.js +87 -0
  151. package/dist/tree/post-processing.js.map +1 -0
  152. package/dist/tree/tree-utils.d.ts +18 -0
  153. package/dist/tree/tree-utils.d.ts.map +1 -0
  154. package/dist/tree/tree-utils.js +43 -0
  155. package/dist/tree/tree-utils.js.map +1 -0
  156. package/dist/tree-parser.d.ts +30 -0
  157. package/dist/tree-parser.d.ts.map +1 -0
  158. package/dist/tree-parser.js +73 -0
  159. package/dist/tree-parser.js.map +1 -0
  160. package/dist/types.d.ts +3 -0
  161. package/dist/types.d.ts.map +1 -0
  162. package/dist/types.js +2 -0
  163. package/dist/types.js.map +1 -0
  164. package/dist/utils/config-loader.d.ts +15 -0
  165. package/dist/utils/config-loader.d.ts.map +1 -0
  166. package/dist/utils/config-loader.js +19 -0
  167. package/dist/utils/config-loader.js.map +1 -0
  168. package/dist/utils/index.d.ts +7 -0
  169. package/dist/utils/index.d.ts.map +1 -0
  170. package/dist/utils/index.js +6 -0
  171. package/dist/utils/index.js.map +1 -0
  172. package/dist/utils/json-parser.d.ts +2 -0
  173. package/dist/utils/json-parser.d.ts.map +1 -0
  174. package/dist/utils/json-parser.js +76 -0
  175. package/dist/utils/json-parser.js.map +1 -0
  176. package/dist/utils/logger.d.ts +3 -0
  177. package/dist/utils/logger.d.ts.map +1 -0
  178. package/dist/utils/logger.js +10 -0
  179. package/dist/utils/logger.js.map +1 -0
  180. package/dist/utils/page-utils.d.ts +16 -0
  181. package/dist/utils/page-utils.d.ts.map +1 -0
  182. package/dist/utils/page-utils.js +56 -0
  183. package/dist/utils/page-utils.js.map +1 -0
  184. package/dist/utils/token-counter.d.ts +2 -0
  185. package/dist/utils/token-counter.d.ts.map +1 -0
  186. package/dist/utils/token-counter.js +5 -0
  187. package/dist/utils/token-counter.js.map +1 -0
  188. package/dist/vector-lib/adapters/in-memory-adapter.d.ts +14 -0
  189. package/dist/vector-lib/adapters/in-memory-adapter.d.ts.map +1 -0
  190. package/dist/vector-lib/adapters/in-memory-adapter.js +55 -0
  191. package/dist/vector-lib/adapters/in-memory-adapter.js.map +1 -0
  192. package/dist/vector-lib/adapters/vector-store.d.ts +10 -0
  193. package/dist/vector-lib/adapters/vector-store.d.ts.map +1 -0
  194. package/dist/vector-lib/adapters/vector-store.js +2 -0
  195. package/dist/vector-lib/adapters/vector-store.js.map +1 -0
  196. package/dist/vector-lib/chunker/tree-chunker.d.ts +8 -0
  197. package/dist/vector-lib/chunker/tree-chunker.d.ts.map +1 -0
  198. package/dist/vector-lib/chunker/tree-chunker.js +59 -0
  199. package/dist/vector-lib/chunker/tree-chunker.js.map +1 -0
  200. package/dist/vector-lib/embedder/embedder.d.ts +8 -0
  201. package/dist/vector-lib/embedder/embedder.d.ts.map +1 -0
  202. package/dist/vector-lib/embedder/embedder.js +2 -0
  203. package/dist/vector-lib/embedder/embedder.js.map +1 -0
  204. package/dist/vector-lib/index.d.ts +10 -0
  205. package/dist/vector-lib/index.d.ts.map +1 -0
  206. package/dist/vector-lib/index.js +6 -0
  207. package/dist/vector-lib/index.js.map +1 -0
  208. package/dist/vector-lib/search/hybrid-search.d.ts +19 -0
  209. package/dist/vector-lib/search/hybrid-search.d.ts.map +1 -0
  210. package/dist/vector-lib/search/hybrid-search.js +25 -0
  211. package/dist/vector-lib/search/hybrid-search.js.map +1 -0
  212. package/dist/vector-lib/search/reranker.d.ts +14 -0
  213. package/dist/vector-lib/search/reranker.d.ts.map +1 -0
  214. package/dist/vector-lib/search/reranker.js +2 -0
  215. package/dist/vector-lib/search/reranker.js.map +1 -0
  216. package/dist/vector-lib/types.d.ts +29 -0
  217. package/dist/vector-lib/types.d.ts.map +1 -0
  218. package/dist/vector-lib/types.js +2 -0
  219. package/dist/vector-lib/types.js.map +1 -0
  220. package/dist/vector-lib/vector-enhancer.d.ts +28 -0
  221. package/dist/vector-lib/vector-enhancer.d.ts.map +1 -0
  222. package/dist/vector-lib/vector-enhancer.js +54 -0
  223. package/dist/vector-lib/vector-enhancer.js.map +1 -0
  224. package/dist/vector.d.ts +5 -0
  225. package/dist/vector.d.ts.map +1 -0
  226. package/dist/vector.js +3 -0
  227. package/dist/vector.js.map +1 -0
  228. package/dist/verification/fix-toc.d.ts +13 -0
  229. package/dist/verification/fix-toc.d.ts.map +1 -0
  230. package/dist/verification/fix-toc.js +73 -0
  231. package/dist/verification/fix-toc.js.map +1 -0
  232. package/dist/verification/index.d.ts +3 -0
  233. package/dist/verification/index.d.ts.map +1 -0
  234. package/dist/verification/index.js +3 -0
  235. package/dist/verification/index.js.map +1 -0
  236. package/dist/verification/verify-toc.d.ts +17 -0
  237. package/dist/verification/verify-toc.d.ts.map +1 -0
  238. package/dist/verification/verify-toc.js +64 -0
  239. package/dist/verification/verify-toc.js.map +1 -0
  240. package/package.json +58 -0
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Recursively collects all nodes in the tree into a flat array.
3
+ */
4
+ export function getNodes(tree) {
5
+ const result = [];
6
+ for (const node of tree) {
7
+ result.push(node);
8
+ if (node.nodes.length > 0) {
9
+ result.push(...getNodes(node.nodes));
10
+ }
11
+ }
12
+ return result;
13
+ }
14
+ /**
15
+ * Recursively collects all leaf nodes (nodes with no children).
16
+ */
17
+ export function getLeafNodes(tree) {
18
+ const result = [];
19
+ for (const node of tree) {
20
+ if (node.nodes.length === 0) {
21
+ result.push(node);
22
+ }
23
+ else {
24
+ result.push(...getLeafNodes(node.nodes));
25
+ }
26
+ }
27
+ return result;
28
+ }
29
+ /**
30
+ * Converts a tree back to a flat list with structure indices.
31
+ */
32
+ export function structureToList(tree, prefix = '') {
33
+ const result = [];
34
+ for (let i = 0; i < tree.length; i++) {
35
+ const structure = prefix ? `${prefix}.${i + 1}` : `${i + 1}`;
36
+ result.push({ structure, title: tree[i].title, node: tree[i] });
37
+ if (tree[i].nodes.length > 0) {
38
+ result.push(...structureToList(tree[i].nodes, structure));
39
+ }
40
+ }
41
+ return result;
42
+ }
43
+ //# sourceMappingURL=tree-utils.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tree-utils.js","sourceRoot":"","sources":["../../src/tree/tree-utils.ts"],"names":[],"mappings":"AAEA;;GAEG;AACH,MAAM,UAAU,QAAQ,CAAC,IAAgB;IACvC,MAAM,MAAM,GAAe,EAAE,CAAC;IAC9B,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;QACxB,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClB,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,MAAM,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QACvC,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,IAAgB;IAC3C,MAAM,MAAM,GAAe,EAAE,CAAC;IAC9B,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;QACxB,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACpB,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QAC3C,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAC7B,IAAgB,EAChB,MAAM,GAAG,EAAE;IAEX,MAAM,MAAM,GAAgE,EAAE,CAAC;IAC/E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,SAAS,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;QAC7D,MAAM,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QAChE,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,MAAM,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC;QAC5D,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,30 @@
1
+ import type { TreeNode, TocItem, ProcessingMode, DegradationEvent, Logger } from './types.js';
2
+ import { LlmClient } from './llm/llm-client.js';
3
+ /**
4
+ * Main tree parsing flow.
5
+ * 1. Detect TOC
6
+ * 2. Process based on TOC presence and page numbers
7
+ * 3. Add preface if needed
8
+ * 4. Check title appearance at start
9
+ * 5. Build tree with postProcessing
10
+ * 6. Recursively split large nodes
11
+ */
12
+ export declare function treeParser(pageList: Array<{
13
+ text: string;
14
+ }>, llmClient: LlmClient, logger: Logger, config: {
15
+ tocCheckPageNum: number;
16
+ maxPageNumEachNode: number;
17
+ maxTokenNumEachNode: number;
18
+ onDegradation?: (event: DegradationEvent) => void;
19
+ }): Promise<{
20
+ structure: TreeNode[];
21
+ finalMode: ProcessingMode;
22
+ degradations: DegradationEvent[];
23
+ }>;
24
+ /**
25
+ * Concurrently checks if titles appear at the start of their pages.
26
+ */
27
+ export declare function checkTitleAppearanceInStartConcurrent(items: TocItem[], pageList: Array<{
28
+ text: string;
29
+ }>, llmClient: LlmClient): Promise<void>;
30
+ //# sourceMappingURL=tree-parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tree-parser.d.ts","sourceRoot":"","sources":["../src/tree-parser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,cAAc,EAAE,gBAAgB,EAAE,MAAM,EAAE,MAAM,YAAY,CAAC;AAC9F,OAAO,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAOhD;;;;;;;;GAQG;AACH,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,EACjC,SAAS,EAAE,SAAS,EACpB,MAAM,EAAE,MAAM,EACd,MAAM,EAAE;IACN,eAAe,EAAE,MAAM,CAAC;IACxB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,mBAAmB,EAAE,MAAM,CAAC;IAC5B,aAAa,CAAC,EAAE,CAAC,KAAK,EAAE,gBAAgB,KAAK,IAAI,CAAC;CACnD,GACA,OAAO,CAAC;IAAE,SAAS,EAAE,QAAQ,EAAE,CAAC;IAAC,SAAS,EAAE,cAAc,CAAC;IAAC,YAAY,EAAE,gBAAgB,EAAE,CAAA;CAAE,CAAC,CA6CjG;AAYD;;GAEG;AACH,wBAAsB,qCAAqC,CACzD,KAAK,EAAE,OAAO,EAAE,EAChB,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,EACjC,SAAS,EAAE,SAAS,GACnB,OAAO,CAAC,IAAI,CAAC,CAkBf"}
@@ -0,0 +1,73 @@
1
+ import { LlmClient } from './llm/llm-client.js';
2
+ import { checkToc } from './toc/toc-detector.js';
3
+ import { metaProcessor } from './processing/meta-processor.js';
4
+ import { processLargeNodeRecursively } from './processing/large-node.js';
5
+ import { postProcessing } from './tree/post-processing.js';
6
+ import { checkTitleAppearanceInStartPrompt } from './llm/prompts.js';
7
+ /**
8
+ * Main tree parsing flow.
9
+ * 1. Detect TOC
10
+ * 2. Process based on TOC presence and page numbers
11
+ * 3. Add preface if needed
12
+ * 4. Check title appearance at start
13
+ * 5. Build tree with postProcessing
14
+ * 6. Recursively split large nodes
15
+ */
16
+ export async function treeParser(pageList, llmClient, logger, config) {
17
+ // Step 1: Check for TOC
18
+ const tocResult = await checkToc(pageList, config.tocCheckPageNum, llmClient, logger);
19
+ // Step 2: Determine mode and process
20
+ let mode;
21
+ if (tocResult.tocContent) {
22
+ mode = tocResult.pageIndexGivenInToc
23
+ ? 'process_toc_with_page_numbers'
24
+ : 'process_toc_no_page_numbers';
25
+ }
26
+ else {
27
+ mode = 'process_no_toc';
28
+ }
29
+ const { items, finalMode, degradations } = await metaProcessor(pageList, mode, llmClient, logger, {
30
+ tocContent: tocResult.tocContent,
31
+ tocPageList: tocResult.tocPageList,
32
+ onDegradation: config.onDegradation,
33
+ });
34
+ // Step 3: Add preface if needed
35
+ addPrefaceIfNeeded(items);
36
+ // Step 4: Check title appearance at start (concurrent)
37
+ await checkTitleAppearanceInStartConcurrent(items, pageList, llmClient);
38
+ // Step 5: Filter and build tree
39
+ const validItems = items.filter((item) => item.physicalIndex != null);
40
+ const structure = postProcessing(validItems, pageList.length);
41
+ // Step 6: Process large nodes
42
+ await Promise.all(structure.map((node) => processLargeNodeRecursively(node, pageList, llmClient, logger, config.maxPageNumEachNode, config.maxTokenNumEachNode)));
43
+ return { structure, finalMode, degradations };
44
+ }
45
+ function addPrefaceIfNeeded(items) {
46
+ if (items.length > 0 && items[0].physicalIndex != null && items[0].physicalIndex > 1) {
47
+ items.unshift({
48
+ structure: '0',
49
+ title: 'Preface',
50
+ physicalIndex: 1,
51
+ });
52
+ }
53
+ }
54
+ /**
55
+ * Concurrently checks if titles appear at the start of their pages.
56
+ */
57
+ export async function checkTitleAppearanceInStartConcurrent(items, pageList, llmClient) {
58
+ await Promise.all(items.map(async (item) => {
59
+ if (item.physicalIndex == null)
60
+ return;
61
+ const pageIdx = item.physicalIndex - 1;
62
+ if (pageIdx < 0 || pageIdx >= pageList.length)
63
+ return;
64
+ const result = await llmClient.chatJson([
65
+ {
66
+ role: 'user',
67
+ content: checkTitleAppearanceInStartPrompt(item.title, pageList[pageIdx].text),
68
+ },
69
+ ]);
70
+ item.appearStart = result.start_begin === 'yes' ? 'yes' : 'no';
71
+ }));
72
+ }
73
+ //# sourceMappingURL=tree-parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tree-parser.js","sourceRoot":"","sources":["../src/tree-parser.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAChD,OAAO,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AACjD,OAAO,EAAE,aAAa,EAAE,MAAM,gCAAgC,CAAC;AAC/D,OAAO,EAAE,2BAA2B,EAAE,MAAM,4BAA4B,CAAC;AACzE,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EAAE,iCAAiC,EAAE,MAAM,kBAAkB,CAAC;AAErE;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,QAAiC,EACjC,SAAoB,EACpB,MAAc,EACd,MAKC;IAED,wBAAwB;IACxB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAC9B,QAAQ,EAAE,MAAM,CAAC,eAAe,EAAE,SAAS,EAAE,MAAM,CACpD,CAAC;IAEF,qCAAqC;IACrC,IAAI,IAAoB,CAAC;IACzB,IAAI,SAAS,CAAC,UAAU,EAAE,CAAC;QACzB,IAAI,GAAG,SAAS,CAAC,mBAAmB;YAClC,CAAC,CAAC,+BAA+B;YACjC,CAAC,CAAC,6BAA6B,CAAC;IACpC,CAAC;SAAM,CAAC;QACN,IAAI,GAAG,gBAAgB,CAAC;IAC1B,CAAC;IAED,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,YAAY,EAAE,GAAG,MAAM,aAAa,CAC5D,QAAQ,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE;QACjC,UAAU,EAAE,SAAS,CAAC,UAAU;QAChC,WAAW,EAAE,SAAS,CAAC,WAAW;QAClC,aAAa,EAAE,MAAM,CAAC,aAAa;KACpC,CACF,CAAC;IAEF,gCAAgC;IAChC,kBAAkB,CAAC,KAAK,CAAC,CAAC;IAE1B,uDAAuD;IACvD,MAAM,qCAAqC,CAAC,KAAK,EAAE,QAAQ,EAAE,SAAS,CAAC,CAAC;IAExE,gCAAgC;IAChC,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,CAAC;IACtE,MAAM,SAAS,GAAG,cAAc,CAAC,UAAU,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;IAE9D,8BAA8B;IAC9B,MAAM,OAAO,CAAC,GAAG,CACf,SAAS,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CACrB,2BAA2B,CACzB,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EACjC,MAAM,CAAC,kBAAkB,EAAE,MAAM,CAAC,mBAAmB,CACtD,CACF,CACF,CAAC;IAEF,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY,EAAE,CAAC;AAChD,CAAC;AAED,SAAS,kBAAkB,CAAC,KAAgB;IAC1C,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,aAAa,IAAI,IAAI,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,aAAa,GAAG,CAAC,EAAE,CAAC;QACrF,KAAK,CAAC,OAAO,CAAC;YACZ,SAAS,EAAE,GAAG;YACd,KAAK,EAAE,SAAS;YAChB,aAAa,EAAE,CAAC;SACjB,CAAC,CAAC;IACL,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,qCAAqC,CACzD,KAAgB,EAChB,QAAiC,EACjC,SAAoB;IAEpB,MAAM,OAAO,CAAC,GAAG,CACf,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;QACvB,IAAI,IAAI,CAAC,aAAa,IAAI,IAAI;YAAE,OAAO;QACvC,MAAM,OAAO,GAAG,IAAI,CAAC,aAAa,GAAG,CAAC,CAAC;QACvC,IAAI,OAAO,GAAG,CAAC,IAAI,OAAO,IAAI,QAAQ,CAAC,MAAM;YAAE,OAAO;QAEtD,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,QAAQ,CAA0B;YAC/D;gBACE,IAAI,EAAE,MAAM;gBACZ,OAAO,EAAE,iCAAiC,CACxC,IAAI,CAAC,KAAK,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC,IAAI,CACnC;aACF;SACF,CAAC,CAAC;QACH,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW,KAAK,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;IACjE,CAAC,CAAC,CACH,CAAC;AACJ,CAAC"}
@@ -0,0 +1,3 @@
1
+ export { DEFAULT_CONFIG, DEFAULT_RETRY_CONFIG, } from './internal-types/index.js';
2
+ export type { TreeNode, TocItem, PageIndexResult, PageIndexConfig, RetryConfig, MdConfig, PageContent, DocumentParser, LlmProvider, LlmMessage, LlmOptions, LlmResponse, JsonSchema, Logger, ProcessingMode, TocCheckResult, VerificationResult, DegradationEvent, } from './internal-types/index.js';
3
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,cAAc,EACd,oBAAoB,GACrB,MAAM,2BAA2B,CAAC;AAEnC,YAAY,EACV,QAAQ,EACR,OAAO,EACP,eAAe,EACf,eAAe,EACf,WAAW,EACX,QAAQ,EACR,WAAW,EACX,cAAc,EACd,WAAW,EACX,UAAU,EACV,UAAU,EACV,WAAW,EACX,UAAU,EACV,MAAM,EACN,cAAc,EACd,cAAc,EACd,kBAAkB,EAClB,gBAAgB,GACjB,MAAM,2BAA2B,CAAC"}
package/dist/types.js ADDED
@@ -0,0 +1,2 @@
1
+ export { DEFAULT_CONFIG, DEFAULT_RETRY_CONFIG, } from './internal-types/index.js';
2
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,cAAc,EACd,oBAAoB,GACrB,MAAM,2BAA2B,CAAC"}
@@ -0,0 +1,15 @@
1
+ import type { PageIndexConfig, RetryConfig, Logger, DegradationEvent } from '../types.js';
2
+ export interface ResolvedConfig {
3
+ tocCheckPageNum: number;
4
+ maxPageNumEachNode: number;
5
+ maxTokenNumEachNode: number;
6
+ addNodeId: boolean;
7
+ addNodeSummary: boolean;
8
+ addDocDescription: boolean;
9
+ addNodeText: boolean;
10
+ onDegradation?: (event: DegradationEvent) => void;
11
+ retryConfig: RetryConfig;
12
+ logger: Logger;
13
+ }
14
+ export declare function mergeConfig(userConfig: Partial<PageIndexConfig>): ResolvedConfig;
15
+ //# sourceMappingURL=config-loader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"config-loader.d.ts","sourceRoot":"","sources":["../../src/utils/config-loader.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EACV,eAAe,EACf,WAAW,EACX,MAAM,EACN,gBAAgB,EACjB,MAAM,aAAa,CAAC;AAGrB,MAAM,WAAW,cAAc;IAC7B,eAAe,EAAE,MAAM,CAAC;IACxB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,mBAAmB,EAAE,MAAM,CAAC;IAC5B,SAAS,EAAE,OAAO,CAAC;IACnB,cAAc,EAAE,OAAO,CAAC;IACxB,iBAAiB,EAAE,OAAO,CAAC;IAC3B,WAAW,EAAE,OAAO,CAAC;IACrB,aAAa,CAAC,EAAE,CAAC,KAAK,EAAE,gBAAgB,KAAK,IAAI,CAAC;IAClD,WAAW,EAAE,WAAW,CAAC;IACzB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,wBAAgB,WAAW,CACzB,UAAU,EAAE,OAAO,CAAC,eAAe,CAAC,GACnC,cAAc,CAoBhB"}
@@ -0,0 +1,19 @@
1
+ import { DEFAULT_CONFIG, DEFAULT_RETRY_CONFIG, } from '../types.js';
2
+ import { createSilentLogger } from './logger.js';
3
+ export function mergeConfig(userConfig) {
4
+ return {
5
+ tocCheckPageNum: userConfig.tocCheckPageNum ?? DEFAULT_CONFIG.tocCheckPageNum,
6
+ maxPageNumEachNode: userConfig.maxPageNumEachNode ?? DEFAULT_CONFIG.maxPageNumEachNode,
7
+ maxTokenNumEachNode: userConfig.maxTokenNumEachNode ?? DEFAULT_CONFIG.maxTokenNumEachNode,
8
+ addNodeId: userConfig.addNodeId ?? DEFAULT_CONFIG.addNodeId,
9
+ addNodeSummary: userConfig.addNodeSummary ?? DEFAULT_CONFIG.addNodeSummary,
10
+ addDocDescription: userConfig.addDocDescription ?? DEFAULT_CONFIG.addDocDescription,
11
+ addNodeText: userConfig.addNodeText ?? DEFAULT_CONFIG.addNodeText,
12
+ onDegradation: userConfig.onDegradation,
13
+ retryConfig: userConfig.retryConfig
14
+ ? { ...DEFAULT_RETRY_CONFIG, ...userConfig.retryConfig }
15
+ : { ...DEFAULT_RETRY_CONFIG },
16
+ logger: userConfig.logger ?? createSilentLogger(),
17
+ };
18
+ }
19
+ //# sourceMappingURL=config-loader.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"config-loader.js","sourceRoot":"","sources":["../../src/utils/config-loader.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,cAAc,EACd,oBAAoB,GACrB,MAAM,aAAa,CAAC;AAOrB,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAejD,MAAM,UAAU,WAAW,CACzB,UAAoC;IAEpC,OAAO;QACL,eAAe,EACb,UAAU,CAAC,eAAe,IAAI,cAAc,CAAC,eAAe;QAC9D,kBAAkB,EAChB,UAAU,CAAC,kBAAkB,IAAI,cAAc,CAAC,kBAAkB;QACpE,mBAAmB,EACjB,UAAU,CAAC,mBAAmB,IAAI,cAAc,CAAC,mBAAmB;QACtE,SAAS,EAAE,UAAU,CAAC,SAAS,IAAI,cAAc,CAAC,SAAS;QAC3D,cAAc,EACZ,UAAU,CAAC,cAAc,IAAI,cAAc,CAAC,cAAc;QAC5D,iBAAiB,EACf,UAAU,CAAC,iBAAiB,IAAI,cAAc,CAAC,iBAAiB;QAClE,WAAW,EAAE,UAAU,CAAC,WAAW,IAAI,cAAc,CAAC,WAAW;QACjE,aAAa,EAAE,UAAU,CAAC,aAAa;QACvC,WAAW,EAAE,UAAU,CAAC,WAAW;YACjC,CAAC,CAAC,EAAE,GAAG,oBAAoB,EAAE,GAAG,UAAU,CAAC,WAAW,EAAE;YACxD,CAAC,CAAC,EAAE,GAAG,oBAAoB,EAAE;QAC/B,MAAM,EAAE,UAAU,CAAC,MAAM,IAAI,kBAAkB,EAAE;KAClD,CAAC;AACJ,CAAC"}
@@ -0,0 +1,7 @@
1
+ export { countTokens } from './token-counter.js';
2
+ export { extractJson } from './json-parser.js';
3
+ export { mergeConfig } from './config-loader.js';
4
+ export type { ResolvedConfig } from './config-loader.js';
5
+ export { pageListToGroupText, convertPhysicalIndexToInt, addPhysicalIndexTags, } from './page-utils.js';
6
+ export { createSilentLogger } from './logger.js';
7
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,YAAY,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,EACL,mBAAmB,EACnB,yBAAyB,EACzB,oBAAoB,GACrB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC"}
@@ -0,0 +1,6 @@
1
+ export { countTokens } from './token-counter.js';
2
+ export { extractJson } from './json-parser.js';
3
+ export { mergeConfig } from './config-loader.js';
4
+ export { pageListToGroupText, convertPhysicalIndexToInt, addPhysicalIndexTags, } from './page-utils.js';
5
+ export { createSilentLogger } from './logger.js';
6
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,mBAAmB,EACnB,yBAAyB,EACzB,oBAAoB,GACrB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC"}
@@ -0,0 +1,2 @@
1
+ export declare function extractJson(raw: string): unknown;
2
+ //# sourceMappingURL=json-parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"json-parser.d.ts","sourceRoot":"","sources":["../../src/utils/json-parser.ts"],"names":[],"mappings":"AAwDA,wBAAgB,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAgChD"}
@@ -0,0 +1,76 @@
1
+ import { LlmError } from '../errors/index.js';
2
+ /**
3
+ * Replace Python-style `None` with `null`, but only outside of quoted strings.
4
+ */
5
+ function replaceNoneOutsideStrings(text) {
6
+ let result = '';
7
+ let inString = false;
8
+ let escapeNext = false;
9
+ let quoteChar = '';
10
+ for (let i = 0; i < text.length; i++) {
11
+ const ch = text[i];
12
+ if (escapeNext) {
13
+ result += ch;
14
+ escapeNext = false;
15
+ continue;
16
+ }
17
+ if (ch === '\\' && inString) {
18
+ result += ch;
19
+ escapeNext = true;
20
+ continue;
21
+ }
22
+ if (!inString && (ch === '"' || ch === "'")) {
23
+ inString = true;
24
+ quoteChar = ch;
25
+ result += ch;
26
+ continue;
27
+ }
28
+ if (inString && ch === quoteChar) {
29
+ inString = false;
30
+ result += ch;
31
+ continue;
32
+ }
33
+ // Check for word boundary `None` outside strings
34
+ if (!inString && text.slice(i, i + 4) === 'None') {
35
+ const before = i > 0 ? text[i - 1] : ' ';
36
+ const after = i + 4 < text.length ? text[i + 4] : ' ';
37
+ if (!/\w/.test(before) && !/\w/.test(after)) {
38
+ result += 'null';
39
+ i += 3; // skip remaining 3 chars of "None"
40
+ continue;
41
+ }
42
+ }
43
+ result += ch;
44
+ }
45
+ return result;
46
+ }
47
+ export function extractJson(raw) {
48
+ let text = raw.trim();
49
+ // Step 1: Detect ```json ... ``` wrapping
50
+ const codeBlockMatch = text.match(/```(?:json)?\s*\n?([\s\S]*?)\n?\s*```/);
51
+ if (codeBlockMatch) {
52
+ text = codeBlockMatch[1].trim();
53
+ }
54
+ // Step 2: Replace Python-style None → null (only outside strings)
55
+ text = replaceNoneOutsideStrings(text);
56
+ // Step 3: Remove newlines, normalize whitespace
57
+ text = text.replace(/\n/g, ' ').replace(/\s+/g, ' ');
58
+ // Step 4: Try JSON.parse
59
+ try {
60
+ return JSON.parse(text);
61
+ }
62
+ catch {
63
+ // Step 5: Remove trailing commas → retry
64
+ const cleaned = text
65
+ .replace(/,\s*]/g, ']')
66
+ .replace(/,\s*}/g, '}');
67
+ try {
68
+ return JSON.parse(cleaned);
69
+ }
70
+ catch {
71
+ // Step 6: Throw instead of silently returning {}
72
+ throw new LlmError(`Failed to parse JSON from LLM response: ${raw.slice(0, 200)}`);
73
+ }
74
+ }
75
+ }
76
+ //# sourceMappingURL=json-parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"json-parser.js","sourceRoot":"","sources":["../../src/utils/json-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAE9C;;GAEG;AACH,SAAS,yBAAyB,CAAC,IAAY;IAC7C,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,IAAI,QAAQ,GAAG,KAAK,CAAC;IACrB,IAAI,UAAU,GAAG,KAAK,CAAC;IACvB,IAAI,SAAS,GAAG,EAAE,CAAC;IAEnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QAEnB,IAAI,UAAU,EAAE,CAAC;YACf,MAAM,IAAI,EAAE,CAAC;YACb,UAAU,GAAG,KAAK,CAAC;YACnB,SAAS;QACX,CAAC;QAED,IAAI,EAAE,KAAK,IAAI,IAAI,QAAQ,EAAE,CAAC;YAC5B,MAAM,IAAI,EAAE,CAAC;YACb,UAAU,GAAG,IAAI,CAAC;YAClB,SAAS;QACX,CAAC;QAED,IAAI,CAAC,QAAQ,IAAI,CAAC,EAAE,KAAK,GAAG,IAAI,EAAE,KAAK,GAAG,CAAC,EAAE,CAAC;YAC5C,QAAQ,GAAG,IAAI,CAAC;YAChB,SAAS,GAAG,EAAE,CAAC;YACf,MAAM,IAAI,EAAE,CAAC;YACb,SAAS;QACX,CAAC;QAED,IAAI,QAAQ,IAAI,EAAE,KAAK,SAAS,EAAE,CAAC;YACjC,QAAQ,GAAG,KAAK,CAAC;YACjB,MAAM,IAAI,EAAE,CAAC;YACb,SAAS;QACX,CAAC;QAED,iDAAiD;QACjD,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,KAAK,MAAM,EAAE,CAAC;YACjD,MAAM,MAAM,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;YACzC,MAAM,KAAK,GAAG,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;YACtD,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC5C,MAAM,IAAI,MAAM,CAAC;gBACjB,CAAC,IAAI,CAAC,CAAC,CAAC,mCAAmC;gBAC3C,SAAS;YACX,CAAC;QACH,CAAC;QAED,MAAM,IAAI,EAAE,CAAC;IACf,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,GAAW;IACrC,IAAI,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;IAEtB,0CAA0C;IAC1C,MAAM,cAAc,GAAG,IAAI,CAAC,KAAK,CAAC,uCAAuC,CAAC,CAAC;IAC3E,IAAI,cAAc,EAAE,CAAC;QACnB,IAAI,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAClC,CAAC;IAED,kEAAkE;IAClE,IAAI,GAAG,yBAAyB,CAAC,IAAI,CAAC,CAAC;IAEvC,gDAAgD;IAChD,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAErD,yBAAyB;IACzB,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAAC,MAAM,CAAC;QACP,yCAAyC;QACzC,MAAM,OAAO,GAAG,IAAI;aACjB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;aACtB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QAC1B,IAAI,CAAC;YACH,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAC7B,CAAC;QAAC,MAAM,CAAC;YACP,iDAAiD;YACjD,MAAM,IAAI,QAAQ,CAChB,2CAA2C,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAC/D,CAAC;QACJ,CAAC;IACH,CAAC;AACH,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { Logger } from '../types.js';
2
+ export declare function createSilentLogger(): Logger;
3
+ //# sourceMappingURL=logger.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../../src/utils/logger.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAI1C,wBAAgB,kBAAkB,IAAI,MAAM,CAO3C"}
@@ -0,0 +1,10 @@
1
+ const noop = () => { };
2
+ export function createSilentLogger() {
3
+ return {
4
+ debug: noop,
5
+ info: noop,
6
+ warn: noop,
7
+ error: noop,
8
+ };
9
+ }
10
+ //# sourceMappingURL=logger.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"logger.js","sourceRoot":"","sources":["../../src/utils/logger.ts"],"names":[],"mappings":"AAEA,MAAM,IAAI,GAAG,GAAS,EAAE,GAAE,CAAC,CAAC;AAE5B,MAAM,UAAU,kBAAkB;IAChC,OAAO;QACL,KAAK,EAAE,IAAI;QACX,IAAI,EAAE,IAAI;QACV,IAAI,EAAE,IAAI;QACV,KAAK,EAAE,IAAI;KACZ,CAAC;AACJ,CAAC"}
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Groups pages into text chunks that fit within maxTokens.
3
+ * Uses overlap pages for context continuity between groups.
4
+ */
5
+ export declare function pageListToGroupText(pageContents: string[], tokenLengths: number[], maxTokens?: number, overlapPage?: number): string[];
6
+ /**
7
+ * Parses "<physical_index_5>" or "physical_index_5" → 5
8
+ */
9
+ export declare function convertPhysicalIndexToInt(value: string): number;
10
+ /**
11
+ * Wraps each page's text with <physical_index_X> tags.
12
+ */
13
+ export declare function addPhysicalIndexTags(pages: Array<{
14
+ text: string;
15
+ }>, startIndex: number): string;
16
+ //# sourceMappingURL=page-utils.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"page-utils.d.ts","sourceRoot":"","sources":["../../src/utils/page-utils.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,wBAAgB,mBAAmB,CACjC,YAAY,EAAE,MAAM,EAAE,EACtB,YAAY,EAAE,MAAM,EAAE,EACtB,SAAS,SAAQ,EACjB,WAAW,SAAI,GACd,MAAM,EAAE,CAuCV;AAED;;GAEG;AACH,wBAAgB,yBAAyB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAM/D;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAClC,KAAK,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,EAC9B,UAAU,EAAE,MAAM,GACjB,MAAM,CAOR"}
@@ -0,0 +1,56 @@
1
+ /**
2
+ * Groups pages into text chunks that fit within maxTokens.
3
+ * Uses overlap pages for context continuity between groups.
4
+ */
5
+ export function pageListToGroupText(pageContents, tokenLengths, maxTokens = 20000, overlapPage = 1) {
6
+ const totalTokens = tokenLengths.reduce((sum, t) => sum + t, 0);
7
+ if (totalTokens <= maxTokens) {
8
+ return [pageContents.join('\n')];
9
+ }
10
+ const expectedParts = Math.ceil(totalTokens / maxTokens);
11
+ const averageTokensPerPart = Math.ceil((totalTokens / expectedParts + maxTokens) / 2);
12
+ const groups = [];
13
+ let currentPages = [];
14
+ let currentTokenCount = 0;
15
+ for (let i = 0; i < pageContents.length; i++) {
16
+ if (currentTokenCount + tokenLengths[i] > averageTokensPerPart &&
17
+ currentPages.length > 0) {
18
+ groups.push(currentPages.join('\n'));
19
+ const startIdx = Math.max(i - overlapPage, 0);
20
+ currentPages = [];
21
+ currentTokenCount = 0;
22
+ for (let j = startIdx; j < i; j++) {
23
+ currentPages.push(pageContents[j]);
24
+ currentTokenCount += tokenLengths[j];
25
+ }
26
+ }
27
+ currentPages.push(pageContents[i]);
28
+ currentTokenCount += tokenLengths[i];
29
+ }
30
+ if (currentPages.length > 0) {
31
+ groups.push(currentPages.join('\n'));
32
+ }
33
+ return groups;
34
+ }
35
+ /**
36
+ * Parses "<physical_index_5>" or "physical_index_5" → 5
37
+ */
38
+ export function convertPhysicalIndexToInt(value) {
39
+ const match = value.match(/physical_index_(\d+)/);
40
+ if (!match) {
41
+ throw new Error(`Invalid physical index format: ${value}`);
42
+ }
43
+ return parseInt(match[1], 10);
44
+ }
45
+ /**
46
+ * Wraps each page's text with <physical_index_X> tags.
47
+ */
48
+ export function addPhysicalIndexTags(pages, startIndex) {
49
+ return pages
50
+ .map((page, i) => {
51
+ const idx = startIndex + i;
52
+ return `<physical_index_${idx}>\n${page.text}\n<physical_index_${idx}>`;
53
+ })
54
+ .join('\n');
55
+ }
56
+ //# sourceMappingURL=page-utils.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"page-utils.js","sourceRoot":"","sources":["../../src/utils/page-utils.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,MAAM,UAAU,mBAAmB,CACjC,YAAsB,EACtB,YAAsB,EACtB,SAAS,GAAG,KAAK,EACjB,WAAW,GAAG,CAAC;IAEf,MAAM,WAAW,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IAEhE,IAAI,WAAW,IAAI,SAAS,EAAE,CAAC;QAC7B,OAAO,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IACnC,CAAC;IAED,MAAM,aAAa,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,GAAG,SAAS,CAAC,CAAC;IACzD,MAAM,oBAAoB,GAAG,IAAI,CAAC,IAAI,CACpC,CAAC,WAAW,GAAG,aAAa,GAAG,SAAS,CAAC,GAAG,CAAC,CAC9C,CAAC;IAEF,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,YAAY,GAAa,EAAE,CAAC;IAChC,IAAI,iBAAiB,GAAG,CAAC,CAAC;IAE1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7C,IACE,iBAAiB,GAAG,YAAY,CAAC,CAAC,CAAC,GAAG,oBAAoB;YAC1D,YAAY,CAAC,MAAM,GAAG,CAAC,EACvB,CAAC;YACD,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;YACrC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,WAAW,EAAE,CAAC,CAAC,CAAC;YAC9C,YAAY,GAAG,EAAE,CAAC;YAClB,iBAAiB,GAAG,CAAC,CAAC;YACtB,KAAK,IAAI,CAAC,GAAG,QAAQ,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBAClC,YAAY,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC;gBACnC,iBAAiB,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC;YACvC,CAAC;QACH,CAAC;QACD,YAAY,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC;QACnC,iBAAiB,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC;IAED,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IACvC,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,yBAAyB,CAAC,KAAa;IACrD,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,sBAAsB,CAAC,CAAC;IAClD,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,kCAAkC,KAAK,EAAE,CAAC,CAAC;IAC7D,CAAC;IACD,OAAO,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AAChC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,oBAAoB,CAClC,KAA8B,EAC9B,UAAkB;IAElB,OAAO,KAAK;SACT,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE;QACf,MAAM,GAAG,GAAG,UAAU,GAAG,CAAC,CAAC;QAC3B,OAAO,mBAAmB,GAAG,MAAM,IAAI,CAAC,IAAI,qBAAqB,GAAG,GAAG,CAAC;IAC1E,CAAC,CAAC;SACD,IAAI,CAAC,IAAI,CAAC,CAAC;AAChB,CAAC"}
@@ -0,0 +1,2 @@
1
+ export declare function countTokens(text: string): number;
2
+ //# sourceMappingURL=token-counter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"token-counter.d.ts","sourceRoot":"","sources":["../../src/utils/token-counter.ts"],"names":[],"mappings":"AAEA,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEhD"}
@@ -0,0 +1,5 @@
1
+ import { encode } from 'gpt-tokenizer';
2
+ export function countTokens(text) {
3
+ return encode(text).length;
4
+ }
5
+ //# sourceMappingURL=token-counter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"token-counter.js","sourceRoot":"","sources":["../../src/utils/token-counter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAEvC,MAAM,UAAU,WAAW,CAAC,IAAY;IACtC,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;AAC7B,CAAC"}
@@ -0,0 +1,14 @@
1
+ import type { VectorStore } from './vector-store.js';
2
+ import type { VectorRecord, SearchResult } from '../types.js';
3
+ /**
4
+ * In-memory VectorStore implementation for testing and small-scale use.
5
+ * Uses cosine similarity for search.
6
+ */
7
+ export declare class InMemoryAdapter implements VectorStore {
8
+ private records;
9
+ upsert(vectors: VectorRecord[]): Promise<void>;
10
+ search(query: number[], topK: number, filter?: Record<string, unknown>): Promise<SearchResult[]>;
11
+ delete(ids: string[]): Promise<void>;
12
+ get size(): number;
13
+ }
14
+ //# sourceMappingURL=in-memory-adapter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"in-memory-adapter.d.ts","sourceRoot":"","sources":["../../../src/vector-lib/adapters/in-memory-adapter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE9D;;;GAGG;AACH,qBAAa,eAAgB,YAAW,WAAW;IACjD,OAAO,CAAC,OAAO,CAAmC;IAE5C,MAAM,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAM9C,MAAM,CACV,KAAK,EAAE,MAAM,EAAE,EACf,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC/B,OAAO,CAAC,YAAY,EAAE,CAAC;IAapB,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAM1C,IAAI,IAAI,IAAI,MAAM,CAEjB;CACF"}
@@ -0,0 +1,55 @@
1
+ /**
2
+ * In-memory VectorStore implementation for testing and small-scale use.
3
+ * Uses cosine similarity for search.
4
+ */
5
+ export class InMemoryAdapter {
6
+ records = new Map();
7
+ async upsert(vectors) {
8
+ for (const v of vectors) {
9
+ this.records.set(v.id, v);
10
+ }
11
+ }
12
+ async search(query, topK, filter) {
13
+ const results = [];
14
+ for (const record of this.records.values()) {
15
+ if (!matchesFilter(record.payload, filter))
16
+ continue;
17
+ const score = cosineSimilarity(query, record.vector);
18
+ results.push({ id: record.id, score, payload: record.payload });
19
+ }
20
+ results.sort((a, b) => b.score - a.score);
21
+ return results.slice(0, topK);
22
+ }
23
+ async delete(ids) {
24
+ for (const id of ids) {
25
+ this.records.delete(id);
26
+ }
27
+ }
28
+ get size() {
29
+ return this.records.size;
30
+ }
31
+ }
32
+ function matchesFilter(payload, filter) {
33
+ if (!filter)
34
+ return true;
35
+ for (const [key, expected] of Object.entries(filter)) {
36
+ if (!Object.is(payload[key], expected))
37
+ return false;
38
+ }
39
+ return true;
40
+ }
41
+ function cosineSimilarity(a, b) {
42
+ if (a.length !== b.length)
43
+ return 0;
44
+ let dotProduct = 0;
45
+ let normA = 0;
46
+ let normB = 0;
47
+ for (let i = 0; i < a.length; i++) {
48
+ dotProduct += a[i] * b[i];
49
+ normA += a[i] * a[i];
50
+ normB += b[i] * b[i];
51
+ }
52
+ const denominator = Math.sqrt(normA) * Math.sqrt(normB);
53
+ return denominator === 0 ? 0 : dotProduct / denominator;
54
+ }
55
+ //# sourceMappingURL=in-memory-adapter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"in-memory-adapter.js","sourceRoot":"","sources":["../../../src/vector-lib/adapters/in-memory-adapter.ts"],"names":[],"mappings":"AAGA;;;GAGG;AACH,MAAM,OAAO,eAAe;IAClB,OAAO,GAAG,IAAI,GAAG,EAAwB,CAAC;IAElD,KAAK,CAAC,MAAM,CAAC,OAAuB;QAClC,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;QAC5B,CAAC;IACH,CAAC;IAED,KAAK,CAAC,MAAM,CACV,KAAe,EACf,IAAY,EACZ,MAAgC;QAEhC,MAAM,OAAO,GAAmB,EAAE,CAAC;QAEnC,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;YAC3C,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC;gBAAE,SAAS;YACrD,MAAM,KAAK,GAAG,gBAAgB,CAAC,KAAK,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;YACrD,OAAO,CAAC,IAAI,CAAC,EAAE,EAAE,EAAE,MAAM,CAAC,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC;QAClE,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QAC1C,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IAChC,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,GAAa;QACxB,KAAK,MAAM,EAAE,IAAI,GAAG,EAAE,CAAC;YACrB,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,IAAI,IAAI;QACN,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC;IAC3B,CAAC;CACF;AAED,SAAS,aAAa,CACpB,OAAgC,EAChC,MAAgC;IAEhC,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAEzB,KAAK,MAAM,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QACrD,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,QAAQ,CAAC;YAAE,OAAO,KAAK,CAAC;IACvD,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,gBAAgB,CAAC,CAAW,EAAE,CAAW;IAChD,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM;QAAE,OAAO,CAAC,CAAC;IAEpC,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,UAAU,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1B,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACvB,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACxD,OAAO,WAAW,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,WAAW,CAAC;AAC1D,CAAC"}
@@ -0,0 +1,10 @@
1
+ import type { VectorRecord, SearchResult } from '../types.js';
2
+ /**
3
+ * VectorStore interface — user implements this for their vector DB.
4
+ */
5
+ export interface VectorStore {
6
+ upsert(vectors: VectorRecord[]): Promise<void>;
7
+ search(query: number[], topK: number, filter?: Record<string, unknown>): Promise<SearchResult[]>;
8
+ delete(ids: string[]): Promise<void>;
9
+ }
10
+ //# sourceMappingURL=vector-store.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vector-store.d.ts","sourceRoot":"","sources":["../../../src/vector-lib/adapters/vector-store.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE9D;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,MAAM,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC/C,MAAM,CACJ,KAAK,EAAE,MAAM,EAAE,EACf,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC/B,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;IAC3B,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACtC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=vector-store.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vector-store.js","sourceRoot":"","sources":["../../../src/vector-lib/adapters/vector-store.ts"],"names":[],"mappings":""}
@@ -0,0 +1,8 @@
1
+ import type { PageIndexResult } from '../../types.js';
2
+ import type { Chunk, VectorConfig } from '../types.js';
3
+ /**
4
+ * Splits a PageIndexResult tree into chunks suitable for vector indexing.
5
+ * Each leaf node with text becomes one or more chunks.
6
+ */
7
+ export declare const treeChunker: (result: PageIndexResult, config?: VectorConfig) => Chunk[];
8
+ //# sourceMappingURL=tree-chunker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tree-chunker.d.ts","sourceRoot":"","sources":["../../../src/vector-lib/chunker/tree-chunker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAY,eAAe,EAAE,MAAM,gBAAgB,CAAC;AAChE,OAAO,KAAK,EAAE,KAAK,EAAW,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhE;;;GAGG;AACH,eAAO,MAAM,WAAW,WACd,eAAe,WACf,YAAY,KACnB,KAAK,EAqCY,CAAC"}