@fastrag/pageindex 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +251 -0
  3. package/README.zh-CN.md +251 -0
  4. package/dist/errors/index.d.ts +10 -0
  5. package/dist/errors/index.d.ts.map +1 -0
  6. package/dist/errors/index.js +19 -0
  7. package/dist/errors/index.js.map +1 -0
  8. package/dist/index.d.ts +14 -0
  9. package/dist/index.d.ts.map +1 -0
  10. package/dist/index.js +20 -0
  11. package/dist/index.js.map +1 -0
  12. package/dist/internal-types/config.d.ts +35 -0
  13. package/dist/internal-types/config.d.ts.map +1 -0
  14. package/dist/internal-types/config.js +16 -0
  15. package/dist/internal-types/config.js.map +1 -0
  16. package/dist/internal-types/document-parser.d.ts +5 -0
  17. package/dist/internal-types/document-parser.d.ts.map +1 -0
  18. package/dist/internal-types/document-parser.js +2 -0
  19. package/dist/internal-types/document-parser.js.map +1 -0
  20. package/dist/internal-types/index.d.ts +9 -0
  21. package/dist/internal-types/index.d.ts.map +1 -0
  22. package/dist/internal-types/index.js +2 -0
  23. package/dist/internal-types/index.js.map +1 -0
  24. package/dist/internal-types/llm-provider.d.ts +19 -0
  25. package/dist/internal-types/llm-provider.d.ts.map +1 -0
  26. package/dist/internal-types/llm-provider.js +2 -0
  27. package/dist/internal-types/llm-provider.js.map +1 -0
  28. package/dist/internal-types/logger.d.ts +7 -0
  29. package/dist/internal-types/logger.d.ts.map +1 -0
  30. package/dist/internal-types/logger.js +2 -0
  31. package/dist/internal-types/logger.js.map +1 -0
  32. package/dist/internal-types/page.d.ts +5 -0
  33. package/dist/internal-types/page.d.ts.map +1 -0
  34. package/dist/internal-types/page.js +2 -0
  35. package/dist/internal-types/page.js.map +1 -0
  36. package/dist/internal-types/processing.d.ts +21 -0
  37. package/dist/internal-types/processing.d.ts.map +1 -0
  38. package/dist/internal-types/processing.js +2 -0
  39. package/dist/internal-types/processing.js.map +1 -0
  40. package/dist/internal-types/tree-node.d.ts +30 -0
  41. package/dist/internal-types/tree-node.d.ts.map +1 -0
  42. package/dist/internal-types/tree-node.js +2 -0
  43. package/dist/internal-types/tree-node.js.map +1 -0
  44. package/dist/llm/index.d.ts +3 -0
  45. package/dist/llm/index.d.ts.map +1 -0
  46. package/dist/llm/index.js +3 -0
  47. package/dist/llm/index.js.map +1 -0
  48. package/dist/llm/llm-client.d.ts +26 -0
  49. package/dist/llm/llm-client.d.ts.map +1 -0
  50. package/dist/llm/llm-client.js +88 -0
  51. package/dist/llm/llm-client.js.map +1 -0
  52. package/dist/llm/prompts.d.ts +33 -0
  53. package/dist/llm/prompts.d.ts.map +1 -0
  54. package/dist/llm/prompts.js +312 -0
  55. package/dist/llm/prompts.js.map +1 -0
  56. package/dist/markdown/index.d.ts +6 -0
  57. package/dist/markdown/index.d.ts.map +1 -0
  58. package/dist/markdown/index.js +5 -0
  59. package/dist/markdown/index.js.map +1 -0
  60. package/dist/markdown/md-extractor.d.ts +14 -0
  61. package/dist/markdown/md-extractor.d.ts.map +1 -0
  62. package/dist/markdown/md-extractor.js +30 -0
  63. package/dist/markdown/md-extractor.js.map +1 -0
  64. package/dist/markdown/md-to-tree.d.ts +8 -0
  65. package/dist/markdown/md-to-tree.d.ts.map +1 -0
  66. package/dist/markdown/md-to-tree.js +20 -0
  67. package/dist/markdown/md-to-tree.js.map +1 -0
  68. package/dist/markdown/md-tree-builder.d.ts +7 -0
  69. package/dist/markdown/md-tree-builder.d.ts.map +1 -0
  70. package/dist/markdown/md-tree-builder.js +36 -0
  71. package/dist/markdown/md-tree-builder.js.map +1 -0
  72. package/dist/markdown/tree-thinning.d.ts +8 -0
  73. package/dist/markdown/tree-thinning.d.ts.map +1 -0
  74. package/dist/markdown/tree-thinning.js +42 -0
  75. package/dist/markdown/tree-thinning.js.map +1 -0
  76. package/dist/page-index.d.ts +10 -0
  77. package/dist/page-index.d.ts.map +1 -0
  78. package/dist/page-index.js +54 -0
  79. package/dist/page-index.js.map +1 -0
  80. package/dist/post-processing/doc-description.d.ts +12 -0
  81. package/dist/post-processing/doc-description.d.ts.map +1 -0
  82. package/dist/post-processing/doc-description.js +31 -0
  83. package/dist/post-processing/doc-description.js.map +1 -0
  84. package/dist/post-processing/index.d.ts +5 -0
  85. package/dist/post-processing/index.d.ts.map +1 -0
  86. package/dist/post-processing/index.js +5 -0
  87. package/dist/post-processing/index.js.map +1 -0
  88. package/dist/post-processing/node-id.d.ts +7 -0
  89. package/dist/post-processing/node-id.d.ts.map +1 -0
  90. package/dist/post-processing/node-id.js +20 -0
  91. package/dist/post-processing/node-id.js.map +1 -0
  92. package/dist/post-processing/node-text.d.ts +11 -0
  93. package/dist/post-processing/node-text.d.ts.map +1 -0
  94. package/dist/post-processing/node-text.js +37 -0
  95. package/dist/post-processing/node-text.js.map +1 -0
  96. package/dist/post-processing/summary.d.ts +7 -0
  97. package/dist/post-processing/summary.d.ts.map +1 -0
  98. package/dist/post-processing/summary.js +31 -0
  99. package/dist/post-processing/summary.js.map +1 -0
  100. package/dist/processing/index.d.ts +6 -0
  101. package/dist/processing/index.d.ts.map +1 -0
  102. package/dist/processing/index.js +6 -0
  103. package/dist/processing/index.js.map +1 -0
  104. package/dist/processing/large-node.d.ts +9 -0
  105. package/dist/processing/large-node.d.ts.map +1 -0
  106. package/dist/processing/large-node.js +40 -0
  107. package/dist/processing/large-node.js.map +1 -0
  108. package/dist/processing/meta-processor.d.ts +19 -0
  109. package/dist/processing/meta-processor.d.ts.map +1 -0
  110. package/dist/processing/meta-processor.js +91 -0
  111. package/dist/processing/meta-processor.js.map +1 -0
  112. package/dist/processing/no-toc.d.ts +10 -0
  113. package/dist/processing/no-toc.d.ts.map +1 -0
  114. package/dist/processing/no-toc.js +44 -0
  115. package/dist/processing/no-toc.js.map +1 -0
  116. package/dist/processing/toc-no-pages.d.ts +11 -0
  117. package/dist/processing/toc-no-pages.d.ts.map +1 -0
  118. package/dist/processing/toc-no-pages.js +46 -0
  119. package/dist/processing/toc-no-pages.js.map +1 -0
  120. package/dist/processing/toc-with-pages.d.ts +15 -0
  121. package/dist/processing/toc-with-pages.d.ts.map +1 -0
  122. package/dist/processing/toc-with-pages.js +151 -0
  123. package/dist/processing/toc-with-pages.js.map +1 -0
  124. package/dist/toc/index.d.ts +4 -0
  125. package/dist/toc/index.d.ts.map +1 -0
  126. package/dist/toc/index.js +4 -0
  127. package/dist/toc/index.js.map +1 -0
  128. package/dist/toc/toc-detector.d.ts +23 -0
  129. package/dist/toc/toc-detector.d.ts.map +1 -0
  130. package/dist/toc/toc-detector.js +65 -0
  131. package/dist/toc/toc-detector.js.map +1 -0
  132. package/dist/toc/toc-extractor.d.ts +13 -0
  133. package/dist/toc/toc-extractor.d.ts.map +1 -0
  134. package/dist/toc/toc-extractor.js +32 -0
  135. package/dist/toc/toc-extractor.js.map +1 -0
  136. package/dist/toc/toc-transformer.d.ts +11 -0
  137. package/dist/toc/toc-transformer.d.ts.map +1 -0
  138. package/dist/toc/toc-transformer.js +69 -0
  139. package/dist/toc/toc-transformer.js.map +1 -0
  140. package/dist/tree/index.d.ts +4 -0
  141. package/dist/tree/index.d.ts.map +1 -0
  142. package/dist/tree/index.js +4 -0
  143. package/dist/tree/index.js.map +1 -0
  144. package/dist/tree/list-to-tree.d.ts +7 -0
  145. package/dist/tree/list-to-tree.d.ts.map +1 -0
  146. package/dist/tree/list-to-tree.js +33 -0
  147. package/dist/tree/list-to-tree.js.map +1 -0
  148. package/dist/tree/post-processing.d.ts +12 -0
  149. package/dist/tree/post-processing.d.ts.map +1 -0
  150. package/dist/tree/post-processing.js +87 -0
  151. package/dist/tree/post-processing.js.map +1 -0
  152. package/dist/tree/tree-utils.d.ts +18 -0
  153. package/dist/tree/tree-utils.d.ts.map +1 -0
  154. package/dist/tree/tree-utils.js +43 -0
  155. package/dist/tree/tree-utils.js.map +1 -0
  156. package/dist/tree-parser.d.ts +30 -0
  157. package/dist/tree-parser.d.ts.map +1 -0
  158. package/dist/tree-parser.js +73 -0
  159. package/dist/tree-parser.js.map +1 -0
  160. package/dist/types.d.ts +3 -0
  161. package/dist/types.d.ts.map +1 -0
  162. package/dist/types.js +2 -0
  163. package/dist/types.js.map +1 -0
  164. package/dist/utils/config-loader.d.ts +15 -0
  165. package/dist/utils/config-loader.d.ts.map +1 -0
  166. package/dist/utils/config-loader.js +19 -0
  167. package/dist/utils/config-loader.js.map +1 -0
  168. package/dist/utils/index.d.ts +7 -0
  169. package/dist/utils/index.d.ts.map +1 -0
  170. package/dist/utils/index.js +6 -0
  171. package/dist/utils/index.js.map +1 -0
  172. package/dist/utils/json-parser.d.ts +2 -0
  173. package/dist/utils/json-parser.d.ts.map +1 -0
  174. package/dist/utils/json-parser.js +76 -0
  175. package/dist/utils/json-parser.js.map +1 -0
  176. package/dist/utils/logger.d.ts +3 -0
  177. package/dist/utils/logger.d.ts.map +1 -0
  178. package/dist/utils/logger.js +10 -0
  179. package/dist/utils/logger.js.map +1 -0
  180. package/dist/utils/page-utils.d.ts +16 -0
  181. package/dist/utils/page-utils.d.ts.map +1 -0
  182. package/dist/utils/page-utils.js +56 -0
  183. package/dist/utils/page-utils.js.map +1 -0
  184. package/dist/utils/token-counter.d.ts +2 -0
  185. package/dist/utils/token-counter.d.ts.map +1 -0
  186. package/dist/utils/token-counter.js +5 -0
  187. package/dist/utils/token-counter.js.map +1 -0
  188. package/dist/vector-lib/adapters/in-memory-adapter.d.ts +14 -0
  189. package/dist/vector-lib/adapters/in-memory-adapter.d.ts.map +1 -0
  190. package/dist/vector-lib/adapters/in-memory-adapter.js +55 -0
  191. package/dist/vector-lib/adapters/in-memory-adapter.js.map +1 -0
  192. package/dist/vector-lib/adapters/vector-store.d.ts +10 -0
  193. package/dist/vector-lib/adapters/vector-store.d.ts.map +1 -0
  194. package/dist/vector-lib/adapters/vector-store.js +2 -0
  195. package/dist/vector-lib/adapters/vector-store.js.map +1 -0
  196. package/dist/vector-lib/chunker/tree-chunker.d.ts +8 -0
  197. package/dist/vector-lib/chunker/tree-chunker.d.ts.map +1 -0
  198. package/dist/vector-lib/chunker/tree-chunker.js +59 -0
  199. package/dist/vector-lib/chunker/tree-chunker.js.map +1 -0
  200. package/dist/vector-lib/embedder/embedder.d.ts +8 -0
  201. package/dist/vector-lib/embedder/embedder.d.ts.map +1 -0
  202. package/dist/vector-lib/embedder/embedder.js +2 -0
  203. package/dist/vector-lib/embedder/embedder.js.map +1 -0
  204. package/dist/vector-lib/index.d.ts +10 -0
  205. package/dist/vector-lib/index.d.ts.map +1 -0
  206. package/dist/vector-lib/index.js +6 -0
  207. package/dist/vector-lib/index.js.map +1 -0
  208. package/dist/vector-lib/search/hybrid-search.d.ts +19 -0
  209. package/dist/vector-lib/search/hybrid-search.d.ts.map +1 -0
  210. package/dist/vector-lib/search/hybrid-search.js +25 -0
  211. package/dist/vector-lib/search/hybrid-search.js.map +1 -0
  212. package/dist/vector-lib/search/reranker.d.ts +14 -0
  213. package/dist/vector-lib/search/reranker.d.ts.map +1 -0
  214. package/dist/vector-lib/search/reranker.js +2 -0
  215. package/dist/vector-lib/search/reranker.js.map +1 -0
  216. package/dist/vector-lib/types.d.ts +29 -0
  217. package/dist/vector-lib/types.d.ts.map +1 -0
  218. package/dist/vector-lib/types.js +2 -0
  219. package/dist/vector-lib/types.js.map +1 -0
  220. package/dist/vector-lib/vector-enhancer.d.ts +28 -0
  221. package/dist/vector-lib/vector-enhancer.d.ts.map +1 -0
  222. package/dist/vector-lib/vector-enhancer.js +54 -0
  223. package/dist/vector-lib/vector-enhancer.js.map +1 -0
  224. package/dist/vector.d.ts +5 -0
  225. package/dist/vector.d.ts.map +1 -0
  226. package/dist/vector.js +3 -0
  227. package/dist/vector.js.map +1 -0
  228. package/dist/verification/fix-toc.d.ts +13 -0
  229. package/dist/verification/fix-toc.d.ts.map +1 -0
  230. package/dist/verification/fix-toc.js +73 -0
  231. package/dist/verification/fix-toc.js.map +1 -0
  232. package/dist/verification/index.d.ts +3 -0
  233. package/dist/verification/index.d.ts.map +1 -0
  234. package/dist/verification/index.js +3 -0
  235. package/dist/verification/index.js.map +1 -0
  236. package/dist/verification/verify-toc.d.ts +17 -0
  237. package/dist/verification/verify-toc.d.ts.map +1 -0
  238. package/dist/verification/verify-toc.js +64 -0
  239. package/dist/verification/verify-toc.js.map +1 -0
  240. package/package.json +58 -0
@@ -0,0 +1,31 @@
1
+ import { LlmClient } from '../llm/llm-client.js';
2
+ import { generateDocDescriptionPrompt } from '../llm/prompts.js';
3
+ /**
4
+ * Creates a clean structure for description generation.
5
+ * Only includes title, nodeId, summary, and prefixSummary.
6
+ */
7
+ export function createCleanStructureForDescription(nodes) {
8
+ return nodes.map((node) => ({
9
+ title: node.title,
10
+ node_id: node.nodeId,
11
+ summary: node.summary,
12
+ prefix_summary: node.prefixSummary,
13
+ children: node.nodes.length > 0
14
+ ? createCleanStructureForDescription(node.nodes)
15
+ : [],
16
+ }));
17
+ }
18
+ /**
19
+ * Generates a one-sentence document description from the tree structure.
20
+ */
21
+ export async function generateDocDescription(nodes, llmClient) {
22
+ const cleanStructure = createCleanStructureForDescription(nodes);
23
+ const response = await llmClient.chat([
24
+ {
25
+ role: 'user',
26
+ content: generateDocDescriptionPrompt(JSON.stringify(cleanStructure)),
27
+ },
28
+ ]);
29
+ return response.content.trim();
30
+ }
31
+ //# sourceMappingURL=doc-description.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"doc-description.js","sourceRoot":"","sources":["../../src/post-processing/doc-description.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,OAAO,EAAE,4BAA4B,EAAE,MAAM,mBAAmB,CAAC;AAEjE;;;GAGG;AACH,MAAM,UAAU,kCAAkC,CAChD,KAAiB;IAEjB,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QAC1B,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,OAAO,EAAE,IAAI,CAAC,MAAM;QACpB,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,cAAc,EAAE,IAAI,CAAC,aAAa;QAClC,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC;YAC7B,CAAC,CAAC,kCAAkC,CAAC,IAAI,CAAC,KAAK,CAAC;YAChD,CAAC,CAAC,EAAE;KACP,CAAC,CAAC,CAAC;AACN,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,sBAAsB,CAC1C,KAAiB,EACjB,SAAoB;IAEpB,MAAM,cAAc,GAAG,kCAAkC,CAAC,KAAK,CAAC,CAAC;IACjE,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC;QACpC;YACE,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE,4BAA4B,CAAC,IAAI,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;SACtE;KACF,CAAC,CAAC;IACH,OAAO,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;AACjC,CAAC"}
@@ -0,0 +1,5 @@
1
+ export { writeNodeId } from './node-id.js';
2
+ export { addNodeText, removeStructureText } from './node-text.js';
3
+ export { generateSummariesForStructure } from './summary.js';
4
+ export { createCleanStructureForDescription, generateDocDescription, } from './doc-description.js';
5
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/post-processing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAC3C,OAAO,EAAE,WAAW,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAC;AAClE,OAAO,EAAE,6BAA6B,EAAE,MAAM,cAAc,CAAC;AAC7D,OAAO,EACL,kCAAkC,EAClC,sBAAsB,GACvB,MAAM,sBAAsB,CAAC"}
@@ -0,0 +1,5 @@
1
+ export { writeNodeId } from './node-id.js';
2
+ export { addNodeText, removeStructureText } from './node-text.js';
3
+ export { generateSummariesForStructure } from './summary.js';
4
+ export { createCleanStructureForDescription, generateDocDescription, } from './doc-description.js';
5
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/post-processing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAC3C,OAAO,EAAE,WAAW,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAC;AAClE,OAAO,EAAE,6BAA6B,EAAE,MAAM,cAAc,CAAC;AAC7D,OAAO,EACL,kCAAkC,EAClC,sBAAsB,GACvB,MAAM,sBAAsB,CAAC"}
@@ -0,0 +1,7 @@
1
+ import type { TreeNode } from '../types.js';
2
+ /**
3
+ * Recursively assigns 4-digit nodeId to each node in the tree.
4
+ * Returns the next available nodeId.
5
+ */
6
+ export declare function writeNodeId(nodes: TreeNode | TreeNode[], nodeId?: number): number;
7
+ //# sourceMappingURL=node-id.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"node-id.d.ts","sourceRoot":"","sources":["../../src/post-processing/node-id.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAE5C;;;GAGG;AACH,wBAAgB,WAAW,CACzB,KAAK,EAAE,QAAQ,GAAG,QAAQ,EAAE,EAC5B,MAAM,SAAI,GACT,MAAM,CAiBR"}
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Recursively assigns 4-digit nodeId to each node in the tree.
3
+ * Returns the next available nodeId.
4
+ */
5
+ export function writeNodeId(nodes, nodeId = 0) {
6
+ if (Array.isArray(nodes)) {
7
+ for (const node of nodes) {
8
+ nodeId = writeNodeId(node, nodeId);
9
+ }
10
+ return nodeId;
11
+ }
12
+ // Single node
13
+ nodes.nodeId = String(nodeId).padStart(4, '0');
14
+ nodeId++;
15
+ if (nodes.nodes.length > 0) {
16
+ nodeId = writeNodeId(nodes.nodes, nodeId);
17
+ }
18
+ return nodeId;
19
+ }
20
+ //# sourceMappingURL=node-id.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"node-id.js","sourceRoot":"","sources":["../../src/post-processing/node-id.ts"],"names":[],"mappings":"AAEA;;;GAGG;AACH,MAAM,UAAU,WAAW,CACzB,KAA4B,EAC5B,MAAM,GAAG,CAAC;IAEV,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,GAAG,WAAW,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;QACrC,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,cAAc;IACd,KAAK,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAC/C,MAAM,EAAE,CAAC;IAET,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3B,MAAM,GAAG,WAAW,CAAC,KAAK,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;IAC5C,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,11 @@
1
+ import type { TreeNode } from '../types.js';
2
+ /**
3
+ * Recursively fills node.text by concatenating page texts
4
+ * from startIndex to endIndex (1-based).
5
+ */
6
+ export declare function addNodeText(nodes: TreeNode | TreeNode[], pageTexts: string[]): void;
7
+ /**
8
+ * Recursively removes text from all nodes.
9
+ */
10
+ export declare function removeStructureText(nodes: TreeNode | TreeNode[]): void;
11
+ //# sourceMappingURL=node-text.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"node-text.d.ts","sourceRoot":"","sources":["../../src/post-processing/node-text.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAE5C;;;GAGG;AACH,wBAAgB,WAAW,CACzB,KAAK,EAAE,QAAQ,GAAG,QAAQ,EAAE,EAC5B,SAAS,EAAE,MAAM,EAAE,GAClB,IAAI,CAkBN;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,QAAQ,GAAG,QAAQ,EAAE,GAAG,IAAI,CAYtE"}
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Recursively fills node.text by concatenating page texts
3
+ * from startIndex to endIndex (1-based).
4
+ */
5
+ export function addNodeText(nodes, pageTexts) {
6
+ if (Array.isArray(nodes)) {
7
+ for (const node of nodes) {
8
+ addNodeText(node, pageTexts);
9
+ }
10
+ return;
11
+ }
12
+ // Single node
13
+ if (nodes.startIndex != null && nodes.endIndex != null) {
14
+ const start = nodes.startIndex - 1; // Convert to 0-based
15
+ const end = nodes.endIndex; // endIndex is inclusive, slice is exclusive
16
+ nodes.text = pageTexts.slice(start, end).join('\n');
17
+ }
18
+ if (nodes.nodes.length > 0) {
19
+ addNodeText(nodes.nodes, pageTexts);
20
+ }
21
+ }
22
+ /**
23
+ * Recursively removes text from all nodes.
24
+ */
25
+ export function removeStructureText(nodes) {
26
+ if (Array.isArray(nodes)) {
27
+ for (const node of nodes) {
28
+ removeStructureText(node);
29
+ }
30
+ return;
31
+ }
32
+ delete nodes.text;
33
+ if (nodes.nodes.length > 0) {
34
+ removeStructureText(nodes.nodes);
35
+ }
36
+ }
37
+ //# sourceMappingURL=node-text.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"node-text.js","sourceRoot":"","sources":["../../src/post-processing/node-text.ts"],"names":[],"mappings":"AAEA;;;GAGG;AACH,MAAM,UAAU,WAAW,CACzB,KAA4B,EAC5B,SAAmB;IAEnB,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,WAAW,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;QAC/B,CAAC;QACD,OAAO;IACT,CAAC;IAED,cAAc;IACd,IAAI,KAAK,CAAC,UAAU,IAAI,IAAI,IAAI,KAAK,CAAC,QAAQ,IAAI,IAAI,EAAE,CAAC;QACvD,MAAM,KAAK,GAAG,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,qBAAqB;QACzD,MAAM,GAAG,GAAG,KAAK,CAAC,QAAQ,CAAC,CAAC,4CAA4C;QACxE,KAAK,CAAC,IAAI,GAAG,SAAS,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACtD,CAAC;IAED,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3B,WAAW,CAAC,KAAK,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IACtC,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAAC,KAA4B;IAC9D,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,mBAAmB,CAAC,IAAI,CAAC,CAAC;QAC5B,CAAC;QACD,OAAO;IACT,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC;IAClB,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3B,mBAAmB,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACnC,CAAC;AACH,CAAC"}
@@ -0,0 +1,7 @@
1
+ import type { TreeNode } from '../types.js';
2
+ import { LlmClient } from '../llm/llm-client.js';
3
+ /**
4
+ * Recursively generates summaries for all nodes that have text.
5
+ */
6
+ export declare function generateSummariesForStructure(nodes: TreeNode[], llmClient: LlmClient): Promise<void>;
7
+ //# sourceMappingURL=summary.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"summary.d.ts","sourceRoot":"","sources":["../../src/post-processing/summary.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAGjD;;GAEG;AACH,wBAAsB,6BAA6B,CACjD,KAAK,EAAE,QAAQ,EAAE,EACjB,SAAS,EAAE,SAAS,GACnB,OAAO,CAAC,IAAI,CAAC,CAGf"}
@@ -0,0 +1,31 @@
1
+ import { LlmClient } from '../llm/llm-client.js';
2
+ import { generateNodeSummaryPrompt } from '../llm/prompts.js';
3
+ /**
4
+ * Recursively generates summaries for all nodes that have text.
5
+ */
6
+ export async function generateSummariesForStructure(nodes, llmClient) {
7
+ const promises = nodes.map((node) => generateNodeSummary(node, llmClient));
8
+ await Promise.all(promises);
9
+ }
10
+ async function generateNodeSummary(node, llmClient) {
11
+ if (node.text) {
12
+ const response = await llmClient.chat([
13
+ { role: 'user', content: generateNodeSummaryPrompt(node.text) },
14
+ ]);
15
+ node.summary = response.content.trim();
16
+ }
17
+ if (node.nodes.length > 0) {
18
+ await Promise.all(node.nodes.map((child) => generateNodeSummary(child, llmClient)));
19
+ // For non-leaf nodes, create a prefix summary from children
20
+ if (node.nodes.length > 0) {
21
+ const childSummaries = node.nodes
22
+ .map((c) => c.summary)
23
+ .filter(Boolean)
24
+ .join('; ');
25
+ if (childSummaries) {
26
+ node.prefixSummary = childSummaries;
27
+ }
28
+ }
29
+ }
30
+ }
31
+ //# sourceMappingURL=summary.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"summary.js","sourceRoot":"","sources":["../../src/post-processing/summary.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,OAAO,EAAE,yBAAyB,EAAE,MAAM,mBAAmB,CAAC;AAE9D;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,6BAA6B,CACjD,KAAiB,EACjB,SAAoB;IAEpB,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,mBAAmB,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC,CAAC;IAC3E,MAAM,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;AAC9B,CAAC;AAED,KAAK,UAAU,mBAAmB,CAChC,IAAc,EACd,SAAoB;IAEpB,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QACd,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC;YACpC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,yBAAyB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;SAChE,CAAC,CAAC;QACH,IAAI,CAAC,OAAO,GAAG,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;IACzC,CAAC;IAED,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1B,MAAM,OAAO,CAAC,GAAG,CACf,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,mBAAmB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC,CACjE,CAAC;QAEF,4DAA4D;QAC5D,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,MAAM,cAAc,GAAG,IAAI,CAAC,KAAK;iBAC9B,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;iBACrB,MAAM,CAAC,OAAO,CAAC;iBACf,IAAI,CAAC,IAAI,CAAC,CAAC;YACd,IAAI,cAAc,EAAE,CAAC;gBACnB,IAAI,CAAC,aAAa,GAAG,cAAc,CAAC;YACtC,CAAC;QACH,CAAC;IACH,CAAC;AACH,CAAC"}
@@ -0,0 +1,6 @@
1
+ export { metaProcessor } from './meta-processor.js';
2
+ export { processTocWithPageNumbers } from './toc-with-pages.js';
3
+ export { processTocNoPageNumbers } from './toc-no-pages.js';
4
+ export { processNoToc } from './no-toc.js';
5
+ export { processLargeNodeRecursively } from './large-node.js';
6
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/processing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,OAAO,EAAE,yBAAyB,EAAE,MAAM,qBAAqB,CAAC;AAChE,OAAO,EAAE,uBAAuB,EAAE,MAAM,mBAAmB,CAAC;AAC5D,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,2BAA2B,EAAE,MAAM,iBAAiB,CAAC"}
@@ -0,0 +1,6 @@
1
+ export { metaProcessor } from './meta-processor.js';
2
+ export { processTocWithPageNumbers } from './toc-with-pages.js';
3
+ export { processTocNoPageNumbers } from './toc-no-pages.js';
4
+ export { processNoToc } from './no-toc.js';
5
+ export { processLargeNodeRecursively } from './large-node.js';
6
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/processing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,OAAO,EAAE,yBAAyB,EAAE,MAAM,qBAAqB,CAAC;AAChE,OAAO,EAAE,uBAAuB,EAAE,MAAM,mBAAmB,CAAC;AAC5D,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,2BAA2B,EAAE,MAAM,iBAAiB,CAAC"}
@@ -0,0 +1,9 @@
1
+ import type { TreeNode, Logger } from '../types.js';
2
+ import { LlmClient } from '../llm/llm-client.js';
3
+ /**
4
+ * Recursively splits large nodes by generating sub-structure.
5
+ */
6
+ export declare function processLargeNodeRecursively(node: TreeNode, pageList: Array<{
7
+ text: string;
8
+ }>, llmClient: LlmClient, logger: Logger, maxPageNumEachNode: number, maxTokenNumEachNode: number): Promise<void>;
9
+ //# sourceMappingURL=large-node.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"large-node.d.ts","sourceRoot":"","sources":["../../src/processing/large-node.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACpD,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAMjD;;GAEG;AACH,wBAAsB,2BAA2B,CAC/C,IAAI,EAAE,QAAQ,EACd,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,EACjC,SAAS,EAAE,SAAS,EACpB,MAAM,EAAE,MAAM,EACd,kBAAkB,EAAE,MAAM,EAC1B,mBAAmB,EAAE,MAAM,GAC1B,OAAO,CAAC,IAAI,CAAC,CAoDf"}
@@ -0,0 +1,40 @@
1
+ import { LlmClient } from '../llm/llm-client.js';
2
+ import { metaProcessor } from './meta-processor.js';
3
+ import { postProcessing } from '../tree/post-processing.js';
4
+ import { checkTitleAppearanceInStartConcurrent } from '../tree-parser.js';
5
+ import { countTokens } from '../utils/token-counter.js';
6
+ /**
7
+ * Recursively splits large nodes by generating sub-structure.
8
+ */
9
+ export async function processLargeNodeRecursively(node, pageList, llmClient, logger, maxPageNumEachNode, maxTokenNumEachNode) {
10
+ if (node.startIndex == null || node.endIndex == null)
11
+ return;
12
+ const nodePageList = pageList.slice(node.startIndex - 1, node.endIndex);
13
+ const tokenNum = nodePageList.reduce((sum, p) => sum + countTokens(p.text), 0);
14
+ // Inclusive span: start=1,end=1 → 1 page
15
+ const pageSpan = node.endIndex - node.startIndex + 1;
16
+ if (pageSpan > maxPageNumEachNode && tokenNum >= maxTokenNumEachNode) {
17
+ logger.info(`Splitting large node: ${node.title}`, {
18
+ pages: pageSpan,
19
+ tokens: tokenNum,
20
+ });
21
+ const { items } = await metaProcessor(nodePageList, 'process_no_toc', llmClient, logger, { startIndex: node.startIndex });
22
+ // Check title appearance at start
23
+ await checkTitleAppearanceInStartConcurrent(items, pageList, llmClient);
24
+ // Filter null physicalIndex
25
+ const validItems = items.filter((item) => item.physicalIndex != null);
26
+ if (validItems.length > 0) {
27
+ // If first item title matches current node, skip it
28
+ let subItems = validItems;
29
+ if (validItems[0].title === node.title) {
30
+ subItems = validItems.slice(1);
31
+ }
32
+ if (subItems.length > 0) {
33
+ node.nodes = postProcessing(subItems, node.endIndex);
34
+ // Recursively process children
35
+ await Promise.all(node.nodes.map((child) => processLargeNodeRecursively(child, pageList, llmClient, logger, maxPageNumEachNode, maxTokenNumEachNode)));
36
+ }
37
+ }
38
+ }
39
+ }
40
+ //# sourceMappingURL=large-node.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"large-node.js","sourceRoot":"","sources":["../../src/processing/large-node.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,OAAO,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAC5D,OAAO,EAAE,qCAAqC,EAAE,MAAM,mBAAmB,CAAC;AAC1E,OAAO,EAAE,WAAW,EAAE,MAAM,2BAA2B,CAAC;AAExD;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAC/C,IAAc,EACd,QAAiC,EACjC,SAAoB,EACpB,MAAc,EACd,kBAA0B,EAC1B,mBAA2B;IAE3B,IAAI,IAAI,CAAC,UAAU,IAAI,IAAI,IAAI,IAAI,CAAC,QAAQ,IAAI,IAAI;QAAE,OAAO;IAE7D,MAAM,YAAY,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,GAAG,CAAC,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;IACxE,MAAM,QAAQ,GAAG,YAAY,CAAC,MAAM,CAClC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CACzC,CAAC;IACF,yCAAyC;IACzC,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC;IAErD,IAAI,QAAQ,GAAG,kBAAkB,IAAI,QAAQ,IAAI,mBAAmB,EAAE,CAAC;QACrE,MAAM,CAAC,IAAI,CAAC,yBAAyB,IAAI,CAAC,KAAK,EAAE,EAAE;YACjD,KAAK,EAAE,QAAQ;YACf,MAAM,EAAE,QAAQ;SACjB,CAAC,CAAC;QAEH,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,aAAa,CACnC,YAAY,EACZ,gBAAgB,EAChB,SAAS,EACT,MAAM,EACN,EAAE,UAAU,EAAE,IAAI,CAAC,UAAU,EAAE,CAChC,CAAC;QAEF,kCAAkC;QAClC,MAAM,qCAAqC,CAAC,KAAK,EAAE,QAAQ,EAAE,SAAS,CAAC,CAAC;QAExE,4BAA4B;QAC5B,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,CAAC;QAEtE,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,oDAAoD;YACpD,IAAI,QAAQ,GAAG,UAAU,CAAC;YAC1B,IAAI,UAAU,CAAC,CAAC,CAAC,CAAC,KAAK,KAAK,IAAI,CAAC,KAAK,EAAE,CAAC;gBACvC,QAAQ,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YACjC,CAAC;YAED,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxB,IAAI,CAAC,KAAK,GAAG,cAAc,CAAC,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAErD,+BAA+B;gBAC/B,MAAM,OAAO,CAAC,GAAG,CACf,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CACvB,2BAA2B,CACzB,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAClC,kBAAkB,EAAE,mBAAmB,CACxC,CACF,CACF,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC;AACH,CAAC"}
@@ -0,0 +1,19 @@
1
+ import type { TocItem, ProcessingMode, DegradationEvent, Logger } from '../types.js';
2
+ import { LlmClient } from '../llm/llm-client.js';
3
+ /**
4
+ * Meta processor: dispatches to the appropriate processing mode,
5
+ * verifies results, and handles degradation.
6
+ */
7
+ export declare function metaProcessor(pageList: Array<{
8
+ text: string;
9
+ }>, mode: ProcessingMode, llmClient: LlmClient, logger: Logger, options?: {
10
+ tocContent?: string | null;
11
+ tocPageList?: number[];
12
+ startIndex?: number;
13
+ onDegradation?: (event: DegradationEvent) => void;
14
+ }): Promise<{
15
+ items: TocItem[];
16
+ finalMode: ProcessingMode;
17
+ degradations: DegradationEvent[];
18
+ }>;
19
+ //# sourceMappingURL=meta-processor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"meta-processor.d.ts","sourceRoot":"","sources":["../../src/processing/meta-processor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,cAAc,EAAE,gBAAgB,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrF,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAQjD;;;GAGG;AACH,wBAAsB,aAAa,CACjC,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,EACjC,IAAI,EAAE,cAAc,EACpB,SAAS,EAAE,SAAS,EACpB,MAAM,EAAE,MAAM,EACd,OAAO,GAAE;IACP,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,CAAC,KAAK,EAAE,gBAAgB,KAAK,IAAI,CAAC;CAC9C,GACL,OAAO,CAAC;IAAE,KAAK,EAAE,OAAO,EAAE,CAAC;IAAC,SAAS,EAAE,cAAc,CAAC;IAAC,YAAY,EAAE,gBAAgB,EAAE,CAAA;CAAE,CAAC,CA8D5F"}
@@ -0,0 +1,91 @@
1
+ import { LlmClient } from '../llm/llm-client.js';
2
+ import { TocProcessingError } from '../errors/index.js';
3
+ import { processTocWithPageNumbers } from './toc-with-pages.js';
4
+ import { processTocNoPageNumbers } from './toc-no-pages.js';
5
+ import { processNoToc } from './no-toc.js';
6
+ import { verifyToc } from '../verification/verify-toc.js';
7
+ import { fixIncorrectTocWithRetries } from '../verification/fix-toc.js';
8
+ /**
9
+ * Meta processor: dispatches to the appropriate processing mode,
10
+ * verifies results, and handles degradation.
11
+ */
12
+ export async function metaProcessor(pageList, mode, llmClient, logger, options = {}) {
13
+ const degradations = [];
14
+ const MAX_ITERATIONS = 5;
15
+ let currentMode = mode;
16
+ let items = [];
17
+ for (let iteration = 0; iteration < MAX_ITERATIONS; iteration++) {
18
+ // Step 1: Process based on current mode
19
+ items = await processByMode(currentMode, pageList, llmClient, options);
20
+ // Step 2: Filter null physicalIndex
21
+ items = items.filter((item) => item.physicalIndex != null);
22
+ // Step 3: Validate and truncate
23
+ validateAndTruncatePhysicalIndices(items, pageList.length);
24
+ // Step 4: Verify
25
+ const { accuracy, incorrectResults } = await verifyToc(items, pageList, llmClient);
26
+ logger.info(`Verification accuracy: ${accuracy}`, { mode: currentMode });
27
+ // Step 5: Handle based on accuracy
28
+ if (accuracy === 1.0) {
29
+ return { items, finalMode: currentMode, degradations };
30
+ }
31
+ if (accuracy > 0.6) {
32
+ items = await fixIncorrectTocWithRetries(items, pageList, incorrectResults, llmClient);
33
+ return { items, finalMode: currentMode, degradations };
34
+ }
35
+ // Step 6: Degrade
36
+ const nextMode = getDegradedMode(currentMode);
37
+ if (!nextMode) {
38
+ throw new TocProcessingError(`Processing failed with accuracy ${accuracy} in mode ${currentMode}, no further degradation possible`);
39
+ }
40
+ const event = {
41
+ fromMode: currentMode,
42
+ toMode: nextMode,
43
+ accuracy,
44
+ reason: `Accuracy ${accuracy} below threshold 0.6`,
45
+ };
46
+ degradations.push(event);
47
+ options.onDegradation?.(event);
48
+ logger.warn(`Degrading from ${currentMode} to ${nextMode}`, { accuracy });
49
+ currentMode = nextMode;
50
+ }
51
+ throw new TocProcessingError(`Processing failed: exceeded maximum ${MAX_ITERATIONS} degradation iterations`);
52
+ }
53
+ async function processByMode(mode, pageList, llmClient, options) {
54
+ switch (mode) {
55
+ case 'process_toc_with_page_numbers':
56
+ if (!options.tocContent) {
57
+ throw new TocProcessingError('tocContent is required for process_toc_with_page_numbers mode');
58
+ }
59
+ return processTocWithPageNumbers(options.tocContent, pageList, options.tocPageList ?? [], llmClient);
60
+ case 'process_toc_no_page_numbers':
61
+ if (!options.tocContent) {
62
+ throw new TocProcessingError('tocContent is required for process_toc_no_page_numbers mode');
63
+ }
64
+ return processTocNoPageNumbers(options.tocContent, pageList, llmClient);
65
+ case 'process_no_toc':
66
+ return processNoToc(pageList, llmClient, options.startIndex);
67
+ }
68
+ }
69
+ function getDegradedMode(mode) {
70
+ switch (mode) {
71
+ case 'process_toc_with_page_numbers':
72
+ return 'process_toc_no_page_numbers';
73
+ case 'process_toc_no_page_numbers':
74
+ return 'process_no_toc';
75
+ case 'process_no_toc':
76
+ return null;
77
+ }
78
+ }
79
+ function validateAndTruncatePhysicalIndices(items, totalPages) {
80
+ for (const item of items) {
81
+ if (item.physicalIndex != null) {
82
+ if (item.physicalIndex > totalPages) {
83
+ item.physicalIndex = totalPages;
84
+ }
85
+ if (item.physicalIndex < 1) {
86
+ item.physicalIndex = 1;
87
+ }
88
+ }
89
+ }
90
+ }
91
+ //# sourceMappingURL=meta-processor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"meta-processor.js","sourceRoot":"","sources":["../../src/processing/meta-processor.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,OAAO,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AACxD,OAAO,EAAE,yBAAyB,EAAE,MAAM,qBAAqB,CAAC;AAChE,OAAO,EAAE,uBAAuB,EAAE,MAAM,mBAAmB,CAAC;AAC5D,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,SAAS,EAAE,MAAM,+BAA+B,CAAC;AAC1D,OAAO,EAAE,0BAA0B,EAAE,MAAM,4BAA4B,CAAC;AAExE;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,QAAiC,EACjC,IAAoB,EACpB,SAAoB,EACpB,MAAc,EACd,UAKI,EAAE;IAEN,MAAM,YAAY,GAAuB,EAAE,CAAC;IAE5C,MAAM,cAAc,GAAG,CAAC,CAAC;IACzB,IAAI,WAAW,GAAG,IAAI,CAAC;IACvB,IAAI,KAAK,GAAc,EAAE,CAAC;IAE1B,KAAK,IAAI,SAAS,GAAG,CAAC,EAAE,SAAS,GAAG,cAAc,EAAE,SAAS,EAAE,EAAE,CAAC;QAChE,wCAAwC;QACxC,KAAK,GAAG,MAAM,aAAa,CACzB,WAAW,EAAE,QAAQ,EAAE,SAAS,EAAE,OAAO,CAC1C,CAAC;QAEF,oCAAoC;QACpC,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,CAAC;QAE3D,gCAAgC;QAChC,kCAAkC,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;QAE3D,iBAAiB;QACjB,MAAM,EAAE,QAAQ,EAAE,gBAAgB,EAAE,GAAG,MAAM,SAAS,CACpD,KAAK,EAAE,QAAQ,EAAE,SAAS,CAC3B,CAAC;QAEF,MAAM,CAAC,IAAI,CAAC,0BAA0B,QAAQ,EAAE,EAAE,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC,CAAC;QAEzE,mCAAmC;QACnC,IAAI,QAAQ,KAAK,GAAG,EAAE,CAAC;YACrB,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,WAAW,EAAE,YAAY,EAAE,CAAC;QACzD,CAAC;QAED,IAAI,QAAQ,GAAG,GAAG,EAAE,CAAC;YACnB,KAAK,GAAG,MAAM,0BAA0B,CACtC,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,SAAS,CAC7C,CAAC;YACF,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,WAAW,EAAE,YAAY,EAAE,CAAC;QACzD,CAAC;QAED,kBAAkB;QAClB,MAAM,QAAQ,GAAG,eAAe,CAAC,WAAW,CAAC,CAAC;QAC9C,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,IAAI,kBAAkB,CAC1B,mCAAmC,QAAQ,YAAY,WAAW,mCAAmC,CACtG,CAAC;QACJ,CAAC;QAED,MAAM,KAAK,GAAqB;YAC9B,QAAQ,EAAE,WAAW;YACrB,MAAM,EAAE,QAAQ;YAChB,QAAQ;YACR,MAAM,EAAE,YAAY,QAAQ,sBAAsB;SACnD,CAAC;QACF,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACzB,OAAO,CAAC,aAAa,EAAE,CAAC,KAAK,CAAC,CAAC;QAC/B,MAAM,CAAC,IAAI,CAAC,kBAAkB,WAAW,OAAO,QAAQ,EAAE,EAAE,EAAE,QAAQ,EAAE,CAAC,CAAC;QAE1E,WAAW,GAAG,QAAQ,CAAC;IACzB,CAAC;IAED,MAAM,IAAI,kBAAkB,CAC1B,uCAAuC,cAAc,yBAAyB,CAC/E,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,aAAa,CAC1B,IAAoB,EACpB,QAAiC,EACjC,SAAoB,EACpB,OAIC;IAED,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,+BAA+B;YAClC,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC;gBACxB,MAAM,IAAI,kBAAkB,CAAC,+DAA+D,CAAC,CAAC;YAChG,CAAC;YACD,OAAO,yBAAyB,CAC9B,OAAO,CAAC,UAAU,EAAE,QAAQ,EAAE,OAAO,CAAC,WAAW,IAAI,EAAE,EAAE,SAAS,CACnE,CAAC;QACJ,KAAK,6BAA6B;YAChC,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC;gBACxB,MAAM,IAAI,kBAAkB,CAAC,6DAA6D,CAAC,CAAC;YAC9F,CAAC;YACD,OAAO,uBAAuB,CAC5B,OAAO,CAAC,UAAU,EAAE,QAAQ,EAAE,SAAS,CACxC,CAAC;QACJ,KAAK,gBAAgB;YACnB,OAAO,YAAY,CAAC,QAAQ,EAAE,SAAS,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC;IACjE,CAAC;AACH,CAAC;AAED,SAAS,eAAe,CAAC,IAAoB;IAC3C,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,+BAA+B;YAClC,OAAO,6BAA6B,CAAC;QACvC,KAAK,6BAA6B;YAChC,OAAO,gBAAgB,CAAC;QAC1B,KAAK,gBAAgB;YACnB,OAAO,IAAI,CAAC;IAChB,CAAC;AACH,CAAC;AAED,SAAS,kCAAkC,CACzC,KAAgB,EAChB,UAAkB;IAElB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,aAAa,IAAI,IAAI,EAAE,CAAC;YAC/B,IAAI,IAAI,CAAC,aAAa,GAAG,UAAU,EAAE,CAAC;gBACpC,IAAI,CAAC,aAAa,GAAG,UAAU,CAAC;YAClC,CAAC;YACD,IAAI,IAAI,CAAC,aAAa,GAAG,CAAC,EAAE,CAAC;gBAC3B,IAAI,CAAC,aAAa,GAAG,CAAC,CAAC;YACzB,CAAC;QACH,CAAC;IACH,CAAC;AACH,CAAC"}
@@ -0,0 +1,10 @@
1
+ import type { TocItem } from '../types.js';
2
+ import { LlmClient } from '../llm/llm-client.js';
3
+ /**
4
+ * Processes documents without any TOC.
5
+ * Generates tree structure by scanning document content in groups.
6
+ */
7
+ export declare function processNoToc(pageList: Array<{
8
+ text: string;
9
+ }>, llmClient: LlmClient, startIndex?: number): Promise<TocItem[]>;
10
+ //# sourceMappingURL=no-toc.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"no-toc.d.ts","sourceRoot":"","sources":["../../src/processing/no-toc.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAWjD;;;GAGG;AACH,wBAAsB,YAAY,CAChC,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,EACjC,SAAS,EAAE,SAAS,EACpB,UAAU,SAAI,GACb,OAAO,CAAC,OAAO,EAAE,CAAC,CA6CpB"}
@@ -0,0 +1,44 @@
1
+ import { LlmClient } from '../llm/llm-client.js';
2
+ import { generateTocInitPrompt, generateTocContinuePrompt, } from '../llm/prompts.js';
3
+ import { pageListToGroupText, convertPhysicalIndexToInt, } from '../utils/page-utils.js';
4
+ import { countTokens } from '../utils/token-counter.js';
5
+ /**
6
+ * Processes documents without any TOC.
7
+ * Generates tree structure by scanning document content in groups.
8
+ */
9
+ export async function processNoToc(pageList, llmClient, startIndex = 1) {
10
+ // Add physical index tags
11
+ const pageContents = pageList.map((p, i) => {
12
+ const idx = startIndex + i;
13
+ return `<physical_index_${idx}>\n${p.text}\n<physical_index_${idx}>`;
14
+ });
15
+ const tokenLengths = pageContents.map((p) => countTokens(p));
16
+ // Group pages
17
+ const groups = pageListToGroupText(pageContents, tokenLengths);
18
+ if (groups.length === 0)
19
+ return [];
20
+ // Generate initial TOC from first group
21
+ const initResult = await llmClient.chatJson([{ role: 'user', content: generateTocInitPrompt(groups[0]) }]);
22
+ let allItems = Array.isArray(initResult) ? initResult : [];
23
+ // Continue with remaining groups
24
+ for (let i = 1; i < groups.length; i++) {
25
+ const continueResult = await llmClient.chatJson([
26
+ {
27
+ role: 'user',
28
+ content: generateTocContinuePrompt(groups[i], JSON.stringify(allItems)),
29
+ },
30
+ ]);
31
+ if (Array.isArray(continueResult)) {
32
+ allItems = [...allItems, ...continueResult];
33
+ }
34
+ }
35
+ // Convert physical_index strings to numbers
36
+ return allItems.map((item) => ({
37
+ structure: item.structure,
38
+ title: item.title,
39
+ physicalIndex: item.physical_index
40
+ ? convertPhysicalIndexToInt(item.physical_index)
41
+ : null,
42
+ }));
43
+ }
44
+ //# sourceMappingURL=no-toc.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"no-toc.js","sourceRoot":"","sources":["../../src/processing/no-toc.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,OAAO,EACL,qBAAqB,EACrB,yBAAyB,GAC1B,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EACL,mBAAmB,EACnB,yBAAyB,GAC1B,MAAM,wBAAwB,CAAC;AAChC,OAAO,EAAE,WAAW,EAAE,MAAM,2BAA2B,CAAC;AAExD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,QAAiC,EACjC,SAAoB,EACpB,UAAU,GAAG,CAAC;IAEd,0BAA0B;IAC1B,MAAM,YAAY,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACzC,MAAM,GAAG,GAAG,UAAU,GAAG,CAAC,CAAC;QAC3B,OAAO,mBAAmB,GAAG,MAAM,CAAC,CAAC,IAAI,qBAAqB,GAAG,GAAG,CAAC;IACvE,CAAC,CAAC,CAAC;IACH,MAAM,YAAY,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;IAE7D,cAAc;IACd,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,EAAE,YAAY,CAAC,CAAC;IAE/D,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEnC,wCAAwC;IACxC,MAAM,UAAU,GAAG,MAAM,SAAS,CAAC,QAAQ,CAEzC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,qBAAqB,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAEjE,IAAI,QAAQ,GACV,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,CAAC;IAE9C,iCAAiC;IACjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,cAAc,GAAG,MAAM,SAAS,CAAC,QAAQ,CAE7C;YACA;gBACE,IAAI,EAAE,MAAM;gBACZ,OAAO,EAAE,yBAAyB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;aACxE;SACF,CAAC,CAAC;QAEH,IAAI,KAAK,CAAC,OAAO,CAAC,cAAc,CAAC,EAAE,CAAC;YAClC,QAAQ,GAAG,CAAC,GAAG,QAAQ,EAAE,GAAG,cAAc,CAAC,CAAC;QAC9C,CAAC;IACH,CAAC;IAED,4CAA4C;IAC5C,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QAC7B,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,aAAa,EAAE,IAAI,CAAC,cAAc;YAChC,CAAC,CAAC,yBAAyB,CAAC,IAAI,CAAC,cAAc,CAAC;YAChD,CAAC,CAAC,IAAI;KACT,CAAC,CAAC,CAAC;AACN,CAAC"}
@@ -0,0 +1,11 @@
1
+ import type { TocItem } from '../types.js';
2
+ import { LlmClient } from '../llm/llm-client.js';
3
+ /**
4
+ * Processes TOC without page numbers.
5
+ * Uses the TOC structure but fills in physical indices by matching
6
+ * against document content.
7
+ */
8
+ export declare function processTocNoPageNumbers(tocContent: string, pageList: Array<{
9
+ text: string;
10
+ }>, llmClient: LlmClient): Promise<TocItem[]>;
11
+ //# sourceMappingURL=toc-no-pages.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"toc-no-pages.d.ts","sourceRoot":"","sources":["../../src/processing/toc-no-pages.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AASjD;;;;GAIG;AACH,wBAAsB,uBAAuB,CAC3C,UAAU,EAAE,MAAM,EAClB,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,EACjC,SAAS,EAAE,SAAS,GACnB,OAAO,CAAC,OAAO,EAAE,CAAC,CAiDpB"}
@@ -0,0 +1,46 @@
1
+ import { LlmClient } from '../llm/llm-client.js';
2
+ import { tocTransformer } from '../toc/toc-transformer.js';
3
+ import { convertPhysicalIndexToInt, pageListToGroupText, } from '../utils/page-utils.js';
4
+ import { countTokens } from '../utils/token-counter.js';
5
+ import { addPageNumberToTocPrompt } from '../llm/prompts.js';
6
+ /**
7
+ * Processes TOC without page numbers.
8
+ * Uses the TOC structure but fills in physical indices by matching
9
+ * against document content.
10
+ */
11
+ export async function processTocNoPageNumbers(tocContent, pageList, llmClient) {
12
+ // Transform TOC to structured JSON
13
+ const tocItems = await tocTransformer(tocContent, llmClient);
14
+ // Add physical index tags to all pages
15
+ const pageContents = pageList.map((p, i) => {
16
+ return `<physical_index_${i + 1}>\n${p.text}\n<physical_index_${i + 1}>`;
17
+ });
18
+ const tokenLengths = pageContents.map((p) => countTokens(p));
19
+ // Group pages
20
+ const groups = pageListToGroupText(pageContents, tokenLengths);
21
+ // For each group, ask LLM to match TOC entries
22
+ let currentStructure = JSON.stringify(tocItems.map(({ structure, title }) => ({
23
+ structure,
24
+ title,
25
+ start: 'no',
26
+ physical_index: null,
27
+ })));
28
+ for (const group of groups) {
29
+ const result = await llmClient.chatJson([
30
+ { role: 'user', content: addPageNumberToTocPrompt(group, currentStructure) },
31
+ ]);
32
+ if (Array.isArray(result)) {
33
+ currentStructure = JSON.stringify(result);
34
+ }
35
+ }
36
+ // Parse final result and convert physical indices
37
+ const finalResult = JSON.parse(currentStructure);
38
+ return finalResult.map((item) => ({
39
+ structure: item.structure,
40
+ title: item.title,
41
+ physicalIndex: item.physical_index
42
+ ? convertPhysicalIndexToInt(item.physical_index)
43
+ : null,
44
+ }));
45
+ }
46
+ //# sourceMappingURL=toc-no-pages.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"toc-no-pages.js","sourceRoot":"","sources":["../../src/processing/toc-no-pages.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EACL,yBAAyB,EACzB,mBAAmB,GACpB,MAAM,wBAAwB,CAAC;AAChC,OAAO,EAAE,WAAW,EAAE,MAAM,2BAA2B,CAAC;AACxD,OAAO,EAAE,wBAAwB,EAAE,MAAM,mBAAmB,CAAC;AAE7D;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAC3C,UAAkB,EAClB,QAAiC,EACjC,SAAoB;IAEpB,mCAAmC;IACnC,MAAM,QAAQ,GAAG,MAAM,cAAc,CAAC,UAAU,EAAE,SAAS,CAAC,CAAC;IAE7D,uCAAuC;IACvC,MAAM,YAAY,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACzC,OAAO,mBAAmB,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,qBAAqB,CAAC,GAAG,CAAC,GAAG,CAAC;IAC3E,CAAC,CAAC,CAAC;IACH,MAAM,YAAY,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;IAE7D,cAAc;IACd,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,EAAE,YAAY,CAAC,CAAC;IAE/D,+CAA+C;IAC/C,IAAI,gBAAgB,GAAG,IAAI,CAAC,SAAS,CACnC,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QACtC,SAAS;QACT,KAAK;QACL,KAAK,EAAE,IAAI;QACX,cAAc,EAAE,IAAI;KACrB,CAAC,CAAC,CACJ,CAAC;IAEF,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,QAAQ,CAErC;YACA,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,wBAAwB,CAAC,KAAK,EAAE,gBAAgB,CAAC,EAAE;SAC7E,CAAC,CAAC;QAEH,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1B,gBAAgB,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IAED,kDAAkD;IAClD,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAI7C,CAAC;IAEH,OAAO,WAAW,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QAChC,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,aAAa,EAAE,IAAI,CAAC,cAAc;YAChC,CAAC,CAAC,yBAAyB,CAAC,IAAI,CAAC,cAAc,CAAC;YAChD,CAAC,CAAC,IAAI;KACT,CAAC,CAAC,CAAC;AACN,CAAC"}
@@ -0,0 +1,15 @@
1
+ import type { TocItem } from '../types.js';
2
+ import { LlmClient } from '../llm/llm-client.js';
3
+ /**
4
+ * Processes TOC with page numbers.
5
+ * Algorithm:
6
+ * 1. tocTransformer → structured JSON with page field
7
+ * 2. Remove page → tocNoPageNumber
8
+ * 3. Extract physical indices from main content
9
+ * 4. Match page/physicalIndex pairs → calculate offset
10
+ * 5. Apply offset to all entries
11
+ */
12
+ export declare function processTocWithPageNumbers(tocContent: string, pageList: Array<{
13
+ text: string;
14
+ }>, tocPageList: number[], llmClient: LlmClient): Promise<TocItem[]>;
15
+ //# sourceMappingURL=toc-with-pages.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"toc-with-pages.d.ts","sourceRoot":"","sources":["../../src/processing/toc-with-pages.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAIjD;;;;;;;;GAQG;AACH,wBAAsB,yBAAyB,CAC7C,UAAU,EAAE,MAAM,EAClB,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,EACjC,WAAW,EAAE,MAAM,EAAE,EACrB,SAAS,EAAE,SAAS,GACnB,OAAO,CAAC,OAAO,EAAE,CAAC,CA6CpB"}