@fastrag/pageindex 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +251 -0
  3. package/README.zh-CN.md +251 -0
  4. package/dist/errors/index.d.ts +10 -0
  5. package/dist/errors/index.d.ts.map +1 -0
  6. package/dist/errors/index.js +19 -0
  7. package/dist/errors/index.js.map +1 -0
  8. package/dist/index.d.ts +14 -0
  9. package/dist/index.d.ts.map +1 -0
  10. package/dist/index.js +20 -0
  11. package/dist/index.js.map +1 -0
  12. package/dist/internal-types/config.d.ts +35 -0
  13. package/dist/internal-types/config.d.ts.map +1 -0
  14. package/dist/internal-types/config.js +16 -0
  15. package/dist/internal-types/config.js.map +1 -0
  16. package/dist/internal-types/document-parser.d.ts +5 -0
  17. package/dist/internal-types/document-parser.d.ts.map +1 -0
  18. package/dist/internal-types/document-parser.js +2 -0
  19. package/dist/internal-types/document-parser.js.map +1 -0
  20. package/dist/internal-types/index.d.ts +9 -0
  21. package/dist/internal-types/index.d.ts.map +1 -0
  22. package/dist/internal-types/index.js +2 -0
  23. package/dist/internal-types/index.js.map +1 -0
  24. package/dist/internal-types/llm-provider.d.ts +19 -0
  25. package/dist/internal-types/llm-provider.d.ts.map +1 -0
  26. package/dist/internal-types/llm-provider.js +2 -0
  27. package/dist/internal-types/llm-provider.js.map +1 -0
  28. package/dist/internal-types/logger.d.ts +7 -0
  29. package/dist/internal-types/logger.d.ts.map +1 -0
  30. package/dist/internal-types/logger.js +2 -0
  31. package/dist/internal-types/logger.js.map +1 -0
  32. package/dist/internal-types/page.d.ts +5 -0
  33. package/dist/internal-types/page.d.ts.map +1 -0
  34. package/dist/internal-types/page.js +2 -0
  35. package/dist/internal-types/page.js.map +1 -0
  36. package/dist/internal-types/processing.d.ts +21 -0
  37. package/dist/internal-types/processing.d.ts.map +1 -0
  38. package/dist/internal-types/processing.js +2 -0
  39. package/dist/internal-types/processing.js.map +1 -0
  40. package/dist/internal-types/tree-node.d.ts +30 -0
  41. package/dist/internal-types/tree-node.d.ts.map +1 -0
  42. package/dist/internal-types/tree-node.js +2 -0
  43. package/dist/internal-types/tree-node.js.map +1 -0
  44. package/dist/llm/index.d.ts +3 -0
  45. package/dist/llm/index.d.ts.map +1 -0
  46. package/dist/llm/index.js +3 -0
  47. package/dist/llm/index.js.map +1 -0
  48. package/dist/llm/llm-client.d.ts +26 -0
  49. package/dist/llm/llm-client.d.ts.map +1 -0
  50. package/dist/llm/llm-client.js +88 -0
  51. package/dist/llm/llm-client.js.map +1 -0
  52. package/dist/llm/prompts.d.ts +33 -0
  53. package/dist/llm/prompts.d.ts.map +1 -0
  54. package/dist/llm/prompts.js +312 -0
  55. package/dist/llm/prompts.js.map +1 -0
  56. package/dist/markdown/index.d.ts +6 -0
  57. package/dist/markdown/index.d.ts.map +1 -0
  58. package/dist/markdown/index.js +5 -0
  59. package/dist/markdown/index.js.map +1 -0
  60. package/dist/markdown/md-extractor.d.ts +14 -0
  61. package/dist/markdown/md-extractor.d.ts.map +1 -0
  62. package/dist/markdown/md-extractor.js +30 -0
  63. package/dist/markdown/md-extractor.js.map +1 -0
  64. package/dist/markdown/md-to-tree.d.ts +8 -0
  65. package/dist/markdown/md-to-tree.d.ts.map +1 -0
  66. package/dist/markdown/md-to-tree.js +20 -0
  67. package/dist/markdown/md-to-tree.js.map +1 -0
  68. package/dist/markdown/md-tree-builder.d.ts +7 -0
  69. package/dist/markdown/md-tree-builder.d.ts.map +1 -0
  70. package/dist/markdown/md-tree-builder.js +36 -0
  71. package/dist/markdown/md-tree-builder.js.map +1 -0
  72. package/dist/markdown/tree-thinning.d.ts +8 -0
  73. package/dist/markdown/tree-thinning.d.ts.map +1 -0
  74. package/dist/markdown/tree-thinning.js +42 -0
  75. package/dist/markdown/tree-thinning.js.map +1 -0
  76. package/dist/page-index.d.ts +10 -0
  77. package/dist/page-index.d.ts.map +1 -0
  78. package/dist/page-index.js +54 -0
  79. package/dist/page-index.js.map +1 -0
  80. package/dist/post-processing/doc-description.d.ts +12 -0
  81. package/dist/post-processing/doc-description.d.ts.map +1 -0
  82. package/dist/post-processing/doc-description.js +31 -0
  83. package/dist/post-processing/doc-description.js.map +1 -0
  84. package/dist/post-processing/index.d.ts +5 -0
  85. package/dist/post-processing/index.d.ts.map +1 -0
  86. package/dist/post-processing/index.js +5 -0
  87. package/dist/post-processing/index.js.map +1 -0
  88. package/dist/post-processing/node-id.d.ts +7 -0
  89. package/dist/post-processing/node-id.d.ts.map +1 -0
  90. package/dist/post-processing/node-id.js +20 -0
  91. package/dist/post-processing/node-id.js.map +1 -0
  92. package/dist/post-processing/node-text.d.ts +11 -0
  93. package/dist/post-processing/node-text.d.ts.map +1 -0
  94. package/dist/post-processing/node-text.js +37 -0
  95. package/dist/post-processing/node-text.js.map +1 -0
  96. package/dist/post-processing/summary.d.ts +7 -0
  97. package/dist/post-processing/summary.d.ts.map +1 -0
  98. package/dist/post-processing/summary.js +31 -0
  99. package/dist/post-processing/summary.js.map +1 -0
  100. package/dist/processing/index.d.ts +6 -0
  101. package/dist/processing/index.d.ts.map +1 -0
  102. package/dist/processing/index.js +6 -0
  103. package/dist/processing/index.js.map +1 -0
  104. package/dist/processing/large-node.d.ts +9 -0
  105. package/dist/processing/large-node.d.ts.map +1 -0
  106. package/dist/processing/large-node.js +40 -0
  107. package/dist/processing/large-node.js.map +1 -0
  108. package/dist/processing/meta-processor.d.ts +19 -0
  109. package/dist/processing/meta-processor.d.ts.map +1 -0
  110. package/dist/processing/meta-processor.js +91 -0
  111. package/dist/processing/meta-processor.js.map +1 -0
  112. package/dist/processing/no-toc.d.ts +10 -0
  113. package/dist/processing/no-toc.d.ts.map +1 -0
  114. package/dist/processing/no-toc.js +44 -0
  115. package/dist/processing/no-toc.js.map +1 -0
  116. package/dist/processing/toc-no-pages.d.ts +11 -0
  117. package/dist/processing/toc-no-pages.d.ts.map +1 -0
  118. package/dist/processing/toc-no-pages.js +46 -0
  119. package/dist/processing/toc-no-pages.js.map +1 -0
  120. package/dist/processing/toc-with-pages.d.ts +15 -0
  121. package/dist/processing/toc-with-pages.d.ts.map +1 -0
  122. package/dist/processing/toc-with-pages.js +151 -0
  123. package/dist/processing/toc-with-pages.js.map +1 -0
  124. package/dist/toc/index.d.ts +4 -0
  125. package/dist/toc/index.d.ts.map +1 -0
  126. package/dist/toc/index.js +4 -0
  127. package/dist/toc/index.js.map +1 -0
  128. package/dist/toc/toc-detector.d.ts +23 -0
  129. package/dist/toc/toc-detector.d.ts.map +1 -0
  130. package/dist/toc/toc-detector.js +65 -0
  131. package/dist/toc/toc-detector.js.map +1 -0
  132. package/dist/toc/toc-extractor.d.ts +13 -0
  133. package/dist/toc/toc-extractor.d.ts.map +1 -0
  134. package/dist/toc/toc-extractor.js +32 -0
  135. package/dist/toc/toc-extractor.js.map +1 -0
  136. package/dist/toc/toc-transformer.d.ts +11 -0
  137. package/dist/toc/toc-transformer.d.ts.map +1 -0
  138. package/dist/toc/toc-transformer.js +69 -0
  139. package/dist/toc/toc-transformer.js.map +1 -0
  140. package/dist/tree/index.d.ts +4 -0
  141. package/dist/tree/index.d.ts.map +1 -0
  142. package/dist/tree/index.js +4 -0
  143. package/dist/tree/index.js.map +1 -0
  144. package/dist/tree/list-to-tree.d.ts +7 -0
  145. package/dist/tree/list-to-tree.d.ts.map +1 -0
  146. package/dist/tree/list-to-tree.js +33 -0
  147. package/dist/tree/list-to-tree.js.map +1 -0
  148. package/dist/tree/post-processing.d.ts +12 -0
  149. package/dist/tree/post-processing.d.ts.map +1 -0
  150. package/dist/tree/post-processing.js +87 -0
  151. package/dist/tree/post-processing.js.map +1 -0
  152. package/dist/tree/tree-utils.d.ts +18 -0
  153. package/dist/tree/tree-utils.d.ts.map +1 -0
  154. package/dist/tree/tree-utils.js +43 -0
  155. package/dist/tree/tree-utils.js.map +1 -0
  156. package/dist/tree-parser.d.ts +30 -0
  157. package/dist/tree-parser.d.ts.map +1 -0
  158. package/dist/tree-parser.js +73 -0
  159. package/dist/tree-parser.js.map +1 -0
  160. package/dist/types.d.ts +3 -0
  161. package/dist/types.d.ts.map +1 -0
  162. package/dist/types.js +2 -0
  163. package/dist/types.js.map +1 -0
  164. package/dist/utils/config-loader.d.ts +15 -0
  165. package/dist/utils/config-loader.d.ts.map +1 -0
  166. package/dist/utils/config-loader.js +19 -0
  167. package/dist/utils/config-loader.js.map +1 -0
  168. package/dist/utils/index.d.ts +7 -0
  169. package/dist/utils/index.d.ts.map +1 -0
  170. package/dist/utils/index.js +6 -0
  171. package/dist/utils/index.js.map +1 -0
  172. package/dist/utils/json-parser.d.ts +2 -0
  173. package/dist/utils/json-parser.d.ts.map +1 -0
  174. package/dist/utils/json-parser.js +76 -0
  175. package/dist/utils/json-parser.js.map +1 -0
  176. package/dist/utils/logger.d.ts +3 -0
  177. package/dist/utils/logger.d.ts.map +1 -0
  178. package/dist/utils/logger.js +10 -0
  179. package/dist/utils/logger.js.map +1 -0
  180. package/dist/utils/page-utils.d.ts +16 -0
  181. package/dist/utils/page-utils.d.ts.map +1 -0
  182. package/dist/utils/page-utils.js +56 -0
  183. package/dist/utils/page-utils.js.map +1 -0
  184. package/dist/utils/token-counter.d.ts +2 -0
  185. package/dist/utils/token-counter.d.ts.map +1 -0
  186. package/dist/utils/token-counter.js +5 -0
  187. package/dist/utils/token-counter.js.map +1 -0
  188. package/dist/vector-lib/adapters/in-memory-adapter.d.ts +14 -0
  189. package/dist/vector-lib/adapters/in-memory-adapter.d.ts.map +1 -0
  190. package/dist/vector-lib/adapters/in-memory-adapter.js +55 -0
  191. package/dist/vector-lib/adapters/in-memory-adapter.js.map +1 -0
  192. package/dist/vector-lib/adapters/vector-store.d.ts +10 -0
  193. package/dist/vector-lib/adapters/vector-store.d.ts.map +1 -0
  194. package/dist/vector-lib/adapters/vector-store.js +2 -0
  195. package/dist/vector-lib/adapters/vector-store.js.map +1 -0
  196. package/dist/vector-lib/chunker/tree-chunker.d.ts +8 -0
  197. package/dist/vector-lib/chunker/tree-chunker.d.ts.map +1 -0
  198. package/dist/vector-lib/chunker/tree-chunker.js +59 -0
  199. package/dist/vector-lib/chunker/tree-chunker.js.map +1 -0
  200. package/dist/vector-lib/embedder/embedder.d.ts +8 -0
  201. package/dist/vector-lib/embedder/embedder.d.ts.map +1 -0
  202. package/dist/vector-lib/embedder/embedder.js +2 -0
  203. package/dist/vector-lib/embedder/embedder.js.map +1 -0
  204. package/dist/vector-lib/index.d.ts +10 -0
  205. package/dist/vector-lib/index.d.ts.map +1 -0
  206. package/dist/vector-lib/index.js +6 -0
  207. package/dist/vector-lib/index.js.map +1 -0
  208. package/dist/vector-lib/search/hybrid-search.d.ts +19 -0
  209. package/dist/vector-lib/search/hybrid-search.d.ts.map +1 -0
  210. package/dist/vector-lib/search/hybrid-search.js +25 -0
  211. package/dist/vector-lib/search/hybrid-search.js.map +1 -0
  212. package/dist/vector-lib/search/reranker.d.ts +14 -0
  213. package/dist/vector-lib/search/reranker.d.ts.map +1 -0
  214. package/dist/vector-lib/search/reranker.js +2 -0
  215. package/dist/vector-lib/search/reranker.js.map +1 -0
  216. package/dist/vector-lib/types.d.ts +29 -0
  217. package/dist/vector-lib/types.d.ts.map +1 -0
  218. package/dist/vector-lib/types.js +2 -0
  219. package/dist/vector-lib/types.js.map +1 -0
  220. package/dist/vector-lib/vector-enhancer.d.ts +28 -0
  221. package/dist/vector-lib/vector-enhancer.d.ts.map +1 -0
  222. package/dist/vector-lib/vector-enhancer.js +54 -0
  223. package/dist/vector-lib/vector-enhancer.js.map +1 -0
  224. package/dist/vector.d.ts +5 -0
  225. package/dist/vector.d.ts.map +1 -0
  226. package/dist/vector.js +3 -0
  227. package/dist/vector.js.map +1 -0
  228. package/dist/verification/fix-toc.d.ts +13 -0
  229. package/dist/verification/fix-toc.d.ts.map +1 -0
  230. package/dist/verification/fix-toc.js +73 -0
  231. package/dist/verification/fix-toc.js.map +1 -0
  232. package/dist/verification/index.d.ts +3 -0
  233. package/dist/verification/index.d.ts.map +1 -0
  234. package/dist/verification/index.js +3 -0
  235. package/dist/verification/index.js.map +1 -0
  236. package/dist/verification/verify-toc.d.ts +17 -0
  237. package/dist/verification/verify-toc.d.ts.map +1 -0
  238. package/dist/verification/verify-toc.js +64 -0
  239. package/dist/verification/verify-toc.js.map +1 -0
  240. package/package.json +58 -0
@@ -0,0 +1,151 @@
1
+ import { LlmClient } from '../llm/llm-client.js';
2
+ import { tocTransformer, tocIndexExtractor } from '../toc/toc-transformer.js';
3
+ import { convertPhysicalIndexToInt, addPhysicalIndexTags } from '../utils/page-utils.js';
4
+ /**
5
+ * Processes TOC with page numbers.
6
+ * Algorithm:
7
+ * 1. tocTransformer → structured JSON with page field
8
+ * 2. Remove page → tocNoPageNumber
9
+ * 3. Extract physical indices from main content
10
+ * 4. Match page/physicalIndex pairs → calculate offset
11
+ * 5. Apply offset to all entries
12
+ */
13
+ export async function processTocWithPageNumbers(tocContent, pageList, tocPageList, llmClient) {
14
+ // Step 1: Transform TOC to JSON
15
+ const tocItems = await tocTransformer(tocContent, llmClient);
16
+ // Step 2: Create version without page numbers
17
+ const tocNoPage = tocItems.map(({ structure, title }) => ({
18
+ structure,
19
+ title,
20
+ }));
21
+ // Step 3: Get main content pages (after TOC)
22
+ const mainStartIdx = tocPageList.length > 0
23
+ ? tocPageList[tocPageList.length - 1] + 1
24
+ : 0;
25
+ const mainPages = pageList.slice(mainStartIdx);
26
+ const taggedContent = addPhysicalIndexTags(mainPages, mainStartIdx + 1);
27
+ // Step 4: Extract physical indices
28
+ const withPhysical = await tocIndexExtractor(tocNoPage, taggedContent, llmClient);
29
+ // Convert physical_index strings to numbers (LLM may return string values)
30
+ for (const item of withPhysical) {
31
+ if (item.physicalIndex != null && typeof item.physicalIndex === 'string') {
32
+ try {
33
+ item.physicalIndex = convertPhysicalIndexToInt(String(item.physicalIndex));
34
+ }
35
+ catch {
36
+ item.physicalIndex = null;
37
+ }
38
+ }
39
+ }
40
+ // Step 5: Match pairs and calculate offset
41
+ const resolvedPhysicalIndices = resolvePhysicalIndices(tocItems, withPhysical);
42
+ const pairs = extractMatchingPagePairs(tocItems, resolvedPhysicalIndices);
43
+ const offset = calculatePageOffset(pairs);
44
+ // Step 6: Apply offset
45
+ const result = addPageOffsetToTocJson(tocItems, offset);
46
+ // Step 7: Fix entries still without physicalIndex
47
+ processNonePageNumbers(result, resolvedPhysicalIndices);
48
+ return result;
49
+ }
50
+ function extractMatchingPagePairs(tocItems, resolvedPhysicalIndices) {
51
+ const pairs = [];
52
+ for (let i = 0; i < tocItems.length; i++) {
53
+ const page = tocItems[i].page;
54
+ const physicalIndex = resolvedPhysicalIndices[i];
55
+ if (page == null || physicalIndex == null)
56
+ continue;
57
+ pairs.push({ page, physicalIndex });
58
+ }
59
+ return pairs;
60
+ }
61
+ function calculatePageOffset(pairs) {
62
+ if (pairs.length === 0)
63
+ return 0;
64
+ const diffs = pairs.map((p) => p.physicalIndex - p.page);
65
+ const counts = new Map();
66
+ for (const d of diffs) {
67
+ counts.set(d, (counts.get(d) ?? 0) + 1);
68
+ }
69
+ let maxCount = 0;
70
+ let mode = 0;
71
+ for (const [diff, count] of counts) {
72
+ if (count > maxCount) {
73
+ maxCount = count;
74
+ mode = diff;
75
+ }
76
+ }
77
+ return mode;
78
+ }
79
+ function addPageOffsetToTocJson(toc, offset) {
80
+ return toc.map((item) => ({
81
+ ...item,
82
+ physicalIndex: item.page != null ? item.page + offset : item.physicalIndex ?? null,
83
+ }));
84
+ }
85
+ function processNonePageNumbers(result, resolvedPhysicalIndices) {
86
+ for (let i = 0; i < result.length; i++) {
87
+ if (result[i].physicalIndex == null && resolvedPhysicalIndices[i] != null) {
88
+ result[i].physicalIndex = resolvedPhysicalIndices[i];
89
+ }
90
+ }
91
+ }
92
+ function resolvePhysicalIndices(tocItems, withPhysical) {
93
+ const byStructure = new Map();
94
+ const byTitleQueue = new Map();
95
+ for (const item of withPhysical) {
96
+ if (item.physicalIndex == null)
97
+ continue;
98
+ if (typeof item.physicalIndex !== 'number')
99
+ continue;
100
+ // structure should be the most stable key (handles duplicate titles)
101
+ if (item.structure) {
102
+ byStructure.set(item.structure, item.physicalIndex);
103
+ }
104
+ // title-only fallback for cases where structure is missing or malformed
105
+ const title = item.title;
106
+ if (title) {
107
+ const queue = byTitleQueue.get(title) ?? [];
108
+ queue.push(item.physicalIndex);
109
+ byTitleQueue.set(title, queue);
110
+ }
111
+ }
112
+ const resolved = [];
113
+ for (const tocItem of tocItems) {
114
+ if (tocItem.structure && byStructure.has(tocItem.structure)) {
115
+ const matched = byStructure.get(tocItem.structure) ?? null;
116
+ resolved.push(matched);
117
+ // Keep title queue in sync with structure matches so later title-fallback
118
+ // entries do not reuse values already consumed by structure-based entries.
119
+ consumeTitleQueueValue(byTitleQueue, tocItem.title, matched);
120
+ continue;
121
+ }
122
+ const title = tocItem.title;
123
+ const queue = title ? byTitleQueue.get(title) : undefined;
124
+ if (queue && queue.length > 0) {
125
+ resolved.push(queue.shift() ?? null);
126
+ continue;
127
+ }
128
+ resolved.push(null);
129
+ }
130
+ return resolved;
131
+ }
132
+ function consumeTitleQueueValue(byTitleQueue, title, value) {
133
+ if (!title || value == null)
134
+ return;
135
+ const queue = byTitleQueue.get(title);
136
+ if (!queue || queue.length === 0)
137
+ return;
138
+ // Prefer removing the same value (queue may not align with structure order).
139
+ const index = queue.indexOf(value);
140
+ if (index >= 0) {
141
+ queue.splice(index, 1);
142
+ }
143
+ else {
144
+ // Fallback to consume head to avoid stale reuse.
145
+ queue.shift();
146
+ }
147
+ if (queue.length === 0) {
148
+ byTitleQueue.delete(title);
149
+ }
150
+ }
151
+ //# sourceMappingURL=toc-with-pages.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"toc-with-pages.js","sourceRoot":"","sources":["../../src/processing/toc-with-pages.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9E,OAAO,EAAE,yBAAyB,EAAE,oBAAoB,EAAE,MAAM,wBAAwB,CAAC;AAEzF;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAC7C,UAAkB,EAClB,QAAiC,EACjC,WAAqB,EACrB,SAAoB;IAEpB,gCAAgC;IAChC,MAAM,QAAQ,GAAG,MAAM,cAAc,CAAC,UAAU,EAAE,SAAS,CAAC,CAAC;IAE7D,8CAA8C;IAC9C,MAAM,SAAS,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QACxD,SAAS;QACT,KAAK;KACN,CAAC,CAAC,CAAC;IAEJ,6CAA6C;IAC7C,MAAM,YAAY,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC;QACzC,CAAC,CAAC,WAAW,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC;QACzC,CAAC,CAAC,CAAC,CAAC;IACN,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;IAC/C,MAAM,aAAa,GAAG,oBAAoB,CAAC,SAAS,EAAE,YAAY,GAAG,CAAC,CAAC,CAAC;IAExE,mCAAmC;IACnC,MAAM,YAAY,GAAG,MAAM,iBAAiB,CAC1C,SAAsB,EAAE,aAAa,EAAE,SAAS,CACjD,CAAC;IAEF,2EAA2E;IAC3E,KAAK,MAAM,IAAI,IAAI,YAAY,EAAE,CAAC;QAChC,IAAI,IAAI,CAAC,aAAa,IAAI,IAAI,IAAI,OAAO,IAAI,CAAC,aAAa,KAAK,QAAQ,EAAE,CAAC;YACzE,IAAI,CAAC;gBACH,IAAI,CAAC,aAAa,GAAG,yBAAyB,CAAC,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC;YAC7E,CAAC;YAAC,MAAM,CAAC;gBACP,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;YAC5B,CAAC;QACH,CAAC;IACH,CAAC;IAED,2CAA2C;IAC3C,MAAM,uBAAuB,GAAG,sBAAsB,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IAC/E,MAAM,KAAK,GAAG,wBAAwB,CAAC,QAAQ,EAAE,uBAAuB,CAAC,CAAC;IAC1E,MAAM,MAAM,GAAG,mBAAmB,CAAC,KAAK,CAAC,CAAC;IAE1C,uBAAuB;IACvB,MAAM,MAAM,GAAG,sBAAsB,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAExD,kDAAkD;IAClD,sBAAsB,CAAC,MAAM,EAAE,uBAAuB,CAAC,CAAC;IAExD,OAAO,MAAM,CAAC;AAChB,CAAC;AAOD,SAAS,wBAAwB,CAC/B,QAAmB,EACnB,uBAA6C;IAE7C,MAAM,KAAK,GAAe,EAAE,CAAC;IAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACzC,MAAM,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAC9B,MAAM,aAAa,GAAG,uBAAuB,CAAC,CAAC,CAAC,CAAC;QACjD,IAAI,IAAI,IAAI,IAAI,IAAI,aAAa,IAAI,IAAI;YAAE,SAAS;QACpD,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,CAAC,CAAC;IACtC,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,mBAAmB,CAAC,KAAiB;IAC5C,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEjC,MAAM,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;IACzD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;IACzC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC1C,CAAC;IAED,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,MAAM,EAAE,CAAC;QACnC,IAAI,KAAK,GAAG,QAAQ,EAAE,CAAC;YACrB,QAAQ,GAAG,KAAK,CAAC;YACjB,IAAI,GAAG,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,sBAAsB,CAAC,GAAc,EAAE,MAAc;IAC5D,OAAO,GAAG,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACxB,GAAG,IAAI;QACP,aAAa,EAAE,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,IAAI,IAAI;KACnF,CAAC,CAAC,CAAC;AACN,CAAC;AAED,SAAS,sBAAsB,CAC7B,MAAiB,EACjB,uBAA6C;IAE7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,aAAa,IAAI,IAAI,IAAI,uBAAuB,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC;YAC1E,MAAM,CAAC,CAAC,CAAC,CAAC,aAAa,GAAG,uBAAuB,CAAC,CAAC,CAAC,CAAC;QACvD,CAAC;IACH,CAAC;AACH,CAAC;AAED,SAAS,sBAAsB,CAC7B,QAAmB,EACnB,YAAuB;IAEvB,MAAM,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC9C,MAAM,YAAY,GAAG,IAAI,GAAG,EAAoB,CAAC;IAEjD,KAAK,MAAM,IAAI,IAAI,YAAY,EAAE,CAAC;QAChC,IAAI,IAAI,CAAC,aAAa,IAAI,IAAI;YAAE,SAAS;QACzC,IAAI,OAAO,IAAI,CAAC,aAAa,KAAK,QAAQ;YAAE,SAAS;QAErD,qEAAqE;QACrE,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC;QACtD,CAAC;QAED,wEAAwE;QACxE,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;QACzB,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,KAAK,GAAG,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;YAC5C,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;YAC/B,YAAY,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QACjC,CAAC;IACH,CAAC;IAED,MAAM,QAAQ,GAAyB,EAAE,CAAC;IAC1C,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,IAAI,OAAO,CAAC,SAAS,IAAI,WAAW,CAAC,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,CAAC;YAC5D,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI,IAAI,CAAC;YAC3D,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACvB,0EAA0E;YAC1E,2EAA2E;YAC3E,sBAAsB,CAAC,YAAY,EAAE,OAAO,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;YAC7D,SAAS;QACX,CAAC;QAED,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAC5B,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QAC1D,IAAI,KAAK,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9B,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,IAAI,IAAI,CAAC,CAAC;YACrC,SAAS;QACX,CAAC;QAED,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACtB,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,sBAAsB,CAC7B,YAAmC,EACnC,KAAyB,EACzB,KAAoB;IAEpB,IAAI,CAAC,KAAK,IAAI,KAAK,IAAI,IAAI;QAAE,OAAO;IACpC,MAAM,KAAK,GAAG,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IACtC,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO;IAEzC,6EAA6E;IAC7E,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IACnC,IAAI,KAAK,IAAI,CAAC,EAAE,CAAC;QACf,KAAK,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IACzB,CAAC;SAAM,CAAC;QACN,iDAAiD;QACjD,KAAK,CAAC,KAAK,EAAE,CAAC;IAChB,CAAC;IAED,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,YAAY,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAC7B,CAAC;AACH,CAAC"}
@@ -0,0 +1,4 @@
1
+ export { checkToc, findTocPages, tocDetectorSinglePage } from './toc-detector.js';
2
+ export { tocExtractor, detectPageIndex } from './toc-extractor.js';
3
+ export { tocTransformer, tocIndexExtractor } from './toc-transformer.js';
4
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/toc/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,qBAAqB,EAAE,MAAM,mBAAmB,CAAC;AAClF,OAAO,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACnE,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC"}
@@ -0,0 +1,4 @@
1
+ export { checkToc, findTocPages, tocDetectorSinglePage } from './toc-detector.js';
2
+ export { tocExtractor, detectPageIndex } from './toc-extractor.js';
3
+ export { tocTransformer, tocIndexExtractor } from './toc-transformer.js';
4
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/toc/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,qBAAqB,EAAE,MAAM,mBAAmB,CAAC;AAClF,OAAO,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACnE,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC"}
@@ -0,0 +1,23 @@
1
+ import type { TocCheckResult, Logger } from '../types.js';
2
+ import { LlmClient } from '../llm/llm-client.js';
3
+ /**
4
+ * Main TOC detection flow.
5
+ * 1. Find TOC pages by scanning from the start
6
+ * 2. Extract TOC content
7
+ * 3. Check for page numbers
8
+ * 4. If no page numbers, continue scanning for more TOC pages
9
+ */
10
+ export declare function checkToc(pageList: Array<{
11
+ text: string;
12
+ }>, tocCheckPageNum: number, llmClient: LlmClient, _logger: Logger): Promise<TocCheckResult>;
13
+ /**
14
+ * Scans pages sequentially to find TOC pages.
15
+ */
16
+ export declare function findTocPages(startPageIndex: number, pageList: Array<{
17
+ text: string;
18
+ }>, tocCheckPageNum: number, llmClient: LlmClient): Promise<number[]>;
19
+ /**
20
+ * Detects if a single page contains a table of contents.
21
+ */
22
+ export declare function tocDetectorSinglePage(content: string, llmClient: LlmClient): Promise<'yes' | 'no'>;
23
+ //# sourceMappingURL=toc-detector.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"toc-detector.d.ts","sourceRoot":"","sources":["../../src/toc/toc-detector.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAC1D,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAIjD;;;;;;GAMG;AACH,wBAAsB,QAAQ,CAC5B,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,EACjC,eAAe,EAAE,MAAM,EACvB,SAAS,EAAE,SAAS,EACpB,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,cAAc,CAAC,CAgCzB;AAED;;GAEG;AACH,wBAAsB,YAAY,CAChC,cAAc,EAAE,MAAM,EACtB,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,EACjC,eAAe,EAAE,MAAM,EACvB,SAAS,EAAE,SAAS,GACnB,OAAO,CAAC,MAAM,EAAE,CAAC,CAqBnB;AAED;;GAEG;AACH,wBAAsB,qBAAqB,CACzC,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,SAAS,GACnB,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC,CAMvB"}
@@ -0,0 +1,65 @@
1
+ import { LlmClient } from '../llm/llm-client.js';
2
+ import { tocDetectorPrompt } from '../llm/prompts.js';
3
+ import { tocExtractor } from './toc-extractor.js';
4
+ /**
5
+ * Main TOC detection flow.
6
+ * 1. Find TOC pages by scanning from the start
7
+ * 2. Extract TOC content
8
+ * 3. Check for page numbers
9
+ * 4. If no page numbers, continue scanning for more TOC pages
10
+ */
11
+ export async function checkToc(pageList, tocCheckPageNum, llmClient, _logger) {
12
+ const tocPageList = await findTocPages(0, pageList, tocCheckPageNum, llmClient);
13
+ if (tocPageList.length === 0) {
14
+ return { tocContent: null, tocPageList: [], pageIndexGivenInToc: false };
15
+ }
16
+ const result = await tocExtractor(pageList, tocPageList, llmClient);
17
+ if (result.pageIndexGivenInToc) {
18
+ return result;
19
+ }
20
+ // Continue scanning for more TOC pages with page numbers
21
+ let startIdx = tocPageList[tocPageList.length - 1] + 1;
22
+ while (startIdx < pageList.length && startIdx < tocCheckPageNum) {
23
+ const moreTocPages = await findTocPages(startIdx, pageList, tocCheckPageNum, llmClient);
24
+ if (moreTocPages.length === 0)
25
+ break;
26
+ const allTocPages = [...tocPageList, ...moreTocPages];
27
+ const newResult = await tocExtractor(pageList, allTocPages, llmClient);
28
+ if (newResult.pageIndexGivenInToc) {
29
+ return newResult;
30
+ }
31
+ startIdx = moreTocPages[moreTocPages.length - 1] + 1;
32
+ }
33
+ // Return TOC without page numbers
34
+ return result;
35
+ }
36
+ /**
37
+ * Scans pages sequentially to find TOC pages.
38
+ */
39
+ export async function findTocPages(startPageIndex, pageList, tocCheckPageNum, llmClient) {
40
+ let lastPageIsYes = false;
41
+ const tocPageList = [];
42
+ let i = startPageIndex;
43
+ while (i < pageList.length) {
44
+ if (i >= tocCheckPageNum && !lastPageIsYes)
45
+ break;
46
+ const result = await tocDetectorSinglePage(pageList[i].text, llmClient);
47
+ if (result === 'yes') {
48
+ tocPageList.push(i);
49
+ lastPageIsYes = true;
50
+ }
51
+ else if (result === 'no' && lastPageIsYes) {
52
+ break; // TOC ended
53
+ }
54
+ i++;
55
+ }
56
+ return tocPageList;
57
+ }
58
+ /**
59
+ * Detects if a single page contains a table of contents.
60
+ */
61
+ export async function tocDetectorSinglePage(content, llmClient) {
62
+ const result = await llmClient.chatJson([{ role: 'user', content: tocDetectorPrompt(content) }]);
63
+ return result.toc_detected === 'yes' ? 'yes' : 'no';
64
+ }
65
+ //# sourceMappingURL=toc-detector.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"toc-detector.js","sourceRoot":"","sources":["../../src/toc/toc-detector.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAElD;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,QAAiC,EACjC,eAAuB,EACvB,SAAoB,EACpB,OAAe;IAEf,MAAM,WAAW,GAAG,MAAM,YAAY,CAAC,CAAC,EAAE,QAAQ,EAAE,eAAe,EAAE,SAAS,CAAC,CAAC;IAEhF,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7B,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,WAAW,EAAE,EAAE,EAAE,mBAAmB,EAAE,KAAK,EAAE,CAAC;IAC3E,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,QAAQ,EAAE,WAAW,EAAE,SAAS,CAAC,CAAC;IAEpE,IAAI,MAAM,CAAC,mBAAmB,EAAE,CAAC;QAC/B,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,yDAAyD;IACzD,IAAI,QAAQ,GAAG,WAAW,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;IACvD,OAAO,QAAQ,GAAG,QAAQ,CAAC,MAAM,IAAI,QAAQ,GAAG,eAAe,EAAE,CAAC;QAChE,MAAM,YAAY,GAAG,MAAM,YAAY,CACrC,QAAQ,EAAE,QAAQ,EAAE,eAAe,EAAE,SAAS,CAC/C,CAAC;QACF,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC;YAAE,MAAM;QAErC,MAAM,WAAW,GAAG,CAAC,GAAG,WAAW,EAAE,GAAG,YAAY,CAAC,CAAC;QACtD,MAAM,SAAS,GAAG,MAAM,YAAY,CAAC,QAAQ,EAAE,WAAW,EAAE,SAAS,CAAC,CAAC;QACvE,IAAI,SAAS,CAAC,mBAAmB,EAAE,CAAC;YAClC,OAAO,SAAS,CAAC;QACnB,CAAC;QAED,QAAQ,GAAG,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;IACvD,CAAC;IAED,kCAAkC;IAClC,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,cAAsB,EACtB,QAAiC,EACjC,eAAuB,EACvB,SAAoB;IAEpB,IAAI,aAAa,GAAG,KAAK,CAAC;IAC1B,MAAM,WAAW,GAAa,EAAE,CAAC;IACjC,IAAI,CAAC,GAAG,cAAc,CAAC;IAEvB,OAAO,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;QAC3B,IAAI,CAAC,IAAI,eAAe,IAAI,CAAC,aAAa;YAAE,MAAM;QAElD,MAAM,MAAM,GAAG,MAAM,qBAAqB,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;QAExE,IAAI,MAAM,KAAK,KAAK,EAAE,CAAC;YACrB,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACpB,aAAa,GAAG,IAAI,CAAC;QACvB,CAAC;aAAM,IAAI,MAAM,KAAK,IAAI,IAAI,aAAa,EAAE,CAAC;YAC5C,MAAM,CAAC,YAAY;QACrB,CAAC;QAED,CAAC,EAAE,CAAC;IACN,CAAC;IAED,OAAO,WAAW,CAAC;AACrB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,OAAe,EACf,SAAoB;IAEpB,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,QAAQ,CAEpC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,iBAAiB,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC;IAE5D,OAAO,MAAM,CAAC,YAAY,KAAK,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;AACtD,CAAC"}
@@ -0,0 +1,13 @@
1
+ import type { TocCheckResult } from '../types.js';
2
+ import { LlmClient } from '../llm/llm-client.js';
3
+ /**
4
+ * Extracts TOC content from identified TOC pages and detects page numbers.
5
+ */
6
+ export declare function tocExtractor(pageList: Array<{
7
+ text: string;
8
+ }>, tocPageList: number[], llmClient: LlmClient): Promise<TocCheckResult>;
9
+ /**
10
+ * Detects if the TOC content contains page numbers.
11
+ */
12
+ export declare function detectPageIndex(tocContent: string, llmClient: LlmClient): Promise<boolean>;
13
+ //# sourceMappingURL=toc-extractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"toc-extractor.d.ts","sourceRoot":"","sources":["../../src/toc/toc-extractor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAOjD;;GAEG;AACH,wBAAsB,YAAY,CAChC,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,EACjC,WAAW,EAAE,MAAM,EAAE,EACrB,SAAS,EAAE,SAAS,GACnB,OAAO,CAAC,cAAc,CAAC,CAyBzB;AAED;;GAEG;AACH,wBAAsB,eAAe,CACnC,UAAU,EAAE,MAAM,EAClB,SAAS,EAAE,SAAS,GACnB,OAAO,CAAC,OAAO,CAAC,CAMlB"}
@@ -0,0 +1,32 @@
1
+ import { LlmClient } from '../llm/llm-client.js';
2
+ import { extractTocContentPrompt, EXTRACT_TOC_CONTINUE_PROMPT, detectPageIndexPrompt, } from '../llm/prompts.js';
3
+ /**
4
+ * Extracts TOC content from identified TOC pages and detects page numbers.
5
+ */
6
+ export async function tocExtractor(pageList, tocPageList, llmClient) {
7
+ // Concatenate TOC page texts
8
+ let rawTocContent = tocPageList
9
+ .map((idx) => pageList[idx].text)
10
+ .join('\n');
11
+ // Replace consecutive dots with colon
12
+ rawTocContent = rawTocContent
13
+ .replace(/\.{5,}/g, ': ')
14
+ .replace(/(?:\. ){5,}\.?/g, ': ');
15
+ // Extract clean TOC content via LLM
16
+ const tocContent = await llmClient.chatWithContinuation([{ role: 'user', content: extractTocContentPrompt(rawTocContent) }], EXTRACT_TOC_CONTINUE_PROMPT);
17
+ // Detect if page numbers are present
18
+ const hasPageIndex = await detectPageIndex(tocContent, llmClient);
19
+ return {
20
+ tocContent,
21
+ tocPageList,
22
+ pageIndexGivenInToc: hasPageIndex,
23
+ };
24
+ }
25
+ /**
26
+ * Detects if the TOC content contains page numbers.
27
+ */
28
+ export async function detectPageIndex(tocContent, llmClient) {
29
+ const result = await llmClient.chatJson([{ role: 'user', content: detectPageIndexPrompt(tocContent) }]);
30
+ return result.page_index_given_in_toc === 'yes';
31
+ }
32
+ //# sourceMappingURL=toc-extractor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"toc-extractor.js","sourceRoot":"","sources":["../../src/toc/toc-extractor.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,OAAO,EACL,uBAAuB,EACvB,2BAA2B,EAC3B,qBAAqB,GACtB,MAAM,mBAAmB,CAAC;AAE3B;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,QAAiC,EACjC,WAAqB,EACrB,SAAoB;IAEpB,6BAA6B;IAC7B,IAAI,aAAa,GAAG,WAAW;SAC5B,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;SAChC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEd,sCAAsC;IACtC,aAAa,GAAG,aAAa;SAC1B,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC;SACxB,OAAO,CAAC,iBAAiB,EAAE,IAAI,CAAC,CAAC;IAEpC,oCAAoC;IACpC,MAAM,UAAU,GAAG,MAAM,SAAS,CAAC,oBAAoB,CACrD,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,uBAAuB,CAAC,aAAa,CAAC,EAAE,CAAC,EACnE,2BAA2B,CAC5B,CAAC;IAEF,qCAAqC;IACrC,MAAM,YAAY,GAAG,MAAM,eAAe,CAAC,UAAU,EAAE,SAAS,CAAC,CAAC;IAElE,OAAO;QACL,UAAU;QACV,WAAW;QACX,mBAAmB,EAAE,YAAY;KAClC,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,UAAkB,EAClB,SAAoB;IAEpB,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,QAAQ,CAEpC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,qBAAqB,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC;IAEnE,OAAO,MAAM,CAAC,uBAAuB,KAAK,KAAK,CAAC;AAClD,CAAC"}
@@ -0,0 +1,11 @@
1
+ import type { TocItem } from '../types.js';
2
+ import { LlmClient } from '../llm/llm-client.js';
3
+ /**
4
+ * Transforms TOC text into structured JSON with structure/title/page.
5
+ */
6
+ export declare function tocTransformer(tocContent: string, llmClient: LlmClient): Promise<TocItem[]>;
7
+ /**
8
+ * Extracts physical page indices by matching TOC entries against document pages.
9
+ */
10
+ export declare function tocIndexExtractor(toc: TocItem[], content: string, llmClient: LlmClient): Promise<TocItem[]>;
11
+ //# sourceMappingURL=toc-transformer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"toc-transformer.d.ts","sourceRoot":"","sources":["../../src/toc/toc-transformer.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AASjD;;GAEG;AACH,wBAAsB,cAAc,CAClC,UAAU,EAAE,MAAM,EAClB,SAAS,EAAE,SAAS,GACnB,OAAO,CAAC,OAAO,EAAE,CAAC,CAapB;AAED;;GAEG;AACH,wBAAsB,iBAAiB,CACrC,GAAG,EAAE,OAAO,EAAE,EACd,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,SAAS,GACnB,OAAO,CAAC,OAAO,EAAE,CAAC,CA0CpB"}
@@ -0,0 +1,69 @@
1
+ import { LlmClient } from '../llm/llm-client.js';
2
+ import { tocTransformerPrompt, TOC_TRANSFORMER_CONTINUE_PROMPT, tocIndexExtractorPrompt, } from '../llm/prompts.js';
3
+ import { extractJson } from '../utils/json-parser.js';
4
+ import { convertPhysicalIndexToInt } from '../utils/page-utils.js';
5
+ /**
6
+ * Transforms TOC text into structured JSON with structure/title/page.
7
+ */
8
+ export async function tocTransformer(tocContent, llmClient) {
9
+ const raw = await llmClient.chatWithContinuation([{ role: 'user', content: tocTransformerPrompt(tocContent) }], TOC_TRANSFORMER_CONTINUE_PROMPT);
10
+ const parsed = extractJson(raw);
11
+ if (Array.isArray(parsed))
12
+ return parsed;
13
+ if (parsed && 'table_of_contents' in parsed)
14
+ return parsed.table_of_contents;
15
+ return [];
16
+ }
17
+ /**
18
+ * Extracts physical page indices by matching TOC entries against document pages.
19
+ */
20
+ export async function tocIndexExtractor(toc, content, llmClient) {
21
+ // Remove page field for matching
22
+ const tocNoPage = toc.map(({ structure, title }) => ({
23
+ structure,
24
+ title,
25
+ }));
26
+ const result = await llmClient.chatJson([{
27
+ role: 'user',
28
+ content: tocIndexExtractorPrompt(JSON.stringify(tocNoPage), content),
29
+ }]);
30
+ if (!Array.isArray(result))
31
+ return [];
32
+ return result
33
+ .map((raw) => {
34
+ const item = raw;
35
+ const structure = typeof item['structure'] === 'string'
36
+ ? item['structure']
37
+ : String(item['structure'] ?? '');
38
+ const title = typeof item['title'] === 'string'
39
+ ? item['title']
40
+ : String(item['title'] ?? '');
41
+ const rawPhysicalIndex = item['physicalIndex'] ?? item['physical_index'];
42
+ const physicalIndex = normalizePhysicalIndex(rawPhysicalIndex);
43
+ if (!title)
44
+ return null;
45
+ return {
46
+ structure,
47
+ title,
48
+ physicalIndex,
49
+ };
50
+ })
51
+ .filter((x) => x != null);
52
+ }
53
+ function normalizePhysicalIndex(value) {
54
+ if (value == null)
55
+ return null;
56
+ if (typeof value === 'number') {
57
+ return Number.isFinite(value) ? value : null;
58
+ }
59
+ if (typeof value === 'string') {
60
+ try {
61
+ return convertPhysicalIndexToInt(value);
62
+ }
63
+ catch {
64
+ return null;
65
+ }
66
+ }
67
+ return null;
68
+ }
69
+ //# sourceMappingURL=toc-transformer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"toc-transformer.js","sourceRoot":"","sources":["../../src/toc/toc-transformer.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,OAAO,EACL,oBAAoB,EACpB,+BAA+B,EAC/B,uBAAuB,GACxB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,EAAE,yBAAyB,EAAE,MAAM,wBAAwB,CAAC;AAEnE;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,UAAkB,EAClB,SAAoB;IAEpB,MAAM,GAAG,GAAG,MAAM,SAAS,CAAC,oBAAoB,CAC9C,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,oBAAoB,CAAC,UAAU,CAAC,EAAE,CAAC,EAC7D,+BAA+B,CAChC,CAAC;IAEF,MAAM,MAAM,GAAG,WAAW,CAAC,GAAG,CAEjB,CAAC;IAEd,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC;QAAE,OAAO,MAAM,CAAC;IACzC,IAAI,MAAM,IAAI,mBAAmB,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC,iBAAiB,CAAC;IAC7E,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,GAAc,EACd,OAAe,EACf,SAAoB;IAEpB,iCAAiC;IACjC,MAAM,SAAS,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QACnD,SAAS;QACT,KAAK;KACN,CAAC,CAAC,CAAC;IAEJ,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,QAAQ,CACrC,CAAC;YACC,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE,uBAAuB,CAC9B,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,EACzB,OAAO,CACR;SACF,CAAC,CACH,CAAC;IAEF,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC;QAAE,OAAO,EAAE,CAAC;IAEtC,OAAO,MAAM;SACV,GAAG,CAAC,CAAC,GAAG,EAAkB,EAAE;QAC3B,MAAM,IAAI,GAAG,GAA8B,CAAC;QAE5C,MAAM,SAAS,GAAG,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,QAAQ;YACrD,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC;YACnB,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC;QACpC,MAAM,KAAK,GAAG,OAAO,IAAI,CAAC,OAAO,CAAC,KAAK,QAAQ;YAC7C,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;YACf,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;QAEhC,MAAM,gBAAgB,GAAG,IAAI,CAAC,eAAe,CAAC,IAAI,IAAI,CAAC,gBAAgB,CAAC,CAAC;QACzE,MAAM,aAAa,GAAG,sBAAsB,CAAC,gBAAgB,CAAC,CAAC;QAE/D,IAAI,CAAC,KAAK;YAAE,OAAO,IAAI,CAAC;QAExB,OAAO;YACL,SAAS;YACT,KAAK;YACL,aAAa;SACd,CAAC;IACJ,CAAC,CAAC;SACD,MAAM,CAAC,CAAC,CAAC,EAAgB,EAAE,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC;AAC5C,CAAC;AAED,SAAS,sBAAsB,CAAC,KAAc;IAC5C,IAAI,KAAK,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC;IAC/B,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,OAAO,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;IAC/C,CAAC;IACD,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,IAAI,CAAC;YACH,OAAO,yBAAyB,CAAC,KAAK,CAAC,CAAC;QAC1C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC"}
@@ -0,0 +1,4 @@
1
+ export { listToTree } from './list-to-tree.js';
2
+ export { getNodes, getLeafNodes, structureToList } from './tree-utils.js';
3
+ export { postProcessing } from './post-processing.js';
4
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/tree/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAC1E,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC"}
@@ -0,0 +1,4 @@
1
+ export { listToTree } from './list-to-tree.js';
2
+ export { getNodes, getLeafNodes, structureToList } from './tree-utils.js';
3
+ export { postProcessing } from './post-processing.js';
4
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/tree/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAC1E,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC"}
@@ -0,0 +1,7 @@
1
+ import type { TreeNode, TocItem } from '../types.js';
2
+ /**
3
+ * Converts a flat TOC list to a nested tree structure.
4
+ * Uses the `structure` field (e.g., "1", "1.1", "1.2.3") to determine hierarchy.
5
+ */
6
+ export declare function listToTree(data: TocItem[]): TreeNode[];
7
+ //# sourceMappingURL=list-to-tree.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"list-to-tree.d.ts","sourceRoot":"","sources":["../../src/tree/list-to-tree.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAErD;;;GAGG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,OAAO,EAAE,GAAG,QAAQ,EAAE,CA6BtD"}
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Converts a flat TOC list to a nested tree structure.
3
+ * Uses the `structure` field (e.g., "1", "1.1", "1.2.3") to determine hierarchy.
4
+ */
5
+ export function listToTree(data) {
6
+ const nodes = new Map();
7
+ const rootNodes = [];
8
+ for (const item of data) {
9
+ const node = {
10
+ title: item.title,
11
+ startIndex: item.physicalIndex ?? undefined,
12
+ endIndex: undefined,
13
+ nodes: [],
14
+ };
15
+ nodes.set(item.structure, node);
16
+ const parts = item.structure.split('.');
17
+ if (parts.length > 1) {
18
+ const parentStructure = parts.slice(0, -1).join('.');
19
+ const parent = nodes.get(parentStructure);
20
+ if (parent) {
21
+ parent.nodes.push(node);
22
+ }
23
+ else {
24
+ rootNodes.push(node);
25
+ }
26
+ }
27
+ else {
28
+ rootNodes.push(node);
29
+ }
30
+ }
31
+ return rootNodes;
32
+ }
33
+ //# sourceMappingURL=list-to-tree.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"list-to-tree.js","sourceRoot":"","sources":["../../src/tree/list-to-tree.ts"],"names":[],"mappings":"AAEA;;;GAGG;AACH,MAAM,UAAU,UAAU,CAAC,IAAe;IACxC,MAAM,KAAK,GAAG,IAAI,GAAG,EAAoB,CAAC;IAC1C,MAAM,SAAS,GAAe,EAAE,CAAC;IAEjC,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;QACxB,MAAM,IAAI,GAAa;YACrB,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,UAAU,EAAE,IAAI,CAAC,aAAa,IAAI,SAAS;YAC3C,QAAQ,EAAE,SAAS;YACnB,KAAK,EAAE,EAAE;SACV,CAAC;QAEF,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;QAEhC,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QACxC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrB,MAAM,eAAe,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACrD,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;YAC1C,IAAI,MAAM,EAAE,CAAC;gBACX,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC1B,CAAC;iBAAM,CAAC;gBACN,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACvB,CAAC;QACH,CAAC;aAAM,CAAC;YACN,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC"}
@@ -0,0 +1,12 @@
1
+ import type { TreeNode, TocItem } from '../types.js';
2
+ /**
3
+ * Converts a flat TOC list to a tree with page range calculation.
4
+ *
5
+ * Algorithm:
6
+ * 1. For each item, compute startIndex and endIndex based on physicalIndex
7
+ * and the next item's appearStart field.
8
+ * 2. Call listToTree to build the nested structure.
9
+ * 3. If tree is empty, return the original flat list as single-level nodes.
10
+ */
11
+ export declare function postProcessing(structure: TocItem[], endPhysicalIndex: number): TreeNode[];
12
+ //# sourceMappingURL=post-processing.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"post-processing.d.ts","sourceRoot":"","sources":["../../src/tree/post-processing.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAGrD;;;;;;;;GAQG;AACH,wBAAgB,cAAc,CAC5B,SAAS,EAAE,OAAO,EAAE,EACpB,gBAAgB,EAAE,MAAM,GACvB,QAAQ,EAAE,CAkDZ"}
@@ -0,0 +1,87 @@
1
+ import { listToTree } from './list-to-tree.js';
2
+ /**
3
+ * Converts a flat TOC list to a tree with page range calculation.
4
+ *
5
+ * Algorithm:
6
+ * 1. For each item, compute startIndex and endIndex based on physicalIndex
7
+ * and the next item's appearStart field.
8
+ * 2. Call listToTree to build the nested structure.
9
+ * 3. If tree is empty, return the original flat list as single-level nodes.
10
+ */
11
+ export function postProcessing(structure, endPhysicalIndex) {
12
+ if (structure.length === 0)
13
+ return [];
14
+ // Calculate startIndex and endIndex for each item
15
+ for (let i = 0; i < structure.length; i++) {
16
+ const item = structure[i];
17
+ item.physicalIndex = item.physicalIndex ?? undefined;
18
+ if (item.physicalIndex == null)
19
+ continue;
20
+ if (i < structure.length - 1) {
21
+ const nextItem = structure[i + 1];
22
+ if (nextItem.physicalIndex != null) {
23
+ if (nextItem.appearStart === 'yes') {
24
+ item.endIndex = nextItem.physicalIndex - 1;
25
+ }
26
+ else {
27
+ item.endIndex = nextItem.physicalIndex;
28
+ }
29
+ }
30
+ }
31
+ else {
32
+ // Last item
33
+ item.endIndex = endPhysicalIndex;
34
+ }
35
+ }
36
+ // Fill in missing endIndex by looking forward
37
+ for (let i = structure.length - 2; i >= 0; i--) {
38
+ if (structure[i].endIndex == null && structure[i].physicalIndex != null) {
39
+ structure[i].endIndex = endPhysicalIndex;
40
+ }
41
+ }
42
+ const tree = listToTree(structure);
43
+ if (tree.length === 0) {
44
+ // Fallback: return flat list as single-level nodes
45
+ return structure
46
+ .filter((item) => item.physicalIndex != null)
47
+ .map((item) => ({
48
+ title: item.title,
49
+ startIndex: item.physicalIndex,
50
+ endIndex: item.endIndex ?? endPhysicalIndex,
51
+ nodes: [],
52
+ }));
53
+ }
54
+ // Apply endIndex from TocItem to TreeNode
55
+ applyEndIndex(tree, structure);
56
+ return tree;
57
+ }
58
+ function applyEndIndex(nodes, items) {
59
+ // Use title-only map as fallback, but prefer matching by title + physicalIndex
60
+ const titleOnlyMap = new Map();
61
+ for (const item of items) {
62
+ if (!titleOnlyMap.has(item.title)) {
63
+ titleOnlyMap.set(item.title, item);
64
+ }
65
+ }
66
+ for (const node of nodes) {
67
+ // Match by startIndex + title for precision (handles duplicate titles)
68
+ let matched;
69
+ for (const item of items) {
70
+ if (item.title === node.title && item.physicalIndex === node.startIndex) {
71
+ matched = item;
72
+ break;
73
+ }
74
+ }
75
+ // Fall back to title-only lookup
76
+ if (!matched) {
77
+ matched = titleOnlyMap.get(node.title);
78
+ }
79
+ if (matched?.endIndex != null) {
80
+ node.endIndex = matched.endIndex;
81
+ }
82
+ if (node.nodes.length > 0) {
83
+ applyEndIndex(node.nodes, items);
84
+ }
85
+ }
86
+ }
87
+ //# sourceMappingURL=post-processing.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"post-processing.js","sourceRoot":"","sources":["../../src/tree/post-processing.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C;;;;;;;;GAQG;AACH,MAAM,UAAU,cAAc,CAC5B,SAAoB,EACpB,gBAAwB;IAExB,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEtC,kDAAkD;IAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC1C,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;QAC1B,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC,aAAa,IAAI,SAAS,CAAC;QAErD,IAAI,IAAI,CAAC,aAAa,IAAI,IAAI;YAAE,SAAS;QAEzC,IAAI,CAAC,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,MAAM,QAAQ,GAAG,SAAS,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YAClC,IAAI,QAAQ,CAAC,aAAa,IAAI,IAAI,EAAE,CAAC;gBACnC,IAAI,QAAQ,CAAC,WAAW,KAAK,KAAK,EAAE,CAAC;oBACnC,IAAI,CAAC,QAAQ,GAAI,QAAQ,CAAC,aAAwB,GAAG,CAAC,CAAC;gBACzD,CAAC;qBAAM,CAAC;oBACN,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC,aAAuB,CAAC;gBACnD,CAAC;YACH,CAAC;QACH,CAAC;aAAM,CAAC;YACN,YAAY;YACZ,IAAI,CAAC,QAAQ,GAAG,gBAAgB,CAAC;QACnC,CAAC;IACH,CAAC;IAED,8CAA8C;IAC9C,KAAK,IAAI,CAAC,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC/C,IAAI,SAAS,CAAC,CAAC,CAAC,CAAC,QAAQ,IAAI,IAAI,IAAI,SAAS,CAAC,CAAC,CAAC,CAAC,aAAa,IAAI,IAAI,EAAE,CAAC;YACxE,SAAS,CAAC,CAAC,CAAC,CAAC,QAAQ,GAAG,gBAAgB,CAAC;QAC3C,CAAC;IACH,CAAC;IAED,MAAM,IAAI,GAAG,UAAU,CAAC,SAAS,CAAC,CAAC;IAEnC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,mDAAmD;QACnD,OAAO,SAAS;aACb,MAAM,CAAC,CAAC,IAAI,EAAmD,EAAE,CAAC,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC;aAC7F,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;YACd,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,UAAU,EAAE,IAAI,CAAC,aAAa;YAC9B,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,gBAAgB;YAC3C,KAAK,EAAE,EAAE;SACV,CAAC,CAAC,CAAC;IACR,CAAC;IAED,0CAA0C;IAC1C,aAAa,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;IAE/B,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,aAAa,CAAC,KAAiB,EAAE,KAAgB;IACxD,+EAA+E;IAC/E,MAAM,YAAY,GAAG,IAAI,GAAG,EAAmB,CAAC;IAChD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YAClC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;QACrC,CAAC;IACH,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,uEAAuE;QACvE,IAAI,OAA4B,CAAC;QACjC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,aAAa,KAAK,IAAI,CAAC,UAAU,EAAE,CAAC;gBACxE,OAAO,GAAG,IAAI,CAAC;gBACf,MAAM;YACR,CAAC;QACH,CAAC;QACD,iCAAiC;QACjC,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,OAAO,GAAG,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACzC,CAAC;QACD,IAAI,OAAO,EAAE,QAAQ,IAAI,IAAI,EAAE,CAAC;YAC9B,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;QACnC,CAAC;QACD,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,aAAa,CAAC,IAAI,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QACnC,CAAC;IACH,CAAC;AACH,CAAC"}
@@ -0,0 +1,18 @@
1
+ import type { TreeNode } from '../types.js';
2
+ /**
3
+ * Recursively collects all nodes in the tree into a flat array.
4
+ */
5
+ export declare function getNodes(tree: TreeNode[]): TreeNode[];
6
+ /**
7
+ * Recursively collects all leaf nodes (nodes with no children).
8
+ */
9
+ export declare function getLeafNodes(tree: TreeNode[]): TreeNode[];
10
+ /**
11
+ * Converts a tree back to a flat list with structure indices.
12
+ */
13
+ export declare function structureToList(tree: TreeNode[], prefix?: string): Array<{
14
+ structure: string;
15
+ title: string;
16
+ node: TreeNode;
17
+ }>;
18
+ //# sourceMappingURL=tree-utils.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tree-utils.d.ts","sourceRoot":"","sources":["../../src/tree/tree-utils.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAE5C;;GAEG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE,CASrD;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE,CAUzD;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC7B,IAAI,EAAE,QAAQ,EAAE,EAChB,MAAM,SAAK,GACV,KAAK,CAAC;IAAE,SAAS,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,QAAQ,CAAA;CAAE,CAAC,CAU7D"}