@juspay/neurolink 9.5.2 → 9.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (356) hide show
  1. package/CHANGELOG.md +16 -0
  2. package/README.md +29 -25
  3. package/dist/agent/directTools.d.ts +5 -5
  4. package/dist/cli/commands/config.d.ts +9 -9
  5. package/dist/cli/commands/serve.d.ts +37 -0
  6. package/dist/cli/commands/serve.js +302 -229
  7. package/dist/cli/commands/setup-anthropic.d.ts +2 -2
  8. package/dist/cli/commands/setup-azure.d.ts +2 -2
  9. package/dist/cli/commands/setup-bedrock.d.ts +2 -2
  10. package/dist/cli/commands/setup-gcp.d.ts +2 -2
  11. package/dist/cli/commands/setup-google-ai.d.ts +2 -2
  12. package/dist/cli/commands/setup-huggingface.d.ts +2 -2
  13. package/dist/cli/commands/setup-mistral.d.ts +2 -2
  14. package/dist/cli/commands/setup-openai.d.ts +2 -2
  15. package/dist/cli/commands/setup.d.ts +2 -2
  16. package/dist/cli/factories/commandFactory.js +16 -2
  17. package/dist/cli/loop/optionsSchema.d.ts +2 -2
  18. package/dist/cli/loop/session.d.ts +4 -0
  19. package/dist/cli/loop/session.js +49 -4
  20. package/dist/cli/utils/interactiveSetup.d.ts +4 -4
  21. package/dist/config/conversationMemory.d.ts +2 -0
  22. package/dist/config/conversationMemory.js +5 -5
  23. package/dist/constants/contextWindows.d.ts +46 -0
  24. package/dist/constants/contextWindows.js +156 -0
  25. package/dist/context/budgetChecker.d.ts +18 -0
  26. package/dist/context/budgetChecker.js +71 -0
  27. package/dist/context/contextCompactor.d.ts +22 -0
  28. package/dist/context/contextCompactor.js +106 -0
  29. package/dist/context/effectiveHistory.d.ts +52 -0
  30. package/dist/context/effectiveHistory.js +105 -0
  31. package/dist/context/errorDetection.d.ts +14 -0
  32. package/dist/context/errorDetection.js +124 -0
  33. package/dist/context/fileSummarizationService.d.ts +54 -0
  34. package/dist/context/fileSummarizationService.js +255 -0
  35. package/dist/context/fileSummarizer.d.ts +56 -0
  36. package/dist/context/fileSummarizer.js +145 -0
  37. package/dist/context/fileTokenBudget.d.ts +53 -0
  38. package/dist/context/fileTokenBudget.js +127 -0
  39. package/dist/context/prompts/summarizationPrompt.d.ts +17 -0
  40. package/dist/context/prompts/summarizationPrompt.js +110 -0
  41. package/dist/context/stages/fileReadDeduplicator.d.ts +10 -0
  42. package/dist/context/stages/fileReadDeduplicator.js +66 -0
  43. package/dist/context/stages/slidingWindowTruncator.d.ts +11 -0
  44. package/dist/context/stages/slidingWindowTruncator.js +42 -0
  45. package/dist/context/stages/structuredSummarizer.d.ts +10 -0
  46. package/dist/context/stages/structuredSummarizer.js +49 -0
  47. package/dist/context/stages/toolOutputPruner.d.ts +10 -0
  48. package/dist/context/stages/toolOutputPruner.js +52 -0
  49. package/dist/context/summarizationEngine.d.ts +45 -0
  50. package/dist/context/summarizationEngine.js +110 -0
  51. package/dist/context/toolOutputLimits.d.ts +17 -0
  52. package/dist/context/toolOutputLimits.js +84 -0
  53. package/dist/context/toolPairRepair.d.ts +16 -0
  54. package/dist/context/toolPairRepair.js +66 -0
  55. package/dist/core/conversationMemoryManager.d.ts +5 -15
  56. package/dist/core/conversationMemoryManager.js +15 -75
  57. package/dist/core/modules/MessageBuilder.d.ts +1 -1
  58. package/dist/core/modules/MessageBuilder.js +2 -0
  59. package/dist/core/modules/TelemetryHandler.d.ts +2 -3
  60. package/dist/core/modules/TelemetryHandler.js +3 -3
  61. package/dist/core/modules/ToolsManager.d.ts +2 -2
  62. package/dist/core/redisConversationMemoryManager.d.ts +8 -14
  63. package/dist/core/redisConversationMemoryManager.js +69 -78
  64. package/dist/factories/providerFactory.d.ts +2 -2
  65. package/dist/files/fileReferenceRegistry.d.ts +276 -0
  66. package/dist/files/fileReferenceRegistry.js +1543 -0
  67. package/dist/files/fileTools.d.ts +423 -0
  68. package/dist/files/fileTools.js +449 -0
  69. package/dist/files/index.d.ts +14 -0
  70. package/dist/files/index.js +13 -0
  71. package/dist/files/streamingReader.d.ts +93 -0
  72. package/dist/files/streamingReader.js +321 -0
  73. package/dist/files/types.d.ts +23 -0
  74. package/dist/files/types.js +23 -0
  75. package/dist/image-gen/imageGenTools.d.ts +2 -2
  76. package/dist/image-gen/types.d.ts +12 -12
  77. package/dist/lib/agent/directTools.d.ts +7 -7
  78. package/dist/lib/config/conversationMemory.d.ts +2 -0
  79. package/dist/lib/config/conversationMemory.js +5 -5
  80. package/dist/lib/constants/contextWindows.d.ts +46 -0
  81. package/dist/lib/constants/contextWindows.js +157 -0
  82. package/dist/lib/context/budgetChecker.d.ts +18 -0
  83. package/dist/lib/context/budgetChecker.js +72 -0
  84. package/dist/lib/context/contextCompactor.d.ts +22 -0
  85. package/dist/lib/context/contextCompactor.js +107 -0
  86. package/dist/lib/context/effectiveHistory.d.ts +52 -0
  87. package/dist/lib/context/effectiveHistory.js +106 -0
  88. package/dist/lib/context/errorDetection.d.ts +14 -0
  89. package/dist/lib/context/errorDetection.js +125 -0
  90. package/dist/lib/context/fileSummarizationService.d.ts +54 -0
  91. package/dist/lib/context/fileSummarizationService.js +256 -0
  92. package/dist/lib/context/fileSummarizer.d.ts +56 -0
  93. package/dist/lib/context/fileSummarizer.js +146 -0
  94. package/dist/lib/context/fileTokenBudget.d.ts +53 -0
  95. package/dist/lib/context/fileTokenBudget.js +128 -0
  96. package/dist/lib/context/prompts/summarizationPrompt.d.ts +17 -0
  97. package/dist/lib/context/prompts/summarizationPrompt.js +111 -0
  98. package/dist/lib/context/stages/fileReadDeduplicator.d.ts +10 -0
  99. package/dist/lib/context/stages/fileReadDeduplicator.js +67 -0
  100. package/dist/lib/context/stages/slidingWindowTruncator.d.ts +11 -0
  101. package/dist/lib/context/stages/slidingWindowTruncator.js +43 -0
  102. package/dist/lib/context/stages/structuredSummarizer.d.ts +10 -0
  103. package/dist/lib/context/stages/structuredSummarizer.js +50 -0
  104. package/dist/lib/context/stages/toolOutputPruner.d.ts +10 -0
  105. package/dist/lib/context/stages/toolOutputPruner.js +53 -0
  106. package/dist/lib/context/summarizationEngine.d.ts +45 -0
  107. package/dist/lib/context/summarizationEngine.js +111 -0
  108. package/dist/lib/context/toolOutputLimits.d.ts +17 -0
  109. package/dist/lib/context/toolOutputLimits.js +85 -0
  110. package/dist/lib/context/toolPairRepair.d.ts +16 -0
  111. package/dist/lib/context/toolPairRepair.js +67 -0
  112. package/dist/lib/core/conversationMemoryManager.d.ts +5 -15
  113. package/dist/lib/core/conversationMemoryManager.js +15 -75
  114. package/dist/lib/core/modules/MessageBuilder.d.ts +1 -1
  115. package/dist/lib/core/modules/MessageBuilder.js +2 -0
  116. package/dist/lib/core/modules/TelemetryHandler.d.ts +2 -3
  117. package/dist/lib/core/modules/TelemetryHandler.js +3 -3
  118. package/dist/lib/core/modules/ToolsManager.d.ts +2 -2
  119. package/dist/lib/core/redisConversationMemoryManager.d.ts +8 -14
  120. package/dist/lib/core/redisConversationMemoryManager.js +69 -78
  121. package/dist/lib/factories/providerFactory.d.ts +2 -2
  122. package/dist/lib/files/fileReferenceRegistry.d.ts +276 -0
  123. package/dist/lib/files/fileReferenceRegistry.js +1544 -0
  124. package/dist/lib/files/fileTools.d.ts +423 -0
  125. package/dist/lib/files/fileTools.js +450 -0
  126. package/dist/lib/files/index.d.ts +14 -0
  127. package/dist/lib/files/index.js +14 -0
  128. package/dist/lib/files/streamingReader.d.ts +93 -0
  129. package/dist/lib/files/streamingReader.js +322 -0
  130. package/dist/lib/files/types.d.ts +23 -0
  131. package/dist/lib/files/types.js +24 -0
  132. package/dist/lib/image-gen/imageGenTools.d.ts +2 -2
  133. package/dist/lib/image-gen/types.d.ts +12 -12
  134. package/dist/lib/memory/mem0Initializer.d.ts +2 -2
  135. package/dist/lib/neurolink.d.ts +61 -2
  136. package/dist/lib/neurolink.js +619 -307
  137. package/dist/lib/processors/archive/ArchiveProcessor.d.ts +327 -0
  138. package/dist/lib/processors/archive/ArchiveProcessor.js +1309 -0
  139. package/dist/lib/processors/archive/index.d.ts +33 -0
  140. package/dist/lib/processors/archive/index.js +43 -0
  141. package/dist/lib/processors/base/types.d.ts +70 -64
  142. package/dist/lib/processors/base/types.js +6 -0
  143. package/dist/lib/processors/cli/fileProcessorCli.d.ts +8 -8
  144. package/dist/lib/processors/cli/fileProcessorCli.js +5 -5
  145. package/dist/lib/processors/config/mimeTypes.js +25 -0
  146. package/dist/lib/processors/config/sizeLimits.d.ts +52 -40
  147. package/dist/lib/processors/config/sizeLimits.js +56 -44
  148. package/dist/lib/processors/document/ExcelProcessor.d.ts +14 -0
  149. package/dist/lib/processors/document/ExcelProcessor.js +72 -1
  150. package/dist/lib/processors/document/PptxProcessor.d.ts +63 -0
  151. package/dist/lib/processors/document/PptxProcessor.js +158 -0
  152. package/dist/lib/processors/document/index.d.ts +1 -0
  153. package/dist/lib/processors/document/index.js +6 -0
  154. package/dist/lib/processors/errors/FileErrorCode.d.ts +2 -2
  155. package/dist/lib/processors/errors/errorHelpers.d.ts +2 -2
  156. package/dist/lib/processors/errors/errorSerializer.d.ts +4 -4
  157. package/dist/lib/processors/index.d.ts +8 -2
  158. package/dist/lib/processors/index.js +5 -2
  159. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +8 -8
  160. package/dist/lib/processors/integration/FileProcessorIntegration.js +7 -7
  161. package/dist/lib/processors/media/AudioProcessor.d.ts +328 -0
  162. package/dist/lib/processors/media/AudioProcessor.js +708 -0
  163. package/dist/lib/processors/media/VideoProcessor.d.ts +350 -0
  164. package/dist/lib/processors/media/VideoProcessor.js +992 -0
  165. package/dist/lib/processors/media/index.d.ts +27 -0
  166. package/dist/lib/processors/media/index.js +37 -0
  167. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +19 -5
  168. package/dist/lib/processors/registry/ProcessorRegistry.js +103 -8
  169. package/dist/lib/processors/registry/index.d.ts +1 -1
  170. package/dist/lib/processors/registry/index.js +1 -1
  171. package/dist/lib/processors/registry/types.d.ts +2 -2
  172. package/dist/lib/providers/googleAiStudio.d.ts +34 -0
  173. package/dist/lib/providers/googleAiStudio.js +267 -397
  174. package/dist/lib/providers/googleVertex.d.ts +55 -1
  175. package/dist/lib/providers/googleVertex.js +452 -719
  176. package/dist/lib/providers/sagemaker/detection.d.ts +6 -6
  177. package/dist/lib/providers/sagemaker/diagnostics.d.ts +4 -4
  178. package/dist/lib/providers/sagemaker/parsers.d.ts +4 -4
  179. package/dist/lib/rag/chunkers/RecursiveChunker.js +2 -2
  180. package/dist/lib/rag/document/loaders.d.ts +6 -71
  181. package/dist/lib/rag/document/loaders.js +5 -5
  182. package/dist/lib/rag/graphRag/graphRAG.js +26 -9
  183. package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +5 -55
  184. package/dist/lib/rag/metadata/metadataExtractor.js +6 -3
  185. package/dist/lib/rag/pipeline/RAGPipeline.d.ts +8 -126
  186. package/dist/lib/rag/pipeline/RAGPipeline.js +11 -11
  187. package/dist/lib/rag/pipeline/contextAssembly.d.ts +3 -42
  188. package/dist/lib/rag/pipeline/contextAssembly.js +6 -3
  189. package/dist/lib/rag/reranker/RerankerFactory.d.ts +5 -60
  190. package/dist/lib/rag/resilience/CircuitBreaker.d.ts +3 -33
  191. package/dist/lib/rag/resilience/RetryHandler.d.ts +2 -21
  192. package/dist/lib/rag/retrieval/hybridSearch.d.ts +3 -41
  193. package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +2 -13
  194. package/dist/lib/rag/retrieval/vectorQueryTool.js +4 -3
  195. package/dist/lib/rag/types.d.ts +3 -3
  196. package/dist/lib/sdk/toolRegistration.d.ts +2 -2
  197. package/dist/lib/server/middleware/cache.d.ts +2 -2
  198. package/dist/lib/server/middleware/rateLimit.d.ts +2 -2
  199. package/dist/lib/server/routes/mcpRoutes.js +277 -249
  200. package/dist/lib/server/routes/memoryRoutes.js +287 -281
  201. package/dist/lib/server/utils/validation.d.ts +10 -10
  202. package/dist/lib/session/globalSessionState.d.ts +2 -2
  203. package/dist/lib/telemetry/telemetryService.d.ts +2 -2
  204. package/dist/lib/types/common.d.ts +39 -0
  205. package/dist/lib/types/contextTypes.d.ts +255 -0
  206. package/dist/lib/types/contextTypes.js +0 -2
  207. package/dist/lib/types/conversation.d.ts +62 -0
  208. package/dist/lib/types/conversationMemoryInterface.d.ts +27 -0
  209. package/dist/lib/types/conversationMemoryInterface.js +7 -0
  210. package/dist/lib/types/fileReferenceTypes.d.ts +222 -0
  211. package/dist/lib/types/fileReferenceTypes.js +9 -0
  212. package/dist/lib/types/fileTypes.d.ts +26 -3
  213. package/dist/lib/types/generateTypes.d.ts +22 -1
  214. package/dist/lib/types/index.d.ts +4 -5
  215. package/dist/lib/types/index.js +8 -10
  216. package/dist/lib/types/modelTypes.d.ts +2 -2
  217. package/dist/lib/types/processorTypes.d.ts +597 -0
  218. package/dist/lib/types/processorTypes.js +91 -0
  219. package/dist/lib/types/ragTypes.d.ts +481 -0
  220. package/dist/lib/types/ragTypes.js +8 -0
  221. package/dist/lib/types/sdkTypes.d.ts +17 -18
  222. package/dist/lib/types/streamTypes.d.ts +11 -1
  223. package/dist/lib/utils/async/retry.d.ts +2 -2
  224. package/dist/lib/utils/async/withTimeout.js +3 -1
  225. package/dist/lib/utils/conversationMemory.d.ts +12 -6
  226. package/dist/lib/utils/conversationMemory.js +76 -36
  227. package/dist/lib/utils/fileDetector.d.ts +62 -0
  228. package/dist/lib/utils/fileDetector.js +1014 -14
  229. package/dist/lib/utils/json/safeParse.d.ts +2 -2
  230. package/dist/lib/utils/messageBuilder.js +806 -153
  231. package/dist/lib/utils/modelChoices.d.ts +2 -2
  232. package/dist/lib/utils/multimodalOptionsBuilder.d.ts +2 -1
  233. package/dist/lib/utils/multimodalOptionsBuilder.js +1 -0
  234. package/dist/lib/utils/rateLimiter.d.ts +2 -2
  235. package/dist/lib/utils/sanitizers/filename.d.ts +4 -4
  236. package/dist/lib/utils/sanitizers/svg.d.ts +2 -2
  237. package/dist/lib/utils/thinkingConfig.d.ts +6 -6
  238. package/dist/lib/utils/tokenEstimation.d.ts +68 -0
  239. package/dist/lib/utils/tokenEstimation.js +113 -0
  240. package/dist/lib/utils/tokenUtils.d.ts +4 -4
  241. package/dist/lib/utils/ttsProcessor.d.ts +2 -2
  242. package/dist/lib/workflow/config.d.ts +150 -150
  243. package/dist/memory/mem0Initializer.d.ts +2 -2
  244. package/dist/neurolink.d.ts +61 -2
  245. package/dist/neurolink.js +619 -307
  246. package/dist/processors/archive/ArchiveProcessor.d.ts +327 -0
  247. package/dist/processors/archive/ArchiveProcessor.js +1308 -0
  248. package/dist/processors/archive/index.d.ts +33 -0
  249. package/dist/processors/archive/index.js +42 -0
  250. package/dist/processors/base/types.d.ts +70 -64
  251. package/dist/processors/base/types.js +6 -0
  252. package/dist/processors/cli/fileProcessorCli.d.ts +8 -8
  253. package/dist/processors/cli/fileProcessorCli.js +5 -5
  254. package/dist/processors/config/mimeTypes.js +25 -0
  255. package/dist/processors/config/sizeLimits.d.ts +52 -40
  256. package/dist/processors/config/sizeLimits.js +56 -44
  257. package/dist/processors/document/ExcelProcessor.d.ts +14 -0
  258. package/dist/processors/document/ExcelProcessor.js +72 -1
  259. package/dist/processors/document/PptxProcessor.d.ts +63 -0
  260. package/dist/processors/document/PptxProcessor.js +157 -0
  261. package/dist/processors/document/index.d.ts +1 -0
  262. package/dist/processors/document/index.js +6 -0
  263. package/dist/processors/errors/FileErrorCode.d.ts +2 -2
  264. package/dist/processors/errors/errorHelpers.d.ts +2 -2
  265. package/dist/processors/errors/errorSerializer.d.ts +4 -4
  266. package/dist/processors/index.d.ts +8 -2
  267. package/dist/processors/index.js +5 -2
  268. package/dist/processors/integration/FileProcessorIntegration.d.ts +8 -8
  269. package/dist/processors/integration/FileProcessorIntegration.js +7 -7
  270. package/dist/processors/media/AudioProcessor.d.ts +328 -0
  271. package/dist/processors/media/AudioProcessor.js +707 -0
  272. package/dist/processors/media/VideoProcessor.d.ts +350 -0
  273. package/dist/processors/media/VideoProcessor.js +991 -0
  274. package/dist/processors/media/ffprobe-static.d.ts +4 -0
  275. package/dist/processors/media/index.d.ts +27 -0
  276. package/dist/processors/media/index.js +36 -0
  277. package/dist/processors/registry/ProcessorRegistry.d.ts +19 -5
  278. package/dist/processors/registry/ProcessorRegistry.js +103 -8
  279. package/dist/processors/registry/index.d.ts +1 -1
  280. package/dist/processors/registry/index.js +1 -1
  281. package/dist/processors/registry/types.d.ts +2 -2
  282. package/dist/providers/googleAiStudio.d.ts +34 -0
  283. package/dist/providers/googleAiStudio.js +267 -397
  284. package/dist/providers/googleVertex.d.ts +55 -1
  285. package/dist/providers/googleVertex.js +452 -719
  286. package/dist/providers/sagemaker/detection.d.ts +6 -6
  287. package/dist/providers/sagemaker/diagnostics.d.ts +4 -4
  288. package/dist/providers/sagemaker/parsers.d.ts +4 -4
  289. package/dist/rag/chunkers/RecursiveChunker.js +2 -2
  290. package/dist/rag/document/loaders.d.ts +6 -71
  291. package/dist/rag/document/loaders.js +5 -5
  292. package/dist/rag/graphRag/graphRAG.js +26 -9
  293. package/dist/rag/metadata/MetadataExtractorFactory.d.ts +5 -55
  294. package/dist/rag/metadata/metadataExtractor.js +6 -3
  295. package/dist/rag/pipeline/RAGPipeline.d.ts +8 -126
  296. package/dist/rag/pipeline/RAGPipeline.js +11 -11
  297. package/dist/rag/pipeline/contextAssembly.d.ts +3 -42
  298. package/dist/rag/pipeline/contextAssembly.js +6 -3
  299. package/dist/rag/reranker/RerankerFactory.d.ts +5 -60
  300. package/dist/rag/resilience/CircuitBreaker.d.ts +3 -33
  301. package/dist/rag/resilience/RetryHandler.d.ts +2 -21
  302. package/dist/rag/retrieval/hybridSearch.d.ts +3 -41
  303. package/dist/rag/retrieval/vectorQueryTool.d.ts +2 -13
  304. package/dist/rag/retrieval/vectorQueryTool.js +4 -3
  305. package/dist/rag/types.d.ts +3 -3
  306. package/dist/sdk/toolRegistration.d.ts +2 -2
  307. package/dist/server/middleware/cache.d.ts +2 -2
  308. package/dist/server/middleware/rateLimit.d.ts +2 -2
  309. package/dist/server/routes/mcpRoutes.js +277 -249
  310. package/dist/server/routes/memoryRoutes.js +287 -281
  311. package/dist/server/utils/validation.d.ts +4 -4
  312. package/dist/session/globalSessionState.d.ts +2 -2
  313. package/dist/telemetry/telemetryService.d.ts +2 -2
  314. package/dist/types/common.d.ts +39 -0
  315. package/dist/types/contextTypes.d.ts +255 -0
  316. package/dist/types/contextTypes.js +0 -2
  317. package/dist/types/conversation.d.ts +62 -0
  318. package/dist/types/conversationMemoryInterface.d.ts +27 -0
  319. package/dist/types/conversationMemoryInterface.js +6 -0
  320. package/dist/types/fileReferenceTypes.d.ts +222 -0
  321. package/dist/types/fileReferenceTypes.js +8 -0
  322. package/dist/types/fileTypes.d.ts +26 -3
  323. package/dist/types/generateTypes.d.ts +22 -1
  324. package/dist/types/index.d.ts +4 -5
  325. package/dist/types/index.js +8 -10
  326. package/dist/types/processorTypes.d.ts +597 -0
  327. package/dist/types/processorTypes.js +90 -0
  328. package/dist/types/ragTypes.d.ts +481 -0
  329. package/dist/types/ragTypes.js +7 -0
  330. package/dist/types/sdkTypes.d.ts +17 -18
  331. package/dist/types/streamTypes.d.ts +11 -1
  332. package/dist/utils/async/retry.d.ts +2 -2
  333. package/dist/utils/async/withTimeout.js +3 -1
  334. package/dist/utils/conversationMemory.d.ts +12 -6
  335. package/dist/utils/conversationMemory.js +76 -36
  336. package/dist/utils/fileDetector.d.ts +62 -0
  337. package/dist/utils/fileDetector.js +1014 -14
  338. package/dist/utils/json/safeParse.d.ts +2 -2
  339. package/dist/utils/messageBuilder.js +806 -153
  340. package/dist/utils/modelChoices.d.ts +2 -2
  341. package/dist/utils/multimodalOptionsBuilder.d.ts +2 -1
  342. package/dist/utils/multimodalOptionsBuilder.js +1 -0
  343. package/dist/utils/rateLimiter.d.ts +2 -2
  344. package/dist/utils/sanitizers/filename.d.ts +4 -4
  345. package/dist/utils/sanitizers/svg.d.ts +2 -2
  346. package/dist/utils/thinkingConfig.d.ts +6 -6
  347. package/dist/utils/tokenEstimation.d.ts +68 -0
  348. package/dist/utils/tokenEstimation.js +112 -0
  349. package/dist/utils/tokenUtils.d.ts +4 -4
  350. package/dist/utils/ttsProcessor.d.ts +2 -2
  351. package/dist/workflow/config.d.ts +104 -104
  352. package/package.json +18 -6
  353. package/dist/lib/utils/conversationMemoryUtils.d.ts +0 -25
  354. package/dist/lib/utils/conversationMemoryUtils.js +0 -138
  355. package/dist/utils/conversationMemoryUtils.d.ts +0 -25
  356. package/dist/utils/conversationMemoryUtils.js +0 -137
@@ -0,0 +1,255 @@
1
+ /**
2
+ * File Summarization Service
3
+ *
4
+ * Orchestrates the end-to-end file summarization pipeline:
5
+ * 1. Accept raw file inputs (strings or Buffers)
6
+ * 2. Extract readable text and estimate tokens
7
+ * 3. Use `planFileSummarization()` to decide which files to summarize
8
+ * 4. Call an LLM to produce context-aware summaries of the largest files
9
+ * 5. Fall back to truncation when the LLM call fails
10
+ *
11
+ * The LLM is instantiated via a *dynamic import* of NeuroLink to avoid
12
+ * circular dependency issues (NeuroLink → fileSummarizationService → NeuroLink).
13
+ */
14
+ import { estimateTokens } from "../utils/tokenEstimation.js";
15
+ import { buildFileSummarizationPrompt, planFileSummarization, } from "./fileSummarizer.js";
16
+ // ---------------------------------------------------------------------------
17
+ // MIME → human label mapping
18
+ // ---------------------------------------------------------------------------
19
+ const MIME_LABEL_MAP = {
20
+ "application/pdf": "PDF Document",
21
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "Word Document",
22
+ "application/msword": "Word Document",
23
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "Excel Spreadsheet",
24
+ "application/vnd.ms-excel": "Excel Spreadsheet",
25
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation": "PowerPoint Presentation",
26
+ "application/vnd.ms-powerpoint": "PowerPoint Presentation",
27
+ "application/json": "JSON File",
28
+ "application/xml": "XML File",
29
+ "text/xml": "XML File",
30
+ "text/html": "HTML Document",
31
+ "text/css": "CSS Stylesheet",
32
+ "text/csv": "CSV File",
33
+ "text/plain": "Text File",
34
+ "text/markdown": "Markdown Document",
35
+ "application/javascript": "JavaScript File",
36
+ "text/javascript": "JavaScript File",
37
+ "application/typescript": "TypeScript File",
38
+ "text/typescript": "TypeScript File",
39
+ "application/yaml": "YAML File",
40
+ "text/yaml": "YAML File",
41
+ "image/svg+xml": "SVG Image",
42
+ "application/rtf": "RTF Document",
43
+ "text/rtf": "RTF Document",
44
+ "application/zip": "ZIP Archive",
45
+ "application/gzip": "GZip Archive",
46
+ };
47
+ /** Binary MIME type prefixes that cannot be meaningfully extracted as text. */
48
+ const BINARY_MIME_PREFIXES = ["image/", "audio/", "video/"];
49
+ // ---------------------------------------------------------------------------
50
+ // Service
51
+ // ---------------------------------------------------------------------------
52
+ export class FileSummarizationService {
53
+ provider;
54
+ model;
55
+ constructor(options) {
56
+ this.provider = options?.provider ?? "vertex";
57
+ this.model = options?.model ?? "gemini-2.5-flash";
58
+ }
59
+ // -------------------------------------------------------------------------
60
+ // Text extraction
61
+ // -------------------------------------------------------------------------
62
+ /**
63
+ * Extract readable text from a file's content.
64
+ *
65
+ * - Strings are returned as-is.
66
+ * - Buffers are decoded as UTF-8 when the MIME type is textual.
67
+ * - Known-binary types (image/*, audio/*, video/*) return a placeholder.
68
+ */
69
+ extractFileText(content, mimeType, fileName) {
70
+ // String content — already text
71
+ if (typeof content === "string") {
72
+ return content;
73
+ }
74
+ // Binary MIME types → placeholder
75
+ const isBinary = BINARY_MIME_PREFIXES.some((prefix) => mimeType.startsWith(prefix));
76
+ if (isBinary) {
77
+ return `[Binary file: ${fileName} (${mimeType}, ${content.length} bytes)]`;
78
+ }
79
+ // Buffer with text-like MIME → decode as UTF-8
80
+ try {
81
+ return content.toString("utf-8");
82
+ }
83
+ catch {
84
+ return `[Binary file: ${fileName} (${mimeType}, ${content.length} bytes)]`;
85
+ }
86
+ }
87
+ // -------------------------------------------------------------------------
88
+ // MIME → label
89
+ // -------------------------------------------------------------------------
90
+ /**
91
+ * Map a MIME type (and filename for fallback) to a human-readable label.
92
+ */
93
+ getFileTypeLabel(mimeType, fileName) {
94
+ // Direct lookup
95
+ if (MIME_LABEL_MAP[mimeType]) {
96
+ return MIME_LABEL_MAP[mimeType];
97
+ }
98
+ // Extension-based fallback
99
+ const ext = fileName.split(".").pop()?.toLowerCase();
100
+ switch (ext) {
101
+ case "ts":
102
+ case "tsx":
103
+ return "TypeScript File";
104
+ case "js":
105
+ case "jsx":
106
+ return "JavaScript File";
107
+ case "py":
108
+ return "Python File";
109
+ case "java":
110
+ return "Java File";
111
+ case "go":
112
+ return "Go File";
113
+ case "rs":
114
+ return "Rust File";
115
+ case "rb":
116
+ return "Ruby File";
117
+ case "php":
118
+ return "PHP File";
119
+ case "c":
120
+ case "h":
121
+ return "C File";
122
+ case "cpp":
123
+ case "hpp":
124
+ case "cc":
125
+ return "C++ File";
126
+ case "cs":
127
+ return "C# File";
128
+ case "swift":
129
+ return "Swift File";
130
+ case "kt":
131
+ return "Kotlin File";
132
+ case "md":
133
+ return "Markdown Document";
134
+ case "yaml":
135
+ case "yml":
136
+ return "YAML File";
137
+ case "toml":
138
+ return "TOML File";
139
+ case "ini":
140
+ case "cfg":
141
+ return "Config File";
142
+ case "sh":
143
+ case "bash":
144
+ return "Shell Script";
145
+ case "sql":
146
+ return "SQL File";
147
+ case "csv":
148
+ return "CSV File";
149
+ case "json":
150
+ return "JSON File";
151
+ case "xml":
152
+ return "XML File";
153
+ case "html":
154
+ case "htm":
155
+ return "HTML Document";
156
+ default:
157
+ return "File";
158
+ }
159
+ }
160
+ // -------------------------------------------------------------------------
161
+ // Preparation
162
+ // -------------------------------------------------------------------------
163
+ /**
164
+ * Convert an array of raw file inputs into `FileForSummarization` objects.
165
+ *
166
+ * Extracts text and estimates token count for each file.
167
+ */
168
+ prepareFilesForSummarization(files, provider) {
169
+ const effectiveProvider = provider ?? this.provider;
170
+ return files.map((file) => {
171
+ const text = this.extractFileText(file.content, file.mimeType, file.fileName);
172
+ const estimatedTokens = estimateTokens(text, effectiveProvider);
173
+ const fileType = this.getFileTypeLabel(file.mimeType, file.fileName);
174
+ return {
175
+ fileName: file.fileName,
176
+ fileType,
177
+ content: text,
178
+ estimatedTokens,
179
+ mimeType: file.mimeType,
180
+ originalSize: file.originalSize,
181
+ };
182
+ });
183
+ }
184
+ // -------------------------------------------------------------------------
185
+ // Summarization
186
+ // -------------------------------------------------------------------------
187
+ /**
188
+ * Summarize files that exceed the context budget.
189
+ *
190
+ * For each file marked "summarize" by `planFileSummarization()`, we call
191
+ * the configured LLM to produce a context-aware summary. If the LLM call
192
+ * fails, we fall back to naive truncation so the request can still proceed.
193
+ */
194
+ async summarizeFiles(files, userPrompt, budgetParams) {
195
+ const plan = planFileSummarization(files, budgetParams);
196
+ const results = [];
197
+ for (const entry of plan) {
198
+ if (entry.action === "keep") {
199
+ results.push({
200
+ fileName: entry.file.fileName,
201
+ fileType: entry.file.fileType,
202
+ summary: entry.file.content,
203
+ originalTokens: entry.file.estimatedTokens,
204
+ summaryTokens: entry.file.estimatedTokens,
205
+ wasSummarized: false,
206
+ });
207
+ continue;
208
+ }
209
+ // Action is "summarize"
210
+ const targetTokens = entry.targetTokens ?? 2000;
211
+ try {
212
+ // Dynamic import to avoid circular dependency
213
+ const { NeuroLink } = await import("../neurolink.js");
214
+ const summarizer = new NeuroLink();
215
+ const prompt = buildFileSummarizationPrompt({
216
+ fileName: entry.file.fileName,
217
+ fileType: entry.file.fileType,
218
+ fileContent: entry.file.content,
219
+ userPrompt,
220
+ targetTokens,
221
+ });
222
+ const result = await summarizer.generate({
223
+ input: { text: prompt },
224
+ provider: this.provider,
225
+ model: this.model,
226
+ });
227
+ const summaryText = typeof result === "string" ? result : (result?.content ?? "");
228
+ const summaryTokens = estimateTokens(summaryText, budgetParams.provider);
229
+ results.push({
230
+ fileName: entry.file.fileName,
231
+ fileType: entry.file.fileType,
232
+ summary: summaryText,
233
+ originalTokens: entry.file.estimatedTokens,
234
+ summaryTokens,
235
+ wasSummarized: true,
236
+ });
237
+ }
238
+ catch {
239
+ // Fallback: naive truncation
240
+ const { truncateToTokenBudget } = await import("../utils/tokenEstimation.js");
241
+ const { text: truncated } = truncateToTokenBudget(entry.file.content, targetTokens, budgetParams.provider);
242
+ const summaryTokens = estimateTokens(truncated, budgetParams.provider);
243
+ results.push({
244
+ fileName: entry.file.fileName,
245
+ fileType: entry.file.fileType,
246
+ summary: truncated,
247
+ originalTokens: entry.file.estimatedTokens,
248
+ summaryTokens,
249
+ wasSummarized: true,
250
+ });
251
+ }
252
+ }
253
+ return results;
254
+ }
255
+ }
@@ -0,0 +1,56 @@
1
+ /**
2
+ * File Content Summarization Pipeline
3
+ *
4
+ * Provides utilities to detect when attached file content exceeds the
5
+ * model's available context budget and to plan / build prompts for
6
+ * LLM-driven summarization of the largest files.
7
+ *
8
+ * Design rationale:
9
+ * - Files are the #1 cause of context overflow when users attach
10
+ * multiple large documents (PDFs, spreadsheets, source code).
11
+ * - Rather than blindly truncating, we ask an LLM to produce a
12
+ * *context-aware* summary that retains the information most
13
+ * relevant to the user's actual question.
14
+ * - The caller (FileSummarizationService) is responsible for the
15
+ * actual LLM calls; this module is pure computation + types.
16
+ */
17
+ import type { FileForSummarization, FileSummarizationCheckParams, FileSummarizationCheckResult, FileSummarizationPromptParams, FileSummarizationPlanEntry } from "../types/contextTypes.js";
18
+ export type { FileForSummarization, FileSummarizationCheckParams, FileSummarizationCheckResult, FileSummarizationPromptParams, SummarizedFile, FileSummarizationPlanEntry, } from "../types/contextTypes.js";
19
+ /** Fraction of the context window reserved for non-file content overhead */
20
+ export declare const NON_FILE_RESERVE = 0.15;
21
+ /** Minimum tokens a single file can be allocated in the plan */
22
+ export declare const MIN_PER_FILE_TOKENS = 500;
23
+ /** Maximum tokens a single file can be allocated in the plan */
24
+ export declare const MAX_PER_FILE_TOKENS = 4000;
25
+ /**
26
+ * Files with fewer estimated tokens than this threshold are never
27
+ * summarized — they're already small enough to include verbatim.
28
+ */
29
+ export declare const FILE_SUMMARIZATION_THRESHOLD = 1000;
30
+ /**
31
+ * Check whether the attached files push the total input token count
32
+ * beyond the model's available context window.
33
+ *
34
+ * When the total exceeds the budget, we calculate how many tokens are
35
+ * available for files (after accounting for system prompt, conversation
36
+ * history, current prompt, and tool definitions) and divide that
37
+ * equally across all files to derive a per-file budget.
38
+ */
39
+ export declare function shouldSummarizeFiles(params: FileSummarizationCheckParams): FileSummarizationCheckResult;
40
+ /**
41
+ * Build the LLM prompt used to summarize a single file's content.
42
+ *
43
+ * The prompt is *context-aware*: it includes the user's original question
44
+ * so the LLM can prioritise the most relevant parts of the file.
45
+ */
46
+ export declare function buildFileSummarizationPrompt(params: FileSummarizationPromptParams): string;
47
+ /**
48
+ * Decide which files need summarization and how much budget each gets.
49
+ *
50
+ * Strategy:
51
+ * 1. Sort files largest-first.
52
+ * 2. Walk through the list, marking the largest files for summarization
53
+ * until the cumulative saved tokens bring us under budget.
54
+ * 3. Files below `FILE_SUMMARIZATION_THRESHOLD` are never summarized.
55
+ */
56
+ export declare function planFileSummarization(files: FileForSummarization[], params: FileSummarizationCheckParams): FileSummarizationPlanEntry[];
@@ -0,0 +1,145 @@
1
+ /**
2
+ * File Content Summarization Pipeline
3
+ *
4
+ * Provides utilities to detect when attached file content exceeds the
5
+ * model's available context budget and to plan / build prompts for
6
+ * LLM-driven summarization of the largest files.
7
+ *
8
+ * Design rationale:
9
+ * - Files are the #1 cause of context overflow when users attach
10
+ * multiple large documents (PDFs, spreadsheets, source code).
11
+ * - Rather than blindly truncating, we ask an LLM to produce a
12
+ * *context-aware* summary that retains the information most
13
+ * relevant to the user's actual question.
14
+ * - The caller (FileSummarizationService) is responsible for the
15
+ * actual LLM calls; this module is pure computation + types.
16
+ */
17
+ import { getAvailableInputTokens } from "../constants/contextWindows.js";
18
+ // ---------------------------------------------------------------------------
19
+ // Constants
20
+ // ---------------------------------------------------------------------------
21
+ /** Fraction of the context window reserved for non-file content overhead */
22
+ export const NON_FILE_RESERVE = 0.15;
23
+ /** Minimum tokens a single file can be allocated in the plan */
24
+ export const MIN_PER_FILE_TOKENS = 500;
25
+ /** Maximum tokens a single file can be allocated in the plan */
26
+ export const MAX_PER_FILE_TOKENS = 4000;
27
+ /**
28
+ * Files with fewer estimated tokens than this threshold are never
29
+ * summarized — they're already small enough to include verbatim.
30
+ */
31
+ export const FILE_SUMMARIZATION_THRESHOLD = 1000;
32
+ // ---------------------------------------------------------------------------
33
+ // Functions
34
+ // ---------------------------------------------------------------------------
35
+ /**
36
+ * Check whether the attached files push the total input token count
37
+ * beyond the model's available context window.
38
+ *
39
+ * When the total exceeds the budget, we calculate how many tokens are
40
+ * available for files (after accounting for system prompt, conversation
41
+ * history, current prompt, and tool definitions) and divide that
42
+ * equally across all files to derive a per-file budget.
43
+ */
44
+ export function shouldSummarizeFiles(params) {
45
+ const { provider, model, systemPromptTokens, conversationHistoryTokens, currentPromptTokens, toolDefinitionTokens, fileTokens, fileCount = 1, maxTokens, threshold = 0.8, minTokensPerFile = MIN_PER_FILE_TOKENS, maxTokensPerFile = MAX_PER_FILE_TOKENS, } = params;
46
+ const availableInputTokens = getAvailableInputTokens(provider, model, maxTokens);
47
+ const nonFileTokens = systemPromptTokens +
48
+ conversationHistoryTokens +
49
+ currentPromptTokens +
50
+ toolDefinitionTokens;
51
+ const totalEstimatedTokens = nonFileTokens + fileTokens;
52
+ // Budget for files = available input minus non-file content minus a reserve
53
+ const reserveTokens = Math.ceil(availableInputTokens * NON_FILE_RESERVE);
54
+ const availableBudgetForFiles = Math.max(0, availableInputTokens - nonFileTokens - reserveTokens);
55
+ const usageRatio = availableInputTokens > 0 ? totalEstimatedTokens / availableInputTokens : 1;
56
+ const needsSummarization = usageRatio >= threshold || fileTokens > availableBudgetForFiles;
57
+ let perFileBudget;
58
+ if (needsSummarization && fileCount > 0) {
59
+ const rawBudget = Math.floor(availableBudgetForFiles / fileCount);
60
+ perFileBudget = Math.max(minTokensPerFile, Math.min(maxTokensPerFile, rawBudget));
61
+ }
62
+ return {
63
+ needsSummarization,
64
+ totalEstimatedTokens,
65
+ availableInputTokens,
66
+ availableBudgetForFiles,
67
+ perFileBudget,
68
+ };
69
+ }
70
+ /**
71
+ * Build the LLM prompt used to summarize a single file's content.
72
+ *
73
+ * The prompt is *context-aware*: it includes the user's original question
74
+ * so the LLM can prioritise the most relevant parts of the file.
75
+ */
76
+ export function buildFileSummarizationPrompt(params) {
77
+ const { fileName, fileType, fileContent, userPrompt, targetTokens } = params;
78
+ return [
79
+ `You are a document summarization assistant. Your task is to summarize the following ${fileType} file in a way that preserves the most important information relevant to the user's question.`,
80
+ ``,
81
+ `## User's Question`,
82
+ `${userPrompt}`,
83
+ ``,
84
+ `## File: ${fileName} (${fileType})`,
85
+ ``,
86
+ `${fileContent}`,
87
+ ``,
88
+ `## Instructions`,
89
+ `1. Produce a concise summary of the file content above.`,
90
+ `2. Focus on information that is most relevant to the user's question.`,
91
+ `3. Preserve key data points, names, numbers, and relationships.`,
92
+ `4. Target approximately ${targetTokens} tokens in your summary.`,
93
+ `5. If the file contains structured data (tables, lists), preserve the structure in a compact form.`,
94
+ `6. Start your summary directly — do not include preamble like "Here is a summary".`,
95
+ ].join("\n");
96
+ }
97
+ /**
98
+ * Decide which files need summarization and how much budget each gets.
99
+ *
100
+ * Strategy:
101
+ * 1. Sort files largest-first.
102
+ * 2. Walk through the list, marking the largest files for summarization
103
+ * until the cumulative saved tokens bring us under budget.
104
+ * 3. Files below `FILE_SUMMARIZATION_THRESHOLD` are never summarized.
105
+ */
106
+ export function planFileSummarization(files, params) {
107
+ const checkResult = shouldSummarizeFiles({
108
+ ...params,
109
+ fileCount: files.length,
110
+ });
111
+ // If no summarization needed, keep everything
112
+ if (!checkResult.needsSummarization) {
113
+ return files.map((file) => ({ file, action: "keep" }));
114
+ }
115
+ // Sort largest first (descending by estimatedTokens)
116
+ const sorted = [...files].sort((a, b) => b.estimatedTokens - a.estimatedTokens);
117
+ const perFileBudget = checkResult.perFileBudget ?? MAX_PER_FILE_TOKENS;
118
+ // Calculate how many tokens we need to save
119
+ const totalFileTokens = files.reduce((sum, f) => sum + f.estimatedTokens, 0);
120
+ const tokensToSave = Math.max(0, totalFileTokens - checkResult.availableBudgetForFiles);
121
+ let savedSoFar = 0;
122
+ const plan = [];
123
+ for (const file of sorted) {
124
+ // Never summarize tiny files
125
+ if (file.estimatedTokens < FILE_SUMMARIZATION_THRESHOLD ||
126
+ savedSoFar >= tokensToSave) {
127
+ plan.push({ file, action: "keep" });
128
+ }
129
+ else {
130
+ const savingsFromThisFile = file.estimatedTokens - perFileBudget;
131
+ if (savingsFromThisFile > 0) {
132
+ plan.push({
133
+ file,
134
+ action: "summarize",
135
+ targetTokens: perFileBudget,
136
+ });
137
+ savedSoFar += savingsFromThisFile;
138
+ }
139
+ else {
140
+ plan.push({ file, action: "keep" });
141
+ }
142
+ }
143
+ }
144
+ return plan;
145
+ }
@@ -0,0 +1,53 @@
1
+ /**
2
+ * File Token Budget System
3
+ *
4
+ * Calculates how much of the remaining context window budget
5
+ * can be used for file reads. Implements fast-path for small files
6
+ * and preview mode for very large files.
7
+ */
8
+ import type { BudgetFileInput } from "../types/contextTypes.js";
9
+ export type { BudgetFileInput } from "../types/contextTypes.js";
10
+ /** Percentage of remaining context to allocate for file reads */
11
+ export declare const FILE_READ_BUDGET_PERCENT = 0.6;
12
+ /** Files below this size skip budget validation (100KB) */
13
+ export declare const FILE_FAST_PATH_SIZE: number;
14
+ /** Files above this size get preview-only mode (5MB) */
15
+ export declare const FILE_PREVIEW_MODE_SIZE: number;
16
+ /** Default preview size in characters */
17
+ export declare const FILE_PREVIEW_CHARS = 2000;
18
+ /**
19
+ * Calculate available token budget for file reads.
20
+ *
21
+ * @param contextWindow - Total context window for the model
22
+ * @param currentTokens - Tokens already used (conversation + system prompt)
23
+ * @param maxOutputTokens - Reserved output tokens
24
+ * @returns Available tokens for file content
25
+ */
26
+ export declare function calculateFileTokenBudget(contextWindow: number, currentTokens: number, maxOutputTokens: number): number;
27
+ /**
28
+ * Determine how a file should be handled based on its size and the budget.
29
+ */
30
+ export declare function shouldTruncateFile(fileSize: number, budget: number): {
31
+ shouldTruncate: boolean;
32
+ maxChars?: number;
33
+ previewMode?: boolean;
34
+ };
35
+ /**
36
+ * Estimate post-processing token count based on file type.
37
+ *
38
+ * Different file types produce vastly different amounts of text after
39
+ * processing. A 50 MB video file yields ~200-500 tokens of metadata,
40
+ * while a 50 MB text file yields ~12.5 M tokens. Using the raw byte
41
+ * size for all types causes media files to be wrongly excluded by the
42
+ * aggregate budget check.
43
+ *
44
+ * @param sizeBytes Raw file size in bytes
45
+ * @param fileType Detected file type (e.g. "video", "audio", "image")
46
+ * @returns Estimated token count after processing
47
+ */
48
+ export declare function estimatePostProcessingTokens(sizeBytes: number, fileType?: string): number;
49
+ export declare function enforceAggregateFileBudget(files: BudgetFileInput[], availableTokens: number): {
50
+ included: BudgetFileInput[];
51
+ excluded: BudgetFileInput[];
52
+ notices: string[];
53
+ };
@@ -0,0 +1,127 @@
1
+ /**
2
+ * File Token Budget System
3
+ *
4
+ * Calculates how much of the remaining context window budget
5
+ * can be used for file reads. Implements fast-path for small files
6
+ * and preview mode for very large files.
7
+ */
8
+ /** Percentage of remaining context to allocate for file reads */
9
+ export const FILE_READ_BUDGET_PERCENT = 0.6;
10
+ /** Files below this size skip budget validation (100KB) */
11
+ export const FILE_FAST_PATH_SIZE = 100 * 1024;
12
+ /** Files above this size get preview-only mode (5MB) */
13
+ export const FILE_PREVIEW_MODE_SIZE = 5 * 1024 * 1024;
14
+ /** Default preview size in characters */
15
+ export const FILE_PREVIEW_CHARS = 2000;
16
+ /**
17
+ * Calculate available token budget for file reads.
18
+ *
19
+ * @param contextWindow - Total context window for the model
20
+ * @param currentTokens - Tokens already used (conversation + system prompt)
21
+ * @param maxOutputTokens - Reserved output tokens
22
+ * @returns Available tokens for file content
23
+ */
24
+ export function calculateFileTokenBudget(contextWindow, currentTokens, maxOutputTokens) {
25
+ const remainingTokens = contextWindow - currentTokens - maxOutputTokens;
26
+ if (remainingTokens <= 0) {
27
+ return 0;
28
+ }
29
+ return Math.floor(remainingTokens * FILE_READ_BUDGET_PERCENT);
30
+ }
31
+ /**
32
+ * Determine how a file should be handled based on its size and the budget.
33
+ */
34
+ export function shouldTruncateFile(fileSize, budget) {
35
+ // Very large files: preview mode
36
+ if (fileSize > FILE_PREVIEW_MODE_SIZE) {
37
+ return {
38
+ shouldTruncate: true,
39
+ maxChars: FILE_PREVIEW_CHARS,
40
+ previewMode: true,
41
+ };
42
+ }
43
+ // Small files: skip validation
44
+ if (fileSize < FILE_FAST_PATH_SIZE) {
45
+ return { shouldTruncate: false };
46
+ }
47
+ // Estimate tokens (4 chars per token, rough)
48
+ const estimatedTokens = Math.ceil(fileSize / 4);
49
+ if (estimatedTokens <= budget) {
50
+ return { shouldTruncate: false };
51
+ }
52
+ // Truncate to fit budget
53
+ const maxChars = budget * 4; // Convert back to chars
54
+ return {
55
+ shouldTruncate: true,
56
+ maxChars: Math.max(FILE_PREVIEW_CHARS, maxChars),
57
+ previewMode: false,
58
+ };
59
+ }
60
+ /**
61
+ * Estimate post-processing token count based on file type.
62
+ *
63
+ * Different file types produce vastly different amounts of text after
64
+ * processing. A 50 MB video file yields ~200-500 tokens of metadata,
65
+ * while a 50 MB text file yields ~12.5 M tokens. Using the raw byte
66
+ * size for all types causes media files to be wrongly excluded by the
67
+ * aggregate budget check.
68
+ *
69
+ * @param sizeBytes Raw file size in bytes
70
+ * @param fileType Detected file type (e.g. "video", "audio", "image")
71
+ * @returns Estimated token count after processing
72
+ */
73
+ export function estimatePostProcessingTokens(sizeBytes, fileType) {
74
+ switch (fileType) {
75
+ // Media files produce only metadata text (~100-500 tokens)
76
+ case "video":
77
+ return 500;
78
+ case "audio":
79
+ return 300;
80
+ // Images are sent as base64 — provider counts them as ~1500 tokens each
81
+ case "image":
82
+ return 1500;
83
+ // Archives produce a file listing (~500-2000 tokens)
84
+ case "archive":
85
+ return 1000;
86
+ // Unknown binary files produce metadata + extracted strings (~200-1000 tokens)
87
+ case "unknown":
88
+ return 500;
89
+ // PDFs are sent natively on some providers; estimate ~1 token per 100 bytes, capped
90
+ case "pdf":
91
+ return Math.min(Math.ceil(sizeBytes / 100), 50_000);
92
+ // Structured documents (Excel, Word, PPTX) extract text — roughly 15% of raw size
93
+ case "xlsx":
94
+ case "docx":
95
+ case "pptx":
96
+ return Math.ceil((sizeBytes * 0.15) / 4);
97
+ // SVG is sanitized markup — usually small
98
+ case "svg":
99
+ return Math.ceil(sizeBytes / 4);
100
+ // CSV, text, code — raw text roughly 1 token per 4 bytes
101
+ case "csv":
102
+ case "text":
103
+ default:
104
+ // Original formula: base64-inflate then divide by 4
105
+ return Math.ceil((sizeBytes * 1.33) / 4);
106
+ }
107
+ }
108
+ export function enforceAggregateFileBudget(files, availableTokens) {
109
+ const TOKEN_BUDGET_FOR_FILES = Math.floor(availableTokens * FILE_READ_BUDGET_PERCENT);
110
+ let usedTokens = 0;
111
+ const included = [];
112
+ const excluded = [];
113
+ const notices = [];
114
+ const sorted = [...files].sort((a, b) => a.sizeBytes - b.sizeBytes);
115
+ for (const file of sorted) {
116
+ const estimatedTokens = estimatePostProcessingTokens(file.sizeBytes, file.fileType);
117
+ if (usedTokens + estimatedTokens <= TOKEN_BUDGET_FOR_FILES) {
118
+ usedTokens += estimatedTokens;
119
+ included.push(file);
120
+ }
121
+ else {
122
+ excluded.push(file);
123
+ notices.push(`Skipped "${file.name}" (${(file.sizeBytes / 1024).toFixed(0)} KB) — exceeds context budget`);
124
+ }
125
+ }
126
+ return { included, excluded, notices };
127
+ }
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Summarization Prompt Builder
3
+ *
4
+ * Builds prompts for summarizing conversation context into a 9-section structure.
5
+ * Supports both initial summarization and incremental merging with existing summaries.
6
+ */
7
+ import type { SummarizationPromptOptions } from "../../types/contextTypes.js";
8
+ export type { SummarizationPromptOptions } from "../../types/contextTypes.js";
9
+ declare const NINE_SECTIONS: string[];
10
+ /**
11
+ * Builds a summarization prompt based on the provided options.
12
+ *
13
+ * @param options - Configuration for the prompt builder
14
+ * @returns The constructed prompt string
15
+ */
16
+ export declare function buildSummarizationPrompt(options: SummarizationPromptOptions): string;
17
+ export { NINE_SECTIONS };