@juspay/neurolink 9.5.2 → 9.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (356) hide show
  1. package/CHANGELOG.md +16 -0
  2. package/README.md +29 -25
  3. package/dist/agent/directTools.d.ts +5 -5
  4. package/dist/cli/commands/config.d.ts +9 -9
  5. package/dist/cli/commands/serve.d.ts +37 -0
  6. package/dist/cli/commands/serve.js +302 -229
  7. package/dist/cli/commands/setup-anthropic.d.ts +2 -2
  8. package/dist/cli/commands/setup-azure.d.ts +2 -2
  9. package/dist/cli/commands/setup-bedrock.d.ts +2 -2
  10. package/dist/cli/commands/setup-gcp.d.ts +2 -2
  11. package/dist/cli/commands/setup-google-ai.d.ts +2 -2
  12. package/dist/cli/commands/setup-huggingface.d.ts +2 -2
  13. package/dist/cli/commands/setup-mistral.d.ts +2 -2
  14. package/dist/cli/commands/setup-openai.d.ts +2 -2
  15. package/dist/cli/commands/setup.d.ts +2 -2
  16. package/dist/cli/factories/commandFactory.js +16 -2
  17. package/dist/cli/loop/optionsSchema.d.ts +2 -2
  18. package/dist/cli/loop/session.d.ts +4 -0
  19. package/dist/cli/loop/session.js +49 -4
  20. package/dist/cli/utils/interactiveSetup.d.ts +4 -4
  21. package/dist/config/conversationMemory.d.ts +2 -0
  22. package/dist/config/conversationMemory.js +5 -5
  23. package/dist/constants/contextWindows.d.ts +46 -0
  24. package/dist/constants/contextWindows.js +156 -0
  25. package/dist/context/budgetChecker.d.ts +18 -0
  26. package/dist/context/budgetChecker.js +71 -0
  27. package/dist/context/contextCompactor.d.ts +22 -0
  28. package/dist/context/contextCompactor.js +106 -0
  29. package/dist/context/effectiveHistory.d.ts +52 -0
  30. package/dist/context/effectiveHistory.js +105 -0
  31. package/dist/context/errorDetection.d.ts +14 -0
  32. package/dist/context/errorDetection.js +124 -0
  33. package/dist/context/fileSummarizationService.d.ts +54 -0
  34. package/dist/context/fileSummarizationService.js +255 -0
  35. package/dist/context/fileSummarizer.d.ts +56 -0
  36. package/dist/context/fileSummarizer.js +145 -0
  37. package/dist/context/fileTokenBudget.d.ts +53 -0
  38. package/dist/context/fileTokenBudget.js +127 -0
  39. package/dist/context/prompts/summarizationPrompt.d.ts +17 -0
  40. package/dist/context/prompts/summarizationPrompt.js +110 -0
  41. package/dist/context/stages/fileReadDeduplicator.d.ts +10 -0
  42. package/dist/context/stages/fileReadDeduplicator.js +66 -0
  43. package/dist/context/stages/slidingWindowTruncator.d.ts +11 -0
  44. package/dist/context/stages/slidingWindowTruncator.js +42 -0
  45. package/dist/context/stages/structuredSummarizer.d.ts +10 -0
  46. package/dist/context/stages/structuredSummarizer.js +49 -0
  47. package/dist/context/stages/toolOutputPruner.d.ts +10 -0
  48. package/dist/context/stages/toolOutputPruner.js +52 -0
  49. package/dist/context/summarizationEngine.d.ts +45 -0
  50. package/dist/context/summarizationEngine.js +110 -0
  51. package/dist/context/toolOutputLimits.d.ts +17 -0
  52. package/dist/context/toolOutputLimits.js +84 -0
  53. package/dist/context/toolPairRepair.d.ts +16 -0
  54. package/dist/context/toolPairRepair.js +66 -0
  55. package/dist/core/conversationMemoryManager.d.ts +5 -15
  56. package/dist/core/conversationMemoryManager.js +15 -75
  57. package/dist/core/modules/MessageBuilder.d.ts +1 -1
  58. package/dist/core/modules/MessageBuilder.js +2 -0
  59. package/dist/core/modules/TelemetryHandler.d.ts +2 -3
  60. package/dist/core/modules/TelemetryHandler.js +3 -3
  61. package/dist/core/modules/ToolsManager.d.ts +2 -2
  62. package/dist/core/redisConversationMemoryManager.d.ts +8 -14
  63. package/dist/core/redisConversationMemoryManager.js +69 -78
  64. package/dist/factories/providerFactory.d.ts +2 -2
  65. package/dist/files/fileReferenceRegistry.d.ts +276 -0
  66. package/dist/files/fileReferenceRegistry.js +1543 -0
  67. package/dist/files/fileTools.d.ts +423 -0
  68. package/dist/files/fileTools.js +449 -0
  69. package/dist/files/index.d.ts +14 -0
  70. package/dist/files/index.js +13 -0
  71. package/dist/files/streamingReader.d.ts +93 -0
  72. package/dist/files/streamingReader.js +321 -0
  73. package/dist/files/types.d.ts +23 -0
  74. package/dist/files/types.js +23 -0
  75. package/dist/image-gen/imageGenTools.d.ts +2 -2
  76. package/dist/image-gen/types.d.ts +12 -12
  77. package/dist/lib/agent/directTools.d.ts +7 -7
  78. package/dist/lib/config/conversationMemory.d.ts +2 -0
  79. package/dist/lib/config/conversationMemory.js +5 -5
  80. package/dist/lib/constants/contextWindows.d.ts +46 -0
  81. package/dist/lib/constants/contextWindows.js +157 -0
  82. package/dist/lib/context/budgetChecker.d.ts +18 -0
  83. package/dist/lib/context/budgetChecker.js +72 -0
  84. package/dist/lib/context/contextCompactor.d.ts +22 -0
  85. package/dist/lib/context/contextCompactor.js +107 -0
  86. package/dist/lib/context/effectiveHistory.d.ts +52 -0
  87. package/dist/lib/context/effectiveHistory.js +106 -0
  88. package/dist/lib/context/errorDetection.d.ts +14 -0
  89. package/dist/lib/context/errorDetection.js +125 -0
  90. package/dist/lib/context/fileSummarizationService.d.ts +54 -0
  91. package/dist/lib/context/fileSummarizationService.js +256 -0
  92. package/dist/lib/context/fileSummarizer.d.ts +56 -0
  93. package/dist/lib/context/fileSummarizer.js +146 -0
  94. package/dist/lib/context/fileTokenBudget.d.ts +53 -0
  95. package/dist/lib/context/fileTokenBudget.js +128 -0
  96. package/dist/lib/context/prompts/summarizationPrompt.d.ts +17 -0
  97. package/dist/lib/context/prompts/summarizationPrompt.js +111 -0
  98. package/dist/lib/context/stages/fileReadDeduplicator.d.ts +10 -0
  99. package/dist/lib/context/stages/fileReadDeduplicator.js +67 -0
  100. package/dist/lib/context/stages/slidingWindowTruncator.d.ts +11 -0
  101. package/dist/lib/context/stages/slidingWindowTruncator.js +43 -0
  102. package/dist/lib/context/stages/structuredSummarizer.d.ts +10 -0
  103. package/dist/lib/context/stages/structuredSummarizer.js +50 -0
  104. package/dist/lib/context/stages/toolOutputPruner.d.ts +10 -0
  105. package/dist/lib/context/stages/toolOutputPruner.js +53 -0
  106. package/dist/lib/context/summarizationEngine.d.ts +45 -0
  107. package/dist/lib/context/summarizationEngine.js +111 -0
  108. package/dist/lib/context/toolOutputLimits.d.ts +17 -0
  109. package/dist/lib/context/toolOutputLimits.js +85 -0
  110. package/dist/lib/context/toolPairRepair.d.ts +16 -0
  111. package/dist/lib/context/toolPairRepair.js +67 -0
  112. package/dist/lib/core/conversationMemoryManager.d.ts +5 -15
  113. package/dist/lib/core/conversationMemoryManager.js +15 -75
  114. package/dist/lib/core/modules/MessageBuilder.d.ts +1 -1
  115. package/dist/lib/core/modules/MessageBuilder.js +2 -0
  116. package/dist/lib/core/modules/TelemetryHandler.d.ts +2 -3
  117. package/dist/lib/core/modules/TelemetryHandler.js +3 -3
  118. package/dist/lib/core/modules/ToolsManager.d.ts +2 -2
  119. package/dist/lib/core/redisConversationMemoryManager.d.ts +8 -14
  120. package/dist/lib/core/redisConversationMemoryManager.js +69 -78
  121. package/dist/lib/factories/providerFactory.d.ts +2 -2
  122. package/dist/lib/files/fileReferenceRegistry.d.ts +276 -0
  123. package/dist/lib/files/fileReferenceRegistry.js +1544 -0
  124. package/dist/lib/files/fileTools.d.ts +423 -0
  125. package/dist/lib/files/fileTools.js +450 -0
  126. package/dist/lib/files/index.d.ts +14 -0
  127. package/dist/lib/files/index.js +14 -0
  128. package/dist/lib/files/streamingReader.d.ts +93 -0
  129. package/dist/lib/files/streamingReader.js +322 -0
  130. package/dist/lib/files/types.d.ts +23 -0
  131. package/dist/lib/files/types.js +24 -0
  132. package/dist/lib/image-gen/imageGenTools.d.ts +2 -2
  133. package/dist/lib/image-gen/types.d.ts +12 -12
  134. package/dist/lib/memory/mem0Initializer.d.ts +2 -2
  135. package/dist/lib/neurolink.d.ts +61 -2
  136. package/dist/lib/neurolink.js +619 -307
  137. package/dist/lib/processors/archive/ArchiveProcessor.d.ts +327 -0
  138. package/dist/lib/processors/archive/ArchiveProcessor.js +1309 -0
  139. package/dist/lib/processors/archive/index.d.ts +33 -0
  140. package/dist/lib/processors/archive/index.js +43 -0
  141. package/dist/lib/processors/base/types.d.ts +70 -64
  142. package/dist/lib/processors/base/types.js +6 -0
  143. package/dist/lib/processors/cli/fileProcessorCli.d.ts +8 -8
  144. package/dist/lib/processors/cli/fileProcessorCli.js +5 -5
  145. package/dist/lib/processors/config/mimeTypes.js +25 -0
  146. package/dist/lib/processors/config/sizeLimits.d.ts +52 -40
  147. package/dist/lib/processors/config/sizeLimits.js +56 -44
  148. package/dist/lib/processors/document/ExcelProcessor.d.ts +14 -0
  149. package/dist/lib/processors/document/ExcelProcessor.js +72 -1
  150. package/dist/lib/processors/document/PptxProcessor.d.ts +63 -0
  151. package/dist/lib/processors/document/PptxProcessor.js +158 -0
  152. package/dist/lib/processors/document/index.d.ts +1 -0
  153. package/dist/lib/processors/document/index.js +6 -0
  154. package/dist/lib/processors/errors/FileErrorCode.d.ts +2 -2
  155. package/dist/lib/processors/errors/errorHelpers.d.ts +2 -2
  156. package/dist/lib/processors/errors/errorSerializer.d.ts +4 -4
  157. package/dist/lib/processors/index.d.ts +8 -2
  158. package/dist/lib/processors/index.js +5 -2
  159. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +8 -8
  160. package/dist/lib/processors/integration/FileProcessorIntegration.js +7 -7
  161. package/dist/lib/processors/media/AudioProcessor.d.ts +328 -0
  162. package/dist/lib/processors/media/AudioProcessor.js +708 -0
  163. package/dist/lib/processors/media/VideoProcessor.d.ts +350 -0
  164. package/dist/lib/processors/media/VideoProcessor.js +992 -0
  165. package/dist/lib/processors/media/index.d.ts +27 -0
  166. package/dist/lib/processors/media/index.js +37 -0
  167. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +19 -5
  168. package/dist/lib/processors/registry/ProcessorRegistry.js +103 -8
  169. package/dist/lib/processors/registry/index.d.ts +1 -1
  170. package/dist/lib/processors/registry/index.js +1 -1
  171. package/dist/lib/processors/registry/types.d.ts +2 -2
  172. package/dist/lib/providers/googleAiStudio.d.ts +34 -0
  173. package/dist/lib/providers/googleAiStudio.js +267 -397
  174. package/dist/lib/providers/googleVertex.d.ts +55 -1
  175. package/dist/lib/providers/googleVertex.js +452 -719
  176. package/dist/lib/providers/sagemaker/detection.d.ts +6 -6
  177. package/dist/lib/providers/sagemaker/diagnostics.d.ts +4 -4
  178. package/dist/lib/providers/sagemaker/parsers.d.ts +4 -4
  179. package/dist/lib/rag/chunkers/RecursiveChunker.js +2 -2
  180. package/dist/lib/rag/document/loaders.d.ts +6 -71
  181. package/dist/lib/rag/document/loaders.js +5 -5
  182. package/dist/lib/rag/graphRag/graphRAG.js +26 -9
  183. package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +5 -55
  184. package/dist/lib/rag/metadata/metadataExtractor.js +6 -3
  185. package/dist/lib/rag/pipeline/RAGPipeline.d.ts +8 -126
  186. package/dist/lib/rag/pipeline/RAGPipeline.js +11 -11
  187. package/dist/lib/rag/pipeline/contextAssembly.d.ts +3 -42
  188. package/dist/lib/rag/pipeline/contextAssembly.js +6 -3
  189. package/dist/lib/rag/reranker/RerankerFactory.d.ts +5 -60
  190. package/dist/lib/rag/resilience/CircuitBreaker.d.ts +3 -33
  191. package/dist/lib/rag/resilience/RetryHandler.d.ts +2 -21
  192. package/dist/lib/rag/retrieval/hybridSearch.d.ts +3 -41
  193. package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +2 -13
  194. package/dist/lib/rag/retrieval/vectorQueryTool.js +4 -3
  195. package/dist/lib/rag/types.d.ts +3 -3
  196. package/dist/lib/sdk/toolRegistration.d.ts +2 -2
  197. package/dist/lib/server/middleware/cache.d.ts +2 -2
  198. package/dist/lib/server/middleware/rateLimit.d.ts +2 -2
  199. package/dist/lib/server/routes/mcpRoutes.js +277 -249
  200. package/dist/lib/server/routes/memoryRoutes.js +287 -281
  201. package/dist/lib/server/utils/validation.d.ts +10 -10
  202. package/dist/lib/session/globalSessionState.d.ts +2 -2
  203. package/dist/lib/telemetry/telemetryService.d.ts +2 -2
  204. package/dist/lib/types/common.d.ts +39 -0
  205. package/dist/lib/types/contextTypes.d.ts +255 -0
  206. package/dist/lib/types/contextTypes.js +0 -2
  207. package/dist/lib/types/conversation.d.ts +62 -0
  208. package/dist/lib/types/conversationMemoryInterface.d.ts +27 -0
  209. package/dist/lib/types/conversationMemoryInterface.js +7 -0
  210. package/dist/lib/types/fileReferenceTypes.d.ts +222 -0
  211. package/dist/lib/types/fileReferenceTypes.js +9 -0
  212. package/dist/lib/types/fileTypes.d.ts +26 -3
  213. package/dist/lib/types/generateTypes.d.ts +22 -1
  214. package/dist/lib/types/index.d.ts +4 -5
  215. package/dist/lib/types/index.js +8 -10
  216. package/dist/lib/types/modelTypes.d.ts +2 -2
  217. package/dist/lib/types/processorTypes.d.ts +597 -0
  218. package/dist/lib/types/processorTypes.js +91 -0
  219. package/dist/lib/types/ragTypes.d.ts +481 -0
  220. package/dist/lib/types/ragTypes.js +8 -0
  221. package/dist/lib/types/sdkTypes.d.ts +17 -18
  222. package/dist/lib/types/streamTypes.d.ts +11 -1
  223. package/dist/lib/utils/async/retry.d.ts +2 -2
  224. package/dist/lib/utils/async/withTimeout.js +3 -1
  225. package/dist/lib/utils/conversationMemory.d.ts +12 -6
  226. package/dist/lib/utils/conversationMemory.js +76 -36
  227. package/dist/lib/utils/fileDetector.d.ts +62 -0
  228. package/dist/lib/utils/fileDetector.js +1014 -14
  229. package/dist/lib/utils/json/safeParse.d.ts +2 -2
  230. package/dist/lib/utils/messageBuilder.js +806 -153
  231. package/dist/lib/utils/modelChoices.d.ts +2 -2
  232. package/dist/lib/utils/multimodalOptionsBuilder.d.ts +2 -1
  233. package/dist/lib/utils/multimodalOptionsBuilder.js +1 -0
  234. package/dist/lib/utils/rateLimiter.d.ts +2 -2
  235. package/dist/lib/utils/sanitizers/filename.d.ts +4 -4
  236. package/dist/lib/utils/sanitizers/svg.d.ts +2 -2
  237. package/dist/lib/utils/thinkingConfig.d.ts +6 -6
  238. package/dist/lib/utils/tokenEstimation.d.ts +68 -0
  239. package/dist/lib/utils/tokenEstimation.js +113 -0
  240. package/dist/lib/utils/tokenUtils.d.ts +4 -4
  241. package/dist/lib/utils/ttsProcessor.d.ts +2 -2
  242. package/dist/lib/workflow/config.d.ts +150 -150
  243. package/dist/memory/mem0Initializer.d.ts +2 -2
  244. package/dist/neurolink.d.ts +61 -2
  245. package/dist/neurolink.js +619 -307
  246. package/dist/processors/archive/ArchiveProcessor.d.ts +327 -0
  247. package/dist/processors/archive/ArchiveProcessor.js +1308 -0
  248. package/dist/processors/archive/index.d.ts +33 -0
  249. package/dist/processors/archive/index.js +42 -0
  250. package/dist/processors/base/types.d.ts +70 -64
  251. package/dist/processors/base/types.js +6 -0
  252. package/dist/processors/cli/fileProcessorCli.d.ts +8 -8
  253. package/dist/processors/cli/fileProcessorCli.js +5 -5
  254. package/dist/processors/config/mimeTypes.js +25 -0
  255. package/dist/processors/config/sizeLimits.d.ts +52 -40
  256. package/dist/processors/config/sizeLimits.js +56 -44
  257. package/dist/processors/document/ExcelProcessor.d.ts +14 -0
  258. package/dist/processors/document/ExcelProcessor.js +72 -1
  259. package/dist/processors/document/PptxProcessor.d.ts +63 -0
  260. package/dist/processors/document/PptxProcessor.js +157 -0
  261. package/dist/processors/document/index.d.ts +1 -0
  262. package/dist/processors/document/index.js +6 -0
  263. package/dist/processors/errors/FileErrorCode.d.ts +2 -2
  264. package/dist/processors/errors/errorHelpers.d.ts +2 -2
  265. package/dist/processors/errors/errorSerializer.d.ts +4 -4
  266. package/dist/processors/index.d.ts +8 -2
  267. package/dist/processors/index.js +5 -2
  268. package/dist/processors/integration/FileProcessorIntegration.d.ts +8 -8
  269. package/dist/processors/integration/FileProcessorIntegration.js +7 -7
  270. package/dist/processors/media/AudioProcessor.d.ts +328 -0
  271. package/dist/processors/media/AudioProcessor.js +707 -0
  272. package/dist/processors/media/VideoProcessor.d.ts +350 -0
  273. package/dist/processors/media/VideoProcessor.js +991 -0
  274. package/dist/processors/media/ffprobe-static.d.ts +4 -0
  275. package/dist/processors/media/index.d.ts +27 -0
  276. package/dist/processors/media/index.js +36 -0
  277. package/dist/processors/registry/ProcessorRegistry.d.ts +19 -5
  278. package/dist/processors/registry/ProcessorRegistry.js +103 -8
  279. package/dist/processors/registry/index.d.ts +1 -1
  280. package/dist/processors/registry/index.js +1 -1
  281. package/dist/processors/registry/types.d.ts +2 -2
  282. package/dist/providers/googleAiStudio.d.ts +34 -0
  283. package/dist/providers/googleAiStudio.js +267 -397
  284. package/dist/providers/googleVertex.d.ts +55 -1
  285. package/dist/providers/googleVertex.js +452 -719
  286. package/dist/providers/sagemaker/detection.d.ts +6 -6
  287. package/dist/providers/sagemaker/diagnostics.d.ts +4 -4
  288. package/dist/providers/sagemaker/parsers.d.ts +4 -4
  289. package/dist/rag/chunkers/RecursiveChunker.js +2 -2
  290. package/dist/rag/document/loaders.d.ts +6 -71
  291. package/dist/rag/document/loaders.js +5 -5
  292. package/dist/rag/graphRag/graphRAG.js +26 -9
  293. package/dist/rag/metadata/MetadataExtractorFactory.d.ts +5 -55
  294. package/dist/rag/metadata/metadataExtractor.js +6 -3
  295. package/dist/rag/pipeline/RAGPipeline.d.ts +8 -126
  296. package/dist/rag/pipeline/RAGPipeline.js +11 -11
  297. package/dist/rag/pipeline/contextAssembly.d.ts +3 -42
  298. package/dist/rag/pipeline/contextAssembly.js +6 -3
  299. package/dist/rag/reranker/RerankerFactory.d.ts +5 -60
  300. package/dist/rag/resilience/CircuitBreaker.d.ts +3 -33
  301. package/dist/rag/resilience/RetryHandler.d.ts +2 -21
  302. package/dist/rag/retrieval/hybridSearch.d.ts +3 -41
  303. package/dist/rag/retrieval/vectorQueryTool.d.ts +2 -13
  304. package/dist/rag/retrieval/vectorQueryTool.js +4 -3
  305. package/dist/rag/types.d.ts +3 -3
  306. package/dist/sdk/toolRegistration.d.ts +2 -2
  307. package/dist/server/middleware/cache.d.ts +2 -2
  308. package/dist/server/middleware/rateLimit.d.ts +2 -2
  309. package/dist/server/routes/mcpRoutes.js +277 -249
  310. package/dist/server/routes/memoryRoutes.js +287 -281
  311. package/dist/server/utils/validation.d.ts +4 -4
  312. package/dist/session/globalSessionState.d.ts +2 -2
  313. package/dist/telemetry/telemetryService.d.ts +2 -2
  314. package/dist/types/common.d.ts +39 -0
  315. package/dist/types/contextTypes.d.ts +255 -0
  316. package/dist/types/contextTypes.js +0 -2
  317. package/dist/types/conversation.d.ts +62 -0
  318. package/dist/types/conversationMemoryInterface.d.ts +27 -0
  319. package/dist/types/conversationMemoryInterface.js +6 -0
  320. package/dist/types/fileReferenceTypes.d.ts +222 -0
  321. package/dist/types/fileReferenceTypes.js +8 -0
  322. package/dist/types/fileTypes.d.ts +26 -3
  323. package/dist/types/generateTypes.d.ts +22 -1
  324. package/dist/types/index.d.ts +4 -5
  325. package/dist/types/index.js +8 -10
  326. package/dist/types/processorTypes.d.ts +597 -0
  327. package/dist/types/processorTypes.js +90 -0
  328. package/dist/types/ragTypes.d.ts +481 -0
  329. package/dist/types/ragTypes.js +7 -0
  330. package/dist/types/sdkTypes.d.ts +17 -18
  331. package/dist/types/streamTypes.d.ts +11 -1
  332. package/dist/utils/async/retry.d.ts +2 -2
  333. package/dist/utils/async/withTimeout.js +3 -1
  334. package/dist/utils/conversationMemory.d.ts +12 -6
  335. package/dist/utils/conversationMemory.js +76 -36
  336. package/dist/utils/fileDetector.d.ts +62 -0
  337. package/dist/utils/fileDetector.js +1014 -14
  338. package/dist/utils/json/safeParse.d.ts +2 -2
  339. package/dist/utils/messageBuilder.js +806 -153
  340. package/dist/utils/modelChoices.d.ts +2 -2
  341. package/dist/utils/multimodalOptionsBuilder.d.ts +2 -1
  342. package/dist/utils/multimodalOptionsBuilder.js +1 -0
  343. package/dist/utils/rateLimiter.d.ts +2 -2
  344. package/dist/utils/sanitizers/filename.d.ts +4 -4
  345. package/dist/utils/sanitizers/svg.d.ts +2 -2
  346. package/dist/utils/thinkingConfig.d.ts +6 -6
  347. package/dist/utils/tokenEstimation.d.ts +68 -0
  348. package/dist/utils/tokenEstimation.js +112 -0
  349. package/dist/utils/tokenUtils.d.ts +4 -4
  350. package/dist/utils/ttsProcessor.d.ts +2 -2
  351. package/dist/workflow/config.d.ts +104 -104
  352. package/package.json +18 -6
  353. package/dist/lib/utils/conversationMemoryUtils.d.ts +0 -25
  354. package/dist/lib/utils/conversationMemoryUtils.js +0 -138
  355. package/dist/utils/conversationMemoryUtils.d.ts +0 -25
  356. package/dist/utils/conversationMemoryUtils.js +0 -137
@@ -5,6 +5,9 @@
5
5
  */
6
6
  import { readFile, stat } from "fs/promises";
7
7
  import { getGlobalDispatcher, interceptors, request } from "undici";
8
+ import { archiveProcessor } from "../processors/archive/ArchiveProcessor.js";
9
+ import { audioProcessor } from "../processors/media/AudioProcessor.js";
10
+ import { videoProcessor } from "../processors/media/VideoProcessor.js";
8
11
  import { CSVProcessor } from "./csvProcessor.js";
9
12
  import { ImageProcessor } from "./imageProcessor.js";
10
13
  import { logger } from "./logger.js";
@@ -211,8 +214,13 @@ export class FileDetector {
211
214
  logger.debug(`[FileDetector] ${allowedType} fallback failed: ${errorMsg}`);
212
215
  }
213
216
  }
214
- // All fallbacks failed
215
- throw new Error(`File type detection failed and all fallback parsing attempts failed. Original detection: ${detection.type}. Attempted types: ${options.allowedTypes.join(", ")}. Errors: ${errors.join("; ")}`);
217
+ // All fallbacks failed — fall through to processFile() which handles
218
+ // "unknown" types gracefully by extracting binary metadata and printable
219
+ // strings instead of throwing.
220
+ logger.warn(`[FileDetector] All fallback parsing failed for type "${detection.type}". ` +
221
+ `Attempted: ${options.allowedTypes.join(", ")}. Falling through to universal handler.`);
222
+ const csvOptions = options?.csvOptions;
223
+ return await FileDetector.processFile(content, detection, csvOptions, options?.provider);
216
224
  }
217
225
  const content = await FileDetector.loadContent(input, detection, options);
218
226
  // Extract CSV-specific options from FileDetectorOptions
@@ -262,6 +270,40 @@ export class FileDetector {
262
270
  // Audio requires magic bytes - can't fallback without detection
263
271
  throw new Error("Audio type requires binary detection, cannot fallback parse");
264
272
  }
273
+ case "video": {
274
+ // Video requires magic bytes - can't fallback without detection
275
+ throw new Error("Video type requires binary detection, cannot fallback parse");
276
+ }
277
+ case "archive": {
278
+ // Archive requires magic bytes - can't fallback without detection
279
+ throw new Error("Archive type requires binary detection, cannot fallback parse");
280
+ }
281
+ case "xlsx": {
282
+ // Document formats require binary detection
283
+ throw new Error("Excel type requires binary detection, cannot fallback parse");
284
+ }
285
+ case "docx": {
286
+ throw new Error("Word type requires binary detection, cannot fallback parse");
287
+ }
288
+ case "pptx": {
289
+ throw new Error("PowerPoint type requires binary detection, cannot fallback parse");
290
+ }
291
+ case "svg": {
292
+ // SVG can be detected from text content
293
+ const svgContent = content.toString("utf-8");
294
+ if (svgContent.includes("<svg") && svgContent.includes("</svg>")) {
295
+ return {
296
+ type: "svg",
297
+ content: svgContent,
298
+ mimeType: "image/svg+xml",
299
+ metadata: {
300
+ confidence: 70,
301
+ size: content.length,
302
+ },
303
+ };
304
+ }
305
+ throw new Error("Content does not appear to be valid SVG");
306
+ }
265
307
  default:
266
308
  return null;
267
309
  }
@@ -438,6 +480,251 @@ export class FileDetector {
438
480
  throw new Error(`Unknown source: ${source}`);
439
481
  }
440
482
  }
483
+ /**
484
+ * SDK-8: Format an informative placeholder when a file processor fails.
485
+ * Instead of bare "[Video file: name]" strings, include size, format, and
486
+ * the reason for failure so the LLM can acknowledge the attachment.
487
+ */
488
+ static formatInformativePlaceholder(typeName, filename, content, detection, error) {
489
+ const sizeStr = content.length < 1024
490
+ ? `${content.length} bytes`
491
+ : content.length < 1024 * 1024
492
+ ? `${(content.length / 1024).toFixed(1)} KB`
493
+ : `${(content.length / (1024 * 1024)).toFixed(1)} MB`;
494
+ const errorMsg = error instanceof Error
495
+ ? error.message
496
+ : error
497
+ ? String(error)
498
+ : "Processing returned no usable content";
499
+ return (`[${typeName} File: "${filename}"]\n` +
500
+ `Size: ${sizeStr}\n` +
501
+ `Format: ${detection.mimeType || "unknown"}\n` +
502
+ `Error: Could not extract content (${errorMsg}).\n` +
503
+ `The file was attached but could not be fully analyzed.`);
504
+ }
505
+ /**
506
+ * Extract metadata and printable strings from an unrecognized binary file.
507
+ * This is the "extract what you can" path for unknown file types.
508
+ *
509
+ * Extracts:
510
+ * - File size (human-readable)
511
+ * - MIME type / detected format
512
+ * - First N bytes as hex dump (for identification)
513
+ * - Printable ASCII/UTF-8 strings found in the binary (like `strings` command)
514
+ * - Known file signatures that we don't have full processors for
515
+ *
516
+ * @param content Raw file buffer
517
+ * @param detection Detection result (may be "unknown")
518
+ * @param filename Original filename (if known)
519
+ * @returns Formatted text summary suitable for LLM consumption
520
+ */
521
+ static extractBinaryMetadata(content, detection, filename) {
522
+ const parts = [];
523
+ // Header
524
+ const ext = detection.extension
525
+ ? `.${detection.extension}`
526
+ : filename.includes(".")
527
+ ? filename.slice(filename.lastIndexOf("."))
528
+ : "";
529
+ const typeLabel = ext
530
+ ? `${ext.toUpperCase().slice(1)} file`
531
+ : "Binary file";
532
+ parts.push(`[${typeLabel}: "${filename}"]`);
533
+ // Basic metadata
534
+ const sizeStr = formatFileSize(content.length);
535
+ parts.push(`Size: ${sizeStr}`);
536
+ if (detection.mimeType &&
537
+ detection.mimeType !== "application/octet-stream") {
538
+ parts.push(`Format: ${detection.mimeType}`);
539
+ }
540
+ // Known binary signature identification (broader than our processing capabilities)
541
+ const sigLabel = FileDetector.identifyBinarySignature(content);
542
+ if (sigLabel) {
543
+ parts.push(`Identified as: ${sigLabel}`);
544
+ }
545
+ // Hex dump of first 32 bytes for identification
546
+ const hexPreview = content
547
+ .subarray(0, Math.min(32, content.length))
548
+ .toString("hex")
549
+ .match(/.{1,2}/g)
550
+ ?.join(" ");
551
+ if (hexPreview) {
552
+ parts.push(`Header bytes: ${hexPreview}`);
553
+ }
554
+ // Extract printable strings (similar to Unix `strings` command)
555
+ const strings = FileDetector.extractPrintableStrings(content, 4, 50);
556
+ if (strings.length > 0) {
557
+ parts.push(`\nEmbedded text found (${strings.length} string${strings.length > 1 ? "s" : ""}):`);
558
+ for (const s of strings) {
559
+ parts.push(` "${s}"`);
560
+ }
561
+ }
562
+ parts.push(`\nThis file was attached but its format is not fully supported for content extraction.`);
563
+ parts.push(`The above metadata and any embedded text have been extracted for context.`);
564
+ return parts.join("\n");
565
+ }
566
+ /**
567
+ * Identify known binary file signatures beyond what we can process.
568
+ * Returns a human-readable description, or null if unrecognized.
569
+ */
570
+ static identifyBinarySignature(buf) {
571
+ if (buf.length < 4) {
572
+ return null;
573
+ }
574
+ // SQLite: "SQLite format 3\0"
575
+ if (buf.length >= 16 &&
576
+ buf.subarray(0, 15).toString("ascii") === "SQLite format 3") {
577
+ return "SQLite database";
578
+ }
579
+ // WOFF: "wOFF"
580
+ if (buf[0] === 0x77 &&
581
+ buf[1] === 0x4f &&
582
+ buf[2] === 0x46 &&
583
+ buf[3] === 0x46) {
584
+ return "WOFF font";
585
+ }
586
+ // WOFF2: "wOF2"
587
+ if (buf[0] === 0x77 &&
588
+ buf[1] === 0x4f &&
589
+ buf[2] === 0x46 &&
590
+ buf[3] === 0x32) {
591
+ return "WOFF2 font";
592
+ }
593
+ // TrueType/OpenType: starts with 0x00010000 or "OTTO"
594
+ if ((buf[0] === 0x00 &&
595
+ buf[1] === 0x01 &&
596
+ buf[2] === 0x00 &&
597
+ buf[3] === 0x00) ||
598
+ (buf[0] === 0x4f && buf[1] === 0x54 && buf[2] === 0x54 && buf[3] === 0x4f)) {
599
+ return "TrueType/OpenType font";
600
+ }
601
+ // ELF executable: \x7fELF
602
+ if (buf[0] === 0x7f &&
603
+ buf[1] === 0x45 &&
604
+ buf[2] === 0x4c &&
605
+ buf[3] === 0x46) {
606
+ return "ELF executable/library";
607
+ }
608
+ // Mach-O: 0xFEEDFACE or 0xFEEDFACF (64-bit) or 0xCAFEBABE (universal)
609
+ if ((buf[0] === 0xfe &&
610
+ buf[1] === 0xed &&
611
+ buf[2] === 0xfa &&
612
+ buf[3] === 0xce) ||
613
+ (buf[0] === 0xfe &&
614
+ buf[1] === 0xed &&
615
+ buf[2] === 0xfa &&
616
+ buf[3] === 0xcf) ||
617
+ (buf[0] === 0xca && buf[1] === 0xfe && buf[2] === 0xba && buf[3] === 0xbe)) {
618
+ return "Mach-O executable/library";
619
+ }
620
+ // PE/Windows executable: "MZ"
621
+ if (buf[0] === 0x4d && buf[1] === 0x5a) {
622
+ return "Windows PE executable/DLL";
623
+ }
624
+ // WebAssembly: "\0asm"
625
+ if (buf[0] === 0x00 &&
626
+ buf[1] === 0x61 &&
627
+ buf[2] === 0x73 &&
628
+ buf[3] === 0x6d) {
629
+ return "WebAssembly binary";
630
+ }
631
+ // DWG (AutoCAD): starts with "AC10"
632
+ if (buf[0] === 0x41 &&
633
+ buf[1] === 0x43 &&
634
+ buf[2] === 0x31 &&
635
+ buf[3] === 0x30) {
636
+ return "AutoCAD DWG drawing";
637
+ }
638
+ // BZ2: "BZ" + 'h'
639
+ if (buf[0] === 0x42 && buf[1] === 0x5a && buf[2] === 0x68) {
640
+ return "BZip2 compressed archive";
641
+ }
642
+ // XZ: 0xFD + "7zXZ"
643
+ if (buf.length >= 6 &&
644
+ buf[0] === 0xfd &&
645
+ buf[1] === 0x37 &&
646
+ buf[2] === 0x7a &&
647
+ buf[3] === 0x58 &&
648
+ buf[4] === 0x5a &&
649
+ buf[5] === 0x00) {
650
+ return "XZ compressed archive";
651
+ }
652
+ // 7z: "7z" + BC AF 27 1C
653
+ if (buf.length >= 6 &&
654
+ buf[0] === 0x37 &&
655
+ buf[1] === 0x7a &&
656
+ buf[2] === 0xbc &&
657
+ buf[3] === 0xaf &&
658
+ buf[4] === 0x27 &&
659
+ buf[5] === 0x1c) {
660
+ return "7-Zip archive";
661
+ }
662
+ // ISO 9660: "CD001" at offset 32769
663
+ if (buf.length > 32773 &&
664
+ buf.subarray(32769, 32774).toString("ascii") === "CD001") {
665
+ return "ISO 9660 disc image";
666
+ }
667
+ // Apache Parquet: "PAR1"
668
+ if (buf[0] === 0x50 &&
669
+ buf[1] === 0x41 &&
670
+ buf[2] === 0x52 &&
671
+ buf[3] === 0x31) {
672
+ return "Apache Parquet data file";
673
+ }
674
+ // Protocol Buffers compiled: (no fixed magic, skip)
675
+ // TIFF (already handled as image, but including for completeness)
676
+ if ((buf[0] === 0x49 &&
677
+ buf[1] === 0x49 &&
678
+ buf[2] === 0x2a &&
679
+ buf[3] === 0x00) ||
680
+ (buf[0] === 0x4d && buf[1] === 0x4d && buf[2] === 0x00 && buf[3] === 0x2a)) {
681
+ return "TIFF image";
682
+ }
683
+ // ICO: 00 00 01 00
684
+ if (buf[0] === 0x00 &&
685
+ buf[1] === 0x00 &&
686
+ buf[2] === 0x01 &&
687
+ buf[3] === 0x00) {
688
+ return "ICO icon image";
689
+ }
690
+ return null;
691
+ }
692
+ /**
693
+ * Extract printable ASCII strings from a binary buffer.
694
+ * Similar to the Unix `strings` utility.
695
+ *
696
+ * @param buf Buffer to scan
697
+ * @param minLength Minimum string length to include (default 4)
698
+ * @param maxStrings Maximum number of strings to return (default 50)
699
+ * @returns Array of printable strings found in the binary
700
+ */
701
+ static extractPrintableStrings(buf, minLength = 4, maxStrings = 50) {
702
+ const strings = [];
703
+ let current = "";
704
+ // Only scan first 64KB to avoid huge processing time
705
+ const scanLimit = Math.min(buf.length, 64 * 1024);
706
+ for (let i = 0; i < scanLimit; i++) {
707
+ const byte = buf[i];
708
+ // Printable ASCII range (space through tilde) plus tab
709
+ if ((byte >= 0x20 && byte <= 0x7e) || byte === 0x09) {
710
+ current += String.fromCharCode(byte);
711
+ }
712
+ else {
713
+ if (current.length >= minLength) {
714
+ strings.push(current);
715
+ if (strings.length >= maxStrings) {
716
+ break;
717
+ }
718
+ }
719
+ current = "";
720
+ }
721
+ }
722
+ // Flush last string
723
+ if (current.length >= minLength && strings.length < maxStrings) {
724
+ strings.push(current);
725
+ }
726
+ return strings;
727
+ }
441
728
  /**
442
729
  * Route to appropriate processor
443
730
  */
@@ -458,6 +745,18 @@ export class FileDetector {
458
745
  // SVG is processed as text content (sanitized XML markup)
459
746
  // AI providers don't support SVG as image format, so we extract text content
460
747
  return await FileDetector.processSvgAsText(content, detection);
748
+ case "video":
749
+ return await FileDetector.processVideoFile(content, detection);
750
+ case "audio":
751
+ return await FileDetector.processAudioFile(content, detection);
752
+ case "archive":
753
+ return await FileDetector.processArchiveFile(content, detection);
754
+ case "xlsx":
755
+ return await FileDetector.processXlsxFile(content, detection);
756
+ case "docx":
757
+ return await FileDetector.processDocxFile(content, detection);
758
+ case "pptx":
759
+ return await FileDetector.processPptxFile(content, detection);
461
760
  case "text":
462
761
  return {
463
762
  type: "text",
@@ -465,9 +764,365 @@ export class FileDetector {
465
764
  mimeType: detection.mimeType || "text/plain",
466
765
  metadata: detection.metadata,
467
766
  };
468
- default:
469
- throw new Error(`Unsupported file type: ${detection.type}`);
767
+ default: {
768
+ // Graceful degradation: try to treat unknown types as text if content is valid UTF-8
769
+ const unknownContent = content.toString("utf-8");
770
+ if (FileDetector.isValidText(unknownContent)) {
771
+ logger.warn(`[FileDetector] Unknown type "${detection.type}", treating as text`);
772
+ return {
773
+ type: "text",
774
+ content: unknownContent,
775
+ mimeType: detection.mimeType || "text/plain",
776
+ metadata: detection.metadata,
777
+ };
778
+ }
779
+ // Binary file that we can't fully process — extract what we can
780
+ // (metadata, printable strings, signature identification)
781
+ const filename = detection.metadata.filename || "file";
782
+ logger.warn(`[FileDetector] Unknown binary type "${detection.type}", extracting metadata for "${filename}"`);
783
+ return {
784
+ type: "unknown",
785
+ content: FileDetector.extractBinaryMetadata(content, detection, filename),
786
+ mimeType: detection.mimeType || "application/octet-stream",
787
+ metadata: detection.metadata,
788
+ };
789
+ }
790
+ }
791
+ }
792
+ /**
793
+ * Process video file: extract metadata, keyframes, and subtitles via VideoProcessor
794
+ */
795
+ static async processVideoFile(content, detection) {
796
+ const videoFilename = detection.metadata.filename || "video";
797
+ try {
798
+ const videoResult = await videoProcessor.processFile({
799
+ id: videoFilename,
800
+ name: videoFilename,
801
+ mimetype: detection.mimeType || "video/mp4",
802
+ size: content.length,
803
+ buffer: content,
804
+ });
805
+ if (videoResult.success && videoResult.data) {
806
+ return {
807
+ type: "video",
808
+ content: videoResult.data.textContent ||
809
+ FileDetector.formatInformativePlaceholder("Video", videoFilename, content, detection),
810
+ mimeType: detection.mimeType,
811
+ images: videoResult.data.keyframes && videoResult.data.keyframes.length > 0
812
+ ? videoResult.data.keyframes
813
+ : undefined,
814
+ metadata: {
815
+ ...detection.metadata,
816
+ frameCount: videoResult.data.frameCount,
817
+ hasKeyframes: videoResult.data.hasKeyframes,
818
+ },
819
+ };
820
+ }
821
+ }
822
+ catch (videoError) {
823
+ logger.warn(`[FileDetector] VideoProcessor failed for ${videoFilename}, using fallback`, videoError instanceof Error ? videoError.message : String(videoError));
824
+ return {
825
+ type: "video",
826
+ content: FileDetector.formatInformativePlaceholder("Video", videoFilename, content, detection, videoError),
827
+ mimeType: detection.mimeType,
828
+ metadata: detection.metadata,
829
+ };
830
+ }
831
+ // Fallback if processor returned no data
832
+ return {
833
+ type: "video",
834
+ content: FileDetector.formatInformativePlaceholder("Video", videoFilename, content, detection),
835
+ mimeType: detection.mimeType,
836
+ metadata: detection.metadata,
837
+ };
838
+ }
839
+ /**
840
+ * Process audio file: extract metadata, tags, and cover art via AudioProcessor
841
+ */
842
+ static async processAudioFile(content, detection) {
843
+ const audioFilename = detection.metadata.filename || "audio";
844
+ try {
845
+ const audioResult = await audioProcessor.processFile({
846
+ id: audioFilename,
847
+ name: audioFilename,
848
+ mimetype: detection.mimeType || "audio/mpeg",
849
+ size: content.length,
850
+ buffer: content,
851
+ });
852
+ if (audioResult.success && audioResult.data) {
853
+ return {
854
+ type: "audio",
855
+ content: audioResult.data.textContent ||
856
+ FileDetector.formatInformativePlaceholder("Audio", audioFilename, content, detection),
857
+ mimeType: detection.mimeType,
858
+ // Surface embedded cover art as an image content block
859
+ images: audioResult.data.coverArt
860
+ ? [audioResult.data.coverArt]
861
+ : undefined,
862
+ metadata: detection.metadata,
863
+ };
864
+ }
865
+ }
866
+ catch (audioError) {
867
+ logger.warn(`[FileDetector] AudioProcessor failed for ${audioFilename}, using fallback`, audioError instanceof Error ? audioError.message : String(audioError));
868
+ return {
869
+ type: "audio",
870
+ content: FileDetector.formatInformativePlaceholder("Audio", audioFilename, content, detection, audioError),
871
+ mimeType: detection.mimeType,
872
+ metadata: detection.metadata,
873
+ };
470
874
  }
875
+ // Fallback if processor returned no data
876
+ return {
877
+ type: "audio",
878
+ content: FileDetector.formatInformativePlaceholder("Audio", audioFilename, content, detection),
879
+ mimeType: detection.mimeType,
880
+ metadata: detection.metadata,
881
+ };
882
+ }
883
+ /**
884
+ * Process archive file: list contents and extract metadata via ArchiveProcessor
885
+ */
886
+ static async processArchiveFile(content, detection) {
887
+ const archiveFilename = detection.metadata.filename || "archive";
888
+ try {
889
+ const archiveResult = await archiveProcessor.processFile({
890
+ id: archiveFilename,
891
+ name: archiveFilename,
892
+ mimetype: detection.mimeType || "application/zip",
893
+ size: content.length,
894
+ buffer: content,
895
+ });
896
+ if (archiveResult.success && archiveResult.data) {
897
+ return {
898
+ type: "archive",
899
+ content: archiveResult.data.textContent ||
900
+ FileDetector.formatInformativePlaceholder("Archive", archiveFilename, content, detection),
901
+ mimeType: detection.mimeType,
902
+ metadata: detection.metadata,
903
+ };
904
+ }
905
+ }
906
+ catch (archiveError) {
907
+ logger.warn(`[FileDetector] ArchiveProcessor failed for ${archiveFilename}, using fallback`, archiveError instanceof Error
908
+ ? archiveError.message
909
+ : String(archiveError));
910
+ return {
911
+ type: "archive",
912
+ content: FileDetector.formatInformativePlaceholder("Archive", archiveFilename, content, detection, archiveError),
913
+ mimeType: detection.mimeType,
914
+ metadata: detection.metadata,
915
+ };
916
+ }
917
+ // Fallback if processor returned no data
918
+ return {
919
+ type: "archive",
920
+ content: FileDetector.formatInformativePlaceholder("Archive", archiveFilename, content, detection),
921
+ mimeType: detection.mimeType,
922
+ metadata: detection.metadata,
923
+ };
924
+ }
925
+ /**
926
+ * Process Excel/OpenDocument spreadsheet file via ExcelProcessor or OpenDocumentProcessor
927
+ */
928
+ static async processXlsxFile(content, detection) {
929
+ const xlsxFilename = detection.metadata.filename || "spreadsheet";
930
+ try {
931
+ const ext = detection.extension?.toLowerCase();
932
+ if (ext === "ods") {
933
+ const { openDocumentProcessor } = await import("../processors/document/OpenDocumentProcessor.js");
934
+ const odsResult = await openDocumentProcessor.processFile({
935
+ id: xlsxFilename,
936
+ name: xlsxFilename,
937
+ mimetype: detection.mimeType ||
938
+ "application/vnd.oasis.opendocument.spreadsheet",
939
+ size: content.length,
940
+ buffer: content,
941
+ });
942
+ if (odsResult.success && odsResult.data) {
943
+ return {
944
+ type: "xlsx",
945
+ content: odsResult.data.textContent ||
946
+ FileDetector.formatInformativePlaceholder("Spreadsheet", xlsxFilename, content, detection),
947
+ mimeType: detection.mimeType,
948
+ metadata: detection.metadata,
949
+ };
950
+ }
951
+ }
952
+ else {
953
+ const { excelProcessor } = await import("../processors/document/ExcelProcessor.js");
954
+ const xlsxResult = await excelProcessor.processFile({
955
+ id: xlsxFilename,
956
+ name: xlsxFilename,
957
+ mimetype: detection.mimeType ||
958
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
959
+ size: content.length,
960
+ buffer: content,
961
+ });
962
+ if (xlsxResult.success && xlsxResult.data) {
963
+ // Build text content from worksheets
964
+ const sheets = xlsxResult.data.worksheets || [];
965
+ let textContent = `Spreadsheet: ${sheets.length} sheet(s), ${xlsxResult.data.totalRows} total rows\n`;
966
+ for (const sheet of sheets) {
967
+ textContent += `\n### Sheet: ${sheet.name}\n`;
968
+ textContent += `Columns (${sheet.columnCount}): ${sheet.headers.join(", ")}\n`;
969
+ textContent += `Rows: ${sheet.rowCount}\n`;
970
+ // Include first rows as sample data
971
+ const sampleRows = sheet.rows.slice(0, 20);
972
+ const rowText = sampleRows
973
+ .map((row) => row.map((c) => String(c ?? "")).join("\t"))
974
+ .join("\n");
975
+ if (!rowText) {
976
+ continue;
977
+ }
978
+ textContent += `\nData:\n${sheet.headers.join("\t")}\n${rowText}\n`;
979
+ const remaining = sheet.rowCount - 20;
980
+ if (remaining > 0) {
981
+ textContent += `... (${remaining} more rows)\n`;
982
+ }
983
+ }
984
+ return {
985
+ type: "xlsx",
986
+ content: textContent,
987
+ mimeType: detection.mimeType,
988
+ metadata: detection.metadata,
989
+ };
990
+ }
991
+ }
992
+ }
993
+ catch (xlsxError) {
994
+ logger.warn(`[FileDetector] ExcelProcessor failed for ${xlsxFilename}, using fallback`, xlsxError instanceof Error ? xlsxError.message : String(xlsxError));
995
+ return {
996
+ type: "xlsx",
997
+ content: FileDetector.formatInformativePlaceholder("Spreadsheet", xlsxFilename, content, detection, xlsxError),
998
+ mimeType: detection.mimeType,
999
+ metadata: detection.metadata,
1000
+ };
1001
+ }
1002
+ // Fallback if processor returned no data
1003
+ return {
1004
+ type: "xlsx",
1005
+ content: FileDetector.formatInformativePlaceholder("Spreadsheet", xlsxFilename, content, detection),
1006
+ mimeType: detection.mimeType,
1007
+ metadata: detection.metadata,
1008
+ };
1009
+ }
1010
+ /**
1011
+ * Process Word/OpenDocument/RTF document via WordProcessor, OpenDocumentProcessor, or RtfProcessor
1012
+ */
1013
+ static async processDocxFile(content, detection) {
1014
+ const docxFilename = detection.metadata.filename || "document";
1015
+ const ext = detection.extension?.toLowerCase();
1016
+ try {
1017
+ if (ext === "odt") {
1018
+ const { openDocumentProcessor } = await import("../processors/document/OpenDocumentProcessor.js");
1019
+ const odtResult = await openDocumentProcessor.processFile({
1020
+ id: docxFilename,
1021
+ name: docxFilename,
1022
+ mimetype: detection.mimeType || "application/vnd.oasis.opendocument.text",
1023
+ size: content.length,
1024
+ buffer: content,
1025
+ });
1026
+ if (odtResult.success && odtResult.data) {
1027
+ return {
1028
+ type: "docx",
1029
+ content: odtResult.data.textContent ||
1030
+ FileDetector.formatInformativePlaceholder("Document", docxFilename, content, detection),
1031
+ mimeType: detection.mimeType,
1032
+ metadata: detection.metadata,
1033
+ };
1034
+ }
1035
+ }
1036
+ else if (ext === "rtf") {
1037
+ const { rtfProcessor } = await import("../processors/document/RtfProcessor.js");
1038
+ const rtfResult = await rtfProcessor.processFile({
1039
+ id: docxFilename,
1040
+ name: docxFilename,
1041
+ mimetype: detection.mimeType || "application/rtf",
1042
+ size: content.length,
1043
+ buffer: content,
1044
+ });
1045
+ if (rtfResult.success && rtfResult.data) {
1046
+ return {
1047
+ type: "docx",
1048
+ content: rtfResult.data.textContent ||
1049
+ FileDetector.formatInformativePlaceholder("Document", docxFilename, content, detection),
1050
+ mimeType: detection.mimeType,
1051
+ metadata: detection.metadata,
1052
+ };
1053
+ }
1054
+ }
1055
+ else {
1056
+ const { wordProcessor } = await import("../processors/document/WordProcessor.js");
1057
+ const docxResult = await wordProcessor.processFile({
1058
+ id: docxFilename,
1059
+ name: docxFilename,
1060
+ mimetype: detection.mimeType ||
1061
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
1062
+ size: content.length,
1063
+ buffer: content,
1064
+ });
1065
+ if (docxResult.success && docxResult.data) {
1066
+ return {
1067
+ type: "docx",
1068
+ content: docxResult.data.textContent ||
1069
+ FileDetector.formatInformativePlaceholder("Document", docxFilename, content, detection),
1070
+ mimeType: detection.mimeType,
1071
+ metadata: detection.metadata,
1072
+ };
1073
+ }
1074
+ }
1075
+ }
1076
+ catch (docxError) {
1077
+ logger.warn(`[FileDetector] Document processor failed for ${docxFilename}, using fallback`, docxError instanceof Error ? docxError.message : String(docxError));
1078
+ return {
1079
+ type: "docx",
1080
+ content: FileDetector.formatInformativePlaceholder("Document", docxFilename, content, detection, docxError),
1081
+ mimeType: detection.mimeType,
1082
+ metadata: detection.metadata,
1083
+ };
1084
+ }
1085
+ // Fallback if processor returned no data
1086
+ return {
1087
+ type: "docx",
1088
+ content: FileDetector.formatInformativePlaceholder("Document", docxFilename, content, detection),
1089
+ mimeType: detection.mimeType,
1090
+ metadata: detection.metadata,
1091
+ };
1092
+ }
1093
+ /**
1094
+ * Process PowerPoint/OpenDocument presentation via PptxProcessor
1095
+ */
1096
+ static async processPptxFile(content, detection) {
1097
+ const pptxFilename = detection.metadata.filename || "presentation";
1098
+ try {
1099
+ const { PptxProcessor } = await import("../processors/document/PptxProcessor.js");
1100
+ const pptxResult = await PptxProcessor.extractText(content);
1101
+ if (pptxResult) {
1102
+ return {
1103
+ type: "pptx",
1104
+ content: pptxResult,
1105
+ mimeType: detection.mimeType,
1106
+ metadata: detection.metadata,
1107
+ };
1108
+ }
1109
+ }
1110
+ catch (pptxError) {
1111
+ logger.warn(`[FileDetector] PptxProcessor failed for ${pptxFilename}, using fallback`, pptxError instanceof Error ? pptxError.message : String(pptxError));
1112
+ return {
1113
+ type: "pptx",
1114
+ content: FileDetector.formatInformativePlaceholder("Presentation", pptxFilename, content, detection, pptxError),
1115
+ mimeType: detection.mimeType,
1116
+ metadata: detection.metadata,
1117
+ };
1118
+ }
1119
+ // Fallback if processor returned no content
1120
+ return {
1121
+ type: "pptx",
1122
+ content: FileDetector.formatInformativePlaceholder("Presentation", pptxFilename, content, detection),
1123
+ mimeType: detection.mimeType,
1124
+ metadata: detection.metadata,
1125
+ };
471
1126
  }
472
1127
  /**
473
1128
  * Process SVG file as text content
@@ -535,7 +1190,7 @@ export class FileDetector {
535
1190
  * Load file from URL with automatic retry on transient network errors
536
1191
  */
537
1192
  static async loadFromURL(url, options) {
538
- const maxSize = options?.maxSize || 10 * 1024 * 1024;
1193
+ const maxSize = options?.maxSize || 200 * 1024 * 1024; // 200MB default (matches Curator memory-safety cap)
539
1194
  const timeout = options?.timeout || FileDetector.DEFAULT_NETWORK_TIMEOUT;
540
1195
  const maxRetries = options?.maxRetries ?? DEFAULT_MAX_RETRIES;
541
1196
  const retryDelay = options?.retryDelay ?? DEFAULT_RETRY_DELAY;
@@ -565,7 +1220,7 @@ export class FileDetector {
565
1220
  * Load file from filesystem path
566
1221
  */
567
1222
  static async loadFromPath(path, options) {
568
- const maxSize = options?.maxSize || 10 * 1024 * 1024;
1223
+ const maxSize = options?.maxSize || 200 * 1024 * 1024; // 200MB default (matches Curator memory-safety cap)
569
1224
  const statInfo = await stat(path);
570
1225
  if (!statInfo.isFile()) {
571
1226
  throw new Error("Not a file");
@@ -610,6 +1265,98 @@ class MagicBytesStrategy {
610
1265
  if (this.isPDF(input)) {
611
1266
  return this.result("pdf", "application/pdf", 95);
612
1267
  }
1268
+ // MP4/MOV: "ftyp" at offset 4
1269
+ if (input.length >= 8 &&
1270
+ input[4] === 0x66 &&
1271
+ input[5] === 0x74 &&
1272
+ input[6] === 0x79 &&
1273
+ input[7] === 0x70) {
1274
+ return this.result("video", "video/mp4", 95);
1275
+ }
1276
+ // MKV/WebM: EBML header
1277
+ if (input.length >= 4 &&
1278
+ input[0] === 0x1a &&
1279
+ input[1] === 0x45 &&
1280
+ input[2] === 0xdf &&
1281
+ input[3] === 0xa3) {
1282
+ return this.result("video", "video/x-matroska", 90);
1283
+ }
1284
+ // AVI: "RIFF" + "AVI "
1285
+ if (input.length >= 12 &&
1286
+ input[0] === 0x52 &&
1287
+ input[1] === 0x49 &&
1288
+ input[2] === 0x46 &&
1289
+ input[3] === 0x46 &&
1290
+ input[8] === 0x41 &&
1291
+ input[9] === 0x56 &&
1292
+ input[10] === 0x49 &&
1293
+ input[11] === 0x20) {
1294
+ return this.result("video", "video/x-msvideo", 95);
1295
+ }
1296
+ // WAV: "RIFF" + "WAVE"
1297
+ if (input.length >= 12 &&
1298
+ input[0] === 0x52 &&
1299
+ input[1] === 0x49 &&
1300
+ input[2] === 0x46 &&
1301
+ input[3] === 0x46 &&
1302
+ input[8] === 0x57 &&
1303
+ input[9] === 0x41 &&
1304
+ input[10] === 0x56 &&
1305
+ input[11] === 0x45) {
1306
+ return this.result("audio", "audio/wav", 95);
1307
+ }
1308
+ // MP3: ID3 tag
1309
+ if (input.length >= 3 &&
1310
+ input[0] === 0x49 &&
1311
+ input[1] === 0x44 &&
1312
+ input[2] === 0x33) {
1313
+ return this.result("audio", "audio/mpeg", 95);
1314
+ }
1315
+ // MP3: sync word
1316
+ if (input.length >= 2 && input[0] === 0xff && (input[1] & 0xe0) === 0xe0) {
1317
+ return this.result("audio", "audio/mpeg", 80);
1318
+ }
1319
+ // FLAC: "fLaC"
1320
+ if (input.length >= 4 &&
1321
+ input[0] === 0x66 &&
1322
+ input[1] === 0x4c &&
1323
+ input[2] === 0x61 &&
1324
+ input[3] === 0x43) {
1325
+ return this.result("audio", "audio/flac", 95);
1326
+ }
1327
+ // OGG: "OggS"
1328
+ if (input.length >= 4 &&
1329
+ input[0] === 0x4f &&
1330
+ input[1] === 0x67 &&
1331
+ input[2] === 0x67 &&
1332
+ input[3] === 0x53) {
1333
+ return this.result("audio", "audio/ogg", 90);
1334
+ }
1335
+ // ZIP: "PK\x03\x04"
1336
+ // NOTE: Many document formats (OOXML: .xlsx, .docx, .pptx; ODF: .odt, .ods)
1337
+ // are internally ZIP archives and share these magic bytes. We return a lower
1338
+ // confidence (70%) so the ExtensionStrategy (85%) can override with the correct
1339
+ // document type when a file path with extension is available. For raw buffers
1340
+ // without path info, this falls through to archive as a safe default.
1341
+ if (input.length >= 4 &&
1342
+ input[0] === 0x50 &&
1343
+ input[1] === 0x4b &&
1344
+ input[2] === 0x03 &&
1345
+ input[3] === 0x04) {
1346
+ return this.result("archive", "application/zip", 70);
1347
+ }
1348
+ // GZIP: 1F 8B
1349
+ if (input.length >= 2 && input[0] === 0x1f && input[1] === 0x8b) {
1350
+ return this.result("archive", "application/gzip", 90);
1351
+ }
1352
+ // RAR: "Rar!"
1353
+ if (input.length >= 4 &&
1354
+ input[0] === 0x52 &&
1355
+ input[1] === 0x61 &&
1356
+ input[2] === 0x72 &&
1357
+ input[3] === 0x21) {
1358
+ return this.result("archive", "application/x-rar-compressed", 95);
1359
+ }
613
1360
  return this.unknown();
614
1361
  }
615
1362
  isPNG(buf) {
@@ -687,24 +1434,95 @@ class MimeTypeStrategy {
687
1434
  }
688
1435
  }
689
1436
  mimeToFileType(mime) {
690
- if (mime.includes("text/csv")) {
691
- return "csv";
692
- }
693
- if (mime.includes("text/tab-separated-values")) {
1437
+ const lower = mime.toLowerCase().split(";")[0].trim();
1438
+ // CSV
1439
+ if (lower === "text/csv" || lower === "text/tab-separated-values") {
694
1440
  return "csv";
695
1441
  }
696
1442
  // SVG is processed as text/markup, NOT as image
697
1443
  // Must check before generic image/ check
698
- if (mime.includes("image/svg+xml")) {
1444
+ if (lower === "image/svg+xml") {
699
1445
  return "svg";
700
1446
  }
701
- if (mime.includes("image/")) {
1447
+ // Images
1448
+ if (lower.startsWith("image/")) {
702
1449
  return "image";
703
1450
  }
704
- if (mime.includes("application/pdf")) {
1451
+ // PDF
1452
+ if (lower === "application/pdf") {
705
1453
  return "pdf";
706
1454
  }
707
- if (mime.includes("text/plain")) {
1455
+ // Video
1456
+ if (lower.startsWith("video/")) {
1457
+ return "video";
1458
+ }
1459
+ // Audio
1460
+ if (lower.startsWith("audio/")) {
1461
+ return "audio";
1462
+ }
1463
+ // Office documents — OOXML
1464
+ if (lower ===
1465
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document" ||
1466
+ lower === "application/msword") {
1467
+ return "docx";
1468
+ }
1469
+ if (lower ===
1470
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ||
1471
+ lower === "application/vnd.ms-excel") {
1472
+ return "xlsx";
1473
+ }
1474
+ if (lower ===
1475
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation" ||
1476
+ lower === "application/vnd.ms-powerpoint") {
1477
+ return "pptx";
1478
+ }
1479
+ // OpenDocument formats
1480
+ if (lower === "application/vnd.oasis.opendocument.text") {
1481
+ return "docx";
1482
+ }
1483
+ if (lower === "application/vnd.oasis.opendocument.spreadsheet") {
1484
+ return "xlsx";
1485
+ }
1486
+ if (lower === "application/vnd.oasis.opendocument.presentation") {
1487
+ return "pptx";
1488
+ }
1489
+ // RTF
1490
+ if (lower === "application/rtf" || lower === "text/rtf") {
1491
+ return "docx";
1492
+ }
1493
+ // Archive formats
1494
+ if (lower === "application/zip" ||
1495
+ lower === "application/x-zip-compressed" ||
1496
+ lower === "application/gzip" ||
1497
+ lower === "application/x-gzip" ||
1498
+ lower === "application/x-tar" ||
1499
+ lower === "application/x-compressed-tar" ||
1500
+ lower === "application/java-archive" ||
1501
+ lower === "application/x-rar-compressed" ||
1502
+ lower === "application/vnd.rar" ||
1503
+ lower === "application/x-7z-compressed") {
1504
+ return "archive";
1505
+ }
1506
+ // Text/markup/source code — broad matching
1507
+ if (lower === "text/plain" ||
1508
+ lower === "text/markdown" ||
1509
+ lower === "text/html" ||
1510
+ lower === "text/css" ||
1511
+ lower === "text/javascript" ||
1512
+ lower === "text/typescript" ||
1513
+ lower === "application/json" ||
1514
+ lower === "application/xml" ||
1515
+ lower === "text/xml" ||
1516
+ lower === "application/yaml" ||
1517
+ lower === "application/x-yaml") {
1518
+ return "text";
1519
+ }
1520
+ // Source code MIME types (text/x-*)
1521
+ if (lower.startsWith("text/x-")) {
1522
+ return "text";
1523
+ }
1524
+ // Generic text types we may not have listed explicitly
1525
+ if (lower.startsWith("text/")) {
708
1526
  return "text";
709
1527
  }
710
1528
  return "unknown";
@@ -751,18 +1569,109 @@ class ExtensionStrategy {
751
1569
  svg: "svg",
752
1570
  avif: "image",
753
1571
  pdf: "pdf",
1572
+ // Video formats
1573
+ mp4: "video",
1574
+ mkv: "video",
1575
+ mov: "video",
1576
+ avi: "video",
1577
+ webm: "video",
1578
+ wmv: "video",
1579
+ flv: "video",
1580
+ // Audio formats
1581
+ mp3: "audio",
1582
+ wav: "audio",
1583
+ ogg: "audio",
1584
+ flac: "audio",
1585
+ m4a: "audio",
1586
+ aac: "audio",
1587
+ wma: "audio",
1588
+ opus: "audio",
1589
+ // Archive formats
1590
+ zip: "archive",
1591
+ tar: "archive",
1592
+ gz: "archive",
1593
+ tgz: "archive",
1594
+ rar: "archive",
1595
+ "7z": "archive",
1596
+ jar: "archive",
1597
+ // Document formats (ZIP-based internally)
1598
+ xlsx: "xlsx",
1599
+ xls: "xlsx",
1600
+ docx: "docx",
1601
+ doc: "docx",
1602
+ pptx: "pptx",
1603
+ ppt: "pptx",
1604
+ odt: "docx", // OpenDocument text → processed like docx
1605
+ ods: "xlsx", // OpenDocument spreadsheet → processed like xlsx
1606
+ odp: "pptx", // OpenDocument presentation → processed like pptx
1607
+ rtf: "docx", // RTF → processed like docx (text extraction)
1608
+ // Text/markup formats
754
1609
  txt: "text",
755
1610
  md: "text",
1611
+ markdown: "text",
756
1612
  json: "text",
757
1613
  xml: "text",
758
1614
  yaml: "text",
759
1615
  yml: "text",
760
1616
  html: "text",
761
1617
  htm: "text",
1618
+ css: "text",
762
1619
  log: "text",
763
1620
  conf: "text",
764
1621
  cfg: "text",
765
1622
  ini: "text",
1623
+ env: "text",
1624
+ toml: "text",
1625
+ properties: "text",
1626
+ gitignore: "text",
1627
+ dockerignore: "text",
1628
+ editorconfig: "text",
1629
+ prettierrc: "text",
1630
+ eslintrc: "text",
1631
+ babelrc: "text",
1632
+ // Source code formats
1633
+ js: "text",
1634
+ mjs: "text",
1635
+ cjs: "text",
1636
+ jsx: "text",
1637
+ ts: "text",
1638
+ tsx: "text",
1639
+ py: "text",
1640
+ java: "text",
1641
+ go: "text",
1642
+ rs: "text",
1643
+ rb: "text",
1644
+ php: "text",
1645
+ c: "text",
1646
+ cpp: "text",
1647
+ cc: "text",
1648
+ h: "text",
1649
+ hpp: "text",
1650
+ cs: "text",
1651
+ swift: "text",
1652
+ kt: "text",
1653
+ kts: "text",
1654
+ scala: "text",
1655
+ sh: "text",
1656
+ bash: "text",
1657
+ zsh: "text",
1658
+ ps1: "text",
1659
+ sql: "text",
1660
+ r: "text",
1661
+ lua: "text",
1662
+ pl: "text",
1663
+ perl: "text",
1664
+ dart: "text",
1665
+ ex: "text",
1666
+ exs: "text",
1667
+ erl: "text",
1668
+ hs: "text",
1669
+ clj: "text",
1670
+ lisp: "text",
1671
+ vim: "text",
1672
+ // Additional video/image
1673
+ m4v: "video",
1674
+ ico: "image",
766
1675
  };
767
1676
  const type = typeMap[ext.toLowerCase()];
768
1677
  return {
@@ -809,18 +1718,109 @@ class ExtensionStrategy {
809
1718
  svg: "image/svg+xml",
810
1719
  avif: "image/avif",
811
1720
  pdf: "application/pdf",
1721
+ // Video MIME types
1722
+ mp4: "video/mp4",
1723
+ mkv: "video/x-matroska",
1724
+ mov: "video/quicktime",
1725
+ avi: "video/x-msvideo",
1726
+ webm: "video/webm",
1727
+ wmv: "video/x-ms-wmv",
1728
+ flv: "video/x-flv",
1729
+ // Audio MIME types
1730
+ mp3: "audio/mpeg",
1731
+ wav: "audio/wav",
1732
+ ogg: "audio/ogg",
1733
+ flac: "audio/flac",
1734
+ m4a: "audio/mp4",
1735
+ aac: "audio/aac",
1736
+ wma: "audio/x-ms-wma",
1737
+ opus: "audio/opus",
1738
+ // Archive MIME types
1739
+ zip: "application/zip",
1740
+ tar: "application/x-tar",
1741
+ gz: "application/gzip",
1742
+ tgz: "application/gzip",
1743
+ rar: "application/x-rar-compressed",
1744
+ "7z": "application/x-7z-compressed",
1745
+ jar: "application/java-archive",
1746
+ // Document MIME types
1747
+ xlsx: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
1748
+ xls: "application/vnd.ms-excel",
1749
+ docx: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
1750
+ doc: "application/msword",
1751
+ pptx: "application/vnd.openxmlformats-officedocument.presentationml.presentation",
1752
+ ppt: "application/vnd.ms-powerpoint",
1753
+ odt: "application/vnd.oasis.opendocument.text",
1754
+ ods: "application/vnd.oasis.opendocument.spreadsheet",
1755
+ odp: "application/vnd.oasis.opendocument.presentation",
1756
+ rtf: "application/rtf",
1757
+ // Text/markup MIME types
812
1758
  txt: "text/plain",
813
1759
  md: "text/markdown",
1760
+ markdown: "text/markdown",
814
1761
  json: "application/json",
815
1762
  xml: "application/xml",
816
1763
  yaml: "application/yaml",
817
1764
  yml: "application/yaml",
818
1765
  html: "text/html",
819
1766
  htm: "text/html",
1767
+ css: "text/css",
820
1768
  log: "text/plain",
821
1769
  conf: "text/plain",
822
1770
  cfg: "text/plain",
823
1771
  ini: "text/plain",
1772
+ env: "text/plain",
1773
+ toml: "text/plain",
1774
+ properties: "text/plain",
1775
+ gitignore: "text/plain",
1776
+ dockerignore: "text/plain",
1777
+ editorconfig: "text/plain",
1778
+ prettierrc: "application/json",
1779
+ eslintrc: "application/json",
1780
+ babelrc: "application/json",
1781
+ // Source code MIME types
1782
+ js: "text/javascript",
1783
+ mjs: "text/javascript",
1784
+ cjs: "text/javascript",
1785
+ jsx: "text/javascript",
1786
+ ts: "text/typescript",
1787
+ tsx: "text/typescript",
1788
+ py: "text/x-python",
1789
+ java: "text/x-java-source",
1790
+ go: "text/x-go",
1791
+ rs: "text/x-rustsrc",
1792
+ rb: "text/x-ruby",
1793
+ php: "text/x-php",
1794
+ c: "text/x-c",
1795
+ cpp: "text/x-c++",
1796
+ cc: "text/x-c++",
1797
+ h: "text/x-c",
1798
+ hpp: "text/x-c++",
1799
+ cs: "text/x-csharp",
1800
+ swift: "text/x-swift",
1801
+ kt: "text/x-kotlin",
1802
+ kts: "text/x-kotlin",
1803
+ scala: "text/x-scala",
1804
+ sh: "text/x-shellscript",
1805
+ bash: "text/x-shellscript",
1806
+ zsh: "text/x-shellscript",
1807
+ ps1: "text/x-powershell",
1808
+ sql: "text/x-sql",
1809
+ r: "text/x-r",
1810
+ lua: "text/x-lua",
1811
+ pl: "text/x-perl",
1812
+ perl: "text/x-perl",
1813
+ dart: "text/x-dart",
1814
+ ex: "text/x-elixir",
1815
+ exs: "text/x-elixir",
1816
+ erl: "text/x-erlang",
1817
+ hs: "text/x-haskell",
1818
+ clj: "text/x-clojure",
1819
+ lisp: "text/x-lisp",
1820
+ vim: "text/plain",
1821
+ // Additional video/image
1822
+ m4v: "video/mp4",
1823
+ ico: "image/x-icon",
824
1824
  };
825
1825
  return mimeMap[ext.toLowerCase()] || "application/octet-stream";
826
1826
  }