@juspay/neurolink 9.32.0 → 9.32.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (467) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/dist/auth/anthropicOAuth.js +1 -1
  3. package/dist/cli/commands/proxy.js +18 -5
  4. package/dist/client/aiSdkAdapter.js +1 -1
  5. package/dist/client/index.js +137 -501
  6. package/dist/core/factory.js +0 -1
  7. package/dist/core/redisConversationMemoryManager.js +1 -1
  8. package/dist/features/ppt/slideGenerator.js +0 -1
  9. package/dist/features/ppt/utils.js +0 -1
  10. package/dist/lib/server/routes/claudeProxyRoutes.js +45 -9
  11. package/dist/mcp/elicitationProtocol.js +1 -1
  12. package/dist/mcp/servers/agent/directToolsServer.js +0 -1
  13. package/dist/providers/azureOpenai.js +1 -1
  14. package/dist/providers/huggingFace.js +0 -1
  15. package/dist/providers/openaiCompatible.js +0 -1
  16. package/dist/sdk/toolRegistration.js +0 -1
  17. package/dist/server/openapi/generator.js +1 -1
  18. package/dist/server/routes/claudeProxyRoutes.js +45 -9
  19. package/dist/types/configTypes.js +0 -5
  20. package/dist/types/modelTypes.js +0 -1
  21. package/dist/types/tools.js +0 -1
  22. package/dist/types/typeAliases.js +0 -1
  23. package/dist/types/utilities.js +1 -1
  24. package/dist/types/workflowTypes.js +0 -1
  25. package/dist/utils/providerRetry.js +0 -1
  26. package/dist/utils/providerUtils.js +0 -1
  27. package/package.json +2 -2
  28. package/dist/client/adapters/providerImageAdapter.js +0 -588
  29. package/dist/client/adapters/tts/googleTTSHandler.js +0 -344
  30. package/dist/client/adapters/video/directorPipeline.js +0 -516
  31. package/dist/client/adapters/video/ffmpegAdapter.js +0 -206
  32. package/dist/client/adapters/video/frameExtractor.js +0 -143
  33. package/dist/client/adapters/video/vertexVideoHandler.js +0 -763
  34. package/dist/client/adapters/video/videoAnalyzer.js +0 -238
  35. package/dist/client/adapters/video/videoMerger.js +0 -171
  36. package/dist/client/agent/directTools.js +0 -840
  37. package/dist/client/auth/AuthProviderFactory.js +0 -111
  38. package/dist/client/auth/AuthProviderRegistry.js +0 -190
  39. package/dist/client/auth/RequestContext.js +0 -78
  40. package/dist/client/auth/accountPool.js +0 -178
  41. package/dist/client/auth/anthropicOAuth.js +0 -974
  42. package/dist/client/auth/authContext.js +0 -314
  43. package/dist/client/auth/errors.js +0 -39
  44. package/dist/client/auth/index.js +0 -61
  45. package/dist/client/auth/middleware/AuthMiddleware.js +0 -519
  46. package/dist/client/auth/middleware/rateLimitByUser.js +0 -554
  47. package/dist/client/auth/providers/BaseAuthProvider.js +0 -723
  48. package/dist/client/auth/providers/CognitoProvider.js +0 -304
  49. package/dist/client/auth/providers/KeycloakProvider.js +0 -393
  50. package/dist/client/auth/providers/auth0.js +0 -274
  51. package/dist/client/auth/providers/betterAuth.js +0 -182
  52. package/dist/client/auth/providers/clerk.js +0 -317
  53. package/dist/client/auth/providers/custom.js +0 -112
  54. package/dist/client/auth/providers/firebase.js +0 -226
  55. package/dist/client/auth/providers/jwt.js +0 -212
  56. package/dist/client/auth/providers/oauth2.js +0 -303
  57. package/dist/client/auth/providers/supabase.js +0 -259
  58. package/dist/client/auth/providers/workos.js +0 -284
  59. package/dist/client/auth/serverBridge.js +0 -25
  60. package/dist/client/auth/sessionManager.js +0 -437
  61. package/dist/client/auth/tokenStore.js +0 -799
  62. package/dist/client/client/aiSdkAdapter.js +0 -487
  63. package/dist/client/client/auth.js +0 -473
  64. package/dist/client/client/errors.js +0 -552
  65. package/dist/client/client/httpClient.js +0 -837
  66. package/dist/client/client/index.js +0 -172
  67. package/dist/client/client/interceptors.js +0 -601
  68. package/dist/client/client/sseClient.js +0 -545
  69. package/dist/client/client/streamingClient.js +0 -917
  70. package/dist/client/client/wsClient.js +0 -369
  71. package/dist/client/config/configManager.js +0 -303
  72. package/dist/client/config/conversationMemory.js +0 -86
  73. package/dist/client/config/taskClassificationConfig.js +0 -148
  74. package/dist/client/constants/contextWindows.js +0 -295
  75. package/dist/client/constants/enums.js +0 -853
  76. package/dist/client/constants/index.js +0 -207
  77. package/dist/client/constants/performance.js +0 -389
  78. package/dist/client/constants/retry.js +0 -266
  79. package/dist/client/constants/timeouts.js +0 -182
  80. package/dist/client/constants/tokens.js +0 -380
  81. package/dist/client/constants/videoErrors.js +0 -46
  82. package/dist/client/context/budgetChecker.js +0 -98
  83. package/dist/client/context/contextCompactor.js +0 -205
  84. package/dist/client/context/emergencyTruncation.js +0 -88
  85. package/dist/client/context/errorDetection.js +0 -171
  86. package/dist/client/context/errors.js +0 -21
  87. package/dist/client/context/fileTokenBudget.js +0 -127
  88. package/dist/client/context/prompts/summarizationPrompt.js +0 -117
  89. package/dist/client/context/stages/fileReadDeduplicator.js +0 -66
  90. package/dist/client/context/stages/slidingWindowTruncator.js +0 -190
  91. package/dist/client/context/stages/structuredSummarizer.js +0 -99
  92. package/dist/client/context/stages/toolOutputPruner.js +0 -52
  93. package/dist/client/context/summarizationEngine.js +0 -136
  94. package/dist/client/context/toolOutputLimits.js +0 -78
  95. package/dist/client/context/toolPairRepair.js +0 -66
  96. package/dist/client/core/analytics.js +0 -88
  97. package/dist/client/core/baseProvider.js +0 -1385
  98. package/dist/client/core/constants.js +0 -140
  99. package/dist/client/core/conversationMemoryFactory.js +0 -141
  100. package/dist/client/core/conversationMemoryInitializer.js +0 -128
  101. package/dist/client/core/conversationMemoryManager.js +0 -344
  102. package/dist/client/core/dynamicModels.js +0 -358
  103. package/dist/client/core/evaluation.js +0 -309
  104. package/dist/client/core/evaluationProviders.js +0 -248
  105. package/dist/client/core/factory.js +0 -412
  106. package/dist/client/core/infrastructure/baseError.js +0 -22
  107. package/dist/client/core/infrastructure/baseFactory.js +0 -54
  108. package/dist/client/core/infrastructure/baseRegistry.js +0 -53
  109. package/dist/client/core/infrastructure/index.js +0 -5
  110. package/dist/client/core/infrastructure/retry.js +0 -20
  111. package/dist/client/core/infrastructure/typedEventEmitter.js +0 -23
  112. package/dist/client/core/modelConfiguration.js +0 -851
  113. package/dist/client/core/modules/GenerationHandler.js +0 -588
  114. package/dist/client/core/modules/MessageBuilder.js +0 -273
  115. package/dist/client/core/modules/StreamHandler.js +0 -185
  116. package/dist/client/core/modules/TelemetryHandler.js +0 -203
  117. package/dist/client/core/modules/ToolsManager.js +0 -499
  118. package/dist/client/core/modules/Utilities.js +0 -331
  119. package/dist/client/core/redisConversationMemoryManager.js +0 -1435
  120. package/dist/client/core/streamAnalytics.js +0 -131
  121. package/dist/client/evaluation/contextBuilder.js +0 -134
  122. package/dist/client/evaluation/index.js +0 -61
  123. package/dist/client/evaluation/prompts.js +0 -73
  124. package/dist/client/evaluation/ragasEvaluator.js +0 -110
  125. package/dist/client/evaluation/retryManager.js +0 -78
  126. package/dist/client/evaluation/scoring.js +0 -61
  127. package/dist/client/factories/providerFactory.js +0 -166
  128. package/dist/client/factories/providerRegistry.js +0 -166
  129. package/dist/client/features/ppt/constants.js +0 -896
  130. package/dist/client/features/ppt/contentPlanner.js +0 -529
  131. package/dist/client/features/ppt/presentationOrchestrator.js +0 -236
  132. package/dist/client/features/ppt/slideGenerator.js +0 -532
  133. package/dist/client/features/ppt/slideRenderers.js +0 -2383
  134. package/dist/client/features/ppt/slideTypeInference.js +0 -405
  135. package/dist/client/features/ppt/types.js +0 -13
  136. package/dist/client/features/ppt/utils.js +0 -443
  137. package/dist/client/files/fileReferenceRegistry.js +0 -1543
  138. package/dist/client/files/fileTools.js +0 -450
  139. package/dist/client/files/streamingReader.js +0 -321
  140. package/dist/client/files/types.js +0 -23
  141. package/dist/client/hitl/hitlErrors.js +0 -54
  142. package/dist/client/hitl/hitlManager.js +0 -460
  143. package/dist/client/mcp/agentExposure.js +0 -356
  144. package/dist/client/mcp/auth/index.js +0 -11
  145. package/dist/client/mcp/auth/oauthClientProvider.js +0 -325
  146. package/dist/client/mcp/auth/tokenStorage.js +0 -134
  147. package/dist/client/mcp/batching/index.js +0 -10
  148. package/dist/client/mcp/batching/requestBatcher.js +0 -441
  149. package/dist/client/mcp/caching/index.js +0 -10
  150. package/dist/client/mcp/caching/toolCache.js +0 -433
  151. package/dist/client/mcp/elicitation/elicitationManager.js +0 -376
  152. package/dist/client/mcp/elicitation/index.js +0 -11
  153. package/dist/client/mcp/elicitation/types.js +0 -10
  154. package/dist/client/mcp/elicitationProtocol.js +0 -375
  155. package/dist/client/mcp/enhancedToolDiscovery.js +0 -481
  156. package/dist/client/mcp/externalServerManager.js +0 -1478
  157. package/dist/client/mcp/factory.js +0 -161
  158. package/dist/client/mcp/flexibleToolValidator.js +0 -161
  159. package/dist/client/mcp/httpRateLimiter.js +0 -391
  160. package/dist/client/mcp/httpRetryHandler.js +0 -178
  161. package/dist/client/mcp/index.js +0 -74
  162. package/dist/client/mcp/mcpCircuitBreaker.js +0 -427
  163. package/dist/client/mcp/mcpClientFactory.js +0 -708
  164. package/dist/client/mcp/mcpRegistryClient.js +0 -488
  165. package/dist/client/mcp/mcpServerBase.js +0 -373
  166. package/dist/client/mcp/multiServerManager.js +0 -579
  167. package/dist/client/mcp/registry.js +0 -158
  168. package/dist/client/mcp/routing/index.js +0 -10
  169. package/dist/client/mcp/routing/toolRouter.js +0 -416
  170. package/dist/client/mcp/serverCapabilities.js +0 -502
  171. package/dist/client/mcp/servers/agent/directToolsServer.js +0 -150
  172. package/dist/client/mcp/toolAnnotations.js +0 -239
  173. package/dist/client/mcp/toolConverter.js +0 -258
  174. package/dist/client/mcp/toolDiscoveryService.js +0 -798
  175. package/dist/client/mcp/toolIntegration.js +0 -334
  176. package/dist/client/mcp/toolRegistry.js +0 -729
  177. package/dist/client/memory/hippocampusInitializer.js +0 -19
  178. package/dist/client/memory/memoryRetrievalTools.js +0 -166
  179. package/dist/client/middleware/builtin/analytics.js +0 -132
  180. package/dist/client/middleware/builtin/autoEvaluation.js +0 -203
  181. package/dist/client/middleware/builtin/guardrails.js +0 -109
  182. package/dist/client/middleware/builtin/lifecycle.js +0 -168
  183. package/dist/client/middleware/factory.js +0 -327
  184. package/dist/client/middleware/registry.js +0 -295
  185. package/dist/client/middleware/utils/guardrailsUtils.js +0 -396
  186. package/dist/client/models/anthropicModels.js +0 -527
  187. package/dist/client/neurolink.js +0 -8233
  188. package/dist/client/observability/exporterRegistry.js +0 -413
  189. package/dist/client/observability/exporters/arizeExporter.js +0 -138
  190. package/dist/client/observability/exporters/baseExporter.js +0 -190
  191. package/dist/client/observability/exporters/braintrustExporter.js +0 -154
  192. package/dist/client/observability/exporters/datadogExporter.js +0 -196
  193. package/dist/client/observability/exporters/laminarExporter.js +0 -302
  194. package/dist/client/observability/exporters/langfuseExporter.js +0 -209
  195. package/dist/client/observability/exporters/langsmithExporter.js +0 -143
  196. package/dist/client/observability/exporters/otelExporter.js +0 -164
  197. package/dist/client/observability/exporters/posthogExporter.js +0 -287
  198. package/dist/client/observability/exporters/sentryExporter.js +0 -165
  199. package/dist/client/observability/index.js +0 -31
  200. package/dist/client/observability/metricsAggregator.js +0 -556
  201. package/dist/client/observability/otelBridge.js +0 -131
  202. package/dist/client/observability/retryPolicy.js +0 -383
  203. package/dist/client/observability/sampling/samplers.js +0 -216
  204. package/dist/client/observability/spanProcessor.js +0 -303
  205. package/dist/client/observability/tokenTracker.js +0 -413
  206. package/dist/client/observability/types/exporterTypes.js +0 -5
  207. package/dist/client/observability/types/index.js +0 -4
  208. package/dist/client/observability/types/spanTypes.js +0 -92
  209. package/dist/client/observability/utils/safeMetadata.js +0 -25
  210. package/dist/client/observability/utils/spanSerializer.js +0 -292
  211. package/dist/client/processors/archive/ArchiveProcessor.js +0 -1308
  212. package/dist/client/processors/base/BaseFileProcessor.js +0 -614
  213. package/dist/client/processors/base/types.js +0 -82
  214. package/dist/client/processors/config/fileTypes.js +0 -520
  215. package/dist/client/processors/config/index.js +0 -92
  216. package/dist/client/processors/config/languageMap.js +0 -410
  217. package/dist/client/processors/config/mimeTypes.js +0 -363
  218. package/dist/client/processors/config/sizeLimits.js +0 -258
  219. package/dist/client/processors/document/ExcelProcessor.js +0 -590
  220. package/dist/client/processors/document/OpenDocumentProcessor.js +0 -212
  221. package/dist/client/processors/document/PptxProcessor.js +0 -157
  222. package/dist/client/processors/document/RtfProcessor.js +0 -361
  223. package/dist/client/processors/document/WordProcessor.js +0 -353
  224. package/dist/client/processors/errors/FileErrorCode.js +0 -255
  225. package/dist/client/processors/errors/errorHelpers.js +0 -386
  226. package/dist/client/processors/errors/errorSerializer.js +0 -507
  227. package/dist/client/processors/errors/index.js +0 -49
  228. package/dist/client/processors/markup/SvgProcessor.js +0 -240
  229. package/dist/client/processors/media/AudioProcessor.js +0 -707
  230. package/dist/client/processors/media/VideoProcessor.js +0 -1045
  231. package/dist/client/providers/amazonBedrock.js +0 -1512
  232. package/dist/client/providers/amazonSagemaker.js +0 -162
  233. package/dist/client/providers/anthropic.js +0 -831
  234. package/dist/client/providers/azureOpenai.js +0 -143
  235. package/dist/client/providers/googleAiStudio.js +0 -1200
  236. package/dist/client/providers/googleNativeGemini3.js +0 -543
  237. package/dist/client/providers/googleVertex.js +0 -2936
  238. package/dist/client/providers/huggingFace.js +0 -315
  239. package/dist/client/providers/litellm.js +0 -488
  240. package/dist/client/providers/mistral.js +0 -157
  241. package/dist/client/providers/ollama.js +0 -1579
  242. package/dist/client/providers/openAI.js +0 -627
  243. package/dist/client/providers/openRouter.js +0 -543
  244. package/dist/client/providers/openaiCompatible.js +0 -290
  245. package/dist/client/providers/providerTypeUtils.js +0 -46
  246. package/dist/client/providers/sagemaker/adaptive-semaphore.js +0 -215
  247. package/dist/client/providers/sagemaker/client.js +0 -472
  248. package/dist/client/providers/sagemaker/config.js +0 -317
  249. package/dist/client/providers/sagemaker/detection.js +0 -606
  250. package/dist/client/providers/sagemaker/error-constants.js +0 -227
  251. package/dist/client/providers/sagemaker/errors.js +0 -299
  252. package/dist/client/providers/sagemaker/language-model.js +0 -775
  253. package/dist/client/providers/sagemaker/parsers.js +0 -634
  254. package/dist/client/providers/sagemaker/streaming.js +0 -331
  255. package/dist/client/providers/sagemaker/structured-parser.js +0 -625
  256. package/dist/client/proxy/accountQuota.js +0 -162
  257. package/dist/client/proxy/claudeFormat.js +0 -595
  258. package/dist/client/proxy/modelRouter.js +0 -29
  259. package/dist/client/proxy/oauthFetch.js +0 -367
  260. package/dist/client/proxy/proxyFetch.js +0 -586
  261. package/dist/client/proxy/requestLogger.js +0 -207
  262. package/dist/client/proxy/tokenRefresh.js +0 -124
  263. package/dist/client/proxy/usageStats.js +0 -74
  264. package/dist/client/proxy/utils/noProxyUtils.js +0 -149
  265. package/dist/client/rag/ChunkerFactory.js +0 -320
  266. package/dist/client/rag/ChunkerRegistry.js +0 -421
  267. package/dist/client/rag/chunkers/BaseChunker.js +0 -143
  268. package/dist/client/rag/chunkers/CharacterChunker.js +0 -28
  269. package/dist/client/rag/chunkers/HTMLChunker.js +0 -38
  270. package/dist/client/rag/chunkers/JSONChunker.js +0 -68
  271. package/dist/client/rag/chunkers/LaTeXChunker.js +0 -63
  272. package/dist/client/rag/chunkers/MarkdownChunker.js +0 -306
  273. package/dist/client/rag/chunkers/RecursiveChunker.js +0 -139
  274. package/dist/client/rag/chunkers/SemanticMarkdownChunker.js +0 -138
  275. package/dist/client/rag/chunkers/SentenceChunker.js +0 -66
  276. package/dist/client/rag/chunkers/TokenChunker.js +0 -61
  277. package/dist/client/rag/chunkers/index.js +0 -15
  278. package/dist/client/rag/chunking/characterChunker.js +0 -142
  279. package/dist/client/rag/chunking/chunkerRegistry.js +0 -194
  280. package/dist/client/rag/chunking/htmlChunker.js +0 -247
  281. package/dist/client/rag/chunking/index.js +0 -17
  282. package/dist/client/rag/chunking/jsonChunker.js +0 -281
  283. package/dist/client/rag/chunking/latexChunker.js +0 -251
  284. package/dist/client/rag/chunking/markdownChunker.js +0 -373
  285. package/dist/client/rag/chunking/recursiveChunker.js +0 -148
  286. package/dist/client/rag/chunking/semanticChunker.js +0 -306
  287. package/dist/client/rag/chunking/sentenceChunker.js +0 -230
  288. package/dist/client/rag/chunking/tokenChunker.js +0 -183
  289. package/dist/client/rag/document/MDocument.js +0 -392
  290. package/dist/client/rag/document/index.js +0 -5
  291. package/dist/client/rag/document/loaders.js +0 -500
  292. package/dist/client/rag/errors/RAGError.js +0 -274
  293. package/dist/client/rag/errors/index.js +0 -6
  294. package/dist/client/rag/graphRag/graphRAG.js +0 -401
  295. package/dist/client/rag/graphRag/index.js +0 -4
  296. package/dist/client/rag/index.js +0 -141
  297. package/dist/client/rag/metadata/MetadataExtractorFactory.js +0 -418
  298. package/dist/client/rag/metadata/MetadataExtractorRegistry.js +0 -362
  299. package/dist/client/rag/metadata/index.js +0 -9
  300. package/dist/client/rag/metadata/metadataExtractor.js +0 -280
  301. package/dist/client/rag/pipeline/RAGPipeline.js +0 -436
  302. package/dist/client/rag/pipeline/contextAssembly.js +0 -341
  303. package/dist/client/rag/pipeline/index.js +0 -5
  304. package/dist/client/rag/ragIntegration.js +0 -321
  305. package/dist/client/rag/reranker/RerankerFactory.js +0 -430
  306. package/dist/client/rag/reranker/RerankerRegistry.js +0 -402
  307. package/dist/client/rag/reranker/index.js +0 -9
  308. package/dist/client/rag/reranker/reranker.js +0 -277
  309. package/dist/client/rag/resilience/CircuitBreaker.js +0 -431
  310. package/dist/client/rag/resilience/RetryHandler.js +0 -304
  311. package/dist/client/rag/resilience/index.js +0 -7
  312. package/dist/client/rag/retrieval/hybridSearch.js +0 -335
  313. package/dist/client/rag/retrieval/index.js +0 -5
  314. package/dist/client/rag/retrieval/vectorQueryTool.js +0 -307
  315. package/dist/client/rag/types.js +0 -8
  316. package/dist/client/sdk/toolRegistration.js +0 -377
  317. package/dist/client/server/abstract/baseServerAdapter.js +0 -575
  318. package/dist/client/server/adapters/expressAdapter.js +0 -486
  319. package/dist/client/server/adapters/fastifyAdapter.js +0 -472
  320. package/dist/client/server/adapters/honoAdapter.js +0 -632
  321. package/dist/client/server/adapters/koaAdapter.js +0 -510
  322. package/dist/client/server/errors.js +0 -486
  323. package/dist/client/server/factory/serverAdapterFactory.js +0 -160
  324. package/dist/client/server/index.js +0 -108
  325. package/dist/client/server/middleware/abortSignal.js +0 -111
  326. package/dist/client/server/middleware/auth.js +0 -388
  327. package/dist/client/server/middleware/cache.js +0 -359
  328. package/dist/client/server/middleware/common.js +0 -281
  329. package/dist/client/server/middleware/deprecation.js +0 -190
  330. package/dist/client/server/middleware/mcpBodyAttachment.js +0 -63
  331. package/dist/client/server/middleware/rateLimit.js +0 -227
  332. package/dist/client/server/middleware/validation.js +0 -388
  333. package/dist/client/server/openapi/generator.js +0 -398
  334. package/dist/client/server/openapi/index.js +0 -36
  335. package/dist/client/server/openapi/schemas.js +0 -695
  336. package/dist/client/server/openapi/templates.js +0 -374
  337. package/dist/client/server/routes/agentRoutes.js +0 -189
  338. package/dist/client/server/routes/claudeProxyRoutes.js +0 -1600
  339. package/dist/client/server/routes/healthRoutes.js +0 -187
  340. package/dist/client/server/routes/index.js +0 -57
  341. package/dist/client/server/routes/mcpRoutes.js +0 -342
  342. package/dist/client/server/routes/memoryRoutes.js +0 -350
  343. package/dist/client/server/routes/openApiRoutes.js +0 -126
  344. package/dist/client/server/routes/toolRoutes.js +0 -199
  345. package/dist/client/server/streaming/dataStream.js +0 -486
  346. package/dist/client/server/streaming/index.js +0 -11
  347. package/dist/client/server/types.js +0 -67
  348. package/dist/client/server/utils/redaction.js +0 -334
  349. package/dist/client/server/utils/validation.js +0 -243
  350. package/dist/client/server/websocket/WebSocketHandler.js +0 -383
  351. package/dist/client/server/websocket/index.js +0 -4
  352. package/dist/client/services/server/ai/observability/instrumentation.js +0 -808
  353. package/dist/client/telemetry/attributes.js +0 -100
  354. package/dist/client/telemetry/index.js +0 -26
  355. package/dist/client/telemetry/telemetryService.js +0 -308
  356. package/dist/client/telemetry/tracers.js +0 -17
  357. package/dist/client/telemetry/withSpan.js +0 -34
  358. package/dist/client/types/actionTypes.js +0 -6
  359. package/dist/client/types/analytics.js +0 -5
  360. package/dist/client/types/authTypes.js +0 -9
  361. package/dist/client/types/circuitBreakerErrors.js +0 -34
  362. package/dist/client/types/cli.js +0 -21
  363. package/dist/client/types/clientTypes.js +0 -10
  364. package/dist/client/types/common.js +0 -51
  365. package/dist/client/types/configTypes.js +0 -49
  366. package/dist/client/types/content.js +0 -19
  367. package/dist/client/types/contextTypes.js +0 -400
  368. package/dist/client/types/conversation.js +0 -47
  369. package/dist/client/types/conversationMemoryInterface.js +0 -6
  370. package/dist/client/types/domainTypes.js +0 -5
  371. package/dist/client/types/errors.js +0 -167
  372. package/dist/client/types/evaluation.js +0 -5
  373. package/dist/client/types/evaluationProviders.js +0 -5
  374. package/dist/client/types/evaluationTypes.js +0 -1
  375. package/dist/client/types/externalMcp.js +0 -6
  376. package/dist/client/types/fileReferenceTypes.js +0 -8
  377. package/dist/client/types/fileTypes.js +0 -4
  378. package/dist/client/types/generateTypes.js +0 -1
  379. package/dist/client/types/guardrails.js +0 -1
  380. package/dist/client/types/hitlTypes.js +0 -8
  381. package/dist/client/types/index.js +0 -57
  382. package/dist/client/types/mcpTypes.js +0 -5
  383. package/dist/client/types/middlewareTypes.js +0 -1
  384. package/dist/client/types/modelTypes.js +0 -30
  385. package/dist/client/types/multimodal.js +0 -135
  386. package/dist/client/types/observability.js +0 -6
  387. package/dist/client/types/pptTypes.js +0 -82
  388. package/dist/client/types/providers.js +0 -111
  389. package/dist/client/types/proxyTypes.js +0 -16
  390. package/dist/client/types/ragTypes.js +0 -7
  391. package/dist/client/types/sdkTypes.js +0 -8
  392. package/dist/client/types/serviceTypes.js +0 -5
  393. package/dist/client/types/streamTypes.js +0 -1
  394. package/dist/client/types/subscriptionTypes.js +0 -9
  395. package/dist/client/types/taskClassificationTypes.js +0 -5
  396. package/dist/client/types/tools.js +0 -24
  397. package/dist/client/types/ttsTypes.js +0 -57
  398. package/dist/client/types/typeAliases.js +0 -48
  399. package/dist/client/types/utilities.js +0 -4
  400. package/dist/client/types/workflowTypes.js +0 -30
  401. package/dist/client/utils/async/withTimeout.js +0 -98
  402. package/dist/client/utils/asyncMutex.js +0 -60
  403. package/dist/client/utils/conversationMemory.js +0 -431
  404. package/dist/client/utils/csvProcessor.js +0 -846
  405. package/dist/client/utils/errorHandling.js +0 -936
  406. package/dist/client/utils/evaluationUtils.js +0 -131
  407. package/dist/client/utils/factoryProcessing.js +0 -589
  408. package/dist/client/utils/fileDetector.js +0 -2161
  409. package/dist/client/utils/imageCache.js +0 -376
  410. package/dist/client/utils/imageProcessor.js +0 -704
  411. package/dist/client/utils/logger.js +0 -491
  412. package/dist/client/utils/mcpDefaults.js +0 -134
  413. package/dist/client/utils/messageBuilder.js +0 -1653
  414. package/dist/client/utils/modelAliasResolver.js +0 -54
  415. package/dist/client/utils/modelDetection.js +0 -80
  416. package/dist/client/utils/modelRouter.js +0 -292
  417. package/dist/client/utils/multimodalOptionsBuilder.js +0 -65
  418. package/dist/client/utils/observabilityHelpers.js +0 -47
  419. package/dist/client/utils/parameterValidation.js +0 -966
  420. package/dist/client/utils/pdfProcessor.js +0 -410
  421. package/dist/client/utils/performance.js +0 -222
  422. package/dist/client/utils/pricing.js +0 -340
  423. package/dist/client/utils/promptRedaction.js +0 -62
  424. package/dist/client/utils/providerConfig.js +0 -1009
  425. package/dist/client/utils/providerHealth.js +0 -1237
  426. package/dist/client/utils/providerRetry.js +0 -112
  427. package/dist/client/utils/providerUtils.js +0 -434
  428. package/dist/client/utils/rateLimiter.js +0 -200
  429. package/dist/client/utils/redis.js +0 -368
  430. package/dist/client/utils/retryHandler.js +0 -269
  431. package/dist/client/utils/retryability.js +0 -22
  432. package/dist/client/utils/sanitizers/svg.js +0 -481
  433. package/dist/client/utils/schemaConversion.js +0 -255
  434. package/dist/client/utils/taskClassificationUtils.js +0 -149
  435. package/dist/client/utils/taskClassifier.js +0 -94
  436. package/dist/client/utils/thinkingConfig.js +0 -104
  437. package/dist/client/utils/timeout.js +0 -359
  438. package/dist/client/utils/tokenEstimation.js +0 -142
  439. package/dist/client/utils/tokenLimits.js +0 -125
  440. package/dist/client/utils/tokenUtils.js +0 -239
  441. package/dist/client/utils/toolUtils.js +0 -75
  442. package/dist/client/utils/transformationUtils.js +0 -554
  443. package/dist/client/utils/ttsProcessor.js +0 -286
  444. package/dist/client/utils/typeUtils.js +0 -97
  445. package/dist/client/utils/videoAnalysisProcessor.js +0 -67
  446. package/dist/client/workflow/config.js +0 -398
  447. package/dist/client/workflow/core/ensembleExecutor.js +0 -407
  448. package/dist/client/workflow/core/judgeScorer.js +0 -544
  449. package/dist/client/workflow/core/responseConditioner.js +0 -225
  450. package/dist/client/workflow/core/types/conditionerTypes.js +0 -7
  451. package/dist/client/workflow/core/types/ensembleTypes.js +0 -7
  452. package/dist/client/workflow/core/types/index.js +0 -7
  453. package/dist/client/workflow/core/types/judgeTypes.js +0 -7
  454. package/dist/client/workflow/core/types/layerTypes.js +0 -7
  455. package/dist/client/workflow/core/types/registryTypes.js +0 -7
  456. package/dist/client/workflow/core/workflowRegistry.js +0 -304
  457. package/dist/client/workflow/core/workflowRunner.js +0 -586
  458. package/dist/client/workflow/index.js +0 -50
  459. package/dist/client/workflow/types.js +0 -9
  460. package/dist/client/workflow/utils/types/index.js +0 -7
  461. package/dist/client/workflow/utils/workflowMetrics.js +0 -311
  462. package/dist/client/workflow/utils/workflowValidation.js +0 -420
  463. package/dist/client/workflow/workflows/adaptiveWorkflow.js +0 -366
  464. package/dist/client/workflow/workflows/consensusWorkflow.js +0 -192
  465. package/dist/client/workflow/workflows/fallbackWorkflow.js +0 -225
  466. package/dist/client/workflow/workflows/multiJudgeWorkflow.js +0 -351
  467. /package/dist/client/{client/reactHooks.js → reactHooks.js} +0 -0
@@ -1,1543 +0,0 @@
1
- /**
2
- * File Reference Registry
3
- *
4
- * Central registry for managing file references in on-demand processing mode.
5
- * Files are registered with lightweight metadata and previews. Full content
6
- * is processed on-demand when the LLM requests it via tools.
7
- *
8
- * This module is the core of the file reference architecture, replacing
9
- * the previous "load everything upfront" pattern for files that exceed
10
- * the tiny/small size tiers.
11
- *
12
- * @module files/fileReferenceRegistry
13
- */
14
- import { randomUUID } from "node:crypto";
15
- import { mkdir, readFile, stat, unlink, writeFile } from "node:fs/promises";
16
- import { tmpdir } from "node:os";
17
- import { basename, extname, join } from "node:path";
18
- import { estimatePostProcessingTokens } from "../context/fileTokenBudget.js";
19
- import { logger } from "../utils/logger.js";
20
- import { StreamingReader } from "./streamingReader.js";
21
- import { SIZE_TIER_THRESHOLDS } from "./types.js";
22
- /** Default maximum files in registry before LRU eviction */
23
- const DEFAULT_MAX_FILES = 100;
24
- /** Default maximum temp bytes (1 GB) */
25
- const DEFAULT_MAX_TEMP_BYTES = 1024 * 1024 * 1024;
26
- /** Default preview length in characters */
27
- const DEFAULT_PREVIEW_CHARS = 2000;
28
- /** Maximum file size we'll accept (2 GB) */
29
- const MAX_ACCEPTED_SIZE = 2 * 1024 * 1024 * 1024;
30
- /**
31
- * Registry for managing file references with on-demand processing.
32
- *
33
- * Design decisions:
34
- * - One instance per NeuroLink SDK instance (not global singleton)
35
- * - File buffers persisted to temp dir for later streaming access
36
- * - LRU eviction when maxFiles exceeded
37
- * - Thread-safe via sequential async operations (Node.js single-threaded)
38
- *
39
- * @example
40
- * ```typescript
41
- * const registry = new FileReferenceRegistry();
42
- * const ref = await registry.register(buffer, {
43
- * filename: 'report.xlsx',
44
- * });
45
- * console.log(ref.sizeTier); // 'medium'
46
- * console.log(ref.preview); // First 2000 chars of processed content
47
- * console.log(ref.estimatedTokens); // Type-aware estimate
48
- *
49
- * // Later, LLM requests specific section
50
- * const section = await registry.readSection(ref.id, 1, 50, 5000);
51
- * ```
52
- */
53
- export class FileReferenceRegistry {
54
- files = new Map();
55
- tempDir;
56
- maxFiles;
57
- maxTempBytes;
58
- defaultPreviewChars;
59
- currentTempBytes = 0;
60
- tempDirCreated = false;
61
- constructor(options = {}) {
62
- this.tempDir =
63
- options.tempDir || join(tmpdir(), "neurolink-files", randomUUID());
64
- this.maxFiles = options.maxFiles ?? DEFAULT_MAX_FILES;
65
- this.maxTempBytes = options.maxTempBytes ?? DEFAULT_MAX_TEMP_BYTES;
66
- this.defaultPreviewChars =
67
- options.defaultPreviewChars ?? DEFAULT_PREVIEW_CHARS;
68
- }
69
- /**
70
- * Register a file from a Buffer.
71
- *
72
- * This is the primary registration method. It performs lightweight analysis:
73
- * 1. Detect file type from magic bytes (first 1KB)
74
- * 2. Determine size tier
75
- * 3. Extract preview (first N chars of text, or metadata for binary)
76
- * 4. Persist buffer to temp directory for later streaming access
77
- *
78
- * Total time: ~1-5ms for most files (no full processing).
79
- *
80
- * @param buffer - File content as Buffer
81
- * @param source - How the file was provided ('buffer', 'url', 'path', 'datauri')
82
- * @param options - Registration options
83
- * @returns FileReference with metadata and preview
84
- */
85
- async register(buffer, source = "buffer", options = {}) {
86
- const sizeBytes = buffer.length;
87
- // Reject oversized files
88
- if (sizeBytes > MAX_ACCEPTED_SIZE) {
89
- const sizeMB = (sizeBytes / (1024 * 1024)).toFixed(1);
90
- throw new Error(`File too large (${sizeMB} MB). Maximum accepted size is 2 GB.`);
91
- }
92
- // Detect file type from magic bytes and extension.
93
- // If the provided filename has no extension, append one guessed from magic bytes
94
- // so downstream processors (e.g., VideoProcessor) can validate by extension.
95
- let filename = options.filename || `file-${Date.now()}${this.guessExtension(buffer)}`;
96
- if (!extname(filename)) {
97
- const guessedExt = this.guessExtension(buffer);
98
- if (guessedExt) {
99
- filename = `${filename}${guessedExt}`;
100
- }
101
- }
102
- const ext = extname(filename).toLowerCase().replace(".", "");
103
- const detectedType = options.fileType || this.detectType(buffer, ext);
104
- const mimeType = this.guessMimeType(detectedType, ext);
105
- const sizeTier = FileReferenceRegistry.classifySizeTier(sizeBytes);
106
- // Generate preview (fast — only reads first N chars)
107
- const preview = this.extractPreview(buffer, detectedType, options.maxPreviewChars ?? this.defaultPreviewChars);
108
- // Estimate post-processing tokens (type-aware)
109
- const estimatedTokens = estimatePostProcessingTokens(sizeBytes, detectedType);
110
- // Create reference
111
- const ref = {
112
- id: randomUUID(),
113
- source,
114
- filename,
115
- sizeBytes,
116
- detectedType,
117
- mimeType,
118
- sizeTier,
119
- estimatedTokens,
120
- preview,
121
- status: "registered",
122
- registeredAt: Date.now(),
123
- lastAccessedAt: Date.now(),
124
- extension: ext || undefined,
125
- };
126
- // Persist buffer to temp directory (unless skipped or tiny)
127
- if (!options.skipTempPersist && sizeTier !== "tiny") {
128
- try {
129
- const tempPath = await this.persistToTemp(ref.id, buffer, ext);
130
- ref.tempPath = tempPath;
131
- }
132
- catch (err) {
133
- logger.warn(`[FileReferenceRegistry] Failed to persist ${filename} to temp: ${err instanceof Error ? err.message : String(err)}`);
134
- // Continue without temp persistence — buffer-based access still works
135
- }
136
- }
137
- // For tiny files, store the processed content inline
138
- if (sizeTier === "tiny") {
139
- ref.processedContent = this.isTextType(detectedType, buffer)
140
- ? buffer.toString("utf-8")
141
- : preview;
142
- ref.status = "processed";
143
- }
144
- else {
145
- ref.status = "previewed";
146
- }
147
- // Evict LRU entries if at capacity
148
- if (this.files.size >= this.maxFiles) {
149
- this.evictLRU();
150
- }
151
- this.files.set(ref.id, ref);
152
- logger.info(`[FileReferenceRegistry] Registered "${filename}" (${this.formatSize(sizeBytes)}, ` +
153
- `tier=${sizeTier}, type=${detectedType}, ~${estimatedTokens} tokens)`);
154
- return ref;
155
- }
156
- /**
157
- * Register a file from a file path on disk.
158
- *
159
- * Does NOT read the entire file — only reads the first 1KB for type detection
160
- * and preview. The file path is stored for later streaming access.
161
- *
162
- * @param filePath - Absolute path to the file
163
- * @param options - Registration options
164
- * @returns FileReference with metadata and preview
165
- */
166
- async registerFromPath(filePath, options = {}) {
167
- const fileStat = await stat(filePath);
168
- const sizeBytes = fileStat.size;
169
- if (sizeBytes > MAX_ACCEPTED_SIZE) {
170
- const sizeMB = (sizeBytes / (1024 * 1024)).toFixed(1);
171
- throw new Error(`File too large (${sizeMB} MB). Maximum accepted size is 2 GB.`);
172
- }
173
- const filename = options.filename || basename(filePath);
174
- const ext = extname(filename).toLowerCase().replace(".", "");
175
- const detectedType = options.fileType || this.detectTypeFromExtension(ext);
176
- const mimeType = this.guessMimeType(detectedType, ext);
177
- const sizeTier = FileReferenceRegistry.classifySizeTier(sizeBytes);
178
- const estimatedTokens = estimatePostProcessingTokens(sizeBytes, detectedType);
179
- // Read preview from file (streaming — only first N bytes)
180
- let preview;
181
- try {
182
- preview = await StreamingReader.readPreview(filePath, options.maxPreviewChars ?? this.defaultPreviewChars);
183
- }
184
- catch {
185
- preview = `[File: ${filename}, ${this.formatSize(sizeBytes)}, type: ${detectedType}]`;
186
- }
187
- const ref = {
188
- id: randomUUID(),
189
- source: "path",
190
- originalPath: filePath,
191
- filename,
192
- sizeBytes,
193
- detectedType,
194
- mimeType,
195
- sizeTier,
196
- estimatedTokens,
197
- preview,
198
- status: "previewed",
199
- registeredAt: Date.now(),
200
- lastAccessedAt: Date.now(),
201
- extension: ext || undefined,
202
- };
203
- // For path-based files, no need to persist — we already have the path
204
- // Store the original path as the access point
205
- ref.tempPath = filePath;
206
- if (this.files.size >= this.maxFiles) {
207
- this.evictLRU();
208
- }
209
- this.files.set(ref.id, ref);
210
- logger.info(`[FileReferenceRegistry] Registered from path "${filename}" ` +
211
- `(${this.formatSize(sizeBytes)}, tier=${sizeTier}, type=${detectedType})`);
212
- return ref;
213
- }
214
- /**
215
- * Get a file reference by ID.
216
- * Updates lastAccessedAt for LRU tracking.
217
- */
218
- get(id) {
219
- const ref = this.files.get(id);
220
- if (ref) {
221
- ref.lastAccessedAt = Date.now();
222
- }
223
- return ref;
224
- }
225
- /**
226
- * Get a file reference by ID or filename.
227
- * Tries ID lookup first, then falls back to filename match.
228
- * This handles the common case where an LLM uses the filename
229
- * instead of the UUID when calling file tools.
230
- *
231
- * @param idOrName - UUID or filename to search for
232
- * @returns File reference if found, undefined otherwise
233
- */
234
- getByIdOrFilename(idOrName) {
235
- // Try direct ID lookup first (most common, O(1))
236
- const byId = this.get(idOrName);
237
- if (byId) {
238
- return byId;
239
- }
240
- // Fallback: search by filename (case-insensitive)
241
- const lowerName = idOrName.toLowerCase();
242
- for (const ref of this.files.values()) {
243
- if (ref.filename.toLowerCase() === lowerName) {
244
- ref.lastAccessedAt = Date.now();
245
- return ref;
246
- }
247
- }
248
- // Fallback: search by basename (without path)
249
- for (const ref of this.files.values()) {
250
- const refBasename = ref.filename.split("/").pop()?.toLowerCase() ?? "";
251
- if (refBasename === lowerName) {
252
- ref.lastAccessedAt = Date.now();
253
- return ref;
254
- }
255
- }
256
- return undefined;
257
- }
258
- /**
259
- * Ensure a file has been processed (binary content extracted to text).
260
- *
261
- * For text files this is a no-op. For binary files (PDF, XLSX, video, etc.)
262
- * this triggers on-demand processing if it hasn't happened yet. After this
263
- * call, ref.processedContent and ref.preview contain extracted text.
264
- *
265
- * Used by file tools (get_file_preview) to ensure the preview contains
266
- * real content instead of placeholder metadata strings.
267
- */
268
- async ensureProcessed(fileId) {
269
- const ref = this.get(fileId);
270
- if (!ref) {
271
- return;
272
- }
273
- if (!ref.processedContent && !this.isTextType(ref.detectedType)) {
274
- await this.processFileOnDemand(ref);
275
- }
276
- }
277
- /**
278
- * Extract targeted content from a registered file.
279
- *
280
- * This is the core dispatch method for the `extract_file_content` tool.
281
- * Routes extraction to the appropriate processor based on file type and
282
- * the parameters provided.
283
- *
284
- * @param params - Extraction parameters (file_id + type-specific options)
285
- * @returns Extraction result with text and/or images
286
- */
287
- async extractContent(params) {
288
- const ref = this.getByIdOrFilename(params.file_id);
289
- if (!ref) {
290
- return {
291
- success: false,
292
- error: `File not found: "${params.file_id}". Use list_attached_files to see available files.`,
293
- };
294
- }
295
- try {
296
- // Text-like types don't need raw buffer — they use readSection
297
- // which works from processedContent (tiny files) or tempPath (larger files)
298
- if (this.isTextType(ref.detectedType) ||
299
- ref.detectedType === "csv" ||
300
- ref.detectedType === "svg" ||
301
- ref.detectedType === "unknown") {
302
- return await this.extractTextTargeted(ref, params);
303
- }
304
- // Binary types need the raw buffer for processor-specific extraction
305
- const buffer = ref.tempPath ? await readFile(ref.tempPath) : null;
306
- if (!buffer) {
307
- return {
308
- success: false,
309
- error: `No file data available for "${ref.filename}". The file may have been evicted from cache.`,
310
- };
311
- }
312
- switch (ref.detectedType) {
313
- case "video":
314
- return await this.extractVideoTargeted(buffer, ref, params);
315
- case "pdf":
316
- return await this.extractPdfTargeted(buffer, ref, params);
317
- case "xlsx":
318
- return await this.extractExcelTargeted(buffer, ref, params);
319
- case "pptx":
320
- return await this.extractPptxTargeted(buffer, ref, params);
321
- case "archive":
322
- return await this.extractArchiveTargeted(buffer, ref, params);
323
- case "audio":
324
- return await this.extractAudioTargeted(buffer, ref, params);
325
- default:
326
- // Fallback for any unrecognized binary type
327
- return await this.extractTextTargeted(ref, params);
328
- }
329
- }
330
- catch (err) {
331
- return {
332
- success: false,
333
- error: `Extraction failed for "${ref.filename}": ${err instanceof Error ? err.message : String(err)}`,
334
- };
335
- }
336
- }
337
- // ─── Targeted Extraction Dispatchers ──────────────────────────────
338
- async extractVideoTargeted(buffer, ref, params) {
339
- const { videoProcessor } = await import("../processors/media/VideoProcessor.js");
340
- // If time range specified, extract frames from that range
341
- if (params.start_time !== undefined && params.end_time !== undefined) {
342
- const frames = await videoProcessor.extractFrameRange(buffer, ref.filename, params.start_time, params.end_time, params.frame_count ?? 5);
343
- return {
344
- success: true,
345
- text: `Extracted ${frames.length} frames from ${ref.filename} (${params.start_time}s - ${params.end_time}s)`,
346
- images: frames,
347
- metadata: {
348
- startTime: params.start_time,
349
- endTime: params.end_time,
350
- frameCount: frames.length,
351
- },
352
- };
353
- }
354
- // No time range: return full metadata + initial keyframes
355
- if (!ref.processedContent) {
356
- await this.processFileOnDemand(ref);
357
- }
358
- return {
359
- success: true,
360
- text: ref.processedContent || `[Video: ${ref.filename}]`,
361
- images: ref.extractedImages ?? undefined,
362
- };
363
- }
364
- async extractPdfTargeted(buffer, ref, params) {
365
- // If specific pages requested, extract those pages
366
- const pages = params.pages ??
367
- (params.page_range
368
- ? Array.from({ length: params.page_range.end - params.page_range.start + 1 }, (_, i) => (params.page_range ?? { start: 0 }).start + i)
369
- : undefined);
370
- if (pages && pages.length > 0) {
371
- try {
372
- const { PDFParse } = await import("pdf-parse");
373
- const pdf = new PDFParse({ data: new Uint8Array(buffer) });
374
- try {
375
- const firstPage = Math.min(...pages);
376
- const lastPage = Math.max(...pages);
377
- const textResult = await pdf.getText({
378
- first: firstPage,
379
- last: lastPage,
380
- });
381
- const totalPages = textResult.total || 0;
382
- const text = textResult.text?.trim() || "(No text found on the requested pages)";
383
- // Note: pdf-parse extracts a contiguous range (first..last).
384
- // For non-contiguous page requests (e.g., [1, 5, 12]), the result
385
- // includes all pages in the range. This is a limitation of pdf-parse.
386
- const rangeNote = firstPage !== lastPage
387
- ? ` (extracted pages ${firstPage}-${lastPage})`
388
- : "";
389
- return {
390
- success: true,
391
- text: `## Pages ${pages.join(", ")} of ${ref.filename}${rangeNote}\n` +
392
- `Total pages in document: ${totalPages}\n\n${text}`,
393
- metadata: {
394
- requestedPages: pages,
395
- extractedRange: { first: firstPage, last: lastPage },
396
- totalPages,
397
- },
398
- };
399
- }
400
- finally {
401
- await pdf.destroy().catch(() => {
402
- /* cleanup - ignore destroy errors */
403
- });
404
- }
405
- }
406
- catch (err) {
407
- return {
408
- success: false,
409
- error: `PDF page extraction failed: ${err instanceof Error ? err.message : String(err)}`,
410
- };
411
- }
412
- }
413
- // No specific pages: return full content
414
- if (!ref.processedContent) {
415
- await this.processFileOnDemand(ref);
416
- }
417
- return {
418
- success: true,
419
- text: ref.processedContent || `[PDF: ${ref.filename}]`,
420
- };
421
- }
422
- async extractExcelTargeted(buffer, ref, params) {
423
- const { excelProcessor } = await import("../processors/document/ExcelProcessor.js");
424
- const text = await excelProcessor.extractSheetRange(buffer, params.sheet, params.row_range?.start ?? 1, params.row_range?.end, params.columns);
425
- return {
426
- success: true,
427
- text,
428
- metadata: {
429
- sheet: params.sheet,
430
- rowRange: params.row_range,
431
- columns: params.columns,
432
- },
433
- };
434
- }
435
- async extractPptxTargeted(buffer, ref, params) {
436
- const pages = params.pages ??
437
- (params.page_range
438
- ? Array.from({ length: params.page_range.end - params.page_range.start + 1 }, (_, i) => (params.page_range ?? { start: 0 }).start + i)
439
- : undefined);
440
- if (pages && pages.length > 0) {
441
- const { PptxProcessor } = await import("../processors/document/PptxProcessor.js");
442
- const text = await PptxProcessor.extractSlides(buffer, pages);
443
- return {
444
- success: true,
445
- text,
446
- metadata: { slides: pages },
447
- };
448
- }
449
- // Full extraction
450
- if (!ref.processedContent) {
451
- await this.processFileOnDemand(ref);
452
- }
453
- return {
454
- success: true,
455
- text: ref.processedContent || `[PPTX: ${ref.filename}]`,
456
- };
457
- }
458
- async extractArchiveTargeted(buffer, ref, params) {
459
- if (params.entry_path) {
460
- const { archiveProcessor } = await import("../processors/archive/ArchiveProcessor.js");
461
- const text = await archiveProcessor.extractEntry(buffer, params.entry_path);
462
- return {
463
- success: true,
464
- text,
465
- metadata: { entryPath: params.entry_path },
466
- };
467
- }
468
- // No specific entry: return full listing
469
- if (!ref.processedContent) {
470
- await this.processFileOnDemand(ref);
471
- }
472
- return {
473
- success: true,
474
- text: ref.processedContent || `[Archive: ${ref.filename}]`,
475
- };
476
- }
477
- async extractAudioTargeted(_buffer, ref, _params) {
478
- // Audio doesn't have sub-section extraction yet — return full metadata
479
- if (!ref.processedContent) {
480
- await this.processFileOnDemand(ref);
481
- }
482
- return {
483
- success: true,
484
- text: ref.processedContent || `[Audio: ${ref.filename}]`,
485
- };
486
- }
487
- async extractTextTargeted(ref, params) {
488
- // For text files, use line-range reading
489
- const startLine = params.page_range?.start ?? params.row_range?.start ?? 1;
490
- const endLine = params.page_range?.end ?? params.row_range?.end;
491
- const result = await this.readSection(ref.id, startLine, endLine, 50_000);
492
- return {
493
- success: true,
494
- text: result.content,
495
- metadata: {
496
- startLine: result.startLine,
497
- endLine: result.endLine,
498
- totalLines: result.totalLines,
499
- truncated: result.truncated,
500
- },
501
- };
502
- }
503
- /**
504
- * List all registered files.
505
- * Returns a lightweight summary suitable for the LLM.
506
- */
507
- list() {
508
- return Array.from(this.files.values());
509
- }
510
- /**
511
- * Generate a formatted table of all registered files for the LLM.
512
- */
513
- listFormatted() {
514
- const files = this.list();
515
- if (files.length === 0) {
516
- return "No files attached.";
517
- }
518
- const header = "| # | Filename | Type | Size | Tier | Est. Tokens | Status |\n" +
519
- "|---|----------|------|------|------|-------------|--------|\n";
520
- const rows = files.map((f, i) => `| ${i + 1} | ${f.filename} | ${f.detectedType} | ${this.formatSize(f.sizeBytes)} | ` +
521
- `${f.sizeTier} | ~${f.estimatedTokens.toLocaleString()} | ${f.status} |`);
522
- return header + rows.join("\n");
523
- }
524
- /**
525
- * Read a section of a registered file.
526
- *
527
- * Uses StreamingReader for memory-efficient access.
528
- *
529
- * @param fileId - File reference ID
530
- * @param startLine - Starting line (1-indexed)
531
- * @param endLine - Ending line (1-indexed)
532
- * @param tokenBudget - Maximum tokens to return
533
- * @param provider - Provider name for token estimation
534
- * @returns FileReadResult
535
- */
536
- async readSection(fileId, startLine = 1, endLine, tokenBudget = 50_000, provider) {
537
- const ref = this.get(fileId);
538
- if (!ref) {
539
- throw new Error(`File reference not found: ${fileId}`);
540
- }
541
- // Process binary files on first read — the lazy registration path
542
- // stores raw binary to temp but never runs processors. We must process
543
- // on-demand so the LLM gets extracted text, not garbled binary.
544
- if (!ref.processedContent && !this.isTextType(ref.detectedType)) {
545
- await this.processFileOnDemand(ref);
546
- }
547
- // If content is already cached (or was just processed), use buffer reader
548
- if (ref.processedContent) {
549
- return StreamingReader.readFromBuffer(Buffer.from(ref.processedContent, "utf-8"), {
550
- startLine,
551
- endLine,
552
- tokenBudget,
553
- provider,
554
- });
555
- }
556
- // If we have a temp path or original path, use streaming reader
557
- // (text files that were not processed on-demand)
558
- const filePath = ref.tempPath || ref.originalPath;
559
- if (filePath) {
560
- const result = await StreamingReader.readLines(filePath, {
561
- startLine,
562
- endLine,
563
- tokenBudget,
564
- provider,
565
- });
566
- // Cache total lines for future reference
567
- if (!ref.totalLines) {
568
- ref.totalLines = result.totalLines;
569
- }
570
- return result;
571
- }
572
- throw new Error(`No accessible content for file "${ref.filename}" (id: ${fileId})`);
573
- }
574
- /**
575
- * Search within a registered file.
576
- *
577
- * @param fileId - File reference ID
578
- * @param pattern - Search pattern (string or regex)
579
- * @param maxMatches - Maximum matches to return
580
- * @returns FileSearchResult
581
- */
582
- async search(fileId, pattern, maxMatches = 50) {
583
- const ref = this.get(fileId);
584
- if (!ref) {
585
- throw new Error(`File reference not found: ${fileId}`);
586
- }
587
- // Process binary files on first search — same lazy processing as readSection().
588
- // Without this, search would scan raw PDF/XLSX binary bytes for text patterns.
589
- if (!ref.processedContent && !this.isTextType(ref.detectedType)) {
590
- await this.processFileOnDemand(ref);
591
- }
592
- // Search in processedContent if available (binary files after on-demand processing, or tiny files)
593
- if (ref.processedContent) {
594
- return FileReferenceRegistry.searchInMemory(ref.processedContent, pattern, maxMatches);
595
- }
596
- // For text files: use streaming search on the raw temp file (content IS valid UTF-8)
597
- const filePath = ref.tempPath || ref.originalPath;
598
- if (filePath) {
599
- return StreamingReader.searchInFile(filePath, pattern, {
600
- maxMatches,
601
- });
602
- }
603
- throw new Error(`No searchable content for file "${ref.filename}" (id: ${fileId})`);
604
- }
605
- /**
606
- * Search within in-memory content (for tiny files without temp paths).
607
- */
608
- static searchInMemory(content, pattern, maxMatches) {
609
- const regex = new RegExp(pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "i");
610
- const lines = content.split("\n");
611
- const matches = [];
612
- let totalMatches = 0;
613
- for (let i = 0; i < lines.length; i++) {
614
- if (regex.test(lines[i])) {
615
- totalMatches++;
616
- if (matches.length < maxMatches) {
617
- matches.push({
618
- lineNumber: i + 1,
619
- line: lines[i],
620
- contextBefore: lines.slice(Math.max(0, i - 3), i),
621
- contextAfter: lines.slice(i + 1, Math.min(lines.length, i + 4)),
622
- });
623
- }
624
- }
625
- }
626
- return {
627
- matches,
628
- totalMatches,
629
- truncated: totalMatches > maxMatches,
630
- };
631
- }
632
- /**
633
- * Store a summary for a file reference.
634
- */
635
- setSummary(fileId, summary) {
636
- const ref = this.files.get(fileId);
637
- if (ref) {
638
- ref.summary = summary;
639
- ref.status = "processed";
640
- ref.lastAccessedAt = Date.now();
641
- }
642
- }
643
- /**
644
- * Remove a file reference and clean up its temp file.
645
- */
646
- async remove(fileId) {
647
- const ref = this.files.get(fileId);
648
- if (!ref) {
649
- return false;
650
- }
651
- // Clean up temp file (only if we created it, not for original paths)
652
- if (ref.tempPath && ref.source !== "path") {
653
- try {
654
- await unlink(ref.tempPath);
655
- this.currentTempBytes -= ref.sizeBytes;
656
- }
657
- catch {
658
- // Temp file may already be cleaned up
659
- }
660
- }
661
- this.files.delete(fileId);
662
- return true;
663
- }
664
- /**
665
- * Clear all file references and clean up temp directory.
666
- */
667
- async clear() {
668
- const ids = Array.from(this.files.keys());
669
- for (const id of ids) {
670
- await this.remove(id);
671
- }
672
- this.files.clear();
673
- this.currentTempBytes = 0;
674
- }
675
- /**
676
- * Get the number of registered files.
677
- */
678
- get size() {
679
- return this.files.size;
680
- }
681
- /**
682
- * Generate the preview text for the initial prompt.
683
- *
684
- * Returns a compact summary of all registered files that uses ~50-100 tokens
685
- * per file instead of full content. The LLM can use file tools to access
686
- * more content as needed.
687
- *
688
- * @returns Formatted string for prompt injection
689
- */
690
- async generatePromptPreview() {
691
- const files = this.list();
692
- if (files.length === 0) {
693
- return "";
694
- }
695
- // Ensure binary files are processed so previews contain real content
696
- // (e.g., video metadata, audio tags) instead of placeholder strings.
697
- for (const ref of files) {
698
- if (!ref.processedContent && !this.isTextType(ref.detectedType)) {
699
- await this.processFileOnDemand(ref);
700
- }
701
- }
702
- const sections = [];
703
- sections.push(`\n\n## Attached Files (${files.length})\n`);
704
- for (const ref of files) {
705
- const sizeStr = this.formatSize(ref.sizeBytes);
706
- sections.push(`### File: "${ref.filename}" (${sizeStr}, ${ref.detectedType})`);
707
- if (ref.sizeTier === "tiny" && ref.processedContent) {
708
- // Tiny files: include full content inline
709
- sections.push(ref.processedContent);
710
- }
711
- else {
712
- // Larger files: include preview + guidance
713
- sections.push(`**Preview** (first ${this.defaultPreviewChars} chars):`);
714
- sections.push(ref.preview);
715
- // Add type-specific extraction hints
716
- const hint = FileReferenceRegistry.getExtractionHint(ref.detectedType, sizeStr);
717
- if (hint) {
718
- sections.push(`\n> ${hint}`);
719
- }
720
- else if (ref.sizeTier !== "small") {
721
- sections.push(`\n> This file is ${sizeStr}. Use \`read_file_section\` to read specific ` +
722
- `sections, \`search_in_file\` to search, or \`summarize_file\` for a full summary.`);
723
- }
724
- }
725
- sections.push(""); // blank line between files
726
- }
727
- return sections.join("\n");
728
- }
729
- // ─── Private Methods ────────────────────────────────────────────
730
- /**
731
- * Get type-specific extraction hints for the LLM prompt.
732
- * Tells the LLM what parameters it can use with extract_file_content.
733
- */
734
- static getExtractionHint(type, sizeStr) {
735
- switch (type) {
736
- case "video":
737
- return (`This video is ${sizeStr}. Use \`extract_file_content\` with \`start_time\`/\`end_time\` ` +
738
- `to get frames from specific time ranges (e.g., start_time=5, end_time=10, frame_count=3). ` +
739
- `Initial keyframes are already provided above.`);
740
- case "pdf":
741
- return (`This PDF is ${sizeStr}. Use \`extract_file_content\` with \`pages\` (e.g., [1, 3, 5]) ` +
742
- `or \`page_range\` (e.g., {start: 1, end: 10}) to get specific pages. ` +
743
- `Use \`read_file_section\` for line-range access or \`search_in_file\` to search.`);
744
- case "xlsx":
745
- return (`This spreadsheet is ${sizeStr}. Use \`extract_file_content\` with \`sheet\` (name or index), ` +
746
- `\`row_range\` (e.g., {start: 1, end: 50}), and \`columns\` (e.g., ["A", "B", "D"]) ` +
747
- `for targeted data extraction.`);
748
- case "pptx":
749
- return (`This presentation is ${sizeStr}. Use \`extract_file_content\` with \`pages\` ` +
750
- `(e.g., [1, 3, 5]) to extract specific slides.`);
751
- case "archive":
752
- return (`This archive is ${sizeStr}. Use \`extract_file_content\` with \`entry_path\` ` +
753
- `(e.g., "src/index.ts") to extract a specific file from the archive.`);
754
- case "audio":
755
- return (`This audio file is ${sizeStr}. Metadata is shown above. ` +
756
- `Use \`read_file_section\` or \`search_in_file\` for text-based access.`);
757
- default:
758
- return null;
759
- }
760
- }
761
- /**
762
- * Classify a file into a size tier based on byte size.
763
- */
764
- static classifySizeTier(sizeBytes) {
765
- if (sizeBytes <= SIZE_TIER_THRESHOLDS.TINY_MAX) {
766
- return "tiny";
767
- }
768
- if (sizeBytes <= SIZE_TIER_THRESHOLDS.SMALL_MAX) {
769
- return "small";
770
- }
771
- if (sizeBytes <= SIZE_TIER_THRESHOLDS.MEDIUM_MAX) {
772
- return "medium";
773
- }
774
- if (sizeBytes <= SIZE_TIER_THRESHOLDS.LARGE_MAX) {
775
- return "large";
776
- }
777
- if (sizeBytes <= SIZE_TIER_THRESHOLDS.HUGE_MAX) {
778
- return "huge";
779
- }
780
- return "oversized";
781
- }
782
- /**
783
- * Process a binary file on-demand, extracting text content via the
784
- * appropriate processor. This bridges the gap between the lazy registration
785
- * path (which stores raw binary) and the LLM read tools (which need text).
786
- *
787
- * Called lazily on first readSection() or search() for non-text files.
788
- * Results are cached in ref.processedContent for subsequent reads.
789
- */
790
- async processFileOnDemand(ref) {
791
- // Prevent concurrent processing of the same file
792
- if (ref.status === "processing") {
793
- return;
794
- }
795
- ref.status = "processing";
796
- try {
797
- const buffer = ref.tempPath ? await readFile(ref.tempPath) : null;
798
- if (!buffer) {
799
- ref.status = "error";
800
- logger.warn(`[FileReferenceRegistry] No buffer available for on-demand processing: "${ref.filename}"`);
801
- return;
802
- }
803
- let extractedText = null;
804
- switch (ref.detectedType) {
805
- case "pdf":
806
- extractedText = await this.extractPdfText(buffer);
807
- break;
808
- case "xlsx":
809
- extractedText = await this.extractExcelText(buffer, ref);
810
- break;
811
- case "docx":
812
- extractedText = await this.extractWordText(buffer, ref);
813
- break;
814
- case "pptx":
815
- extractedText = await this.extractPptxText(buffer);
816
- break;
817
- case "video":
818
- extractedText = await this.extractVideoContent(buffer, ref);
819
- break;
820
- case "audio":
821
- extractedText = await this.extractAudioContent(buffer, ref);
822
- break;
823
- case "archive":
824
- extractedText = await this.extractArchiveContent(buffer, ref);
825
- break;
826
- default:
827
- // For unknown binary types, provide a descriptive fallback
828
- extractedText =
829
- `[Binary file: ${ref.filename}, ${this.formatSize(ref.sizeBytes)}, type: ${ref.detectedType}]\n` +
830
- `This file could not be processed into text content.`;
831
- break;
832
- }
833
- if (extractedText) {
834
- ref.processedContent = extractedText;
835
- ref.status = "processed";
836
- // Update the preview with actual content instead of placeholder metadata
837
- const previewChars = this.defaultPreviewChars;
838
- if (extractedText.length <= previewChars) {
839
- ref.preview = extractedText;
840
- }
841
- else {
842
- const lastNewline = extractedText.lastIndexOf("\n", previewChars);
843
- ref.preview =
844
- lastNewline > previewChars * 0.8
845
- ? extractedText.substring(0, lastNewline)
846
- : extractedText.substring(0, previewChars) + "\n...[truncated]";
847
- }
848
- logger.info(`[FileReferenceRegistry] On-demand processed "${ref.filename}" ` +
849
- `(${ref.detectedType}, ${this.formatSize(ref.sizeBytes)}) → ${extractedText.length} chars`);
850
- }
851
- else {
852
- ref.processedContent =
853
- `[${ref.detectedType.toUpperCase()} file: ${ref.filename}, ${this.formatSize(ref.sizeBytes)}]\n` +
854
- `Content could not be extracted. The file may be corrupted or in an unsupported format.`;
855
- ref.preview = ref.processedContent;
856
- ref.status = "processed";
857
- }
858
- }
859
- catch (err) {
860
- const errorMsg = err instanceof Error ? err.message : String(err);
861
- logger.warn(`[FileReferenceRegistry] On-demand processing failed for "${ref.filename}": ${errorMsg}`);
862
- ref.processedContent =
863
- `[Processing error for ${ref.filename}]\n` +
864
- `Type: ${ref.detectedType}, Size: ${this.formatSize(ref.sizeBytes)}\n` +
865
- `Error: ${errorMsg}`;
866
- ref.preview = ref.processedContent;
867
- ref.status = "error";
868
- }
869
- }
870
- /**
871
- * Extract text from a PDF buffer using pdf-parse v2 (pdfjs-dist under the hood).
872
- *
873
- * Handles compressed streams (FlateDecode), CMap-encoded text, modern PDFs,
874
- * and most text-based PDF formats. For scanned/image-only PDFs where no text
875
- * can be extracted, falls back to a descriptive message.
876
- */
877
- async extractPdfText(buffer) {
878
- try {
879
- const { PDFParse } = await import("pdf-parse");
880
- const pdf = new PDFParse({
881
- data: new Uint8Array(buffer),
882
- });
883
- try {
884
- const textResult = await pdf.getText({
885
- // Limit to first 100 pages to avoid unbounded processing
886
- last: 100,
887
- });
888
- const text = textResult.text?.trim();
889
- if (!text || text.length === 0) {
890
- // No text found — likely a scanned/image-only PDF
891
- const pageCount = textResult.total || 0;
892
- return (`[PDF document: ${this.formatSize(buffer.length)}, ${pageCount} page(s)]\n` +
893
- `This PDF appears to contain scanned images or non-extractable content.\n` +
894
- `Text could not be extracted from the document. The content may consist of:\n` +
895
- `- Scanned pages (images of text, not searchable text)\n` +
896
- `- Forms or graphical content\n` +
897
- `- Protected/encrypted content`);
898
- }
899
- // Clean up excessive blank lines
900
- const cleaned = text.replace(/\n{3,}/g, "\n\n");
901
- return cleaned;
902
- }
903
- finally {
904
- // Always clean up the PDF instance to free pdfjs-dist resources
905
- await pdf.destroy().catch(() => {
906
- /* cleanup - ignore destroy errors */
907
- });
908
- }
909
- }
910
- catch (err) {
911
- logger.warn(`[FileReferenceRegistry] PDF text extraction failed: ${err instanceof Error ? err.message : String(err)}`);
912
- return null;
913
- }
914
- }
915
- /**
916
- * Extract text content from an Excel file using ExcelProcessor.
917
- */
918
- async extractExcelText(buffer, ref) {
919
- try {
920
- const { processExcel } = await import("../processors/document/ExcelProcessor.js");
921
- const result = await processExcel({
922
- id: ref.id,
923
- name: ref.filename,
924
- mimetype: ref.mimeType,
925
- size: ref.sizeBytes,
926
- buffer,
927
- });
928
- if (!result.success || !result.data) {
929
- return null;
930
- }
931
- // Format worksheets as TSV text for LLM consumption
932
- const worksheets = result.data.worksheets;
933
- if (worksheets && worksheets.length > 0) {
934
- const sections = [];
935
- for (const ws of worksheets) {
936
- sections.push(`## Sheet: ${ws.name}`);
937
- if (ws.headers.length > 0) {
938
- sections.push(ws.headers.join("\t"));
939
- }
940
- for (const row of ws.rows) {
941
- sections.push(row.map((cell) => (cell === null ? "" : String(cell))).join("\t"));
942
- }
943
- sections.push("");
944
- }
945
- return sections.join("\n");
946
- }
947
- return null;
948
- }
949
- catch (err) {
950
- logger.warn(`[FileReferenceRegistry] Excel extraction failed: ${err instanceof Error ? err.message : String(err)}`);
951
- return null;
952
- }
953
- }
954
- /**
955
- * Extract text content from a Word document using WordProcessor.
956
- */
957
- async extractWordText(buffer, ref) {
958
- try {
959
- const { processWord } = await import("../processors/document/WordProcessor.js");
960
- const result = await processWord({
961
- id: ref.id,
962
- name: ref.filename,
963
- mimetype: ref.mimeType,
964
- size: ref.sizeBytes,
965
- buffer,
966
- });
967
- if (!result.success || !result.data) {
968
- return null;
969
- }
970
- return result.data.textContent || null;
971
- }
972
- catch (err) {
973
- logger.warn(`[FileReferenceRegistry] Word extraction failed: ${err instanceof Error ? err.message : String(err)}`);
974
- return null;
975
- }
976
- }
977
- /**
978
- * Extract text from a PowerPoint file using PptxProcessor.
979
- */
980
- async extractPptxText(buffer) {
981
- try {
982
- const { PptxProcessor } = await import("../processors/document/PptxProcessor.js");
983
- return await PptxProcessor.extractText(buffer);
984
- }
985
- catch (err) {
986
- logger.warn(`[FileReferenceRegistry] PPTX extraction failed: ${err instanceof Error ? err.message : String(err)}`);
987
- return null;
988
- }
989
- }
990
- /**
991
- * Extract metadata and content from a video file using VideoProcessor.
992
- */
993
- async extractVideoContent(buffer, ref) {
994
- try {
995
- const { processVideo } = await import("../processors/media/VideoProcessor.js");
996
- const result = await processVideo({
997
- id: ref.id,
998
- name: ref.filename,
999
- mimetype: ref.mimeType,
1000
- size: ref.sizeBytes,
1001
- buffer,
1002
- });
1003
- if (!result.success || !result.data) {
1004
- return null;
1005
- }
1006
- // Store keyframe images on the reference for injection into the prompt
1007
- if (result.data.keyframes && result.data.keyframes.length > 0) {
1008
- ref.extractedImages = result.data.keyframes;
1009
- logger.info(`[FileReferenceRegistry] Extracted ${result.data.keyframes.length} keyframes from "${ref.filename}"`);
1010
- }
1011
- return result.data.textContent || null;
1012
- }
1013
- catch (err) {
1014
- logger.warn(`[FileReferenceRegistry] Video extraction failed: ${err instanceof Error ? err.message : String(err)}`);
1015
- // Provide basic metadata even on failure
1016
- return (`[Video file: ${ref.filename}, ${this.formatSize(ref.sizeBytes)}]\n` +
1017
- `Video processing requires ffmpeg/ffprobe. Metadata could not be extracted.\n` +
1018
- `Error: ${err instanceof Error ? err.message : String(err)}`);
1019
- }
1020
- }
1021
- /**
1022
- * Extract metadata and content from an audio file using AudioProcessor.
1023
- */
1024
- async extractAudioContent(buffer, ref) {
1025
- try {
1026
- const { processAudio } = await import("../processors/media/AudioProcessor.js");
1027
- const result = await processAudio({
1028
- id: ref.id,
1029
- name: ref.filename,
1030
- mimetype: ref.mimeType,
1031
- size: ref.sizeBytes,
1032
- buffer,
1033
- });
1034
- if (!result.success || !result.data) {
1035
- return null;
1036
- }
1037
- return result.data.textContent || null;
1038
- }
1039
- catch (err) {
1040
- logger.warn(`[FileReferenceRegistry] Audio extraction failed: ${err instanceof Error ? err.message : String(err)}`);
1041
- return (`[Audio file: ${ref.filename}, ${this.formatSize(ref.sizeBytes)}]\n` +
1042
- `Audio processing failed. Error: ${err instanceof Error ? err.message : String(err)}`);
1043
- }
1044
- }
1045
- /**
1046
- * Extract file listing from an archive using ArchiveProcessor.
1047
- */
1048
- async extractArchiveContent(buffer, ref) {
1049
- try {
1050
- const { processArchive } = await import("../processors/archive/ArchiveProcessor.js");
1051
- const result = await processArchive({
1052
- id: ref.id,
1053
- name: ref.filename,
1054
- mimetype: ref.mimeType,
1055
- size: ref.sizeBytes,
1056
- buffer,
1057
- });
1058
- if (!result.success || !result.data) {
1059
- return null;
1060
- }
1061
- return result.data.textContent || null;
1062
- }
1063
- catch (err) {
1064
- logger.warn(`[FileReferenceRegistry] Archive extraction failed: ${err instanceof Error ? err.message : String(err)}`);
1065
- return null;
1066
- }
1067
- }
1068
- /**
1069
- * Extract a preview from a buffer.
1070
- * For text: first N characters.
1071
- * For binary: type-specific metadata.
1072
- */
1073
- extractPreview(buffer, type, maxChars) {
1074
- if (this.isTextType(type, buffer)) {
1075
- // Text-based: extract first N characters
1076
- const text = buffer.toString("utf-8", 0, Math.min(buffer.length, maxChars + 100));
1077
- if (text.length <= maxChars) {
1078
- return text;
1079
- }
1080
- // Break at line boundary
1081
- const lastNewline = text.lastIndexOf("\n", maxChars);
1082
- if (lastNewline > maxChars * 0.8) {
1083
- return text.substring(0, lastNewline);
1084
- }
1085
- return text.substring(0, maxChars) + "\n...[truncated]";
1086
- }
1087
- // Binary types: type-specific preview
1088
- const sizeMB = (buffer.length / (1024 * 1024)).toFixed(2);
1089
- switch (type) {
1090
- case "image":
1091
- return `[Image file: ${sizeMB} MB]`;
1092
- case "video":
1093
- return `[Video file: ${sizeMB} MB — use read tools for metadata/keyframes]`;
1094
- case "audio":
1095
- return `[Audio file: ${sizeMB} MB — use read tools for metadata/transcript]`;
1096
- case "archive":
1097
- return `[Archive file: ${sizeMB} MB — use read tools for file listing]`;
1098
- case "pdf":
1099
- return `[PDF document: ${sizeMB} MB — use read tools for page content]`;
1100
- default:
1101
- return `[Binary file: ${sizeMB} MB, type: ${type}]`;
1102
- }
1103
- }
1104
- /**
1105
- * Detect file type from buffer magic bytes and extension.
1106
- */
1107
- detectType(buffer, ext) {
1108
- // Check magic bytes first
1109
- if (buffer.length >= 4) {
1110
- const header = buffer.subarray(0, 8);
1111
- // PNG: 89 50 4E 47
1112
- if (header[0] === 0x89 &&
1113
- header[1] === 0x50 &&
1114
- header[2] === 0x4e &&
1115
- header[3] === 0x47) {
1116
- return "image";
1117
- }
1118
- // JPEG: FF D8 FF
1119
- if (header[0] === 0xff && header[1] === 0xd8 && header[2] === 0xff) {
1120
- return "image";
1121
- }
1122
- // GIF: 47 49 46
1123
- if (header[0] === 0x47 && header[1] === 0x49 && header[2] === 0x46) {
1124
- return "image";
1125
- }
1126
- // WebP: 52 49 46 46 ... 57 45 42 50
1127
- if (header[0] === 0x52 &&
1128
- header[1] === 0x49 &&
1129
- header[2] === 0x46 &&
1130
- header[3] === 0x46 &&
1131
- buffer.length >= 12 &&
1132
- buffer[8] === 0x57 &&
1133
- buffer[9] === 0x45 &&
1134
- buffer[10] === 0x42 &&
1135
- buffer[11] === 0x50) {
1136
- return "image";
1137
- }
1138
- // PDF: 25 50 44 46
1139
- if (header[0] === 0x25 &&
1140
- header[1] === 0x50 &&
1141
- header[2] === 0x44 &&
1142
- header[3] === 0x46) {
1143
- return "pdf";
1144
- }
1145
- // ZIP (and derivatives: xlsx, docx, pptx)
1146
- if (header[0] === 0x50 && header[1] === 0x4b) {
1147
- // Differentiate by extension
1148
- if (ext === "xlsx") {
1149
- return "xlsx";
1150
- }
1151
- if (ext === "docx") {
1152
- return "docx";
1153
- }
1154
- if (ext === "pptx") {
1155
- return "pptx";
1156
- }
1157
- return "archive";
1158
- }
1159
- // MP4/M4A: ftyp
1160
- if (buffer.length >= 8 &&
1161
- buffer[4] === 0x66 &&
1162
- buffer[5] === 0x74 &&
1163
- buffer[6] === 0x79 &&
1164
- buffer[7] === 0x70) {
1165
- if (["m4a", "aac"].includes(ext)) {
1166
- return "audio";
1167
- }
1168
- return "video";
1169
- }
1170
- // ID3 (MP3): 49 44 33
1171
- if (header[0] === 0x49 && header[1] === 0x44 && header[2] === 0x33) {
1172
- return "audio";
1173
- }
1174
- // OGG: 4F 67 67 53
1175
- if (header[0] === 0x4f &&
1176
- header[1] === 0x67 &&
1177
- header[2] === 0x67 &&
1178
- header[3] === 0x53) {
1179
- return "audio";
1180
- }
1181
- // FLAC: 66 4C 61 43
1182
- if (header[0] === 0x66 &&
1183
- header[1] === 0x4c &&
1184
- header[2] === 0x61 &&
1185
- header[3] === 0x43) {
1186
- return "audio";
1187
- }
1188
- // WAV: 52 49 46 46 ... 57 41 56 45
1189
- if (header[0] === 0x52 &&
1190
- header[1] === 0x49 &&
1191
- header[2] === 0x46 &&
1192
- header[3] === 0x46 &&
1193
- buffer.length >= 12 &&
1194
- buffer[8] === 0x57 &&
1195
- buffer[9] === 0x41 &&
1196
- buffer[10] === 0x56 &&
1197
- buffer[11] === 0x45) {
1198
- return "audio";
1199
- }
1200
- // MKV/WebM: 1A 45 DF A3
1201
- if (header[0] === 0x1a &&
1202
- header[1] === 0x45 &&
1203
- header[2] === 0xdf &&
1204
- header[3] === 0xa3) {
1205
- if (ext === "webm") {
1206
- return "video";
1207
- }
1208
- return "video";
1209
- }
1210
- // AVI: 52 49 46 46 ... 41 56 49 20
1211
- if (header[0] === 0x52 &&
1212
- header[1] === 0x49 &&
1213
- header[2] === 0x46 &&
1214
- header[3] === 0x46 &&
1215
- buffer.length >= 12 &&
1216
- buffer[8] === 0x41 &&
1217
- buffer[9] === 0x56 &&
1218
- buffer[10] === 0x49 &&
1219
- buffer[11] === 0x20) {
1220
- return "video";
1221
- }
1222
- }
1223
- // Fall back to extension
1224
- return this.detectTypeFromExtension(ext);
1225
- }
1226
- /**
1227
- * Detect file type from extension alone.
1228
- */
1229
- detectTypeFromExtension(ext) {
1230
- const extensionMap = {
1231
- // Images
1232
- png: "image",
1233
- jpg: "image",
1234
- jpeg: "image",
1235
- gif: "image",
1236
- webp: "image",
1237
- bmp: "image",
1238
- tiff: "image",
1239
- ico: "image",
1240
- // Video
1241
- mp4: "video",
1242
- mkv: "video",
1243
- webm: "video",
1244
- avi: "video",
1245
- mov: "video",
1246
- m4v: "video",
1247
- // Audio
1248
- mp3: "audio",
1249
- wav: "audio",
1250
- ogg: "audio",
1251
- flac: "audio",
1252
- aac: "audio",
1253
- m4a: "audio",
1254
- wma: "audio",
1255
- // Documents
1256
- pdf: "pdf",
1257
- docx: "docx",
1258
- pptx: "pptx",
1259
- xlsx: "xlsx",
1260
- // Data
1261
- csv: "csv",
1262
- tsv: "csv",
1263
- // Markup
1264
- svg: "svg",
1265
- // Archives
1266
- zip: "archive",
1267
- tar: "archive",
1268
- gz: "archive",
1269
- tgz: "archive",
1270
- "7z": "archive",
1271
- rar: "archive",
1272
- // Text & Code
1273
- txt: "text",
1274
- md: "text",
1275
- log: "text",
1276
- json: "text",
1277
- yaml: "text",
1278
- yml: "text",
1279
- xml: "text",
1280
- html: "text",
1281
- htm: "text",
1282
- css: "text",
1283
- js: "text",
1284
- ts: "text",
1285
- jsx: "text",
1286
- tsx: "text",
1287
- py: "text",
1288
- java: "text",
1289
- go: "text",
1290
- rs: "text",
1291
- rb: "text",
1292
- php: "text",
1293
- c: "text",
1294
- cpp: "text",
1295
- h: "text",
1296
- cs: "text",
1297
- swift: "text",
1298
- kt: "text",
1299
- scala: "text",
1300
- sql: "text",
1301
- sh: "text",
1302
- bash: "text",
1303
- zsh: "text",
1304
- toml: "text",
1305
- ini: "text",
1306
- cfg: "text",
1307
- env: "text",
1308
- dockerfile: "text",
1309
- makefile: "text",
1310
- };
1311
- return extensionMap[ext.toLowerCase()] || "unknown";
1312
- }
1313
- /**
1314
- * Whether a file type contains readable text content.
1315
- * For "unknown" types, optionally checks the buffer for valid UTF-8 text.
1316
- */
1317
- isTextType(type, buffer) {
1318
- if (["text", "csv", "svg"].includes(type)) {
1319
- return true;
1320
- }
1321
- // For unknown types, heuristically check if the buffer is likely text
1322
- if (type === "unknown" && buffer && buffer.length > 0) {
1323
- return FileReferenceRegistry.looksLikeText(buffer);
1324
- }
1325
- return false;
1326
- }
1327
- /**
1328
- * Heuristic check: does a buffer look like valid text content?
1329
- * Checks the first 512 bytes for mostly printable ASCII/UTF-8 characters.
1330
- * Returns true if >90% of bytes are printable (ASCII 0x20-0x7E, tab, newline, CR).
1331
- */
1332
- static looksLikeText(buffer) {
1333
- const sampleSize = Math.min(buffer.length, 512);
1334
- let printable = 0;
1335
- for (let i = 0; i < sampleSize; i++) {
1336
- const b = buffer[i];
1337
- // Printable ASCII, tab, newline, carriage return, or high bytes (UTF-8 multibyte)
1338
- if ((b >= 0x20 && b <= 0x7e) ||
1339
- b === 0x09 ||
1340
- b === 0x0a ||
1341
- b === 0x0d ||
1342
- b >= 0x80) {
1343
- printable++;
1344
- }
1345
- }
1346
- return printable / sampleSize > 0.9;
1347
- }
1348
- /**
1349
- * Guess MIME type from file type and extension.
1350
- */
1351
- guessMimeType(type, ext) {
1352
- const mimeMap = {
1353
- // By file type
1354
- csv: "text/csv",
1355
- svg: "image/svg+xml",
1356
- pdf: "application/pdf",
1357
- docx: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
1358
- pptx: "application/vnd.openxmlformats-officedocument.presentationml.presentation",
1359
- xlsx: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
1360
- video: "video/mp4",
1361
- audio: "audio/mpeg",
1362
- archive: "application/zip",
1363
- image: "image/png",
1364
- };
1365
- if (mimeMap[type]) {
1366
- return mimeMap[type];
1367
- }
1368
- // By extension
1369
- const extMime = {
1370
- png: "image/png",
1371
- jpg: "image/jpeg",
1372
- jpeg: "image/jpeg",
1373
- gif: "image/gif",
1374
- webp: "image/webp",
1375
- mp4: "video/mp4",
1376
- mkv: "video/x-matroska",
1377
- webm: "video/webm",
1378
- avi: "video/x-msvideo",
1379
- mov: "video/quicktime",
1380
- mp3: "audio/mpeg",
1381
- wav: "audio/wav",
1382
- ogg: "audio/ogg",
1383
- flac: "audio/flac",
1384
- json: "application/json",
1385
- xml: "application/xml",
1386
- html: "text/html",
1387
- css: "text/css",
1388
- js: "text/javascript",
1389
- ts: "text/typescript",
1390
- py: "text/x-python",
1391
- zip: "application/zip",
1392
- tar: "application/x-tar",
1393
- gz: "application/gzip",
1394
- };
1395
- return extMime[ext.toLowerCase()] || "application/octet-stream";
1396
- }
1397
- /**
1398
- * Guess file extension from magic bytes.
1399
- */
1400
- guessExtension(buffer) {
1401
- if (buffer.length < 4) {
1402
- return "";
1403
- }
1404
- if (buffer[0] === 0x89 && buffer[1] === 0x50) {
1405
- return ".png";
1406
- }
1407
- if (buffer[0] === 0xff && buffer[1] === 0xd8) {
1408
- return ".jpg";
1409
- }
1410
- if (buffer[0] === 0x25 && buffer[1] === 0x50) {
1411
- return ".pdf";
1412
- }
1413
- if (buffer[0] === 0x50 && buffer[1] === 0x4b) {
1414
- return ".zip";
1415
- }
1416
- if (buffer[0] === 0x49 && buffer[1] === 0x44) {
1417
- return ".mp3";
1418
- }
1419
- // MP4/MOV/M4V — ftyp atom at offset 4
1420
- if (buffer.length >= 8 &&
1421
- buffer[4] === 0x66 &&
1422
- buffer[5] === 0x74 &&
1423
- buffer[6] === 0x79 &&
1424
- buffer[7] === 0x70) {
1425
- // Check the brand to distinguish MOV vs MP4
1426
- const brand = buffer.toString("ascii", 8, 12);
1427
- if (brand === "qt ") {
1428
- return ".mov";
1429
- }
1430
- return ".mp4";
1431
- }
1432
- // MKV/WebM — EBML header (0x1A 0x45 0xDF 0xA3)
1433
- if (buffer.length >= 4 &&
1434
- buffer[0] === 0x1a &&
1435
- buffer[1] === 0x45 &&
1436
- buffer[2] === 0xdf &&
1437
- buffer[3] === 0xa3) {
1438
- return ".mkv";
1439
- }
1440
- // AVI — RIFF....AVI
1441
- if (buffer.length >= 12 &&
1442
- buffer[0] === 0x52 &&
1443
- buffer[1] === 0x49 &&
1444
- buffer[2] === 0x46 &&
1445
- buffer[3] === 0x46 &&
1446
- buffer[8] === 0x41 &&
1447
- buffer[9] === 0x56 &&
1448
- buffer[10] === 0x49) {
1449
- return ".avi";
1450
- }
1451
- // WAV — RIFF....WAVE
1452
- if (buffer.length >= 12 &&
1453
- buffer[0] === 0x52 &&
1454
- buffer[1] === 0x49 &&
1455
- buffer[2] === 0x46 &&
1456
- buffer[3] === 0x46 &&
1457
- buffer[8] === 0x57 &&
1458
- buffer[9] === 0x41 &&
1459
- buffer[10] === 0x56 &&
1460
- buffer[11] === 0x45) {
1461
- return ".wav";
1462
- }
1463
- // FLAC
1464
- if (buffer.length >= 4 &&
1465
- buffer[0] === 0x66 &&
1466
- buffer[1] === 0x4c &&
1467
- buffer[2] === 0x61 &&
1468
- buffer[3] === 0x43) {
1469
- return ".flac";
1470
- }
1471
- // OGG
1472
- if (buffer.length >= 4 &&
1473
- buffer[0] === 0x4f &&
1474
- buffer[1] === 0x67 &&
1475
- buffer[2] === 0x67 &&
1476
- buffer[3] === 0x53) {
1477
- return ".ogg";
1478
- }
1479
- return "";
1480
- }
1481
- /**
1482
- * Persist a buffer to the temp directory.
1483
- */
1484
- async persistToTemp(id, buffer, ext) {
1485
- // Check temp space budget
1486
- if (this.currentTempBytes + buffer.length > this.maxTempBytes) {
1487
- // Try evicting oldest files
1488
- this.evictLRU();
1489
- if (this.currentTempBytes + buffer.length > this.maxTempBytes) {
1490
- throw new Error(`Temp directory budget exceeded (${this.formatSize(this.maxTempBytes)})`);
1491
- }
1492
- }
1493
- // Ensure temp directory exists
1494
- if (!this.tempDirCreated) {
1495
- await mkdir(this.tempDir, { recursive: true });
1496
- this.tempDirCreated = true;
1497
- }
1498
- const tempPath = join(this.tempDir, `${id}${ext ? `.${ext}` : ""}`);
1499
- await writeFile(tempPath, buffer);
1500
- this.currentTempBytes += buffer.length;
1501
- return tempPath;
1502
- }
1503
- /**
1504
- * Evict the least recently used file reference.
1505
- */
1506
- evictLRU() {
1507
- let oldest = null;
1508
- let oldestId = null;
1509
- for (const [id, ref] of this.files) {
1510
- if (!oldest || ref.lastAccessedAt < oldest.lastAccessedAt) {
1511
- oldest = ref;
1512
- oldestId = id;
1513
- }
1514
- }
1515
- if (oldestId && oldest) {
1516
- logger.info(`[FileReferenceRegistry] Evicting LRU: "${oldest.filename}" ` +
1517
- `(last accessed ${new Date(oldest.lastAccessedAt).toISOString()})`);
1518
- // Clean up temp file if we created it
1519
- if (oldest.tempPath && oldest.source !== "path") {
1520
- unlink(oldest.tempPath).catch(() => {
1521
- // Ignore cleanup errors
1522
- });
1523
- this.currentTempBytes -= oldest.sizeBytes;
1524
- }
1525
- this.files.delete(oldestId);
1526
- }
1527
- }
1528
- /**
1529
- * Format byte size as human-readable string.
1530
- */
1531
- formatSize(bytes) {
1532
- if (bytes < 1024) {
1533
- return `${bytes} B`;
1534
- }
1535
- if (bytes < 1024 * 1024) {
1536
- return `${(bytes / 1024).toFixed(1)} KB`;
1537
- }
1538
- if (bytes < 1024 * 1024 * 1024) {
1539
- return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
1540
- }
1541
- return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`;
1542
- }
1543
- }