@juspay/neurolink 9.32.0 → 9.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (475) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/auth/anthropicOAuth.js +1 -1
  3. package/dist/cli/commands/proxy.js +18 -5
  4. package/dist/client/aiSdkAdapter.js +1 -1
  5. package/dist/client/index.js +137 -501
  6. package/dist/core/factory.js +0 -1
  7. package/dist/core/redisConversationMemoryManager.js +1 -1
  8. package/dist/features/ppt/slideGenerator.js +0 -1
  9. package/dist/features/ppt/utils.js +0 -1
  10. package/dist/lib/neurolink.d.ts +10 -0
  11. package/dist/lib/neurolink.js +41 -7
  12. package/dist/lib/server/routes/claudeProxyRoutes.js +45 -9
  13. package/dist/lib/types/generateTypes.d.ts +16 -0
  14. package/dist/lib/types/streamTypes.d.ts +15 -0
  15. package/dist/mcp/elicitationProtocol.js +1 -1
  16. package/dist/mcp/servers/agent/directToolsServer.js +0 -1
  17. package/dist/neurolink.d.ts +10 -0
  18. package/dist/neurolink.js +41 -7
  19. package/dist/providers/azureOpenai.js +1 -1
  20. package/dist/providers/huggingFace.js +0 -1
  21. package/dist/providers/openaiCompatible.js +0 -1
  22. package/dist/sdk/toolRegistration.js +0 -1
  23. package/dist/server/openapi/generator.js +1 -1
  24. package/dist/server/routes/claudeProxyRoutes.js +45 -9
  25. package/dist/types/configTypes.js +0 -5
  26. package/dist/types/generateTypes.d.ts +16 -0
  27. package/dist/types/modelTypes.js +0 -1
  28. package/dist/types/streamTypes.d.ts +15 -0
  29. package/dist/types/tools.js +0 -1
  30. package/dist/types/typeAliases.js +0 -1
  31. package/dist/types/utilities.js +1 -1
  32. package/dist/types/workflowTypes.js +0 -1
  33. package/dist/utils/providerRetry.js +0 -1
  34. package/dist/utils/providerUtils.js +0 -1
  35. package/package.json +2 -2
  36. package/dist/client/adapters/providerImageAdapter.js +0 -588
  37. package/dist/client/adapters/tts/googleTTSHandler.js +0 -344
  38. package/dist/client/adapters/video/directorPipeline.js +0 -516
  39. package/dist/client/adapters/video/ffmpegAdapter.js +0 -206
  40. package/dist/client/adapters/video/frameExtractor.js +0 -143
  41. package/dist/client/adapters/video/vertexVideoHandler.js +0 -763
  42. package/dist/client/adapters/video/videoAnalyzer.js +0 -238
  43. package/dist/client/adapters/video/videoMerger.js +0 -171
  44. package/dist/client/agent/directTools.js +0 -840
  45. package/dist/client/auth/AuthProviderFactory.js +0 -111
  46. package/dist/client/auth/AuthProviderRegistry.js +0 -190
  47. package/dist/client/auth/RequestContext.js +0 -78
  48. package/dist/client/auth/accountPool.js +0 -178
  49. package/dist/client/auth/anthropicOAuth.js +0 -974
  50. package/dist/client/auth/authContext.js +0 -314
  51. package/dist/client/auth/errors.js +0 -39
  52. package/dist/client/auth/index.js +0 -61
  53. package/dist/client/auth/middleware/AuthMiddleware.js +0 -519
  54. package/dist/client/auth/middleware/rateLimitByUser.js +0 -554
  55. package/dist/client/auth/providers/BaseAuthProvider.js +0 -723
  56. package/dist/client/auth/providers/CognitoProvider.js +0 -304
  57. package/dist/client/auth/providers/KeycloakProvider.js +0 -393
  58. package/dist/client/auth/providers/auth0.js +0 -274
  59. package/dist/client/auth/providers/betterAuth.js +0 -182
  60. package/dist/client/auth/providers/clerk.js +0 -317
  61. package/dist/client/auth/providers/custom.js +0 -112
  62. package/dist/client/auth/providers/firebase.js +0 -226
  63. package/dist/client/auth/providers/jwt.js +0 -212
  64. package/dist/client/auth/providers/oauth2.js +0 -303
  65. package/dist/client/auth/providers/supabase.js +0 -259
  66. package/dist/client/auth/providers/workos.js +0 -284
  67. package/dist/client/auth/serverBridge.js +0 -25
  68. package/dist/client/auth/sessionManager.js +0 -437
  69. package/dist/client/auth/tokenStore.js +0 -799
  70. package/dist/client/client/aiSdkAdapter.js +0 -487
  71. package/dist/client/client/auth.js +0 -473
  72. package/dist/client/client/errors.js +0 -552
  73. package/dist/client/client/httpClient.js +0 -837
  74. package/dist/client/client/index.js +0 -172
  75. package/dist/client/client/interceptors.js +0 -601
  76. package/dist/client/client/sseClient.js +0 -545
  77. package/dist/client/client/streamingClient.js +0 -917
  78. package/dist/client/client/wsClient.js +0 -369
  79. package/dist/client/config/configManager.js +0 -303
  80. package/dist/client/config/conversationMemory.js +0 -86
  81. package/dist/client/config/taskClassificationConfig.js +0 -148
  82. package/dist/client/constants/contextWindows.js +0 -295
  83. package/dist/client/constants/enums.js +0 -853
  84. package/dist/client/constants/index.js +0 -207
  85. package/dist/client/constants/performance.js +0 -389
  86. package/dist/client/constants/retry.js +0 -266
  87. package/dist/client/constants/timeouts.js +0 -182
  88. package/dist/client/constants/tokens.js +0 -380
  89. package/dist/client/constants/videoErrors.js +0 -46
  90. package/dist/client/context/budgetChecker.js +0 -98
  91. package/dist/client/context/contextCompactor.js +0 -205
  92. package/dist/client/context/emergencyTruncation.js +0 -88
  93. package/dist/client/context/errorDetection.js +0 -171
  94. package/dist/client/context/errors.js +0 -21
  95. package/dist/client/context/fileTokenBudget.js +0 -127
  96. package/dist/client/context/prompts/summarizationPrompt.js +0 -117
  97. package/dist/client/context/stages/fileReadDeduplicator.js +0 -66
  98. package/dist/client/context/stages/slidingWindowTruncator.js +0 -190
  99. package/dist/client/context/stages/structuredSummarizer.js +0 -99
  100. package/dist/client/context/stages/toolOutputPruner.js +0 -52
  101. package/dist/client/context/summarizationEngine.js +0 -136
  102. package/dist/client/context/toolOutputLimits.js +0 -78
  103. package/dist/client/context/toolPairRepair.js +0 -66
  104. package/dist/client/core/analytics.js +0 -88
  105. package/dist/client/core/baseProvider.js +0 -1385
  106. package/dist/client/core/constants.js +0 -140
  107. package/dist/client/core/conversationMemoryFactory.js +0 -141
  108. package/dist/client/core/conversationMemoryInitializer.js +0 -128
  109. package/dist/client/core/conversationMemoryManager.js +0 -344
  110. package/dist/client/core/dynamicModels.js +0 -358
  111. package/dist/client/core/evaluation.js +0 -309
  112. package/dist/client/core/evaluationProviders.js +0 -248
  113. package/dist/client/core/factory.js +0 -412
  114. package/dist/client/core/infrastructure/baseError.js +0 -22
  115. package/dist/client/core/infrastructure/baseFactory.js +0 -54
  116. package/dist/client/core/infrastructure/baseRegistry.js +0 -53
  117. package/dist/client/core/infrastructure/index.js +0 -5
  118. package/dist/client/core/infrastructure/retry.js +0 -20
  119. package/dist/client/core/infrastructure/typedEventEmitter.js +0 -23
  120. package/dist/client/core/modelConfiguration.js +0 -851
  121. package/dist/client/core/modules/GenerationHandler.js +0 -588
  122. package/dist/client/core/modules/MessageBuilder.js +0 -273
  123. package/dist/client/core/modules/StreamHandler.js +0 -185
  124. package/dist/client/core/modules/TelemetryHandler.js +0 -203
  125. package/dist/client/core/modules/ToolsManager.js +0 -499
  126. package/dist/client/core/modules/Utilities.js +0 -331
  127. package/dist/client/core/redisConversationMemoryManager.js +0 -1435
  128. package/dist/client/core/streamAnalytics.js +0 -131
  129. package/dist/client/evaluation/contextBuilder.js +0 -134
  130. package/dist/client/evaluation/index.js +0 -61
  131. package/dist/client/evaluation/prompts.js +0 -73
  132. package/dist/client/evaluation/ragasEvaluator.js +0 -110
  133. package/dist/client/evaluation/retryManager.js +0 -78
  134. package/dist/client/evaluation/scoring.js +0 -61
  135. package/dist/client/factories/providerFactory.js +0 -166
  136. package/dist/client/factories/providerRegistry.js +0 -166
  137. package/dist/client/features/ppt/constants.js +0 -896
  138. package/dist/client/features/ppt/contentPlanner.js +0 -529
  139. package/dist/client/features/ppt/presentationOrchestrator.js +0 -236
  140. package/dist/client/features/ppt/slideGenerator.js +0 -532
  141. package/dist/client/features/ppt/slideRenderers.js +0 -2383
  142. package/dist/client/features/ppt/slideTypeInference.js +0 -405
  143. package/dist/client/features/ppt/types.js +0 -13
  144. package/dist/client/features/ppt/utils.js +0 -443
  145. package/dist/client/files/fileReferenceRegistry.js +0 -1543
  146. package/dist/client/files/fileTools.js +0 -450
  147. package/dist/client/files/streamingReader.js +0 -321
  148. package/dist/client/files/types.js +0 -23
  149. package/dist/client/hitl/hitlErrors.js +0 -54
  150. package/dist/client/hitl/hitlManager.js +0 -460
  151. package/dist/client/mcp/agentExposure.js +0 -356
  152. package/dist/client/mcp/auth/index.js +0 -11
  153. package/dist/client/mcp/auth/oauthClientProvider.js +0 -325
  154. package/dist/client/mcp/auth/tokenStorage.js +0 -134
  155. package/dist/client/mcp/batching/index.js +0 -10
  156. package/dist/client/mcp/batching/requestBatcher.js +0 -441
  157. package/dist/client/mcp/caching/index.js +0 -10
  158. package/dist/client/mcp/caching/toolCache.js +0 -433
  159. package/dist/client/mcp/elicitation/elicitationManager.js +0 -376
  160. package/dist/client/mcp/elicitation/index.js +0 -11
  161. package/dist/client/mcp/elicitation/types.js +0 -10
  162. package/dist/client/mcp/elicitationProtocol.js +0 -375
  163. package/dist/client/mcp/enhancedToolDiscovery.js +0 -481
  164. package/dist/client/mcp/externalServerManager.js +0 -1478
  165. package/dist/client/mcp/factory.js +0 -161
  166. package/dist/client/mcp/flexibleToolValidator.js +0 -161
  167. package/dist/client/mcp/httpRateLimiter.js +0 -391
  168. package/dist/client/mcp/httpRetryHandler.js +0 -178
  169. package/dist/client/mcp/index.js +0 -74
  170. package/dist/client/mcp/mcpCircuitBreaker.js +0 -427
  171. package/dist/client/mcp/mcpClientFactory.js +0 -708
  172. package/dist/client/mcp/mcpRegistryClient.js +0 -488
  173. package/dist/client/mcp/mcpServerBase.js +0 -373
  174. package/dist/client/mcp/multiServerManager.js +0 -579
  175. package/dist/client/mcp/registry.js +0 -158
  176. package/dist/client/mcp/routing/index.js +0 -10
  177. package/dist/client/mcp/routing/toolRouter.js +0 -416
  178. package/dist/client/mcp/serverCapabilities.js +0 -502
  179. package/dist/client/mcp/servers/agent/directToolsServer.js +0 -150
  180. package/dist/client/mcp/toolAnnotations.js +0 -239
  181. package/dist/client/mcp/toolConverter.js +0 -258
  182. package/dist/client/mcp/toolDiscoveryService.js +0 -798
  183. package/dist/client/mcp/toolIntegration.js +0 -334
  184. package/dist/client/mcp/toolRegistry.js +0 -729
  185. package/dist/client/memory/hippocampusInitializer.js +0 -19
  186. package/dist/client/memory/memoryRetrievalTools.js +0 -166
  187. package/dist/client/middleware/builtin/analytics.js +0 -132
  188. package/dist/client/middleware/builtin/autoEvaluation.js +0 -203
  189. package/dist/client/middleware/builtin/guardrails.js +0 -109
  190. package/dist/client/middleware/builtin/lifecycle.js +0 -168
  191. package/dist/client/middleware/factory.js +0 -327
  192. package/dist/client/middleware/registry.js +0 -295
  193. package/dist/client/middleware/utils/guardrailsUtils.js +0 -396
  194. package/dist/client/models/anthropicModels.js +0 -527
  195. package/dist/client/neurolink.js +0 -8233
  196. package/dist/client/observability/exporterRegistry.js +0 -413
  197. package/dist/client/observability/exporters/arizeExporter.js +0 -138
  198. package/dist/client/observability/exporters/baseExporter.js +0 -190
  199. package/dist/client/observability/exporters/braintrustExporter.js +0 -154
  200. package/dist/client/observability/exporters/datadogExporter.js +0 -196
  201. package/dist/client/observability/exporters/laminarExporter.js +0 -302
  202. package/dist/client/observability/exporters/langfuseExporter.js +0 -209
  203. package/dist/client/observability/exporters/langsmithExporter.js +0 -143
  204. package/dist/client/observability/exporters/otelExporter.js +0 -164
  205. package/dist/client/observability/exporters/posthogExporter.js +0 -287
  206. package/dist/client/observability/exporters/sentryExporter.js +0 -165
  207. package/dist/client/observability/index.js +0 -31
  208. package/dist/client/observability/metricsAggregator.js +0 -556
  209. package/dist/client/observability/otelBridge.js +0 -131
  210. package/dist/client/observability/retryPolicy.js +0 -383
  211. package/dist/client/observability/sampling/samplers.js +0 -216
  212. package/dist/client/observability/spanProcessor.js +0 -303
  213. package/dist/client/observability/tokenTracker.js +0 -413
  214. package/dist/client/observability/types/exporterTypes.js +0 -5
  215. package/dist/client/observability/types/index.js +0 -4
  216. package/dist/client/observability/types/spanTypes.js +0 -92
  217. package/dist/client/observability/utils/safeMetadata.js +0 -25
  218. package/dist/client/observability/utils/spanSerializer.js +0 -292
  219. package/dist/client/processors/archive/ArchiveProcessor.js +0 -1308
  220. package/dist/client/processors/base/BaseFileProcessor.js +0 -614
  221. package/dist/client/processors/base/types.js +0 -82
  222. package/dist/client/processors/config/fileTypes.js +0 -520
  223. package/dist/client/processors/config/index.js +0 -92
  224. package/dist/client/processors/config/languageMap.js +0 -410
  225. package/dist/client/processors/config/mimeTypes.js +0 -363
  226. package/dist/client/processors/config/sizeLimits.js +0 -258
  227. package/dist/client/processors/document/ExcelProcessor.js +0 -590
  228. package/dist/client/processors/document/OpenDocumentProcessor.js +0 -212
  229. package/dist/client/processors/document/PptxProcessor.js +0 -157
  230. package/dist/client/processors/document/RtfProcessor.js +0 -361
  231. package/dist/client/processors/document/WordProcessor.js +0 -353
  232. package/dist/client/processors/errors/FileErrorCode.js +0 -255
  233. package/dist/client/processors/errors/errorHelpers.js +0 -386
  234. package/dist/client/processors/errors/errorSerializer.js +0 -507
  235. package/dist/client/processors/errors/index.js +0 -49
  236. package/dist/client/processors/markup/SvgProcessor.js +0 -240
  237. package/dist/client/processors/media/AudioProcessor.js +0 -707
  238. package/dist/client/processors/media/VideoProcessor.js +0 -1045
  239. package/dist/client/providers/amazonBedrock.js +0 -1512
  240. package/dist/client/providers/amazonSagemaker.js +0 -162
  241. package/dist/client/providers/anthropic.js +0 -831
  242. package/dist/client/providers/azureOpenai.js +0 -143
  243. package/dist/client/providers/googleAiStudio.js +0 -1200
  244. package/dist/client/providers/googleNativeGemini3.js +0 -543
  245. package/dist/client/providers/googleVertex.js +0 -2936
  246. package/dist/client/providers/huggingFace.js +0 -315
  247. package/dist/client/providers/litellm.js +0 -488
  248. package/dist/client/providers/mistral.js +0 -157
  249. package/dist/client/providers/ollama.js +0 -1579
  250. package/dist/client/providers/openAI.js +0 -627
  251. package/dist/client/providers/openRouter.js +0 -543
  252. package/dist/client/providers/openaiCompatible.js +0 -290
  253. package/dist/client/providers/providerTypeUtils.js +0 -46
  254. package/dist/client/providers/sagemaker/adaptive-semaphore.js +0 -215
  255. package/dist/client/providers/sagemaker/client.js +0 -472
  256. package/dist/client/providers/sagemaker/config.js +0 -317
  257. package/dist/client/providers/sagemaker/detection.js +0 -606
  258. package/dist/client/providers/sagemaker/error-constants.js +0 -227
  259. package/dist/client/providers/sagemaker/errors.js +0 -299
  260. package/dist/client/providers/sagemaker/language-model.js +0 -775
  261. package/dist/client/providers/sagemaker/parsers.js +0 -634
  262. package/dist/client/providers/sagemaker/streaming.js +0 -331
  263. package/dist/client/providers/sagemaker/structured-parser.js +0 -625
  264. package/dist/client/proxy/accountQuota.js +0 -162
  265. package/dist/client/proxy/claudeFormat.js +0 -595
  266. package/dist/client/proxy/modelRouter.js +0 -29
  267. package/dist/client/proxy/oauthFetch.js +0 -367
  268. package/dist/client/proxy/proxyFetch.js +0 -586
  269. package/dist/client/proxy/requestLogger.js +0 -207
  270. package/dist/client/proxy/tokenRefresh.js +0 -124
  271. package/dist/client/proxy/usageStats.js +0 -74
  272. package/dist/client/proxy/utils/noProxyUtils.js +0 -149
  273. package/dist/client/rag/ChunkerFactory.js +0 -320
  274. package/dist/client/rag/ChunkerRegistry.js +0 -421
  275. package/dist/client/rag/chunkers/BaseChunker.js +0 -143
  276. package/dist/client/rag/chunkers/CharacterChunker.js +0 -28
  277. package/dist/client/rag/chunkers/HTMLChunker.js +0 -38
  278. package/dist/client/rag/chunkers/JSONChunker.js +0 -68
  279. package/dist/client/rag/chunkers/LaTeXChunker.js +0 -63
  280. package/dist/client/rag/chunkers/MarkdownChunker.js +0 -306
  281. package/dist/client/rag/chunkers/RecursiveChunker.js +0 -139
  282. package/dist/client/rag/chunkers/SemanticMarkdownChunker.js +0 -138
  283. package/dist/client/rag/chunkers/SentenceChunker.js +0 -66
  284. package/dist/client/rag/chunkers/TokenChunker.js +0 -61
  285. package/dist/client/rag/chunkers/index.js +0 -15
  286. package/dist/client/rag/chunking/characterChunker.js +0 -142
  287. package/dist/client/rag/chunking/chunkerRegistry.js +0 -194
  288. package/dist/client/rag/chunking/htmlChunker.js +0 -247
  289. package/dist/client/rag/chunking/index.js +0 -17
  290. package/dist/client/rag/chunking/jsonChunker.js +0 -281
  291. package/dist/client/rag/chunking/latexChunker.js +0 -251
  292. package/dist/client/rag/chunking/markdownChunker.js +0 -373
  293. package/dist/client/rag/chunking/recursiveChunker.js +0 -148
  294. package/dist/client/rag/chunking/semanticChunker.js +0 -306
  295. package/dist/client/rag/chunking/sentenceChunker.js +0 -230
  296. package/dist/client/rag/chunking/tokenChunker.js +0 -183
  297. package/dist/client/rag/document/MDocument.js +0 -392
  298. package/dist/client/rag/document/index.js +0 -5
  299. package/dist/client/rag/document/loaders.js +0 -500
  300. package/dist/client/rag/errors/RAGError.js +0 -274
  301. package/dist/client/rag/errors/index.js +0 -6
  302. package/dist/client/rag/graphRag/graphRAG.js +0 -401
  303. package/dist/client/rag/graphRag/index.js +0 -4
  304. package/dist/client/rag/index.js +0 -141
  305. package/dist/client/rag/metadata/MetadataExtractorFactory.js +0 -418
  306. package/dist/client/rag/metadata/MetadataExtractorRegistry.js +0 -362
  307. package/dist/client/rag/metadata/index.js +0 -9
  308. package/dist/client/rag/metadata/metadataExtractor.js +0 -280
  309. package/dist/client/rag/pipeline/RAGPipeline.js +0 -436
  310. package/dist/client/rag/pipeline/contextAssembly.js +0 -341
  311. package/dist/client/rag/pipeline/index.js +0 -5
  312. package/dist/client/rag/ragIntegration.js +0 -321
  313. package/dist/client/rag/reranker/RerankerFactory.js +0 -430
  314. package/dist/client/rag/reranker/RerankerRegistry.js +0 -402
  315. package/dist/client/rag/reranker/index.js +0 -9
  316. package/dist/client/rag/reranker/reranker.js +0 -277
  317. package/dist/client/rag/resilience/CircuitBreaker.js +0 -431
  318. package/dist/client/rag/resilience/RetryHandler.js +0 -304
  319. package/dist/client/rag/resilience/index.js +0 -7
  320. package/dist/client/rag/retrieval/hybridSearch.js +0 -335
  321. package/dist/client/rag/retrieval/index.js +0 -5
  322. package/dist/client/rag/retrieval/vectorQueryTool.js +0 -307
  323. package/dist/client/rag/types.js +0 -8
  324. package/dist/client/sdk/toolRegistration.js +0 -377
  325. package/dist/client/server/abstract/baseServerAdapter.js +0 -575
  326. package/dist/client/server/adapters/expressAdapter.js +0 -486
  327. package/dist/client/server/adapters/fastifyAdapter.js +0 -472
  328. package/dist/client/server/adapters/honoAdapter.js +0 -632
  329. package/dist/client/server/adapters/koaAdapter.js +0 -510
  330. package/dist/client/server/errors.js +0 -486
  331. package/dist/client/server/factory/serverAdapterFactory.js +0 -160
  332. package/dist/client/server/index.js +0 -108
  333. package/dist/client/server/middleware/abortSignal.js +0 -111
  334. package/dist/client/server/middleware/auth.js +0 -388
  335. package/dist/client/server/middleware/cache.js +0 -359
  336. package/dist/client/server/middleware/common.js +0 -281
  337. package/dist/client/server/middleware/deprecation.js +0 -190
  338. package/dist/client/server/middleware/mcpBodyAttachment.js +0 -63
  339. package/dist/client/server/middleware/rateLimit.js +0 -227
  340. package/dist/client/server/middleware/validation.js +0 -388
  341. package/dist/client/server/openapi/generator.js +0 -398
  342. package/dist/client/server/openapi/index.js +0 -36
  343. package/dist/client/server/openapi/schemas.js +0 -695
  344. package/dist/client/server/openapi/templates.js +0 -374
  345. package/dist/client/server/routes/agentRoutes.js +0 -189
  346. package/dist/client/server/routes/claudeProxyRoutes.js +0 -1600
  347. package/dist/client/server/routes/healthRoutes.js +0 -187
  348. package/dist/client/server/routes/index.js +0 -57
  349. package/dist/client/server/routes/mcpRoutes.js +0 -342
  350. package/dist/client/server/routes/memoryRoutes.js +0 -350
  351. package/dist/client/server/routes/openApiRoutes.js +0 -126
  352. package/dist/client/server/routes/toolRoutes.js +0 -199
  353. package/dist/client/server/streaming/dataStream.js +0 -486
  354. package/dist/client/server/streaming/index.js +0 -11
  355. package/dist/client/server/types.js +0 -67
  356. package/dist/client/server/utils/redaction.js +0 -334
  357. package/dist/client/server/utils/validation.js +0 -243
  358. package/dist/client/server/websocket/WebSocketHandler.js +0 -383
  359. package/dist/client/server/websocket/index.js +0 -4
  360. package/dist/client/services/server/ai/observability/instrumentation.js +0 -808
  361. package/dist/client/telemetry/attributes.js +0 -100
  362. package/dist/client/telemetry/index.js +0 -26
  363. package/dist/client/telemetry/telemetryService.js +0 -308
  364. package/dist/client/telemetry/tracers.js +0 -17
  365. package/dist/client/telemetry/withSpan.js +0 -34
  366. package/dist/client/types/actionTypes.js +0 -6
  367. package/dist/client/types/analytics.js +0 -5
  368. package/dist/client/types/authTypes.js +0 -9
  369. package/dist/client/types/circuitBreakerErrors.js +0 -34
  370. package/dist/client/types/cli.js +0 -21
  371. package/dist/client/types/clientTypes.js +0 -10
  372. package/dist/client/types/common.js +0 -51
  373. package/dist/client/types/configTypes.js +0 -49
  374. package/dist/client/types/content.js +0 -19
  375. package/dist/client/types/contextTypes.js +0 -400
  376. package/dist/client/types/conversation.js +0 -47
  377. package/dist/client/types/conversationMemoryInterface.js +0 -6
  378. package/dist/client/types/domainTypes.js +0 -5
  379. package/dist/client/types/errors.js +0 -167
  380. package/dist/client/types/evaluation.js +0 -5
  381. package/dist/client/types/evaluationProviders.js +0 -5
  382. package/dist/client/types/evaluationTypes.js +0 -1
  383. package/dist/client/types/externalMcp.js +0 -6
  384. package/dist/client/types/fileReferenceTypes.js +0 -8
  385. package/dist/client/types/fileTypes.js +0 -4
  386. package/dist/client/types/generateTypes.js +0 -1
  387. package/dist/client/types/guardrails.js +0 -1
  388. package/dist/client/types/hitlTypes.js +0 -8
  389. package/dist/client/types/index.js +0 -57
  390. package/dist/client/types/mcpTypes.js +0 -5
  391. package/dist/client/types/middlewareTypes.js +0 -1
  392. package/dist/client/types/modelTypes.js +0 -30
  393. package/dist/client/types/multimodal.js +0 -135
  394. package/dist/client/types/observability.js +0 -6
  395. package/dist/client/types/pptTypes.js +0 -82
  396. package/dist/client/types/providers.js +0 -111
  397. package/dist/client/types/proxyTypes.js +0 -16
  398. package/dist/client/types/ragTypes.js +0 -7
  399. package/dist/client/types/sdkTypes.js +0 -8
  400. package/dist/client/types/serviceTypes.js +0 -5
  401. package/dist/client/types/streamTypes.js +0 -1
  402. package/dist/client/types/subscriptionTypes.js +0 -9
  403. package/dist/client/types/taskClassificationTypes.js +0 -5
  404. package/dist/client/types/tools.js +0 -24
  405. package/dist/client/types/ttsTypes.js +0 -57
  406. package/dist/client/types/typeAliases.js +0 -48
  407. package/dist/client/types/utilities.js +0 -4
  408. package/dist/client/types/workflowTypes.js +0 -30
  409. package/dist/client/utils/async/withTimeout.js +0 -98
  410. package/dist/client/utils/asyncMutex.js +0 -60
  411. package/dist/client/utils/conversationMemory.js +0 -431
  412. package/dist/client/utils/csvProcessor.js +0 -846
  413. package/dist/client/utils/errorHandling.js +0 -936
  414. package/dist/client/utils/evaluationUtils.js +0 -131
  415. package/dist/client/utils/factoryProcessing.js +0 -589
  416. package/dist/client/utils/fileDetector.js +0 -2161
  417. package/dist/client/utils/imageCache.js +0 -376
  418. package/dist/client/utils/imageProcessor.js +0 -704
  419. package/dist/client/utils/logger.js +0 -491
  420. package/dist/client/utils/mcpDefaults.js +0 -134
  421. package/dist/client/utils/messageBuilder.js +0 -1653
  422. package/dist/client/utils/modelAliasResolver.js +0 -54
  423. package/dist/client/utils/modelDetection.js +0 -80
  424. package/dist/client/utils/modelRouter.js +0 -292
  425. package/dist/client/utils/multimodalOptionsBuilder.js +0 -65
  426. package/dist/client/utils/observabilityHelpers.js +0 -47
  427. package/dist/client/utils/parameterValidation.js +0 -966
  428. package/dist/client/utils/pdfProcessor.js +0 -410
  429. package/dist/client/utils/performance.js +0 -222
  430. package/dist/client/utils/pricing.js +0 -340
  431. package/dist/client/utils/promptRedaction.js +0 -62
  432. package/dist/client/utils/providerConfig.js +0 -1009
  433. package/dist/client/utils/providerHealth.js +0 -1237
  434. package/dist/client/utils/providerRetry.js +0 -112
  435. package/dist/client/utils/providerUtils.js +0 -434
  436. package/dist/client/utils/rateLimiter.js +0 -200
  437. package/dist/client/utils/redis.js +0 -368
  438. package/dist/client/utils/retryHandler.js +0 -269
  439. package/dist/client/utils/retryability.js +0 -22
  440. package/dist/client/utils/sanitizers/svg.js +0 -481
  441. package/dist/client/utils/schemaConversion.js +0 -255
  442. package/dist/client/utils/taskClassificationUtils.js +0 -149
  443. package/dist/client/utils/taskClassifier.js +0 -94
  444. package/dist/client/utils/thinkingConfig.js +0 -104
  445. package/dist/client/utils/timeout.js +0 -359
  446. package/dist/client/utils/tokenEstimation.js +0 -142
  447. package/dist/client/utils/tokenLimits.js +0 -125
  448. package/dist/client/utils/tokenUtils.js +0 -239
  449. package/dist/client/utils/toolUtils.js +0 -75
  450. package/dist/client/utils/transformationUtils.js +0 -554
  451. package/dist/client/utils/ttsProcessor.js +0 -286
  452. package/dist/client/utils/typeUtils.js +0 -97
  453. package/dist/client/utils/videoAnalysisProcessor.js +0 -67
  454. package/dist/client/workflow/config.js +0 -398
  455. package/dist/client/workflow/core/ensembleExecutor.js +0 -407
  456. package/dist/client/workflow/core/judgeScorer.js +0 -544
  457. package/dist/client/workflow/core/responseConditioner.js +0 -225
  458. package/dist/client/workflow/core/types/conditionerTypes.js +0 -7
  459. package/dist/client/workflow/core/types/ensembleTypes.js +0 -7
  460. package/dist/client/workflow/core/types/index.js +0 -7
  461. package/dist/client/workflow/core/types/judgeTypes.js +0 -7
  462. package/dist/client/workflow/core/types/layerTypes.js +0 -7
  463. package/dist/client/workflow/core/types/registryTypes.js +0 -7
  464. package/dist/client/workflow/core/workflowRegistry.js +0 -304
  465. package/dist/client/workflow/core/workflowRunner.js +0 -586
  466. package/dist/client/workflow/index.js +0 -50
  467. package/dist/client/workflow/types.js +0 -9
  468. package/dist/client/workflow/utils/types/index.js +0 -7
  469. package/dist/client/workflow/utils/workflowMetrics.js +0 -311
  470. package/dist/client/workflow/utils/workflowValidation.js +0 -420
  471. package/dist/client/workflow/workflows/adaptiveWorkflow.js +0 -366
  472. package/dist/client/workflow/workflows/consensusWorkflow.js +0 -192
  473. package/dist/client/workflow/workflows/fallbackWorkflow.js +0 -225
  474. package/dist/client/workflow/workflows/multiJudgeWorkflow.js +0 -351
  475. /package/dist/client/{client/reactHooks.js → reactHooks.js} +0 -0
@@ -1,1543 +0,0 @@
1
- /**
2
- * File Reference Registry
3
- *
4
- * Central registry for managing file references in on-demand processing mode.
5
- * Files are registered with lightweight metadata and previews. Full content
6
- * is processed on-demand when the LLM requests it via tools.
7
- *
8
- * This module is the core of the file reference architecture, replacing
9
- * the previous "load everything upfront" pattern for files that exceed
10
- * the tiny/small size tiers.
11
- *
12
- * @module files/fileReferenceRegistry
13
- */
14
- import { randomUUID } from "node:crypto";
15
- import { mkdir, readFile, stat, unlink, writeFile } from "node:fs/promises";
16
- import { tmpdir } from "node:os";
17
- import { basename, extname, join } from "node:path";
18
- import { estimatePostProcessingTokens } from "../context/fileTokenBudget.js";
19
- import { logger } from "../utils/logger.js";
20
- import { StreamingReader } from "./streamingReader.js";
21
- import { SIZE_TIER_THRESHOLDS } from "./types.js";
22
- /** Default maximum files in registry before LRU eviction */
23
- const DEFAULT_MAX_FILES = 100;
24
- /** Default maximum temp bytes (1 GB) */
25
- const DEFAULT_MAX_TEMP_BYTES = 1024 * 1024 * 1024;
26
- /** Default preview length in characters */
27
- const DEFAULT_PREVIEW_CHARS = 2000;
28
- /** Maximum file size we'll accept (2 GB) */
29
- const MAX_ACCEPTED_SIZE = 2 * 1024 * 1024 * 1024;
30
- /**
31
- * Registry for managing file references with on-demand processing.
32
- *
33
- * Design decisions:
34
- * - One instance per NeuroLink SDK instance (not global singleton)
35
- * - File buffers persisted to temp dir for later streaming access
36
- * - LRU eviction when maxFiles exceeded
37
- * - Thread-safe via sequential async operations (Node.js single-threaded)
38
- *
39
- * @example
40
- * ```typescript
41
- * const registry = new FileReferenceRegistry();
42
- * const ref = await registry.register(buffer, {
43
- * filename: 'report.xlsx',
44
- * });
45
- * console.log(ref.sizeTier); // 'medium'
46
- * console.log(ref.preview); // First 2000 chars of processed content
47
- * console.log(ref.estimatedTokens); // Type-aware estimate
48
- *
49
- * // Later, LLM requests specific section
50
- * const section = await registry.readSection(ref.id, 1, 50, 5000);
51
- * ```
52
- */
53
- export class FileReferenceRegistry {
54
- files = new Map();
55
- tempDir;
56
- maxFiles;
57
- maxTempBytes;
58
- defaultPreviewChars;
59
- currentTempBytes = 0;
60
- tempDirCreated = false;
61
- constructor(options = {}) {
62
- this.tempDir =
63
- options.tempDir || join(tmpdir(), "neurolink-files", randomUUID());
64
- this.maxFiles = options.maxFiles ?? DEFAULT_MAX_FILES;
65
- this.maxTempBytes = options.maxTempBytes ?? DEFAULT_MAX_TEMP_BYTES;
66
- this.defaultPreviewChars =
67
- options.defaultPreviewChars ?? DEFAULT_PREVIEW_CHARS;
68
- }
69
- /**
70
- * Register a file from a Buffer.
71
- *
72
- * This is the primary registration method. It performs lightweight analysis:
73
- * 1. Detect file type from magic bytes (first 1KB)
74
- * 2. Determine size tier
75
- * 3. Extract preview (first N chars of text, or metadata for binary)
76
- * 4. Persist buffer to temp directory for later streaming access
77
- *
78
- * Total time: ~1-5ms for most files (no full processing).
79
- *
80
- * @param buffer - File content as Buffer
81
- * @param source - How the file was provided ('buffer', 'url', 'path', 'datauri')
82
- * @param options - Registration options
83
- * @returns FileReference with metadata and preview
84
- */
85
- async register(buffer, source = "buffer", options = {}) {
86
- const sizeBytes = buffer.length;
87
- // Reject oversized files
88
- if (sizeBytes > MAX_ACCEPTED_SIZE) {
89
- const sizeMB = (sizeBytes / (1024 * 1024)).toFixed(1);
90
- throw new Error(`File too large (${sizeMB} MB). Maximum accepted size is 2 GB.`);
91
- }
92
- // Detect file type from magic bytes and extension.
93
- // If the provided filename has no extension, append one guessed from magic bytes
94
- // so downstream processors (e.g., VideoProcessor) can validate by extension.
95
- let filename = options.filename || `file-${Date.now()}${this.guessExtension(buffer)}`;
96
- if (!extname(filename)) {
97
- const guessedExt = this.guessExtension(buffer);
98
- if (guessedExt) {
99
- filename = `${filename}${guessedExt}`;
100
- }
101
- }
102
- const ext = extname(filename).toLowerCase().replace(".", "");
103
- const detectedType = options.fileType || this.detectType(buffer, ext);
104
- const mimeType = this.guessMimeType(detectedType, ext);
105
- const sizeTier = FileReferenceRegistry.classifySizeTier(sizeBytes);
106
- // Generate preview (fast — only reads first N chars)
107
- const preview = this.extractPreview(buffer, detectedType, options.maxPreviewChars ?? this.defaultPreviewChars);
108
- // Estimate post-processing tokens (type-aware)
109
- const estimatedTokens = estimatePostProcessingTokens(sizeBytes, detectedType);
110
- // Create reference
111
- const ref = {
112
- id: randomUUID(),
113
- source,
114
- filename,
115
- sizeBytes,
116
- detectedType,
117
- mimeType,
118
- sizeTier,
119
- estimatedTokens,
120
- preview,
121
- status: "registered",
122
- registeredAt: Date.now(),
123
- lastAccessedAt: Date.now(),
124
- extension: ext || undefined,
125
- };
126
- // Persist buffer to temp directory (unless skipped or tiny)
127
- if (!options.skipTempPersist && sizeTier !== "tiny") {
128
- try {
129
- const tempPath = await this.persistToTemp(ref.id, buffer, ext);
130
- ref.tempPath = tempPath;
131
- }
132
- catch (err) {
133
- logger.warn(`[FileReferenceRegistry] Failed to persist ${filename} to temp: ${err instanceof Error ? err.message : String(err)}`);
134
- // Continue without temp persistence — buffer-based access still works
135
- }
136
- }
137
- // For tiny files, store the processed content inline
138
- if (sizeTier === "tiny") {
139
- ref.processedContent = this.isTextType(detectedType, buffer)
140
- ? buffer.toString("utf-8")
141
- : preview;
142
- ref.status = "processed";
143
- }
144
- else {
145
- ref.status = "previewed";
146
- }
147
- // Evict LRU entries if at capacity
148
- if (this.files.size >= this.maxFiles) {
149
- this.evictLRU();
150
- }
151
- this.files.set(ref.id, ref);
152
- logger.info(`[FileReferenceRegistry] Registered "${filename}" (${this.formatSize(sizeBytes)}, ` +
153
- `tier=${sizeTier}, type=${detectedType}, ~${estimatedTokens} tokens)`);
154
- return ref;
155
- }
156
- /**
157
- * Register a file from a file path on disk.
158
- *
159
- * Does NOT read the entire file — only reads the first 1KB for type detection
160
- * and preview. The file path is stored for later streaming access.
161
- *
162
- * @param filePath - Absolute path to the file
163
- * @param options - Registration options
164
- * @returns FileReference with metadata and preview
165
- */
166
- async registerFromPath(filePath, options = {}) {
167
- const fileStat = await stat(filePath);
168
- const sizeBytes = fileStat.size;
169
- if (sizeBytes > MAX_ACCEPTED_SIZE) {
170
- const sizeMB = (sizeBytes / (1024 * 1024)).toFixed(1);
171
- throw new Error(`File too large (${sizeMB} MB). Maximum accepted size is 2 GB.`);
172
- }
173
- const filename = options.filename || basename(filePath);
174
- const ext = extname(filename).toLowerCase().replace(".", "");
175
- const detectedType = options.fileType || this.detectTypeFromExtension(ext);
176
- const mimeType = this.guessMimeType(detectedType, ext);
177
- const sizeTier = FileReferenceRegistry.classifySizeTier(sizeBytes);
178
- const estimatedTokens = estimatePostProcessingTokens(sizeBytes, detectedType);
179
- // Read preview from file (streaming — only first N bytes)
180
- let preview;
181
- try {
182
- preview = await StreamingReader.readPreview(filePath, options.maxPreviewChars ?? this.defaultPreviewChars);
183
- }
184
- catch {
185
- preview = `[File: ${filename}, ${this.formatSize(sizeBytes)}, type: ${detectedType}]`;
186
- }
187
- const ref = {
188
- id: randomUUID(),
189
- source: "path",
190
- originalPath: filePath,
191
- filename,
192
- sizeBytes,
193
- detectedType,
194
- mimeType,
195
- sizeTier,
196
- estimatedTokens,
197
- preview,
198
- status: "previewed",
199
- registeredAt: Date.now(),
200
- lastAccessedAt: Date.now(),
201
- extension: ext || undefined,
202
- };
203
- // For path-based files, no need to persist — we already have the path
204
- // Store the original path as the access point
205
- ref.tempPath = filePath;
206
- if (this.files.size >= this.maxFiles) {
207
- this.evictLRU();
208
- }
209
- this.files.set(ref.id, ref);
210
- logger.info(`[FileReferenceRegistry] Registered from path "${filename}" ` +
211
- `(${this.formatSize(sizeBytes)}, tier=${sizeTier}, type=${detectedType})`);
212
- return ref;
213
- }
214
- /**
215
- * Get a file reference by ID.
216
- * Updates lastAccessedAt for LRU tracking.
217
- */
218
- get(id) {
219
- const ref = this.files.get(id);
220
- if (ref) {
221
- ref.lastAccessedAt = Date.now();
222
- }
223
- return ref;
224
- }
225
- /**
226
- * Get a file reference by ID or filename.
227
- * Tries ID lookup first, then falls back to filename match.
228
- * This handles the common case where an LLM uses the filename
229
- * instead of the UUID when calling file tools.
230
- *
231
- * @param idOrName - UUID or filename to search for
232
- * @returns File reference if found, undefined otherwise
233
- */
234
- getByIdOrFilename(idOrName) {
235
- // Try direct ID lookup first (most common, O(1))
236
- const byId = this.get(idOrName);
237
- if (byId) {
238
- return byId;
239
- }
240
- // Fallback: search by filename (case-insensitive)
241
- const lowerName = idOrName.toLowerCase();
242
- for (const ref of this.files.values()) {
243
- if (ref.filename.toLowerCase() === lowerName) {
244
- ref.lastAccessedAt = Date.now();
245
- return ref;
246
- }
247
- }
248
- // Fallback: search by basename (without path)
249
- for (const ref of this.files.values()) {
250
- const refBasename = ref.filename.split("/").pop()?.toLowerCase() ?? "";
251
- if (refBasename === lowerName) {
252
- ref.lastAccessedAt = Date.now();
253
- return ref;
254
- }
255
- }
256
- return undefined;
257
- }
258
- /**
259
- * Ensure a file has been processed (binary content extracted to text).
260
- *
261
- * For text files this is a no-op. For binary files (PDF, XLSX, video, etc.)
262
- * this triggers on-demand processing if it hasn't happened yet. After this
263
- * call, ref.processedContent and ref.preview contain extracted text.
264
- *
265
- * Used by file tools (get_file_preview) to ensure the preview contains
266
- * real content instead of placeholder metadata strings.
267
- */
268
- async ensureProcessed(fileId) {
269
- const ref = this.get(fileId);
270
- if (!ref) {
271
- return;
272
- }
273
- if (!ref.processedContent && !this.isTextType(ref.detectedType)) {
274
- await this.processFileOnDemand(ref);
275
- }
276
- }
277
- /**
278
- * Extract targeted content from a registered file.
279
- *
280
- * This is the core dispatch method for the `extract_file_content` tool.
281
- * Routes extraction to the appropriate processor based on file type and
282
- * the parameters provided.
283
- *
284
- * @param params - Extraction parameters (file_id + type-specific options)
285
- * @returns Extraction result with text and/or images
286
- */
287
- async extractContent(params) {
288
- const ref = this.getByIdOrFilename(params.file_id);
289
- if (!ref) {
290
- return {
291
- success: false,
292
- error: `File not found: "${params.file_id}". Use list_attached_files to see available files.`,
293
- };
294
- }
295
- try {
296
- // Text-like types don't need raw buffer — they use readSection
297
- // which works from processedContent (tiny files) or tempPath (larger files)
298
- if (this.isTextType(ref.detectedType) ||
299
- ref.detectedType === "csv" ||
300
- ref.detectedType === "svg" ||
301
- ref.detectedType === "unknown") {
302
- return await this.extractTextTargeted(ref, params);
303
- }
304
- // Binary types need the raw buffer for processor-specific extraction
305
- const buffer = ref.tempPath ? await readFile(ref.tempPath) : null;
306
- if (!buffer) {
307
- return {
308
- success: false,
309
- error: `No file data available for "${ref.filename}". The file may have been evicted from cache.`,
310
- };
311
- }
312
- switch (ref.detectedType) {
313
- case "video":
314
- return await this.extractVideoTargeted(buffer, ref, params);
315
- case "pdf":
316
- return await this.extractPdfTargeted(buffer, ref, params);
317
- case "xlsx":
318
- return await this.extractExcelTargeted(buffer, ref, params);
319
- case "pptx":
320
- return await this.extractPptxTargeted(buffer, ref, params);
321
- case "archive":
322
- return await this.extractArchiveTargeted(buffer, ref, params);
323
- case "audio":
324
- return await this.extractAudioTargeted(buffer, ref, params);
325
- default:
326
- // Fallback for any unrecognized binary type
327
- return await this.extractTextTargeted(ref, params);
328
- }
329
- }
330
- catch (err) {
331
- return {
332
- success: false,
333
- error: `Extraction failed for "${ref.filename}": ${err instanceof Error ? err.message : String(err)}`,
334
- };
335
- }
336
- }
337
- // ─── Targeted Extraction Dispatchers ──────────────────────────────
338
- async extractVideoTargeted(buffer, ref, params) {
339
- const { videoProcessor } = await import("../processors/media/VideoProcessor.js");
340
- // If time range specified, extract frames from that range
341
- if (params.start_time !== undefined && params.end_time !== undefined) {
342
- const frames = await videoProcessor.extractFrameRange(buffer, ref.filename, params.start_time, params.end_time, params.frame_count ?? 5);
343
- return {
344
- success: true,
345
- text: `Extracted ${frames.length} frames from ${ref.filename} (${params.start_time}s - ${params.end_time}s)`,
346
- images: frames,
347
- metadata: {
348
- startTime: params.start_time,
349
- endTime: params.end_time,
350
- frameCount: frames.length,
351
- },
352
- };
353
- }
354
- // No time range: return full metadata + initial keyframes
355
- if (!ref.processedContent) {
356
- await this.processFileOnDemand(ref);
357
- }
358
- return {
359
- success: true,
360
- text: ref.processedContent || `[Video: ${ref.filename}]`,
361
- images: ref.extractedImages ?? undefined,
362
- };
363
- }
364
- async extractPdfTargeted(buffer, ref, params) {
365
- // If specific pages requested, extract those pages
366
- const pages = params.pages ??
367
- (params.page_range
368
- ? Array.from({ length: params.page_range.end - params.page_range.start + 1 }, (_, i) => (params.page_range ?? { start: 0 }).start + i)
369
- : undefined);
370
- if (pages && pages.length > 0) {
371
- try {
372
- const { PDFParse } = await import("pdf-parse");
373
- const pdf = new PDFParse({ data: new Uint8Array(buffer) });
374
- try {
375
- const firstPage = Math.min(...pages);
376
- const lastPage = Math.max(...pages);
377
- const textResult = await pdf.getText({
378
- first: firstPage,
379
- last: lastPage,
380
- });
381
- const totalPages = textResult.total || 0;
382
- const text = textResult.text?.trim() || "(No text found on the requested pages)";
383
- // Note: pdf-parse extracts a contiguous range (first..last).
384
- // For non-contiguous page requests (e.g., [1, 5, 12]), the result
385
- // includes all pages in the range. This is a limitation of pdf-parse.
386
- const rangeNote = firstPage !== lastPage
387
- ? ` (extracted pages ${firstPage}-${lastPage})`
388
- : "";
389
- return {
390
- success: true,
391
- text: `## Pages ${pages.join(", ")} of ${ref.filename}${rangeNote}\n` +
392
- `Total pages in document: ${totalPages}\n\n${text}`,
393
- metadata: {
394
- requestedPages: pages,
395
- extractedRange: { first: firstPage, last: lastPage },
396
- totalPages,
397
- },
398
- };
399
- }
400
- finally {
401
- await pdf.destroy().catch(() => {
402
- /* cleanup - ignore destroy errors */
403
- });
404
- }
405
- }
406
- catch (err) {
407
- return {
408
- success: false,
409
- error: `PDF page extraction failed: ${err instanceof Error ? err.message : String(err)}`,
410
- };
411
- }
412
- }
413
- // No specific pages: return full content
414
- if (!ref.processedContent) {
415
- await this.processFileOnDemand(ref);
416
- }
417
- return {
418
- success: true,
419
- text: ref.processedContent || `[PDF: ${ref.filename}]`,
420
- };
421
- }
422
- async extractExcelTargeted(buffer, ref, params) {
423
- const { excelProcessor } = await import("../processors/document/ExcelProcessor.js");
424
- const text = await excelProcessor.extractSheetRange(buffer, params.sheet, params.row_range?.start ?? 1, params.row_range?.end, params.columns);
425
- return {
426
- success: true,
427
- text,
428
- metadata: {
429
- sheet: params.sheet,
430
- rowRange: params.row_range,
431
- columns: params.columns,
432
- },
433
- };
434
- }
435
- async extractPptxTargeted(buffer, ref, params) {
436
- const pages = params.pages ??
437
- (params.page_range
438
- ? Array.from({ length: params.page_range.end - params.page_range.start + 1 }, (_, i) => (params.page_range ?? { start: 0 }).start + i)
439
- : undefined);
440
- if (pages && pages.length > 0) {
441
- const { PptxProcessor } = await import("../processors/document/PptxProcessor.js");
442
- const text = await PptxProcessor.extractSlides(buffer, pages);
443
- return {
444
- success: true,
445
- text,
446
- metadata: { slides: pages },
447
- };
448
- }
449
- // Full extraction
450
- if (!ref.processedContent) {
451
- await this.processFileOnDemand(ref);
452
- }
453
- return {
454
- success: true,
455
- text: ref.processedContent || `[PPTX: ${ref.filename}]`,
456
- };
457
- }
458
- async extractArchiveTargeted(buffer, ref, params) {
459
- if (params.entry_path) {
460
- const { archiveProcessor } = await import("../processors/archive/ArchiveProcessor.js");
461
- const text = await archiveProcessor.extractEntry(buffer, params.entry_path);
462
- return {
463
- success: true,
464
- text,
465
- metadata: { entryPath: params.entry_path },
466
- };
467
- }
468
- // No specific entry: return full listing
469
- if (!ref.processedContent) {
470
- await this.processFileOnDemand(ref);
471
- }
472
- return {
473
- success: true,
474
- text: ref.processedContent || `[Archive: ${ref.filename}]`,
475
- };
476
- }
477
- async extractAudioTargeted(_buffer, ref, _params) {
478
- // Audio doesn't have sub-section extraction yet — return full metadata
479
- if (!ref.processedContent) {
480
- await this.processFileOnDemand(ref);
481
- }
482
- return {
483
- success: true,
484
- text: ref.processedContent || `[Audio: ${ref.filename}]`,
485
- };
486
- }
487
- async extractTextTargeted(ref, params) {
488
- // For text files, use line-range reading
489
- const startLine = params.page_range?.start ?? params.row_range?.start ?? 1;
490
- const endLine = params.page_range?.end ?? params.row_range?.end;
491
- const result = await this.readSection(ref.id, startLine, endLine, 50_000);
492
- return {
493
- success: true,
494
- text: result.content,
495
- metadata: {
496
- startLine: result.startLine,
497
- endLine: result.endLine,
498
- totalLines: result.totalLines,
499
- truncated: result.truncated,
500
- },
501
- };
502
- }
503
- /**
504
- * List all registered files.
505
- * Returns a lightweight summary suitable for the LLM.
506
- */
507
- list() {
508
- return Array.from(this.files.values());
509
- }
510
- /**
511
- * Generate a formatted table of all registered files for the LLM.
512
- */
513
- listFormatted() {
514
- const files = this.list();
515
- if (files.length === 0) {
516
- return "No files attached.";
517
- }
518
- const header = "| # | Filename | Type | Size | Tier | Est. Tokens | Status |\n" +
519
- "|---|----------|------|------|------|-------------|--------|\n";
520
- const rows = files.map((f, i) => `| ${i + 1} | ${f.filename} | ${f.detectedType} | ${this.formatSize(f.sizeBytes)} | ` +
521
- `${f.sizeTier} | ~${f.estimatedTokens.toLocaleString()} | ${f.status} |`);
522
- return header + rows.join("\n");
523
- }
524
- /**
525
- * Read a section of a registered file.
526
- *
527
- * Uses StreamingReader for memory-efficient access.
528
- *
529
- * @param fileId - File reference ID
530
- * @param startLine - Starting line (1-indexed)
531
- * @param endLine - Ending line (1-indexed)
532
- * @param tokenBudget - Maximum tokens to return
533
- * @param provider - Provider name for token estimation
534
- * @returns FileReadResult
535
- */
536
- async readSection(fileId, startLine = 1, endLine, tokenBudget = 50_000, provider) {
537
- const ref = this.get(fileId);
538
- if (!ref) {
539
- throw new Error(`File reference not found: ${fileId}`);
540
- }
541
- // Process binary files on first read — the lazy registration path
542
- // stores raw binary to temp but never runs processors. We must process
543
- // on-demand so the LLM gets extracted text, not garbled binary.
544
- if (!ref.processedContent && !this.isTextType(ref.detectedType)) {
545
- await this.processFileOnDemand(ref);
546
- }
547
- // If content is already cached (or was just processed), use buffer reader
548
- if (ref.processedContent) {
549
- return StreamingReader.readFromBuffer(Buffer.from(ref.processedContent, "utf-8"), {
550
- startLine,
551
- endLine,
552
- tokenBudget,
553
- provider,
554
- });
555
- }
556
- // If we have a temp path or original path, use streaming reader
557
- // (text files that were not processed on-demand)
558
- const filePath = ref.tempPath || ref.originalPath;
559
- if (filePath) {
560
- const result = await StreamingReader.readLines(filePath, {
561
- startLine,
562
- endLine,
563
- tokenBudget,
564
- provider,
565
- });
566
- // Cache total lines for future reference
567
- if (!ref.totalLines) {
568
- ref.totalLines = result.totalLines;
569
- }
570
- return result;
571
- }
572
- throw new Error(`No accessible content for file "${ref.filename}" (id: ${fileId})`);
573
- }
574
- /**
575
- * Search within a registered file.
576
- *
577
- * @param fileId - File reference ID
578
- * @param pattern - Search pattern (string or regex)
579
- * @param maxMatches - Maximum matches to return
580
- * @returns FileSearchResult
581
- */
582
- async search(fileId, pattern, maxMatches = 50) {
583
- const ref = this.get(fileId);
584
- if (!ref) {
585
- throw new Error(`File reference not found: ${fileId}`);
586
- }
587
- // Process binary files on first search — same lazy processing as readSection().
588
- // Without this, search would scan raw PDF/XLSX binary bytes for text patterns.
589
- if (!ref.processedContent && !this.isTextType(ref.detectedType)) {
590
- await this.processFileOnDemand(ref);
591
- }
592
- // Search in processedContent if available (binary files after on-demand processing, or tiny files)
593
- if (ref.processedContent) {
594
- return FileReferenceRegistry.searchInMemory(ref.processedContent, pattern, maxMatches);
595
- }
596
- // For text files: use streaming search on the raw temp file (content IS valid UTF-8)
597
- const filePath = ref.tempPath || ref.originalPath;
598
- if (filePath) {
599
- return StreamingReader.searchInFile(filePath, pattern, {
600
- maxMatches,
601
- });
602
- }
603
- throw new Error(`No searchable content for file "${ref.filename}" (id: ${fileId})`);
604
- }
605
- /**
606
- * Search within in-memory content (for tiny files without temp paths).
607
- */
608
- static searchInMemory(content, pattern, maxMatches) {
609
- const regex = new RegExp(pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "i");
610
- const lines = content.split("\n");
611
- const matches = [];
612
- let totalMatches = 0;
613
- for (let i = 0; i < lines.length; i++) {
614
- if (regex.test(lines[i])) {
615
- totalMatches++;
616
- if (matches.length < maxMatches) {
617
- matches.push({
618
- lineNumber: i + 1,
619
- line: lines[i],
620
- contextBefore: lines.slice(Math.max(0, i - 3), i),
621
- contextAfter: lines.slice(i + 1, Math.min(lines.length, i + 4)),
622
- });
623
- }
624
- }
625
- }
626
- return {
627
- matches,
628
- totalMatches,
629
- truncated: totalMatches > maxMatches,
630
- };
631
- }
632
- /**
633
- * Store a summary for a file reference.
634
- */
635
- setSummary(fileId, summary) {
636
- const ref = this.files.get(fileId);
637
- if (ref) {
638
- ref.summary = summary;
639
- ref.status = "processed";
640
- ref.lastAccessedAt = Date.now();
641
- }
642
- }
643
- /**
644
- * Remove a file reference and clean up its temp file.
645
- */
646
- async remove(fileId) {
647
- const ref = this.files.get(fileId);
648
- if (!ref) {
649
- return false;
650
- }
651
- // Clean up temp file (only if we created it, not for original paths)
652
- if (ref.tempPath && ref.source !== "path") {
653
- try {
654
- await unlink(ref.tempPath);
655
- this.currentTempBytes -= ref.sizeBytes;
656
- }
657
- catch {
658
- // Temp file may already be cleaned up
659
- }
660
- }
661
- this.files.delete(fileId);
662
- return true;
663
- }
664
- /**
665
- * Clear all file references and clean up temp directory.
666
- */
667
- async clear() {
668
- const ids = Array.from(this.files.keys());
669
- for (const id of ids) {
670
- await this.remove(id);
671
- }
672
- this.files.clear();
673
- this.currentTempBytes = 0;
674
- }
675
- /**
676
- * Get the number of registered files.
677
- */
678
- get size() {
679
- return this.files.size;
680
- }
681
- /**
682
- * Generate the preview text for the initial prompt.
683
- *
684
- * Returns a compact summary of all registered files that uses ~50-100 tokens
685
- * per file instead of full content. The LLM can use file tools to access
686
- * more content as needed.
687
- *
688
- * @returns Formatted string for prompt injection
689
- */
690
- async generatePromptPreview() {
691
- const files = this.list();
692
- if (files.length === 0) {
693
- return "";
694
- }
695
- // Ensure binary files are processed so previews contain real content
696
- // (e.g., video metadata, audio tags) instead of placeholder strings.
697
- for (const ref of files) {
698
- if (!ref.processedContent && !this.isTextType(ref.detectedType)) {
699
- await this.processFileOnDemand(ref);
700
- }
701
- }
702
- const sections = [];
703
- sections.push(`\n\n## Attached Files (${files.length})\n`);
704
- for (const ref of files) {
705
- const sizeStr = this.formatSize(ref.sizeBytes);
706
- sections.push(`### File: "${ref.filename}" (${sizeStr}, ${ref.detectedType})`);
707
- if (ref.sizeTier === "tiny" && ref.processedContent) {
708
- // Tiny files: include full content inline
709
- sections.push(ref.processedContent);
710
- }
711
- else {
712
- // Larger files: include preview + guidance
713
- sections.push(`**Preview** (first ${this.defaultPreviewChars} chars):`);
714
- sections.push(ref.preview);
715
- // Add type-specific extraction hints
716
- const hint = FileReferenceRegistry.getExtractionHint(ref.detectedType, sizeStr);
717
- if (hint) {
718
- sections.push(`\n> ${hint}`);
719
- }
720
- else if (ref.sizeTier !== "small") {
721
- sections.push(`\n> This file is ${sizeStr}. Use \`read_file_section\` to read specific ` +
722
- `sections, \`search_in_file\` to search, or \`summarize_file\` for a full summary.`);
723
- }
724
- }
725
- sections.push(""); // blank line between files
726
- }
727
- return sections.join("\n");
728
- }
729
- // ─── Private Methods ────────────────────────────────────────────
730
- /**
731
- * Get type-specific extraction hints for the LLM prompt.
732
- * Tells the LLM what parameters it can use with extract_file_content.
733
- */
734
- static getExtractionHint(type, sizeStr) {
735
- switch (type) {
736
- case "video":
737
- return (`This video is ${sizeStr}. Use \`extract_file_content\` with \`start_time\`/\`end_time\` ` +
738
- `to get frames from specific time ranges (e.g., start_time=5, end_time=10, frame_count=3). ` +
739
- `Initial keyframes are already provided above.`);
740
- case "pdf":
741
- return (`This PDF is ${sizeStr}. Use \`extract_file_content\` with \`pages\` (e.g., [1, 3, 5]) ` +
742
- `or \`page_range\` (e.g., {start: 1, end: 10}) to get specific pages. ` +
743
- `Use \`read_file_section\` for line-range access or \`search_in_file\` to search.`);
744
- case "xlsx":
745
- return (`This spreadsheet is ${sizeStr}. Use \`extract_file_content\` with \`sheet\` (name or index), ` +
746
- `\`row_range\` (e.g., {start: 1, end: 50}), and \`columns\` (e.g., ["A", "B", "D"]) ` +
747
- `for targeted data extraction.`);
748
- case "pptx":
749
- return (`This presentation is ${sizeStr}. Use \`extract_file_content\` with \`pages\` ` +
750
- `(e.g., [1, 3, 5]) to extract specific slides.`);
751
- case "archive":
752
- return (`This archive is ${sizeStr}. Use \`extract_file_content\` with \`entry_path\` ` +
753
- `(e.g., "src/index.ts") to extract a specific file from the archive.`);
754
- case "audio":
755
- return (`This audio file is ${sizeStr}. Metadata is shown above. ` +
756
- `Use \`read_file_section\` or \`search_in_file\` for text-based access.`);
757
- default:
758
- return null;
759
- }
760
- }
761
- /**
762
- * Classify a file into a size tier based on byte size.
763
- */
764
- static classifySizeTier(sizeBytes) {
765
- if (sizeBytes <= SIZE_TIER_THRESHOLDS.TINY_MAX) {
766
- return "tiny";
767
- }
768
- if (sizeBytes <= SIZE_TIER_THRESHOLDS.SMALL_MAX) {
769
- return "small";
770
- }
771
- if (sizeBytes <= SIZE_TIER_THRESHOLDS.MEDIUM_MAX) {
772
- return "medium";
773
- }
774
- if (sizeBytes <= SIZE_TIER_THRESHOLDS.LARGE_MAX) {
775
- return "large";
776
- }
777
- if (sizeBytes <= SIZE_TIER_THRESHOLDS.HUGE_MAX) {
778
- return "huge";
779
- }
780
- return "oversized";
781
- }
782
- /**
783
- * Process a binary file on-demand, extracting text content via the
784
- * appropriate processor. This bridges the gap between the lazy registration
785
- * path (which stores raw binary) and the LLM read tools (which need text).
786
- *
787
- * Called lazily on first readSection() or search() for non-text files.
788
- * Results are cached in ref.processedContent for subsequent reads.
789
- */
790
- async processFileOnDemand(ref) {
791
- // Prevent concurrent processing of the same file
792
- if (ref.status === "processing") {
793
- return;
794
- }
795
- ref.status = "processing";
796
- try {
797
- const buffer = ref.tempPath ? await readFile(ref.tempPath) : null;
798
- if (!buffer) {
799
- ref.status = "error";
800
- logger.warn(`[FileReferenceRegistry] No buffer available for on-demand processing: "${ref.filename}"`);
801
- return;
802
- }
803
- let extractedText = null;
804
- switch (ref.detectedType) {
805
- case "pdf":
806
- extractedText = await this.extractPdfText(buffer);
807
- break;
808
- case "xlsx":
809
- extractedText = await this.extractExcelText(buffer, ref);
810
- break;
811
- case "docx":
812
- extractedText = await this.extractWordText(buffer, ref);
813
- break;
814
- case "pptx":
815
- extractedText = await this.extractPptxText(buffer);
816
- break;
817
- case "video":
818
- extractedText = await this.extractVideoContent(buffer, ref);
819
- break;
820
- case "audio":
821
- extractedText = await this.extractAudioContent(buffer, ref);
822
- break;
823
- case "archive":
824
- extractedText = await this.extractArchiveContent(buffer, ref);
825
- break;
826
- default:
827
- // For unknown binary types, provide a descriptive fallback
828
- extractedText =
829
- `[Binary file: ${ref.filename}, ${this.formatSize(ref.sizeBytes)}, type: ${ref.detectedType}]\n` +
830
- `This file could not be processed into text content.`;
831
- break;
832
- }
833
- if (extractedText) {
834
- ref.processedContent = extractedText;
835
- ref.status = "processed";
836
- // Update the preview with actual content instead of placeholder metadata
837
- const previewChars = this.defaultPreviewChars;
838
- if (extractedText.length <= previewChars) {
839
- ref.preview = extractedText;
840
- }
841
- else {
842
- const lastNewline = extractedText.lastIndexOf("\n", previewChars);
843
- ref.preview =
844
- lastNewline > previewChars * 0.8
845
- ? extractedText.substring(0, lastNewline)
846
- : extractedText.substring(0, previewChars) + "\n...[truncated]";
847
- }
848
- logger.info(`[FileReferenceRegistry] On-demand processed "${ref.filename}" ` +
849
- `(${ref.detectedType}, ${this.formatSize(ref.sizeBytes)}) → ${extractedText.length} chars`);
850
- }
851
- else {
852
- ref.processedContent =
853
- `[${ref.detectedType.toUpperCase()} file: ${ref.filename}, ${this.formatSize(ref.sizeBytes)}]\n` +
854
- `Content could not be extracted. The file may be corrupted or in an unsupported format.`;
855
- ref.preview = ref.processedContent;
856
- ref.status = "processed";
857
- }
858
- }
859
- catch (err) {
860
- const errorMsg = err instanceof Error ? err.message : String(err);
861
- logger.warn(`[FileReferenceRegistry] On-demand processing failed for "${ref.filename}": ${errorMsg}`);
862
- ref.processedContent =
863
- `[Processing error for ${ref.filename}]\n` +
864
- `Type: ${ref.detectedType}, Size: ${this.formatSize(ref.sizeBytes)}\n` +
865
- `Error: ${errorMsg}`;
866
- ref.preview = ref.processedContent;
867
- ref.status = "error";
868
- }
869
- }
870
- /**
871
- * Extract text from a PDF buffer using pdf-parse v2 (pdfjs-dist under the hood).
872
- *
873
- * Handles compressed streams (FlateDecode), CMap-encoded text, modern PDFs,
874
- * and most text-based PDF formats. For scanned/image-only PDFs where no text
875
- * can be extracted, falls back to a descriptive message.
876
- */
877
- async extractPdfText(buffer) {
878
- try {
879
- const { PDFParse } = await import("pdf-parse");
880
- const pdf = new PDFParse({
881
- data: new Uint8Array(buffer),
882
- });
883
- try {
884
- const textResult = await pdf.getText({
885
- // Limit to first 100 pages to avoid unbounded processing
886
- last: 100,
887
- });
888
- const text = textResult.text?.trim();
889
- if (!text || text.length === 0) {
890
- // No text found — likely a scanned/image-only PDF
891
- const pageCount = textResult.total || 0;
892
- return (`[PDF document: ${this.formatSize(buffer.length)}, ${pageCount} page(s)]\n` +
893
- `This PDF appears to contain scanned images or non-extractable content.\n` +
894
- `Text could not be extracted from the document. The content may consist of:\n` +
895
- `- Scanned pages (images of text, not searchable text)\n` +
896
- `- Forms or graphical content\n` +
897
- `- Protected/encrypted content`);
898
- }
899
- // Clean up excessive blank lines
900
- const cleaned = text.replace(/\n{3,}/g, "\n\n");
901
- return cleaned;
902
- }
903
- finally {
904
- // Always clean up the PDF instance to free pdfjs-dist resources
905
- await pdf.destroy().catch(() => {
906
- /* cleanup - ignore destroy errors */
907
- });
908
- }
909
- }
910
- catch (err) {
911
- logger.warn(`[FileReferenceRegistry] PDF text extraction failed: ${err instanceof Error ? err.message : String(err)}`);
912
- return null;
913
- }
914
- }
915
- /**
916
- * Extract text content from an Excel file using ExcelProcessor.
917
- */
918
- async extractExcelText(buffer, ref) {
919
- try {
920
- const { processExcel } = await import("../processors/document/ExcelProcessor.js");
921
- const result = await processExcel({
922
- id: ref.id,
923
- name: ref.filename,
924
- mimetype: ref.mimeType,
925
- size: ref.sizeBytes,
926
- buffer,
927
- });
928
- if (!result.success || !result.data) {
929
- return null;
930
- }
931
- // Format worksheets as TSV text for LLM consumption
932
- const worksheets = result.data.worksheets;
933
- if (worksheets && worksheets.length > 0) {
934
- const sections = [];
935
- for (const ws of worksheets) {
936
- sections.push(`## Sheet: ${ws.name}`);
937
- if (ws.headers.length > 0) {
938
- sections.push(ws.headers.join("\t"));
939
- }
940
- for (const row of ws.rows) {
941
- sections.push(row.map((cell) => (cell === null ? "" : String(cell))).join("\t"));
942
- }
943
- sections.push("");
944
- }
945
- return sections.join("\n");
946
- }
947
- return null;
948
- }
949
- catch (err) {
950
- logger.warn(`[FileReferenceRegistry] Excel extraction failed: ${err instanceof Error ? err.message : String(err)}`);
951
- return null;
952
- }
953
- }
954
- /**
955
- * Extract text content from a Word document using WordProcessor.
956
- */
957
- async extractWordText(buffer, ref) {
958
- try {
959
- const { processWord } = await import("../processors/document/WordProcessor.js");
960
- const result = await processWord({
961
- id: ref.id,
962
- name: ref.filename,
963
- mimetype: ref.mimeType,
964
- size: ref.sizeBytes,
965
- buffer,
966
- });
967
- if (!result.success || !result.data) {
968
- return null;
969
- }
970
- return result.data.textContent || null;
971
- }
972
- catch (err) {
973
- logger.warn(`[FileReferenceRegistry] Word extraction failed: ${err instanceof Error ? err.message : String(err)}`);
974
- return null;
975
- }
976
- }
977
- /**
978
- * Extract text from a PowerPoint file using PptxProcessor.
979
- */
980
- async extractPptxText(buffer) {
981
- try {
982
- const { PptxProcessor } = await import("../processors/document/PptxProcessor.js");
983
- return await PptxProcessor.extractText(buffer);
984
- }
985
- catch (err) {
986
- logger.warn(`[FileReferenceRegistry] PPTX extraction failed: ${err instanceof Error ? err.message : String(err)}`);
987
- return null;
988
- }
989
- }
990
- /**
991
- * Extract metadata and content from a video file using VideoProcessor.
992
- */
993
- async extractVideoContent(buffer, ref) {
994
- try {
995
- const { processVideo } = await import("../processors/media/VideoProcessor.js");
996
- const result = await processVideo({
997
- id: ref.id,
998
- name: ref.filename,
999
- mimetype: ref.mimeType,
1000
- size: ref.sizeBytes,
1001
- buffer,
1002
- });
1003
- if (!result.success || !result.data) {
1004
- return null;
1005
- }
1006
- // Store keyframe images on the reference for injection into the prompt
1007
- if (result.data.keyframes && result.data.keyframes.length > 0) {
1008
- ref.extractedImages = result.data.keyframes;
1009
- logger.info(`[FileReferenceRegistry] Extracted ${result.data.keyframes.length} keyframes from "${ref.filename}"`);
1010
- }
1011
- return result.data.textContent || null;
1012
- }
1013
- catch (err) {
1014
- logger.warn(`[FileReferenceRegistry] Video extraction failed: ${err instanceof Error ? err.message : String(err)}`);
1015
- // Provide basic metadata even on failure
1016
- return (`[Video file: ${ref.filename}, ${this.formatSize(ref.sizeBytes)}]\n` +
1017
- `Video processing requires ffmpeg/ffprobe. Metadata could not be extracted.\n` +
1018
- `Error: ${err instanceof Error ? err.message : String(err)}`);
1019
- }
1020
- }
1021
- /**
1022
- * Extract metadata and content from an audio file using AudioProcessor.
1023
- */
1024
- async extractAudioContent(buffer, ref) {
1025
- try {
1026
- const { processAudio } = await import("../processors/media/AudioProcessor.js");
1027
- const result = await processAudio({
1028
- id: ref.id,
1029
- name: ref.filename,
1030
- mimetype: ref.mimeType,
1031
- size: ref.sizeBytes,
1032
- buffer,
1033
- });
1034
- if (!result.success || !result.data) {
1035
- return null;
1036
- }
1037
- return result.data.textContent || null;
1038
- }
1039
- catch (err) {
1040
- logger.warn(`[FileReferenceRegistry] Audio extraction failed: ${err instanceof Error ? err.message : String(err)}`);
1041
- return (`[Audio file: ${ref.filename}, ${this.formatSize(ref.sizeBytes)}]\n` +
1042
- `Audio processing failed. Error: ${err instanceof Error ? err.message : String(err)}`);
1043
- }
1044
- }
1045
- /**
1046
- * Extract file listing from an archive using ArchiveProcessor.
1047
- */
1048
- async extractArchiveContent(buffer, ref) {
1049
- try {
1050
- const { processArchive } = await import("../processors/archive/ArchiveProcessor.js");
1051
- const result = await processArchive({
1052
- id: ref.id,
1053
- name: ref.filename,
1054
- mimetype: ref.mimeType,
1055
- size: ref.sizeBytes,
1056
- buffer,
1057
- });
1058
- if (!result.success || !result.data) {
1059
- return null;
1060
- }
1061
- return result.data.textContent || null;
1062
- }
1063
- catch (err) {
1064
- logger.warn(`[FileReferenceRegistry] Archive extraction failed: ${err instanceof Error ? err.message : String(err)}`);
1065
- return null;
1066
- }
1067
- }
1068
- /**
1069
- * Extract a preview from a buffer.
1070
- * For text: first N characters.
1071
- * For binary: type-specific metadata.
1072
- */
1073
- extractPreview(buffer, type, maxChars) {
1074
- if (this.isTextType(type, buffer)) {
1075
- // Text-based: extract first N characters
1076
- const text = buffer.toString("utf-8", 0, Math.min(buffer.length, maxChars + 100));
1077
- if (text.length <= maxChars) {
1078
- return text;
1079
- }
1080
- // Break at line boundary
1081
- const lastNewline = text.lastIndexOf("\n", maxChars);
1082
- if (lastNewline > maxChars * 0.8) {
1083
- return text.substring(0, lastNewline);
1084
- }
1085
- return text.substring(0, maxChars) + "\n...[truncated]";
1086
- }
1087
- // Binary types: type-specific preview
1088
- const sizeMB = (buffer.length / (1024 * 1024)).toFixed(2);
1089
- switch (type) {
1090
- case "image":
1091
- return `[Image file: ${sizeMB} MB]`;
1092
- case "video":
1093
- return `[Video file: ${sizeMB} MB — use read tools for metadata/keyframes]`;
1094
- case "audio":
1095
- return `[Audio file: ${sizeMB} MB — use read tools for metadata/transcript]`;
1096
- case "archive":
1097
- return `[Archive file: ${sizeMB} MB — use read tools for file listing]`;
1098
- case "pdf":
1099
- return `[PDF document: ${sizeMB} MB — use read tools for page content]`;
1100
- default:
1101
- return `[Binary file: ${sizeMB} MB, type: ${type}]`;
1102
- }
1103
- }
1104
- /**
1105
- * Detect file type from buffer magic bytes and extension.
1106
- */
1107
- detectType(buffer, ext) {
1108
- // Check magic bytes first
1109
- if (buffer.length >= 4) {
1110
- const header = buffer.subarray(0, 8);
1111
- // PNG: 89 50 4E 47
1112
- if (header[0] === 0x89 &&
1113
- header[1] === 0x50 &&
1114
- header[2] === 0x4e &&
1115
- header[3] === 0x47) {
1116
- return "image";
1117
- }
1118
- // JPEG: FF D8 FF
1119
- if (header[0] === 0xff && header[1] === 0xd8 && header[2] === 0xff) {
1120
- return "image";
1121
- }
1122
- // GIF: 47 49 46
1123
- if (header[0] === 0x47 && header[1] === 0x49 && header[2] === 0x46) {
1124
- return "image";
1125
- }
1126
- // WebP: 52 49 46 46 ... 57 45 42 50
1127
- if (header[0] === 0x52 &&
1128
- header[1] === 0x49 &&
1129
- header[2] === 0x46 &&
1130
- header[3] === 0x46 &&
1131
- buffer.length >= 12 &&
1132
- buffer[8] === 0x57 &&
1133
- buffer[9] === 0x45 &&
1134
- buffer[10] === 0x42 &&
1135
- buffer[11] === 0x50) {
1136
- return "image";
1137
- }
1138
- // PDF: 25 50 44 46
1139
- if (header[0] === 0x25 &&
1140
- header[1] === 0x50 &&
1141
- header[2] === 0x44 &&
1142
- header[3] === 0x46) {
1143
- return "pdf";
1144
- }
1145
- // ZIP (and derivatives: xlsx, docx, pptx)
1146
- if (header[0] === 0x50 && header[1] === 0x4b) {
1147
- // Differentiate by extension
1148
- if (ext === "xlsx") {
1149
- return "xlsx";
1150
- }
1151
- if (ext === "docx") {
1152
- return "docx";
1153
- }
1154
- if (ext === "pptx") {
1155
- return "pptx";
1156
- }
1157
- return "archive";
1158
- }
1159
- // MP4/M4A: ftyp
1160
- if (buffer.length >= 8 &&
1161
- buffer[4] === 0x66 &&
1162
- buffer[5] === 0x74 &&
1163
- buffer[6] === 0x79 &&
1164
- buffer[7] === 0x70) {
1165
- if (["m4a", "aac"].includes(ext)) {
1166
- return "audio";
1167
- }
1168
- return "video";
1169
- }
1170
- // ID3 (MP3): 49 44 33
1171
- if (header[0] === 0x49 && header[1] === 0x44 && header[2] === 0x33) {
1172
- return "audio";
1173
- }
1174
- // OGG: 4F 67 67 53
1175
- if (header[0] === 0x4f &&
1176
- header[1] === 0x67 &&
1177
- header[2] === 0x67 &&
1178
- header[3] === 0x53) {
1179
- return "audio";
1180
- }
1181
- // FLAC: 66 4C 61 43
1182
- if (header[0] === 0x66 &&
1183
- header[1] === 0x4c &&
1184
- header[2] === 0x61 &&
1185
- header[3] === 0x43) {
1186
- return "audio";
1187
- }
1188
- // WAV: 52 49 46 46 ... 57 41 56 45
1189
- if (header[0] === 0x52 &&
1190
- header[1] === 0x49 &&
1191
- header[2] === 0x46 &&
1192
- header[3] === 0x46 &&
1193
- buffer.length >= 12 &&
1194
- buffer[8] === 0x57 &&
1195
- buffer[9] === 0x41 &&
1196
- buffer[10] === 0x56 &&
1197
- buffer[11] === 0x45) {
1198
- return "audio";
1199
- }
1200
- // MKV/WebM: 1A 45 DF A3
1201
- if (header[0] === 0x1a &&
1202
- header[1] === 0x45 &&
1203
- header[2] === 0xdf &&
1204
- header[3] === 0xa3) {
1205
- if (ext === "webm") {
1206
- return "video";
1207
- }
1208
- return "video";
1209
- }
1210
- // AVI: 52 49 46 46 ... 41 56 49 20
1211
- if (header[0] === 0x52 &&
1212
- header[1] === 0x49 &&
1213
- header[2] === 0x46 &&
1214
- header[3] === 0x46 &&
1215
- buffer.length >= 12 &&
1216
- buffer[8] === 0x41 &&
1217
- buffer[9] === 0x56 &&
1218
- buffer[10] === 0x49 &&
1219
- buffer[11] === 0x20) {
1220
- return "video";
1221
- }
1222
- }
1223
- // Fall back to extension
1224
- return this.detectTypeFromExtension(ext);
1225
- }
1226
- /**
1227
- * Detect file type from extension alone.
1228
- */
1229
- detectTypeFromExtension(ext) {
1230
- const extensionMap = {
1231
- // Images
1232
- png: "image",
1233
- jpg: "image",
1234
- jpeg: "image",
1235
- gif: "image",
1236
- webp: "image",
1237
- bmp: "image",
1238
- tiff: "image",
1239
- ico: "image",
1240
- // Video
1241
- mp4: "video",
1242
- mkv: "video",
1243
- webm: "video",
1244
- avi: "video",
1245
- mov: "video",
1246
- m4v: "video",
1247
- // Audio
1248
- mp3: "audio",
1249
- wav: "audio",
1250
- ogg: "audio",
1251
- flac: "audio",
1252
- aac: "audio",
1253
- m4a: "audio",
1254
- wma: "audio",
1255
- // Documents
1256
- pdf: "pdf",
1257
- docx: "docx",
1258
- pptx: "pptx",
1259
- xlsx: "xlsx",
1260
- // Data
1261
- csv: "csv",
1262
- tsv: "csv",
1263
- // Markup
1264
- svg: "svg",
1265
- // Archives
1266
- zip: "archive",
1267
- tar: "archive",
1268
- gz: "archive",
1269
- tgz: "archive",
1270
- "7z": "archive",
1271
- rar: "archive",
1272
- // Text & Code
1273
- txt: "text",
1274
- md: "text",
1275
- log: "text",
1276
- json: "text",
1277
- yaml: "text",
1278
- yml: "text",
1279
- xml: "text",
1280
- html: "text",
1281
- htm: "text",
1282
- css: "text",
1283
- js: "text",
1284
- ts: "text",
1285
- jsx: "text",
1286
- tsx: "text",
1287
- py: "text",
1288
- java: "text",
1289
- go: "text",
1290
- rs: "text",
1291
- rb: "text",
1292
- php: "text",
1293
- c: "text",
1294
- cpp: "text",
1295
- h: "text",
1296
- cs: "text",
1297
- swift: "text",
1298
- kt: "text",
1299
- scala: "text",
1300
- sql: "text",
1301
- sh: "text",
1302
- bash: "text",
1303
- zsh: "text",
1304
- toml: "text",
1305
- ini: "text",
1306
- cfg: "text",
1307
- env: "text",
1308
- dockerfile: "text",
1309
- makefile: "text",
1310
- };
1311
- return extensionMap[ext.toLowerCase()] || "unknown";
1312
- }
1313
- /**
1314
- * Whether a file type contains readable text content.
1315
- * For "unknown" types, optionally checks the buffer for valid UTF-8 text.
1316
- */
1317
- isTextType(type, buffer) {
1318
- if (["text", "csv", "svg"].includes(type)) {
1319
- return true;
1320
- }
1321
- // For unknown types, heuristically check if the buffer is likely text
1322
- if (type === "unknown" && buffer && buffer.length > 0) {
1323
- return FileReferenceRegistry.looksLikeText(buffer);
1324
- }
1325
- return false;
1326
- }
1327
- /**
1328
- * Heuristic check: does a buffer look like valid text content?
1329
- * Checks the first 512 bytes for mostly printable ASCII/UTF-8 characters.
1330
- * Returns true if >90% of bytes are printable (ASCII 0x20-0x7E, tab, newline, CR).
1331
- */
1332
- static looksLikeText(buffer) {
1333
- const sampleSize = Math.min(buffer.length, 512);
1334
- let printable = 0;
1335
- for (let i = 0; i < sampleSize; i++) {
1336
- const b = buffer[i];
1337
- // Printable ASCII, tab, newline, carriage return, or high bytes (UTF-8 multibyte)
1338
- if ((b >= 0x20 && b <= 0x7e) ||
1339
- b === 0x09 ||
1340
- b === 0x0a ||
1341
- b === 0x0d ||
1342
- b >= 0x80) {
1343
- printable++;
1344
- }
1345
- }
1346
- return printable / sampleSize > 0.9;
1347
- }
1348
- /**
1349
- * Guess MIME type from file type and extension.
1350
- */
1351
- guessMimeType(type, ext) {
1352
- const mimeMap = {
1353
- // By file type
1354
- csv: "text/csv",
1355
- svg: "image/svg+xml",
1356
- pdf: "application/pdf",
1357
- docx: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
1358
- pptx: "application/vnd.openxmlformats-officedocument.presentationml.presentation",
1359
- xlsx: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
1360
- video: "video/mp4",
1361
- audio: "audio/mpeg",
1362
- archive: "application/zip",
1363
- image: "image/png",
1364
- };
1365
- if (mimeMap[type]) {
1366
- return mimeMap[type];
1367
- }
1368
- // By extension
1369
- const extMime = {
1370
- png: "image/png",
1371
- jpg: "image/jpeg",
1372
- jpeg: "image/jpeg",
1373
- gif: "image/gif",
1374
- webp: "image/webp",
1375
- mp4: "video/mp4",
1376
- mkv: "video/x-matroska",
1377
- webm: "video/webm",
1378
- avi: "video/x-msvideo",
1379
- mov: "video/quicktime",
1380
- mp3: "audio/mpeg",
1381
- wav: "audio/wav",
1382
- ogg: "audio/ogg",
1383
- flac: "audio/flac",
1384
- json: "application/json",
1385
- xml: "application/xml",
1386
- html: "text/html",
1387
- css: "text/css",
1388
- js: "text/javascript",
1389
- ts: "text/typescript",
1390
- py: "text/x-python",
1391
- zip: "application/zip",
1392
- tar: "application/x-tar",
1393
- gz: "application/gzip",
1394
- };
1395
- return extMime[ext.toLowerCase()] || "application/octet-stream";
1396
- }
1397
- /**
1398
- * Guess file extension from magic bytes.
1399
- */
1400
- guessExtension(buffer) {
1401
- if (buffer.length < 4) {
1402
- return "";
1403
- }
1404
- if (buffer[0] === 0x89 && buffer[1] === 0x50) {
1405
- return ".png";
1406
- }
1407
- if (buffer[0] === 0xff && buffer[1] === 0xd8) {
1408
- return ".jpg";
1409
- }
1410
- if (buffer[0] === 0x25 && buffer[1] === 0x50) {
1411
- return ".pdf";
1412
- }
1413
- if (buffer[0] === 0x50 && buffer[1] === 0x4b) {
1414
- return ".zip";
1415
- }
1416
- if (buffer[0] === 0x49 && buffer[1] === 0x44) {
1417
- return ".mp3";
1418
- }
1419
- // MP4/MOV/M4V — ftyp atom at offset 4
1420
- if (buffer.length >= 8 &&
1421
- buffer[4] === 0x66 &&
1422
- buffer[5] === 0x74 &&
1423
- buffer[6] === 0x79 &&
1424
- buffer[7] === 0x70) {
1425
- // Check the brand to distinguish MOV vs MP4
1426
- const brand = buffer.toString("ascii", 8, 12);
1427
- if (brand === "qt ") {
1428
- return ".mov";
1429
- }
1430
- return ".mp4";
1431
- }
1432
- // MKV/WebM — EBML header (0x1A 0x45 0xDF 0xA3)
1433
- if (buffer.length >= 4 &&
1434
- buffer[0] === 0x1a &&
1435
- buffer[1] === 0x45 &&
1436
- buffer[2] === 0xdf &&
1437
- buffer[3] === 0xa3) {
1438
- return ".mkv";
1439
- }
1440
- // AVI — RIFF....AVI
1441
- if (buffer.length >= 12 &&
1442
- buffer[0] === 0x52 &&
1443
- buffer[1] === 0x49 &&
1444
- buffer[2] === 0x46 &&
1445
- buffer[3] === 0x46 &&
1446
- buffer[8] === 0x41 &&
1447
- buffer[9] === 0x56 &&
1448
- buffer[10] === 0x49) {
1449
- return ".avi";
1450
- }
1451
- // WAV — RIFF....WAVE
1452
- if (buffer.length >= 12 &&
1453
- buffer[0] === 0x52 &&
1454
- buffer[1] === 0x49 &&
1455
- buffer[2] === 0x46 &&
1456
- buffer[3] === 0x46 &&
1457
- buffer[8] === 0x57 &&
1458
- buffer[9] === 0x41 &&
1459
- buffer[10] === 0x56 &&
1460
- buffer[11] === 0x45) {
1461
- return ".wav";
1462
- }
1463
- // FLAC
1464
- if (buffer.length >= 4 &&
1465
- buffer[0] === 0x66 &&
1466
- buffer[1] === 0x4c &&
1467
- buffer[2] === 0x61 &&
1468
- buffer[3] === 0x43) {
1469
- return ".flac";
1470
- }
1471
- // OGG
1472
- if (buffer.length >= 4 &&
1473
- buffer[0] === 0x4f &&
1474
- buffer[1] === 0x67 &&
1475
- buffer[2] === 0x67 &&
1476
- buffer[3] === 0x53) {
1477
- return ".ogg";
1478
- }
1479
- return "";
1480
- }
1481
- /**
1482
- * Persist a buffer to the temp directory.
1483
- */
1484
- async persistToTemp(id, buffer, ext) {
1485
- // Check temp space budget
1486
- if (this.currentTempBytes + buffer.length > this.maxTempBytes) {
1487
- // Try evicting oldest files
1488
- this.evictLRU();
1489
- if (this.currentTempBytes + buffer.length > this.maxTempBytes) {
1490
- throw new Error(`Temp directory budget exceeded (${this.formatSize(this.maxTempBytes)})`);
1491
- }
1492
- }
1493
- // Ensure temp directory exists
1494
- if (!this.tempDirCreated) {
1495
- await mkdir(this.tempDir, { recursive: true });
1496
- this.tempDirCreated = true;
1497
- }
1498
- const tempPath = join(this.tempDir, `${id}${ext ? `.${ext}` : ""}`);
1499
- await writeFile(tempPath, buffer);
1500
- this.currentTempBytes += buffer.length;
1501
- return tempPath;
1502
- }
1503
- /**
1504
- * Evict the least recently used file reference.
1505
- */
1506
- evictLRU() {
1507
- let oldest = null;
1508
- let oldestId = null;
1509
- for (const [id, ref] of this.files) {
1510
- if (!oldest || ref.lastAccessedAt < oldest.lastAccessedAt) {
1511
- oldest = ref;
1512
- oldestId = id;
1513
- }
1514
- }
1515
- if (oldestId && oldest) {
1516
- logger.info(`[FileReferenceRegistry] Evicting LRU: "${oldest.filename}" ` +
1517
- `(last accessed ${new Date(oldest.lastAccessedAt).toISOString()})`);
1518
- // Clean up temp file if we created it
1519
- if (oldest.tempPath && oldest.source !== "path") {
1520
- unlink(oldest.tempPath).catch(() => {
1521
- // Ignore cleanup errors
1522
- });
1523
- this.currentTempBytes -= oldest.sizeBytes;
1524
- }
1525
- this.files.delete(oldestId);
1526
- }
1527
- }
1528
- /**
1529
- * Format byte size as human-readable string.
1530
- */
1531
- formatSize(bytes) {
1532
- if (bytes < 1024) {
1533
- return `${bytes} B`;
1534
- }
1535
- if (bytes < 1024 * 1024) {
1536
- return `${(bytes / 1024).toFixed(1)} KB`;
1537
- }
1538
- if (bytes < 1024 * 1024 * 1024) {
1539
- return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
1540
- }
1541
- return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`;
1542
- }
1543
- }