@juspay/neurolink 9.32.0 → 9.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (475) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/auth/anthropicOAuth.js +1 -1
  3. package/dist/cli/commands/proxy.js +18 -5
  4. package/dist/client/aiSdkAdapter.js +1 -1
  5. package/dist/client/index.js +137 -501
  6. package/dist/core/factory.js +0 -1
  7. package/dist/core/redisConversationMemoryManager.js +1 -1
  8. package/dist/features/ppt/slideGenerator.js +0 -1
  9. package/dist/features/ppt/utils.js +0 -1
  10. package/dist/lib/neurolink.d.ts +10 -0
  11. package/dist/lib/neurolink.js +41 -7
  12. package/dist/lib/server/routes/claudeProxyRoutes.js +45 -9
  13. package/dist/lib/types/generateTypes.d.ts +16 -0
  14. package/dist/lib/types/streamTypes.d.ts +15 -0
  15. package/dist/mcp/elicitationProtocol.js +1 -1
  16. package/dist/mcp/servers/agent/directToolsServer.js +0 -1
  17. package/dist/neurolink.d.ts +10 -0
  18. package/dist/neurolink.js +41 -7
  19. package/dist/providers/azureOpenai.js +1 -1
  20. package/dist/providers/huggingFace.js +0 -1
  21. package/dist/providers/openaiCompatible.js +0 -1
  22. package/dist/sdk/toolRegistration.js +0 -1
  23. package/dist/server/openapi/generator.js +1 -1
  24. package/dist/server/routes/claudeProxyRoutes.js +45 -9
  25. package/dist/types/configTypes.js +0 -5
  26. package/dist/types/generateTypes.d.ts +16 -0
  27. package/dist/types/modelTypes.js +0 -1
  28. package/dist/types/streamTypes.d.ts +15 -0
  29. package/dist/types/tools.js +0 -1
  30. package/dist/types/typeAliases.js +0 -1
  31. package/dist/types/utilities.js +1 -1
  32. package/dist/types/workflowTypes.js +0 -1
  33. package/dist/utils/providerRetry.js +0 -1
  34. package/dist/utils/providerUtils.js +0 -1
  35. package/package.json +2 -2
  36. package/dist/client/adapters/providerImageAdapter.js +0 -588
  37. package/dist/client/adapters/tts/googleTTSHandler.js +0 -344
  38. package/dist/client/adapters/video/directorPipeline.js +0 -516
  39. package/dist/client/adapters/video/ffmpegAdapter.js +0 -206
  40. package/dist/client/adapters/video/frameExtractor.js +0 -143
  41. package/dist/client/adapters/video/vertexVideoHandler.js +0 -763
  42. package/dist/client/adapters/video/videoAnalyzer.js +0 -238
  43. package/dist/client/adapters/video/videoMerger.js +0 -171
  44. package/dist/client/agent/directTools.js +0 -840
  45. package/dist/client/auth/AuthProviderFactory.js +0 -111
  46. package/dist/client/auth/AuthProviderRegistry.js +0 -190
  47. package/dist/client/auth/RequestContext.js +0 -78
  48. package/dist/client/auth/accountPool.js +0 -178
  49. package/dist/client/auth/anthropicOAuth.js +0 -974
  50. package/dist/client/auth/authContext.js +0 -314
  51. package/dist/client/auth/errors.js +0 -39
  52. package/dist/client/auth/index.js +0 -61
  53. package/dist/client/auth/middleware/AuthMiddleware.js +0 -519
  54. package/dist/client/auth/middleware/rateLimitByUser.js +0 -554
  55. package/dist/client/auth/providers/BaseAuthProvider.js +0 -723
  56. package/dist/client/auth/providers/CognitoProvider.js +0 -304
  57. package/dist/client/auth/providers/KeycloakProvider.js +0 -393
  58. package/dist/client/auth/providers/auth0.js +0 -274
  59. package/dist/client/auth/providers/betterAuth.js +0 -182
  60. package/dist/client/auth/providers/clerk.js +0 -317
  61. package/dist/client/auth/providers/custom.js +0 -112
  62. package/dist/client/auth/providers/firebase.js +0 -226
  63. package/dist/client/auth/providers/jwt.js +0 -212
  64. package/dist/client/auth/providers/oauth2.js +0 -303
  65. package/dist/client/auth/providers/supabase.js +0 -259
  66. package/dist/client/auth/providers/workos.js +0 -284
  67. package/dist/client/auth/serverBridge.js +0 -25
  68. package/dist/client/auth/sessionManager.js +0 -437
  69. package/dist/client/auth/tokenStore.js +0 -799
  70. package/dist/client/client/aiSdkAdapter.js +0 -487
  71. package/dist/client/client/auth.js +0 -473
  72. package/dist/client/client/errors.js +0 -552
  73. package/dist/client/client/httpClient.js +0 -837
  74. package/dist/client/client/index.js +0 -172
  75. package/dist/client/client/interceptors.js +0 -601
  76. package/dist/client/client/sseClient.js +0 -545
  77. package/dist/client/client/streamingClient.js +0 -917
  78. package/dist/client/client/wsClient.js +0 -369
  79. package/dist/client/config/configManager.js +0 -303
  80. package/dist/client/config/conversationMemory.js +0 -86
  81. package/dist/client/config/taskClassificationConfig.js +0 -148
  82. package/dist/client/constants/contextWindows.js +0 -295
  83. package/dist/client/constants/enums.js +0 -853
  84. package/dist/client/constants/index.js +0 -207
  85. package/dist/client/constants/performance.js +0 -389
  86. package/dist/client/constants/retry.js +0 -266
  87. package/dist/client/constants/timeouts.js +0 -182
  88. package/dist/client/constants/tokens.js +0 -380
  89. package/dist/client/constants/videoErrors.js +0 -46
  90. package/dist/client/context/budgetChecker.js +0 -98
  91. package/dist/client/context/contextCompactor.js +0 -205
  92. package/dist/client/context/emergencyTruncation.js +0 -88
  93. package/dist/client/context/errorDetection.js +0 -171
  94. package/dist/client/context/errors.js +0 -21
  95. package/dist/client/context/fileTokenBudget.js +0 -127
  96. package/dist/client/context/prompts/summarizationPrompt.js +0 -117
  97. package/dist/client/context/stages/fileReadDeduplicator.js +0 -66
  98. package/dist/client/context/stages/slidingWindowTruncator.js +0 -190
  99. package/dist/client/context/stages/structuredSummarizer.js +0 -99
  100. package/dist/client/context/stages/toolOutputPruner.js +0 -52
  101. package/dist/client/context/summarizationEngine.js +0 -136
  102. package/dist/client/context/toolOutputLimits.js +0 -78
  103. package/dist/client/context/toolPairRepair.js +0 -66
  104. package/dist/client/core/analytics.js +0 -88
  105. package/dist/client/core/baseProvider.js +0 -1385
  106. package/dist/client/core/constants.js +0 -140
  107. package/dist/client/core/conversationMemoryFactory.js +0 -141
  108. package/dist/client/core/conversationMemoryInitializer.js +0 -128
  109. package/dist/client/core/conversationMemoryManager.js +0 -344
  110. package/dist/client/core/dynamicModels.js +0 -358
  111. package/dist/client/core/evaluation.js +0 -309
  112. package/dist/client/core/evaluationProviders.js +0 -248
  113. package/dist/client/core/factory.js +0 -412
  114. package/dist/client/core/infrastructure/baseError.js +0 -22
  115. package/dist/client/core/infrastructure/baseFactory.js +0 -54
  116. package/dist/client/core/infrastructure/baseRegistry.js +0 -53
  117. package/dist/client/core/infrastructure/index.js +0 -5
  118. package/dist/client/core/infrastructure/retry.js +0 -20
  119. package/dist/client/core/infrastructure/typedEventEmitter.js +0 -23
  120. package/dist/client/core/modelConfiguration.js +0 -851
  121. package/dist/client/core/modules/GenerationHandler.js +0 -588
  122. package/dist/client/core/modules/MessageBuilder.js +0 -273
  123. package/dist/client/core/modules/StreamHandler.js +0 -185
  124. package/dist/client/core/modules/TelemetryHandler.js +0 -203
  125. package/dist/client/core/modules/ToolsManager.js +0 -499
  126. package/dist/client/core/modules/Utilities.js +0 -331
  127. package/dist/client/core/redisConversationMemoryManager.js +0 -1435
  128. package/dist/client/core/streamAnalytics.js +0 -131
  129. package/dist/client/evaluation/contextBuilder.js +0 -134
  130. package/dist/client/evaluation/index.js +0 -61
  131. package/dist/client/evaluation/prompts.js +0 -73
  132. package/dist/client/evaluation/ragasEvaluator.js +0 -110
  133. package/dist/client/evaluation/retryManager.js +0 -78
  134. package/dist/client/evaluation/scoring.js +0 -61
  135. package/dist/client/factories/providerFactory.js +0 -166
  136. package/dist/client/factories/providerRegistry.js +0 -166
  137. package/dist/client/features/ppt/constants.js +0 -896
  138. package/dist/client/features/ppt/contentPlanner.js +0 -529
  139. package/dist/client/features/ppt/presentationOrchestrator.js +0 -236
  140. package/dist/client/features/ppt/slideGenerator.js +0 -532
  141. package/dist/client/features/ppt/slideRenderers.js +0 -2383
  142. package/dist/client/features/ppt/slideTypeInference.js +0 -405
  143. package/dist/client/features/ppt/types.js +0 -13
  144. package/dist/client/features/ppt/utils.js +0 -443
  145. package/dist/client/files/fileReferenceRegistry.js +0 -1543
  146. package/dist/client/files/fileTools.js +0 -450
  147. package/dist/client/files/streamingReader.js +0 -321
  148. package/dist/client/files/types.js +0 -23
  149. package/dist/client/hitl/hitlErrors.js +0 -54
  150. package/dist/client/hitl/hitlManager.js +0 -460
  151. package/dist/client/mcp/agentExposure.js +0 -356
  152. package/dist/client/mcp/auth/index.js +0 -11
  153. package/dist/client/mcp/auth/oauthClientProvider.js +0 -325
  154. package/dist/client/mcp/auth/tokenStorage.js +0 -134
  155. package/dist/client/mcp/batching/index.js +0 -10
  156. package/dist/client/mcp/batching/requestBatcher.js +0 -441
  157. package/dist/client/mcp/caching/index.js +0 -10
  158. package/dist/client/mcp/caching/toolCache.js +0 -433
  159. package/dist/client/mcp/elicitation/elicitationManager.js +0 -376
  160. package/dist/client/mcp/elicitation/index.js +0 -11
  161. package/dist/client/mcp/elicitation/types.js +0 -10
  162. package/dist/client/mcp/elicitationProtocol.js +0 -375
  163. package/dist/client/mcp/enhancedToolDiscovery.js +0 -481
  164. package/dist/client/mcp/externalServerManager.js +0 -1478
  165. package/dist/client/mcp/factory.js +0 -161
  166. package/dist/client/mcp/flexibleToolValidator.js +0 -161
  167. package/dist/client/mcp/httpRateLimiter.js +0 -391
  168. package/dist/client/mcp/httpRetryHandler.js +0 -178
  169. package/dist/client/mcp/index.js +0 -74
  170. package/dist/client/mcp/mcpCircuitBreaker.js +0 -427
  171. package/dist/client/mcp/mcpClientFactory.js +0 -708
  172. package/dist/client/mcp/mcpRegistryClient.js +0 -488
  173. package/dist/client/mcp/mcpServerBase.js +0 -373
  174. package/dist/client/mcp/multiServerManager.js +0 -579
  175. package/dist/client/mcp/registry.js +0 -158
  176. package/dist/client/mcp/routing/index.js +0 -10
  177. package/dist/client/mcp/routing/toolRouter.js +0 -416
  178. package/dist/client/mcp/serverCapabilities.js +0 -502
  179. package/dist/client/mcp/servers/agent/directToolsServer.js +0 -150
  180. package/dist/client/mcp/toolAnnotations.js +0 -239
  181. package/dist/client/mcp/toolConverter.js +0 -258
  182. package/dist/client/mcp/toolDiscoveryService.js +0 -798
  183. package/dist/client/mcp/toolIntegration.js +0 -334
  184. package/dist/client/mcp/toolRegistry.js +0 -729
  185. package/dist/client/memory/hippocampusInitializer.js +0 -19
  186. package/dist/client/memory/memoryRetrievalTools.js +0 -166
  187. package/dist/client/middleware/builtin/analytics.js +0 -132
  188. package/dist/client/middleware/builtin/autoEvaluation.js +0 -203
  189. package/dist/client/middleware/builtin/guardrails.js +0 -109
  190. package/dist/client/middleware/builtin/lifecycle.js +0 -168
  191. package/dist/client/middleware/factory.js +0 -327
  192. package/dist/client/middleware/registry.js +0 -295
  193. package/dist/client/middleware/utils/guardrailsUtils.js +0 -396
  194. package/dist/client/models/anthropicModels.js +0 -527
  195. package/dist/client/neurolink.js +0 -8233
  196. package/dist/client/observability/exporterRegistry.js +0 -413
  197. package/dist/client/observability/exporters/arizeExporter.js +0 -138
  198. package/dist/client/observability/exporters/baseExporter.js +0 -190
  199. package/dist/client/observability/exporters/braintrustExporter.js +0 -154
  200. package/dist/client/observability/exporters/datadogExporter.js +0 -196
  201. package/dist/client/observability/exporters/laminarExporter.js +0 -302
  202. package/dist/client/observability/exporters/langfuseExporter.js +0 -209
  203. package/dist/client/observability/exporters/langsmithExporter.js +0 -143
  204. package/dist/client/observability/exporters/otelExporter.js +0 -164
  205. package/dist/client/observability/exporters/posthogExporter.js +0 -287
  206. package/dist/client/observability/exporters/sentryExporter.js +0 -165
  207. package/dist/client/observability/index.js +0 -31
  208. package/dist/client/observability/metricsAggregator.js +0 -556
  209. package/dist/client/observability/otelBridge.js +0 -131
  210. package/dist/client/observability/retryPolicy.js +0 -383
  211. package/dist/client/observability/sampling/samplers.js +0 -216
  212. package/dist/client/observability/spanProcessor.js +0 -303
  213. package/dist/client/observability/tokenTracker.js +0 -413
  214. package/dist/client/observability/types/exporterTypes.js +0 -5
  215. package/dist/client/observability/types/index.js +0 -4
  216. package/dist/client/observability/types/spanTypes.js +0 -92
  217. package/dist/client/observability/utils/safeMetadata.js +0 -25
  218. package/dist/client/observability/utils/spanSerializer.js +0 -292
  219. package/dist/client/processors/archive/ArchiveProcessor.js +0 -1308
  220. package/dist/client/processors/base/BaseFileProcessor.js +0 -614
  221. package/dist/client/processors/base/types.js +0 -82
  222. package/dist/client/processors/config/fileTypes.js +0 -520
  223. package/dist/client/processors/config/index.js +0 -92
  224. package/dist/client/processors/config/languageMap.js +0 -410
  225. package/dist/client/processors/config/mimeTypes.js +0 -363
  226. package/dist/client/processors/config/sizeLimits.js +0 -258
  227. package/dist/client/processors/document/ExcelProcessor.js +0 -590
  228. package/dist/client/processors/document/OpenDocumentProcessor.js +0 -212
  229. package/dist/client/processors/document/PptxProcessor.js +0 -157
  230. package/dist/client/processors/document/RtfProcessor.js +0 -361
  231. package/dist/client/processors/document/WordProcessor.js +0 -353
  232. package/dist/client/processors/errors/FileErrorCode.js +0 -255
  233. package/dist/client/processors/errors/errorHelpers.js +0 -386
  234. package/dist/client/processors/errors/errorSerializer.js +0 -507
  235. package/dist/client/processors/errors/index.js +0 -49
  236. package/dist/client/processors/markup/SvgProcessor.js +0 -240
  237. package/dist/client/processors/media/AudioProcessor.js +0 -707
  238. package/dist/client/processors/media/VideoProcessor.js +0 -1045
  239. package/dist/client/providers/amazonBedrock.js +0 -1512
  240. package/dist/client/providers/amazonSagemaker.js +0 -162
  241. package/dist/client/providers/anthropic.js +0 -831
  242. package/dist/client/providers/azureOpenai.js +0 -143
  243. package/dist/client/providers/googleAiStudio.js +0 -1200
  244. package/dist/client/providers/googleNativeGemini3.js +0 -543
  245. package/dist/client/providers/googleVertex.js +0 -2936
  246. package/dist/client/providers/huggingFace.js +0 -315
  247. package/dist/client/providers/litellm.js +0 -488
  248. package/dist/client/providers/mistral.js +0 -157
  249. package/dist/client/providers/ollama.js +0 -1579
  250. package/dist/client/providers/openAI.js +0 -627
  251. package/dist/client/providers/openRouter.js +0 -543
  252. package/dist/client/providers/openaiCompatible.js +0 -290
  253. package/dist/client/providers/providerTypeUtils.js +0 -46
  254. package/dist/client/providers/sagemaker/adaptive-semaphore.js +0 -215
  255. package/dist/client/providers/sagemaker/client.js +0 -472
  256. package/dist/client/providers/sagemaker/config.js +0 -317
  257. package/dist/client/providers/sagemaker/detection.js +0 -606
  258. package/dist/client/providers/sagemaker/error-constants.js +0 -227
  259. package/dist/client/providers/sagemaker/errors.js +0 -299
  260. package/dist/client/providers/sagemaker/language-model.js +0 -775
  261. package/dist/client/providers/sagemaker/parsers.js +0 -634
  262. package/dist/client/providers/sagemaker/streaming.js +0 -331
  263. package/dist/client/providers/sagemaker/structured-parser.js +0 -625
  264. package/dist/client/proxy/accountQuota.js +0 -162
  265. package/dist/client/proxy/claudeFormat.js +0 -595
  266. package/dist/client/proxy/modelRouter.js +0 -29
  267. package/dist/client/proxy/oauthFetch.js +0 -367
  268. package/dist/client/proxy/proxyFetch.js +0 -586
  269. package/dist/client/proxy/requestLogger.js +0 -207
  270. package/dist/client/proxy/tokenRefresh.js +0 -124
  271. package/dist/client/proxy/usageStats.js +0 -74
  272. package/dist/client/proxy/utils/noProxyUtils.js +0 -149
  273. package/dist/client/rag/ChunkerFactory.js +0 -320
  274. package/dist/client/rag/ChunkerRegistry.js +0 -421
  275. package/dist/client/rag/chunkers/BaseChunker.js +0 -143
  276. package/dist/client/rag/chunkers/CharacterChunker.js +0 -28
  277. package/dist/client/rag/chunkers/HTMLChunker.js +0 -38
  278. package/dist/client/rag/chunkers/JSONChunker.js +0 -68
  279. package/dist/client/rag/chunkers/LaTeXChunker.js +0 -63
  280. package/dist/client/rag/chunkers/MarkdownChunker.js +0 -306
  281. package/dist/client/rag/chunkers/RecursiveChunker.js +0 -139
  282. package/dist/client/rag/chunkers/SemanticMarkdownChunker.js +0 -138
  283. package/dist/client/rag/chunkers/SentenceChunker.js +0 -66
  284. package/dist/client/rag/chunkers/TokenChunker.js +0 -61
  285. package/dist/client/rag/chunkers/index.js +0 -15
  286. package/dist/client/rag/chunking/characterChunker.js +0 -142
  287. package/dist/client/rag/chunking/chunkerRegistry.js +0 -194
  288. package/dist/client/rag/chunking/htmlChunker.js +0 -247
  289. package/dist/client/rag/chunking/index.js +0 -17
  290. package/dist/client/rag/chunking/jsonChunker.js +0 -281
  291. package/dist/client/rag/chunking/latexChunker.js +0 -251
  292. package/dist/client/rag/chunking/markdownChunker.js +0 -373
  293. package/dist/client/rag/chunking/recursiveChunker.js +0 -148
  294. package/dist/client/rag/chunking/semanticChunker.js +0 -306
  295. package/dist/client/rag/chunking/sentenceChunker.js +0 -230
  296. package/dist/client/rag/chunking/tokenChunker.js +0 -183
  297. package/dist/client/rag/document/MDocument.js +0 -392
  298. package/dist/client/rag/document/index.js +0 -5
  299. package/dist/client/rag/document/loaders.js +0 -500
  300. package/dist/client/rag/errors/RAGError.js +0 -274
  301. package/dist/client/rag/errors/index.js +0 -6
  302. package/dist/client/rag/graphRag/graphRAG.js +0 -401
  303. package/dist/client/rag/graphRag/index.js +0 -4
  304. package/dist/client/rag/index.js +0 -141
  305. package/dist/client/rag/metadata/MetadataExtractorFactory.js +0 -418
  306. package/dist/client/rag/metadata/MetadataExtractorRegistry.js +0 -362
  307. package/dist/client/rag/metadata/index.js +0 -9
  308. package/dist/client/rag/metadata/metadataExtractor.js +0 -280
  309. package/dist/client/rag/pipeline/RAGPipeline.js +0 -436
  310. package/dist/client/rag/pipeline/contextAssembly.js +0 -341
  311. package/dist/client/rag/pipeline/index.js +0 -5
  312. package/dist/client/rag/ragIntegration.js +0 -321
  313. package/dist/client/rag/reranker/RerankerFactory.js +0 -430
  314. package/dist/client/rag/reranker/RerankerRegistry.js +0 -402
  315. package/dist/client/rag/reranker/index.js +0 -9
  316. package/dist/client/rag/reranker/reranker.js +0 -277
  317. package/dist/client/rag/resilience/CircuitBreaker.js +0 -431
  318. package/dist/client/rag/resilience/RetryHandler.js +0 -304
  319. package/dist/client/rag/resilience/index.js +0 -7
  320. package/dist/client/rag/retrieval/hybridSearch.js +0 -335
  321. package/dist/client/rag/retrieval/index.js +0 -5
  322. package/dist/client/rag/retrieval/vectorQueryTool.js +0 -307
  323. package/dist/client/rag/types.js +0 -8
  324. package/dist/client/sdk/toolRegistration.js +0 -377
  325. package/dist/client/server/abstract/baseServerAdapter.js +0 -575
  326. package/dist/client/server/adapters/expressAdapter.js +0 -486
  327. package/dist/client/server/adapters/fastifyAdapter.js +0 -472
  328. package/dist/client/server/adapters/honoAdapter.js +0 -632
  329. package/dist/client/server/adapters/koaAdapter.js +0 -510
  330. package/dist/client/server/errors.js +0 -486
  331. package/dist/client/server/factory/serverAdapterFactory.js +0 -160
  332. package/dist/client/server/index.js +0 -108
  333. package/dist/client/server/middleware/abortSignal.js +0 -111
  334. package/dist/client/server/middleware/auth.js +0 -388
  335. package/dist/client/server/middleware/cache.js +0 -359
  336. package/dist/client/server/middleware/common.js +0 -281
  337. package/dist/client/server/middleware/deprecation.js +0 -190
  338. package/dist/client/server/middleware/mcpBodyAttachment.js +0 -63
  339. package/dist/client/server/middleware/rateLimit.js +0 -227
  340. package/dist/client/server/middleware/validation.js +0 -388
  341. package/dist/client/server/openapi/generator.js +0 -398
  342. package/dist/client/server/openapi/index.js +0 -36
  343. package/dist/client/server/openapi/schemas.js +0 -695
  344. package/dist/client/server/openapi/templates.js +0 -374
  345. package/dist/client/server/routes/agentRoutes.js +0 -189
  346. package/dist/client/server/routes/claudeProxyRoutes.js +0 -1600
  347. package/dist/client/server/routes/healthRoutes.js +0 -187
  348. package/dist/client/server/routes/index.js +0 -57
  349. package/dist/client/server/routes/mcpRoutes.js +0 -342
  350. package/dist/client/server/routes/memoryRoutes.js +0 -350
  351. package/dist/client/server/routes/openApiRoutes.js +0 -126
  352. package/dist/client/server/routes/toolRoutes.js +0 -199
  353. package/dist/client/server/streaming/dataStream.js +0 -486
  354. package/dist/client/server/streaming/index.js +0 -11
  355. package/dist/client/server/types.js +0 -67
  356. package/dist/client/server/utils/redaction.js +0 -334
  357. package/dist/client/server/utils/validation.js +0 -243
  358. package/dist/client/server/websocket/WebSocketHandler.js +0 -383
  359. package/dist/client/server/websocket/index.js +0 -4
  360. package/dist/client/services/server/ai/observability/instrumentation.js +0 -808
  361. package/dist/client/telemetry/attributes.js +0 -100
  362. package/dist/client/telemetry/index.js +0 -26
  363. package/dist/client/telemetry/telemetryService.js +0 -308
  364. package/dist/client/telemetry/tracers.js +0 -17
  365. package/dist/client/telemetry/withSpan.js +0 -34
  366. package/dist/client/types/actionTypes.js +0 -6
  367. package/dist/client/types/analytics.js +0 -5
  368. package/dist/client/types/authTypes.js +0 -9
  369. package/dist/client/types/circuitBreakerErrors.js +0 -34
  370. package/dist/client/types/cli.js +0 -21
  371. package/dist/client/types/clientTypes.js +0 -10
  372. package/dist/client/types/common.js +0 -51
  373. package/dist/client/types/configTypes.js +0 -49
  374. package/dist/client/types/content.js +0 -19
  375. package/dist/client/types/contextTypes.js +0 -400
  376. package/dist/client/types/conversation.js +0 -47
  377. package/dist/client/types/conversationMemoryInterface.js +0 -6
  378. package/dist/client/types/domainTypes.js +0 -5
  379. package/dist/client/types/errors.js +0 -167
  380. package/dist/client/types/evaluation.js +0 -5
  381. package/dist/client/types/evaluationProviders.js +0 -5
  382. package/dist/client/types/evaluationTypes.js +0 -1
  383. package/dist/client/types/externalMcp.js +0 -6
  384. package/dist/client/types/fileReferenceTypes.js +0 -8
  385. package/dist/client/types/fileTypes.js +0 -4
  386. package/dist/client/types/generateTypes.js +0 -1
  387. package/dist/client/types/guardrails.js +0 -1
  388. package/dist/client/types/hitlTypes.js +0 -8
  389. package/dist/client/types/index.js +0 -57
  390. package/dist/client/types/mcpTypes.js +0 -5
  391. package/dist/client/types/middlewareTypes.js +0 -1
  392. package/dist/client/types/modelTypes.js +0 -30
  393. package/dist/client/types/multimodal.js +0 -135
  394. package/dist/client/types/observability.js +0 -6
  395. package/dist/client/types/pptTypes.js +0 -82
  396. package/dist/client/types/providers.js +0 -111
  397. package/dist/client/types/proxyTypes.js +0 -16
  398. package/dist/client/types/ragTypes.js +0 -7
  399. package/dist/client/types/sdkTypes.js +0 -8
  400. package/dist/client/types/serviceTypes.js +0 -5
  401. package/dist/client/types/streamTypes.js +0 -1
  402. package/dist/client/types/subscriptionTypes.js +0 -9
  403. package/dist/client/types/taskClassificationTypes.js +0 -5
  404. package/dist/client/types/tools.js +0 -24
  405. package/dist/client/types/ttsTypes.js +0 -57
  406. package/dist/client/types/typeAliases.js +0 -48
  407. package/dist/client/types/utilities.js +0 -4
  408. package/dist/client/types/workflowTypes.js +0 -30
  409. package/dist/client/utils/async/withTimeout.js +0 -98
  410. package/dist/client/utils/asyncMutex.js +0 -60
  411. package/dist/client/utils/conversationMemory.js +0 -431
  412. package/dist/client/utils/csvProcessor.js +0 -846
  413. package/dist/client/utils/errorHandling.js +0 -936
  414. package/dist/client/utils/evaluationUtils.js +0 -131
  415. package/dist/client/utils/factoryProcessing.js +0 -589
  416. package/dist/client/utils/fileDetector.js +0 -2161
  417. package/dist/client/utils/imageCache.js +0 -376
  418. package/dist/client/utils/imageProcessor.js +0 -704
  419. package/dist/client/utils/logger.js +0 -491
  420. package/dist/client/utils/mcpDefaults.js +0 -134
  421. package/dist/client/utils/messageBuilder.js +0 -1653
  422. package/dist/client/utils/modelAliasResolver.js +0 -54
  423. package/dist/client/utils/modelDetection.js +0 -80
  424. package/dist/client/utils/modelRouter.js +0 -292
  425. package/dist/client/utils/multimodalOptionsBuilder.js +0 -65
  426. package/dist/client/utils/observabilityHelpers.js +0 -47
  427. package/dist/client/utils/parameterValidation.js +0 -966
  428. package/dist/client/utils/pdfProcessor.js +0 -410
  429. package/dist/client/utils/performance.js +0 -222
  430. package/dist/client/utils/pricing.js +0 -340
  431. package/dist/client/utils/promptRedaction.js +0 -62
  432. package/dist/client/utils/providerConfig.js +0 -1009
  433. package/dist/client/utils/providerHealth.js +0 -1237
  434. package/dist/client/utils/providerRetry.js +0 -112
  435. package/dist/client/utils/providerUtils.js +0 -434
  436. package/dist/client/utils/rateLimiter.js +0 -200
  437. package/dist/client/utils/redis.js +0 -368
  438. package/dist/client/utils/retryHandler.js +0 -269
  439. package/dist/client/utils/retryability.js +0 -22
  440. package/dist/client/utils/sanitizers/svg.js +0 -481
  441. package/dist/client/utils/schemaConversion.js +0 -255
  442. package/dist/client/utils/taskClassificationUtils.js +0 -149
  443. package/dist/client/utils/taskClassifier.js +0 -94
  444. package/dist/client/utils/thinkingConfig.js +0 -104
  445. package/dist/client/utils/timeout.js +0 -359
  446. package/dist/client/utils/tokenEstimation.js +0 -142
  447. package/dist/client/utils/tokenLimits.js +0 -125
  448. package/dist/client/utils/tokenUtils.js +0 -239
  449. package/dist/client/utils/toolUtils.js +0 -75
  450. package/dist/client/utils/transformationUtils.js +0 -554
  451. package/dist/client/utils/ttsProcessor.js +0 -286
  452. package/dist/client/utils/typeUtils.js +0 -97
  453. package/dist/client/utils/videoAnalysisProcessor.js +0 -67
  454. package/dist/client/workflow/config.js +0 -398
  455. package/dist/client/workflow/core/ensembleExecutor.js +0 -407
  456. package/dist/client/workflow/core/judgeScorer.js +0 -544
  457. package/dist/client/workflow/core/responseConditioner.js +0 -225
  458. package/dist/client/workflow/core/types/conditionerTypes.js +0 -7
  459. package/dist/client/workflow/core/types/ensembleTypes.js +0 -7
  460. package/dist/client/workflow/core/types/index.js +0 -7
  461. package/dist/client/workflow/core/types/judgeTypes.js +0 -7
  462. package/dist/client/workflow/core/types/layerTypes.js +0 -7
  463. package/dist/client/workflow/core/types/registryTypes.js +0 -7
  464. package/dist/client/workflow/core/workflowRegistry.js +0 -304
  465. package/dist/client/workflow/core/workflowRunner.js +0 -586
  466. package/dist/client/workflow/index.js +0 -50
  467. package/dist/client/workflow/types.js +0 -9
  468. package/dist/client/workflow/utils/types/index.js +0 -7
  469. package/dist/client/workflow/utils/workflowMetrics.js +0 -311
  470. package/dist/client/workflow/utils/workflowValidation.js +0 -420
  471. package/dist/client/workflow/workflows/adaptiveWorkflow.js +0 -366
  472. package/dist/client/workflow/workflows/consensusWorkflow.js +0 -192
  473. package/dist/client/workflow/workflows/fallbackWorkflow.js +0 -225
  474. package/dist/client/workflow/workflows/multiJudgeWorkflow.js +0 -351
  475. /package/dist/client/{client/reactHooks.js → reactHooks.js} +0 -0
@@ -1,1308 +0,0 @@
1
- /**
2
- * Archive Processor
3
- *
4
- * Handles downloading, validating, and processing archive files (ZIP, TAR, TAR.GZ, GZ).
5
- * Extracts file listings with metadata for AI consumption without recursively
6
- * processing individual entries through other processors (Phase 1).
7
- *
8
- * Key features:
9
- * - ZIP support via adm-zip (dynamic import)
10
- * - TAR / TAR.GZ support via tar-stream (dynamic import)
11
- * - Plain GZ support via Node zlib
12
- * - Comprehensive security validation (path traversal, zip bombs, symlinks, encryption)
13
- * - In-memory extraction with configurable size limits
14
- * - Structured text output for LLM consumption
15
- *
16
- * @module processors/archive/ArchiveProcessor
17
- *
18
- * @example
19
- * ```typescript
20
- * import { archiveProcessor, processArchive, isArchiveFile } from "./ArchiveProcessor.js";
21
- *
22
- * // Check if a file is an archive
23
- * if (isArchiveFile(fileInfo.mimetype, fileInfo.name)) {
24
- * const result = await processArchive(fileInfo, {
25
- * authHeaders: { Authorization: "Bearer token" },
26
- * });
27
- *
28
- * if (result.success) {
29
- * console.log(`Format: ${result.data.archiveMetadata.format}`);
30
- * console.log(`Entries: ${result.data.archiveMetadata.totalEntries}`);
31
- * for (const entry of result.data.entries) {
32
- * console.log(` ${entry.name} (${entry.uncompressedSize} bytes)`);
33
- * }
34
- * }
35
- * }
36
- * ```
37
- */
38
- import * as path from "path";
39
- import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
40
- import { SIZE_LIMITS_MB } from "../config/index.js";
41
- import { FileErrorCode } from "../errors/index.js";
42
- // =============================================================================
43
- // SECURITY CONFIGURATION
44
- // =============================================================================
45
- /**
46
- * Security limits for archive processing.
47
- * These values are intentionally conservative to prevent resource exhaustion
48
- * and common archive-based attacks (zip bombs, path traversal, etc.).
49
- */
50
- const ARCHIVE_SECURITY = {
51
- /** Maximum number of entries allowed in a single archive */
52
- MAX_ENTRIES: 1000,
53
- /** Maximum total decompressed size allowed (100 MB) */
54
- MAX_DECOMPRESSED_SIZE: 100 * 1024 * 1024,
55
- /** Maximum size of any single file within the archive (20 MB) */
56
- MAX_SINGLE_FILE_SIZE: 20 * 1024 * 1024,
57
- /** Maximum compression ratio before flagging as potential zip bomb */
58
- MAX_COMPRESSION_RATIO: 100,
59
- /**
60
- * Maximum archive nesting depth.
61
- * Phase 1 only lists contents (no recursive extraction), so depth is 1.
62
- */
63
- MAX_NESTING_DEPTH: 1,
64
- /** Maximum path length for any entry name */
65
- MAX_PATH_LENGTH: 255,
66
- /** Whether to allow encrypted archive entries */
67
- ALLOW_ENCRYPTED: false,
68
- /** Whether to allow symbolic link entries */
69
- ALLOW_SYMLINKS: false,
70
- };
71
- /**
72
- * Archive processor configuration constants.
73
- */
74
- const ARCHIVE_CONFIG = {
75
- /** Maximum archive file size in MB (uses centralized constant from sizeLimits) */
76
- MAX_SIZE_MB: SIZE_LIMITS_MB.ARCHIVE_MAX_MB,
77
- /** Processing timeout in milliseconds (60 seconds) */
78
- TIMEOUT_MS: 60_000,
79
- /** Maximum number of entries to extract content from (Phase 2 sub-processing) */
80
- MAX_EXTRACT_ENTRIES: 20,
81
- /** Maximum size of a single entry to extract for content processing (1 MB) */
82
- MAX_EXTRACT_ENTRY_SIZE: 1 * 1024 * 1024,
83
- /** Maximum total extracted content size across all entries (5 MB) */
84
- MAX_TOTAL_EXTRACT_SIZE: 5 * 1024 * 1024,
85
- /** File extensions eligible for content extraction inside archives */
86
- EXTRACTABLE_EXTENSIONS: new Set([
87
- ".ts",
88
- ".js",
89
- ".tsx",
90
- ".jsx",
91
- ".py",
92
- ".java",
93
- ".go",
94
- ".rs",
95
- ".rb",
96
- ".php",
97
- ".c",
98
- ".cpp",
99
- ".h",
100
- ".hpp",
101
- ".cs",
102
- ".swift",
103
- ".kt",
104
- ".scala",
105
- ".sh",
106
- ".bash",
107
- ".txt",
108
- ".md",
109
- ".json",
110
- ".yaml",
111
- ".yml",
112
- ".xml",
113
- ".html",
114
- ".css",
115
- ".sql",
116
- ".toml",
117
- ".ini",
118
- ".cfg",
119
- ".env",
120
- ".csv",
121
- ".log",
122
- ".conf",
123
- ".dockerfile",
124
- ".makefile",
125
- ".gitignore",
126
- ".editorconfig",
127
- ]),
128
- };
129
- // =============================================================================
130
- // SUPPORTED FORMATS
131
- // =============================================================================
132
- /** MIME types recognized as archive formats */
133
- const SUPPORTED_ARCHIVE_MIME_TYPES = [
134
- "application/zip",
135
- "application/x-zip-compressed",
136
- "application/x-zip",
137
- "application/x-tar",
138
- "application/x-gtar",
139
- "application/gzip",
140
- "application/x-gzip",
141
- "application/x-compressed-tar",
142
- "application/x-bzip2",
143
- "application/java-archive",
144
- ];
145
- /** File extensions recognized as archive formats */
146
- const SUPPORTED_ARCHIVE_EXTENSIONS = [".zip", ".tar", ".gz", ".tgz", ".bz2", ".tbz2", ".jar"];
147
- // =============================================================================
148
- // MAGIC BYTE SIGNATURES
149
- // =============================================================================
150
- /**
151
- * Magic byte signatures for archive format detection.
152
- * Used alongside file extension for robust format identification.
153
- */
154
- const MAGIC_BYTES = {
155
- /** ZIP/JAR: PK\x03\x04 */
156
- ZIP: [0x50, 0x4b, 0x03, 0x04],
157
- /** ZIP empty archive: PK\x05\x06 */
158
- ZIP_EMPTY: [0x50, 0x4b, 0x05, 0x06],
159
- /** ZIP spanned: PK\x07\x08 */
160
- ZIP_SPANNED: [0x50, 0x4b, 0x07, 0x08],
161
- /** GZIP: \x1f\x8b */
162
- GZIP: [0x1f, 0x8b],
163
- /** BZIP2: BZ */
164
- BZIP2: [0x42, 0x5a],
165
- /** RAR: Rar!\x1a\x07 */
166
- RAR: [0x52, 0x61, 0x72, 0x21, 0x1a, 0x07],
167
- /** 7-Zip: 7z\xbc\xaf\x27\x1c */
168
- SEVEN_ZIP: [0x37, 0x7a, 0xbc, 0xaf, 0x27, 0x1c],
169
- };
170
- // =============================================================================
171
- // ARCHIVE PROCESSOR CLASS
172
- // =============================================================================
173
- /**
174
- * Archive Processor - handles ZIP, TAR, TAR.GZ, and plain GZ files.
175
- *
176
- * Overrides the base `processFile()` to implement a custom pipeline:
177
- * 1. Validate file type and size
178
- * 2. Obtain the archive buffer (from provided buffer or URL download)
179
- * 3. Detect the archive format via magic bytes and file extension
180
- * 4. Run security validation (path traversal, zip bombs, encryption, symlinks)
181
- * 5. Extract entry metadata (no recursive file processing in Phase 1)
182
- * 6. Build LLM-friendly text content with file listing
183
- *
184
- * RAR and 7z formats are detected but not yet supported for extraction.
185
- *
186
- * @example
187
- * ```typescript
188
- * const processor = new ArchiveProcessor();
189
- *
190
- * const result = await processor.processFile(fileInfo, {
191
- * authHeaders: { Authorization: "Bearer token" },
192
- * });
193
- *
194
- * if (result.success) {
195
- * console.log(`Format: ${result.data.archiveMetadata.format}`);
196
- * console.log(`Entries: ${result.data.entries.length}`);
197
- * console.log(result.data.textContent);
198
- * }
199
- * ```
200
- */
201
- export class ArchiveProcessor extends BaseFileProcessor {
202
- constructor() {
203
- super({
204
- maxSizeMB: ARCHIVE_CONFIG.MAX_SIZE_MB,
205
- timeoutMs: ARCHIVE_CONFIG.TIMEOUT_MS,
206
- supportedMimeTypes: [...SUPPORTED_ARCHIVE_MIME_TYPES],
207
- supportedExtensions: [...SUPPORTED_ARCHIVE_EXTENSIONS],
208
- fileTypeName: "archive",
209
- defaultFilename: "archive.zip",
210
- });
211
- }
212
- // ===========================================================================
213
- // ABSTRACT METHOD IMPLEMENTATION
214
- // ===========================================================================
215
- /**
216
- * Build a stub processed result.
217
- * The actual work is done in the `processFile()` override; this method
218
- * satisfies the abstract contract from `BaseFileProcessor`.
219
- *
220
- * @param buffer - Raw archive buffer
221
- * @param fileInfo - Original file information
222
- * @returns Empty ProcessedArchive scaffold
223
- */
224
- buildProcessedResult(buffer, fileInfo) {
225
- return {
226
- buffer,
227
- mimetype: fileInfo.mimetype || "application/octet-stream",
228
- size: buffer.length,
229
- filename: this.getFilename(fileInfo),
230
- textContent: "",
231
- archiveMetadata: {
232
- format: "zip",
233
- totalEntries: 0,
234
- totalUncompressedSize: 0,
235
- totalCompressedSize: 0,
236
- },
237
- entries: [],
238
- securityWarnings: [],
239
- };
240
- }
241
- // ===========================================================================
242
- // MAIN PROCESSING PIPELINE (override)
243
- // ===========================================================================
244
- /**
245
- * Process an archive file through the full extraction pipeline.
246
- *
247
- * @param fileInfo - File information (can include URL or buffer)
248
- * @param options - Optional processing options (auth headers, timeout, etc.)
249
- * @returns Processing result with archive metadata and entry listing, or error
250
- */
251
- async processFile(fileInfo, options) {
252
- try {
253
- // Step 1: Validate file type and size
254
- const validationResult = this.validateFileWithResult(fileInfo);
255
- if (!validationResult.success) {
256
- return { success: false, error: validationResult.error };
257
- }
258
- // Step 2: Get file buffer
259
- let buffer;
260
- if (fileInfo.buffer) {
261
- buffer = fileInfo.buffer;
262
- }
263
- else if (fileInfo.url) {
264
- const downloadResult = await this.downloadFileWithRetry(fileInfo, options);
265
- if (!downloadResult.success) {
266
- return { success: false, error: downloadResult.error };
267
- }
268
- if (!downloadResult.data) {
269
- return {
270
- success: false,
271
- error: this.createError(FileErrorCode.DOWNLOAD_FAILED, {
272
- reason: "Download succeeded but returned no data",
273
- }),
274
- };
275
- }
276
- buffer = downloadResult.data;
277
- // Validate actual downloaded size against limit
278
- if (!this.validateFileSize(buffer.length)) {
279
- return {
280
- success: false,
281
- error: this.createError(FileErrorCode.FILE_TOO_LARGE, {
282
- sizeMB: (buffer.length / (1024 * 1024)).toFixed(2),
283
- maxMB: this.config.maxSizeMB,
284
- type: this.config.fileTypeName,
285
- }),
286
- };
287
- }
288
- }
289
- else {
290
- return {
291
- success: false,
292
- error: this.createError(FileErrorCode.DOWNLOAD_FAILED, {
293
- reason: "No buffer or URL provided for file",
294
- }),
295
- };
296
- }
297
- // Step 3: Detect archive format
298
- const filename = this.getFilename(fileInfo);
299
- const format = this.detectArchiveFormat(buffer, filename);
300
- if (!format) {
301
- return {
302
- success: false,
303
- error: this.createError(FileErrorCode.INVALID_FORMAT, {
304
- reason: "Unable to detect archive format from magic bytes or file extension",
305
- }),
306
- };
307
- }
308
- // Step 4: Check for unsupported formats (RAR, 7z)
309
- if (format === "rar" || format === "7z") {
310
- return {
311
- success: false,
312
- error: this.createError(FileErrorCode.UNSUPPORTED_TYPE, {
313
- format,
314
- reason: `${format.toUpperCase()} archives are not yet supported. Please convert to ZIP or TAR format.`,
315
- supportedFormats: "ZIP, TAR, TAR.GZ, GZ",
316
- }),
317
- };
318
- }
319
- // Step 5: Extract entries based on format
320
- const extractionResult = await this.extractEntries(buffer, format);
321
- if (!extractionResult.success) {
322
- return {
323
- success: false,
324
- error: extractionResult.error,
325
- };
326
- }
327
- const { entries, securityWarnings } = extractionResult;
328
- // Step 6: Compute aggregate metadata
329
- const totalUncompressedSize = entries.reduce((sum, e) => sum + e.uncompressedSize, 0);
330
- const totalCompressedSize = entries.reduce((sum, e) => sum + e.compressedSize, 0);
331
- // Step 7: Security check - overall compression ratio
332
- if (buffer.length > 0 && totalUncompressedSize > 0) {
333
- const overallRatio = totalUncompressedSize / buffer.length;
334
- if (overallRatio > ARCHIVE_SECURITY.MAX_COMPRESSION_RATIO) {
335
- return {
336
- success: false,
337
- error: this.createError(FileErrorCode.ZIP_BOMB_DETECTED, {
338
- compressionRatio: overallRatio.toFixed(1),
339
- maxRatio: ARCHIVE_SECURITY.MAX_COMPRESSION_RATIO,
340
- }),
341
- };
342
- }
343
- }
344
- // Step 8: Security check - total decompressed size
345
- if (totalUncompressedSize > ARCHIVE_SECURITY.MAX_DECOMPRESSED_SIZE) {
346
- return {
347
- success: false,
348
- error: this.createError(FileErrorCode.SECURITY_VALIDATION_FAILED, {
349
- reason: `Total decompressed size (${this.formatSizeMB(totalUncompressedSize)} MB) exceeds limit (${this.formatSizeMB(ARCHIVE_SECURITY.MAX_DECOMPRESSED_SIZE)} MB)`,
350
- }),
351
- };
352
- }
353
- // Step 9: Extract content from text-based entries (Phase 2 sub-processing)
354
- // For ZIP archives, extract and include content from small text-based files.
355
- // Skips nested archives and binary files for safety.
356
- let extractedContents = new Map();
357
- if (format === "zip") {
358
- extractedContents = await this.extractEntryContents(buffer, entries);
359
- }
360
- // Step 10: Build text content for LLM
361
- const archiveMetadata = {
362
- format,
363
- totalEntries: entries.length,
364
- totalUncompressedSize,
365
- totalCompressedSize,
366
- };
367
- const textContent = this.buildTextContent(filename, archiveMetadata, entries, securityWarnings, extractedContents);
368
- // Step 10: Build final result
369
- return {
370
- success: true,
371
- data: {
372
- buffer,
373
- mimetype: fileInfo.mimetype || "application/octet-stream",
374
- size: buffer.length,
375
- filename,
376
- textContent,
377
- archiveMetadata,
378
- entries,
379
- securityWarnings,
380
- },
381
- };
382
- }
383
- catch (error) {
384
- return {
385
- success: false,
386
- error: this.createError(FileErrorCode.PROCESSING_FAILED, {
387
- fileType: "archive",
388
- error: error instanceof Error ? error.message : String(error),
389
- }, error instanceof Error ? error : undefined),
390
- };
391
- }
392
- }
393
- // ===========================================================================
394
- // FORMAT DETECTION
395
- // ===========================================================================
396
- /**
397
- * Detect the archive format using magic bytes and file extension.
398
- * Magic bytes take precedence over extension when available.
399
- *
400
- * @param buffer - Raw archive buffer
401
- * @param filename - Original filename for extension-based fallback
402
- * @returns Detected archive format, or null if unrecognized
403
- */
404
- detectArchiveFormat(buffer, filename) {
405
- // Try magic bytes first (most reliable)
406
- const magicFormat = this.detectFormatFromMagicBytes(buffer);
407
- if (magicFormat) {
408
- // For GZIP, check if it wraps a TAR archive
409
- if (magicFormat === "gz") {
410
- const ext = filename.toLowerCase();
411
- if (ext.endsWith(".tar.gz") || ext.endsWith(".tgz") || ext.endsWith(".tbz2")) {
412
- return "tar.gz";
413
- }
414
- // Could still be a tar.gz without the extension - we'll detect during extraction
415
- return "gz";
416
- }
417
- return magicFormat;
418
- }
419
- // Fallback to extension-based detection
420
- return this.detectFormatFromExtension(filename);
421
- }
422
- /**
423
- * Detect archive format from magic bytes at the start of the buffer.
424
- *
425
- * @param buffer - Raw archive buffer
426
- * @returns Detected format, or null if magic bytes don't match any known format
427
- */
428
- detectFormatFromMagicBytes(buffer) {
429
- if (buffer.length < 2) {
430
- return null;
431
- }
432
- // Check for 7-Zip (6 bytes)
433
- if (buffer.length >= 6 && this.matchesMagic(buffer, MAGIC_BYTES.SEVEN_ZIP)) {
434
- return "7z";
435
- }
436
- // Check for RAR (6+ bytes)
437
- if (buffer.length >= 6 && this.matchesMagic(buffer, MAGIC_BYTES.RAR)) {
438
- return "rar";
439
- }
440
- // Check for ZIP/JAR (4 bytes)
441
- if (buffer.length >= 4 &&
442
- (this.matchesMagic(buffer, MAGIC_BYTES.ZIP) ||
443
- this.matchesMagic(buffer, MAGIC_BYTES.ZIP_EMPTY) ||
444
- this.matchesMagic(buffer, MAGIC_BYTES.ZIP_SPANNED))) {
445
- return "zip";
446
- }
447
- // Check for GZIP (2 bytes)
448
- if (this.matchesMagic(buffer, MAGIC_BYTES.GZIP)) {
449
- return "gz";
450
- }
451
- // Check for BZIP2 (2 bytes)
452
- if (this.matchesMagic(buffer, MAGIC_BYTES.BZIP2)) {
453
- return "tar.bz2";
454
- }
455
- return null;
456
- }
457
- /**
458
- * Detect archive format from file extension.
459
- *
460
- * @param filename - Filename to extract extension from
461
- * @returns Detected format, or null if extension is unrecognized
462
- */
463
- detectFormatFromExtension(filename) {
464
- const lowerFilename = filename.toLowerCase();
465
- if (lowerFilename.endsWith(".tar.gz") || lowerFilename.endsWith(".tgz")) {
466
- return "tar.gz";
467
- }
468
- if (lowerFilename.endsWith(".tar.bz2") || lowerFilename.endsWith(".tbz2")) {
469
- return "tar.bz2";
470
- }
471
- if (lowerFilename.endsWith(".tar")) {
472
- return "tar";
473
- }
474
- if (lowerFilename.endsWith(".gz")) {
475
- return "gz";
476
- }
477
- if (lowerFilename.endsWith(".bz2")) {
478
- return "tar.bz2";
479
- }
480
- if (lowerFilename.endsWith(".zip") || lowerFilename.endsWith(".jar")) {
481
- return "zip";
482
- }
483
- if (lowerFilename.endsWith(".rar")) {
484
- return "rar";
485
- }
486
- if (lowerFilename.endsWith(".7z")) {
487
- return "7z";
488
- }
489
- return null;
490
- }
491
- /**
492
- * Check if a buffer starts with the given magic byte sequence.
493
- *
494
- * @param buffer - Buffer to check
495
- * @param magic - Expected byte sequence
496
- * @returns true if the buffer starts with the magic bytes
497
- */
498
- matchesMagic(buffer, magic) {
499
- for (let i = 0; i < magic.length; i++) {
500
- if (buffer[i] !== magic[i]) {
501
- return false;
502
- }
503
- }
504
- return true;
505
- }
506
- // ===========================================================================
507
- // ENTRY EXTRACTION
508
- // ===========================================================================
509
- /**
510
- * Extract entry metadata from the archive.
511
- * Delegates to format-specific extraction methods.
512
- *
513
- * @param buffer - Raw archive buffer
514
- * @param format - Detected archive format
515
- * @returns Extraction result with entries and security warnings, or error
516
- */
517
- async extractEntries(buffer, format) {
518
- switch (format) {
519
- case "zip":
520
- return this.extractZipEntries(buffer);
521
- case "tar":
522
- return this.extractTarEntries(buffer);
523
- case "tar.gz":
524
- return this.extractTarGzEntries(buffer);
525
- case "tar.bz2":
526
- return {
527
- success: false,
528
- entries: [],
529
- securityWarnings: [],
530
- error: this.createError(FileErrorCode.UNSUPPORTED_TYPE, {
531
- format: "tar.bz2",
532
- reason: "TAR.BZ2 archives are not yet supported. Please convert to ZIP or TAR.GZ format.",
533
- supportedFormats: "ZIP, TAR, TAR.GZ, GZ",
534
- }),
535
- };
536
- case "gz":
537
- return this.extractGzEntries(buffer);
538
- default:
539
- return {
540
- success: false,
541
- entries: [],
542
- securityWarnings: [],
543
- error: this.createError(FileErrorCode.UNSUPPORTED_TYPE, {
544
- format,
545
- reason: `${format} archives are not supported`,
546
- supportedFormats: "ZIP, TAR, TAR.GZ, GZ",
547
- }),
548
- };
549
- }
550
- }
551
- // ===========================================================================
552
- // ZIP EXTRACTION
553
- // ===========================================================================
554
- /**
555
- * Extract entry metadata from a ZIP archive.
556
- * Validates each entry for path traversal, encryption, symlinks, and size limits.
557
- *
558
- * @param buffer - Raw ZIP buffer
559
- * @returns Extraction result with entries, security warnings, or error
560
- */
561
- async extractZipEntries(buffer) {
562
- const entries = [];
563
- const securityWarnings = [];
564
- try {
565
- const AdmZip = (await import("adm-zip")).default;
566
- const zip = new AdmZip(buffer);
567
- const zipEntries = zip.getEntries();
568
- // Check entry count limit
569
- if (zipEntries.length > ARCHIVE_SECURITY.MAX_ENTRIES) {
570
- return {
571
- success: false,
572
- entries: [],
573
- securityWarnings: [],
574
- error: this.createError(FileErrorCode.SECURITY_VALIDATION_FAILED, {
575
- reason: `Archive contains ${zipEntries.length} entries, exceeding the limit of ${ARCHIVE_SECURITY.MAX_ENTRIES}`,
576
- }),
577
- };
578
- }
579
- let cumulativeUncompressedSize = 0;
580
- for (const entry of zipEntries) {
581
- const entryName = entry.entryName;
582
- // Security: path traversal check
583
- if (this.hasPathTraversal(entryName)) {
584
- securityWarnings.push(`Path traversal detected in entry: "${entryName}" - entry skipped`);
585
- continue;
586
- }
587
- // Security: path length check
588
- if (entryName.length > ARCHIVE_SECURITY.MAX_PATH_LENGTH) {
589
- securityWarnings.push(`Entry name exceeds maximum path length (${ARCHIVE_SECURITY.MAX_PATH_LENGTH}): "${entryName.substring(0, 50)}..." - entry skipped`);
590
- continue;
591
- }
592
- // Security: encrypted entry check
593
- if (entry.header.flags & 0x01) {
594
- if (!ARCHIVE_SECURITY.ALLOW_ENCRYPTED) {
595
- securityWarnings.push(`Encrypted entry detected: "${entryName}" - entry skipped`);
596
- continue;
597
- }
598
- }
599
- // Security: symlink check (ZIP external attributes)
600
- const externalAttr = entry.header.attr >>> 16;
601
- const isSymlink = (externalAttr & 0xa000) === 0xa000;
602
- if (isSymlink && !ARCHIVE_SECURITY.ALLOW_SYMLINKS) {
603
- securityWarnings.push(`Symbolic link detected: "${entryName}" - entry skipped`);
604
- continue;
605
- }
606
- const isDirectory = entry.isDirectory;
607
- const uncompressedSize = entry.header.size;
608
- const compressedSize = entry.header.compressedSize;
609
- // Security: single file size check
610
- if (!isDirectory && uncompressedSize > ARCHIVE_SECURITY.MAX_SINGLE_FILE_SIZE) {
611
- securityWarnings.push(`Entry "${entryName}" exceeds single file size limit (${this.formatSizeMB(uncompressedSize)} MB > ${this.formatSizeMB(ARCHIVE_SECURITY.MAX_SINGLE_FILE_SIZE)} MB) - entry listed but flagged`);
612
- }
613
- // Security: per-entry compression ratio check
614
- if (compressedSize > 0 && !isDirectory) {
615
- const ratio = uncompressedSize / compressedSize;
616
- if (ratio > ARCHIVE_SECURITY.MAX_COMPRESSION_RATIO) {
617
- return {
618
- success: false,
619
- entries: [],
620
- securityWarnings: [],
621
- error: this.createError(FileErrorCode.ZIP_BOMB_DETECTED, {
622
- entryName,
623
- compressionRatio: ratio.toFixed(1),
624
- maxRatio: ARCHIVE_SECURITY.MAX_COMPRESSION_RATIO,
625
- }),
626
- };
627
- }
628
- }
629
- // Cumulative decompressed size check
630
- cumulativeUncompressedSize += uncompressedSize;
631
- if (cumulativeUncompressedSize > ARCHIVE_SECURITY.MAX_DECOMPRESSED_SIZE) {
632
- return {
633
- success: false,
634
- entries: [],
635
- securityWarnings: [],
636
- error: this.createError(FileErrorCode.SECURITY_VALIDATION_FAILED, {
637
- reason: `Cumulative decompressed size exceeds limit of ${this.formatSizeMB(ARCHIVE_SECURITY.MAX_DECOMPRESSED_SIZE)} MB`,
638
- }),
639
- };
640
- }
641
- entries.push({
642
- name: entryName,
643
- uncompressedSize,
644
- compressedSize,
645
- isDirectory,
646
- });
647
- }
648
- return { success: true, entries, securityWarnings };
649
- }
650
- catch (error) {
651
- return {
652
- success: false,
653
- entries: [],
654
- securityWarnings: [],
655
- error: this.createError(FileErrorCode.CORRUPTED_FILE, {
656
- reason: `Failed to read ZIP archive: ${error instanceof Error ? error.message : String(error)}`,
657
- }, error instanceof Error ? error : undefined),
658
- };
659
- }
660
- }
661
- // ===========================================================================
662
- // TAR EXTRACTION
663
- // ===========================================================================
664
- /**
665
- * Extract entry metadata from a plain TAR archive.
666
- *
667
- * @param buffer - Raw TAR buffer
668
- * @returns Extraction result with entries and security warnings, or error
669
- */
670
- async extractTarEntries(buffer) {
671
- try {
672
- const tarStream = await import("tar-stream");
673
- return await this.parseTarStream(tarStream, buffer);
674
- }
675
- catch (error) {
676
- return {
677
- success: false,
678
- entries: [],
679
- securityWarnings: [],
680
- error: this.createError(FileErrorCode.CORRUPTED_FILE, {
681
- reason: `Failed to read TAR archive: ${error instanceof Error ? error.message : String(error)}`,
682
- }, error instanceof Error ? error : undefined),
683
- };
684
- }
685
- }
686
- /**
687
- * Extract entry metadata from a GZIP-compressed TAR archive.
688
- * First decompresses with zlib, then parses as TAR.
689
- *
690
- * @param buffer - Raw TAR.GZ buffer
691
- * @returns Extraction result with entries and security warnings, or error
692
- */
693
- async extractTarGzEntries(buffer) {
694
- try {
695
- const zlib = await import("zlib");
696
- const { promisify } = await import("util");
697
- const gunzip = promisify(zlib.gunzip);
698
- const decompressed = await gunzip(buffer);
699
- const tarBuffer = Buffer.from(decompressed);
700
- // Security: check decompressed size
701
- if (tarBuffer.length > ARCHIVE_SECURITY.MAX_DECOMPRESSED_SIZE) {
702
- return {
703
- success: false,
704
- entries: [],
705
- securityWarnings: [],
706
- error: this.createError(FileErrorCode.SECURITY_VALIDATION_FAILED, {
707
- reason: `Decompressed TAR size (${this.formatSizeMB(tarBuffer.length)} MB) exceeds limit (${this.formatSizeMB(ARCHIVE_SECURITY.MAX_DECOMPRESSED_SIZE)} MB)`,
708
- }),
709
- };
710
- }
711
- // Security: check compression ratio
712
- if (buffer.length > 0) {
713
- const ratio = tarBuffer.length / buffer.length;
714
- if (ratio > ARCHIVE_SECURITY.MAX_COMPRESSION_RATIO) {
715
- return {
716
- success: false,
717
- entries: [],
718
- securityWarnings: [],
719
- error: this.createError(FileErrorCode.ZIP_BOMB_DETECTED, {
720
- compressionRatio: ratio.toFixed(1),
721
- maxRatio: ARCHIVE_SECURITY.MAX_COMPRESSION_RATIO,
722
- }),
723
- };
724
- }
725
- }
726
- const tarStream = await import("tar-stream");
727
- return await this.parseTarStream(tarStream, tarBuffer);
728
- }
729
- catch (error) {
730
- // Check if the error is one we already created (security validation)
731
- if (error &&
732
- typeof error === "object" &&
733
- "code" in error &&
734
- typeof error.code === "string") {
735
- // Re-throw our structured errors
736
- return {
737
- success: false,
738
- entries: [],
739
- securityWarnings: [],
740
- error: this.createError(FileErrorCode.DECOMPRESSION_FAILED, {
741
- reason: `Failed to decompress TAR.GZ archive: ${error instanceof Error ? error.message : String(error)}`,
742
- }, error instanceof Error ? error : undefined),
743
- };
744
- }
745
- return {
746
- success: false,
747
- entries: [],
748
- securityWarnings: [],
749
- error: this.createError(FileErrorCode.DECOMPRESSION_FAILED, {
750
- reason: `Failed to decompress TAR.GZ archive: ${error instanceof Error ? error.message : String(error)}`,
751
- }, error instanceof Error ? error : undefined),
752
- };
753
- }
754
- }
755
- /**
756
- * Parse a TAR stream and extract entry metadata.
757
- * Shared between plain TAR and decompressed TAR.GZ processing.
758
- *
759
- * @param tarStream - The imported tar-stream module
760
- * @param buffer - Raw (decompressed) TAR buffer
761
- * @returns Extraction result with entries and security warnings, or error
762
- */
763
- async parseTarStream(tarStream, buffer) {
764
- return new Promise((resolve) => {
765
- const entries = [];
766
- const securityWarnings = [];
767
- let entryCount = 0;
768
- let cumulativeSize = 0;
769
- let earlyError = null;
770
- const extract = tarStream.extract();
771
- extract.on("entry", (header, stream, next) => {
772
- entryCount++;
773
- // Security: entry count limit
774
- if (entryCount > ARCHIVE_SECURITY.MAX_ENTRIES) {
775
- earlyError = this.createError(FileErrorCode.SECURITY_VALIDATION_FAILED, {
776
- reason: `Archive contains more than ${ARCHIVE_SECURITY.MAX_ENTRIES} entries`,
777
- });
778
- stream.resume();
779
- extract.destroy();
780
- return;
781
- }
782
- const entryName = header.name || "";
783
- const entrySize = header.size || 0;
784
- const entryType = header.type || "file";
785
- // Security: path traversal
786
- if (this.hasPathTraversal(entryName)) {
787
- securityWarnings.push(`Path traversal detected in entry: "${entryName}" - entry skipped`);
788
- stream.resume();
789
- next();
790
- return;
791
- }
792
- // Security: path length
793
- if (entryName.length > ARCHIVE_SECURITY.MAX_PATH_LENGTH) {
794
- securityWarnings.push(`Entry name exceeds maximum path length (${ARCHIVE_SECURITY.MAX_PATH_LENGTH}): "${entryName.substring(0, 50)}..." - entry skipped`);
795
- stream.resume();
796
- next();
797
- return;
798
- }
799
- // Security: symlinks
800
- if ((entryType === "symlink" || entryType === "link") && !ARCHIVE_SECURITY.ALLOW_SYMLINKS) {
801
- securityWarnings.push(`Symbolic/hard link detected: "${entryName}" - entry skipped`);
802
- stream.resume();
803
- next();
804
- return;
805
- }
806
- const isDirectory = entryType === "directory";
807
- // Security: single file size
808
- if (!isDirectory && entrySize > ARCHIVE_SECURITY.MAX_SINGLE_FILE_SIZE) {
809
- securityWarnings.push(`Entry "${entryName}" exceeds single file size limit (${this.formatSizeMB(entrySize)} MB > ${this.formatSizeMB(ARCHIVE_SECURITY.MAX_SINGLE_FILE_SIZE)} MB) - entry listed but flagged`);
810
- }
811
- // Security: cumulative size
812
- cumulativeSize += entrySize;
813
- if (cumulativeSize > ARCHIVE_SECURITY.MAX_DECOMPRESSED_SIZE) {
814
- earlyError = this.createError(FileErrorCode.SECURITY_VALIDATION_FAILED, {
815
- reason: `Cumulative entry size exceeds limit of ${this.formatSizeMB(ARCHIVE_SECURITY.MAX_DECOMPRESSED_SIZE)} MB`,
816
- });
817
- stream.resume();
818
- extract.destroy();
819
- return;
820
- }
821
- entries.push({
822
- name: entryName,
823
- uncompressedSize: entrySize,
824
- compressedSize: 0, // TAR doesn't compress individual entries
825
- isDirectory,
826
- });
827
- // Consume the stream without buffering (we only need metadata)
828
- stream.resume();
829
- next();
830
- });
831
- extract.on("finish", () => {
832
- if (earlyError) {
833
- resolve({
834
- success: false,
835
- entries: [],
836
- securityWarnings: [],
837
- error: earlyError,
838
- });
839
- }
840
- else {
841
- resolve({ success: true, entries, securityWarnings });
842
- }
843
- });
844
- extract.on("error", (err) => {
845
- if (earlyError) {
846
- resolve({
847
- success: false,
848
- entries: [],
849
- securityWarnings: [],
850
- error: earlyError,
851
- });
852
- }
853
- else {
854
- resolve({
855
- success: false,
856
- entries: [],
857
- securityWarnings: [],
858
- error: this.createError(FileErrorCode.CORRUPTED_FILE, {
859
- reason: `Failed to parse TAR archive: ${err.message}`,
860
- }, err),
861
- });
862
- }
863
- });
864
- // Feed the buffer into the extract stream
865
- extract.end(buffer);
866
- });
867
- }
868
- // ===========================================================================
869
- // GZIP EXTRACTION (plain, non-TAR)
870
- // ===========================================================================
871
- /**
872
- * Extract metadata from a plain GZIP file (single compressed file, not a TAR).
873
- * Since plain GZ wraps a single file, we create a single entry using the
874
- * original filename minus the .gz extension.
875
- *
876
- * @param buffer - Raw GZIP buffer
877
- * @returns Extraction result with a single entry and security warnings, or error
878
- */
879
- async extractGzEntries(buffer) {
880
- try {
881
- const zlib = await import("zlib");
882
- const { promisify } = await import("util");
883
- const gunzip = promisify(zlib.gunzip);
884
- const decompressed = await gunzip(buffer);
885
- // Security: check decompressed size
886
- if (decompressed.length > ARCHIVE_SECURITY.MAX_DECOMPRESSED_SIZE) {
887
- return {
888
- success: false,
889
- entries: [],
890
- securityWarnings: [],
891
- error: this.createError(FileErrorCode.SECURITY_VALIDATION_FAILED, {
892
- reason: `Decompressed size (${this.formatSizeMB(decompressed.length)} MB) exceeds limit (${this.formatSizeMB(ARCHIVE_SECURITY.MAX_DECOMPRESSED_SIZE)} MB)`,
893
- }),
894
- };
895
- }
896
- // Security: compression ratio
897
- if (buffer.length > 0) {
898
- const ratio = decompressed.length / buffer.length;
899
- if (ratio > ARCHIVE_SECURITY.MAX_COMPRESSION_RATIO) {
900
- return {
901
- success: false,
902
- entries: [],
903
- securityWarnings: [],
904
- error: this.createError(FileErrorCode.ZIP_BOMB_DETECTED, {
905
- compressionRatio: ratio.toFixed(1),
906
- maxRatio: ARCHIVE_SECURITY.MAX_COMPRESSION_RATIO,
907
- }),
908
- };
909
- }
910
- }
911
- // Check if the decompressed content is actually a TAR
912
- if (this.looksLikeTar(decompressed)) {
913
- // It's actually a tar.gz; re-route through TAR extraction
914
- const tarStream = await import("tar-stream");
915
- return await this.parseTarStream(tarStream, Buffer.from(decompressed));
916
- }
917
- // Plain GZ - single entry
918
- // Derive the inner filename by removing the .gz extension
919
- const innerFilename = "decompressed-content";
920
- const securityWarnings = [];
921
- const entries = [
922
- {
923
- name: innerFilename,
924
- uncompressedSize: decompressed.length,
925
- compressedSize: buffer.length,
926
- isDirectory: false,
927
- },
928
- ];
929
- return { success: true, entries, securityWarnings };
930
- }
931
- catch (error) {
932
- return {
933
- success: false,
934
- entries: [],
935
- securityWarnings: [],
936
- error: this.createError(FileErrorCode.DECOMPRESSION_FAILED, {
937
- reason: `Failed to decompress GZIP file: ${error instanceof Error ? error.message : String(error)}`,
938
- }, error instanceof Error ? error : undefined),
939
- };
940
- }
941
- }
942
- /**
943
- * Heuristic check to determine if a buffer looks like a TAR archive.
944
- * TAR archives have a "ustar" magic string at byte offset 257.
945
- *
946
- * @param buffer - Decompressed buffer to check
947
- * @returns true if the buffer appears to be a TAR archive
948
- */
949
- looksLikeTar(buffer) {
950
- if (buffer.length < 263) {
951
- return false;
952
- }
953
- // "ustar" at offset 257
954
- const magic = Buffer.from(buffer.slice(257, 263)).toString("ascii");
955
- return magic.startsWith("ustar");
956
- }
957
- // ===========================================================================
958
- // SECURITY VALIDATION
959
- // ===========================================================================
960
- /**
961
- * Check if an entry name contains path traversal sequences.
962
- * Detects `../`, absolute paths, and other traversal vectors.
963
- *
964
- * @param entryName - Archive entry name/path to validate
965
- * @returns true if path traversal is detected
966
- */
967
- hasPathTraversal(entryName) {
968
- // Normalize separators
969
- const normalized = entryName.replace(/\\/g, "/");
970
- // Check for parent directory traversal
971
- if (normalized.includes("../") || normalized.includes("/..")) {
972
- return true;
973
- }
974
- // Check for absolute paths
975
- if (normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized)) {
976
- return true;
977
- }
978
- // Check resolved path doesn't escape root
979
- const resolved = path.posix.normalize(normalized);
980
- if (resolved.startsWith("../") || resolved === "..") {
981
- return true;
982
- }
983
- return false;
984
- }
985
- // ===========================================================================
986
- // CONTENT EXTRACTION (Phase 2 sub-processing)
987
- // ===========================================================================
988
- /**
989
- * Extract text content from eligible ZIP entries for LLM consumption.
990
- *
991
- * Selects small, text-based files from the archive and extracts their
992
- * content. Files are sorted by relevance (config files, source code, docs).
993
- * Binary files, nested archives, and files exceeding size limits are skipped.
994
- *
995
- * @param buffer - Raw ZIP archive buffer
996
- * @param entries - Previously extracted entry metadata
997
- * @returns Map of entry name to extracted text content
998
- */
999
- async extractEntryContents(buffer, entries) {
1000
- const contents = new Map();
1001
- try {
1002
- const AdmZip = (await import("adm-zip")).default;
1003
- const zip = new AdmZip(buffer);
1004
- // Filter to extractable text-based entries within size limits
1005
- const candidates = entries
1006
- .filter((e) => {
1007
- if (e.isDirectory) {
1008
- return false;
1009
- }
1010
- if (e.uncompressedSize > ARCHIVE_CONFIG.MAX_EXTRACT_ENTRY_SIZE) {
1011
- return false;
1012
- }
1013
- if (e.uncompressedSize === 0) {
1014
- return false;
1015
- }
1016
- const ext = path.extname(e.name).toLowerCase();
1017
- // Check by extension
1018
- if (ARCHIVE_CONFIG.EXTRACTABLE_EXTENSIONS.has(ext)) {
1019
- return true;
1020
- }
1021
- // Check for common extensionless config files
1022
- const basename = path.basename(e.name).toLowerCase();
1023
- if (basename === "readme" || basename === "license" || basename === "makefile" || basename === "dockerfile") {
1024
- return true;
1025
- }
1026
- return false;
1027
- })
1028
- // Sort: smaller files first (more likely to fit), then by name
1029
- .sort((a, b) => a.uncompressedSize - b.uncompressedSize);
1030
- let totalExtracted = 0;
1031
- let extractCount = 0;
1032
- for (const entry of candidates) {
1033
- if (extractCount >= ARCHIVE_CONFIG.MAX_EXTRACT_ENTRIES) {
1034
- break;
1035
- }
1036
- if (totalExtracted + entry.uncompressedSize > ARCHIVE_CONFIG.MAX_TOTAL_EXTRACT_SIZE) {
1037
- break;
1038
- }
1039
- try {
1040
- const zipEntry = zip.getEntry(entry.name);
1041
- if (!zipEntry) {
1042
- continue;
1043
- }
1044
- const data = zipEntry.getData();
1045
- if (!data || data.length === 0) {
1046
- continue;
1047
- }
1048
- // Simple binary detection: check for null bytes in first 512 bytes
1049
- const sample = data.slice(0, Math.min(512, data.length));
1050
- if (sample.includes(0)) {
1051
- continue;
1052
- }
1053
- const text = data.toString("utf-8");
1054
- // Sanity check: skip if too many replacement characters (likely binary)
1055
- const replacementCount = (text.match(/\ufffd/g) || []).length;
1056
- if (replacementCount > text.length * 0.05) {
1057
- continue;
1058
- }
1059
- contents.set(entry.name, text);
1060
- totalExtracted += data.length;
1061
- extractCount++;
1062
- }
1063
- catch {
1064
- // Skip entries that fail to extract (binary, corrupt, etc.)
1065
- }
1066
- }
1067
- }
1068
- catch {
1069
- // If ZIP re-parsing fails, return empty — listing is still available
1070
- }
1071
- return contents;
1072
- }
1073
- // ===========================================================================
1074
- // TEXT CONTENT BUILDING
1075
- // ===========================================================================
1076
- /**
1077
- * Build a structured text description of the archive for LLM consumption.
1078
- * Includes archive metadata, file listing with sizes, and security warnings.
1079
- *
1080
- * @param filename - Original archive filename
1081
- * @param metadata - Aggregate archive metadata
1082
- * @param entries - Individual entry metadata
1083
- * @param securityWarnings - Security warnings encountered during processing
1084
- * @param extractedContents - Map of entry name to extracted text content (Phase 2)
1085
- * @returns Formatted text content string
1086
- */
1087
- buildTextContent(filename, metadata, entries, securityWarnings, extractedContents) {
1088
- const lines = [];
1089
- // Header
1090
- lines.push(`## Archive: ${filename}`);
1091
- lines.push("");
1092
- // Metadata
1093
- lines.push("### Metadata");
1094
- lines.push(`- **Format:** ${metadata.format.toUpperCase()}`);
1095
- lines.push(`- **Total entries:** ${metadata.totalEntries}`);
1096
- lines.push(`- **Total uncompressed size:** ${this.formatHumanReadableSize(metadata.totalUncompressedSize)}`);
1097
- if (metadata.totalCompressedSize > 0) {
1098
- lines.push(`- **Total compressed size:** ${this.formatHumanReadableSize(metadata.totalCompressedSize)}`);
1099
- }
1100
- lines.push("");
1101
- // Security warnings
1102
- if (securityWarnings.length > 0) {
1103
- lines.push("### Security Warnings");
1104
- for (const warning of securityWarnings) {
1105
- lines.push(`- ${warning}`);
1106
- }
1107
- lines.push("");
1108
- }
1109
- // File listing
1110
- lines.push("### Contents");
1111
- lines.push("");
1112
- // Separate directories and files
1113
- const directories = entries.filter((e) => e.isDirectory);
1114
- const files = entries.filter((e) => !e.isDirectory);
1115
- if (directories.length > 0) {
1116
- lines.push(`**Directories (${directories.length}):**`);
1117
- for (const dir of directories) {
1118
- lines.push(` ${dir.name}`);
1119
- }
1120
- lines.push("");
1121
- }
1122
- if (files.length > 0) {
1123
- lines.push(`**Files (${files.length}):**`);
1124
- // Sort files by path for readability
1125
- const sortedFiles = [...files].sort((a, b) => a.name.localeCompare(b.name));
1126
- for (const file of sortedFiles) {
1127
- const sizeStr = this.formatHumanReadableSize(file.uncompressedSize);
1128
- lines.push(` ${file.name} (${sizeStr})`);
1129
- }
1130
- lines.push("");
1131
- }
1132
- if (entries.length === 0) {
1133
- lines.push("*Archive is empty.*");
1134
- lines.push("");
1135
- }
1136
- // Extracted file contents (Phase 2 sub-processing)
1137
- if (extractedContents && extractedContents.size > 0) {
1138
- lines.push("### Extracted File Contents");
1139
- lines.push("");
1140
- extractedContents.forEach((content, entryName) => {
1141
- const ext = path.extname(entryName).replace(".", "");
1142
- const langHint = ext || "";
1143
- lines.push(`#### ${entryName}`);
1144
- lines.push(`\`\`\`${langHint}`);
1145
- // Truncate very long file contents to avoid excessive token usage
1146
- if (content.length > 10000) {
1147
- lines.push(content.slice(0, 8000));
1148
- lines.push(`\n... [truncated ${content.length - 8000} characters] ...`);
1149
- lines.push(content.slice(-1000));
1150
- }
1151
- else {
1152
- lines.push(content);
1153
- }
1154
- lines.push("```");
1155
- lines.push("");
1156
- });
1157
- }
1158
- return lines.join("\n");
1159
- }
1160
- /**
1161
- * Format a byte count as a human-readable size string.
1162
- *
1163
- * @param bytes - Size in bytes
1164
- * @returns Formatted string (e.g., "1.5 MB", "256 KB", "128 B")
1165
- */
1166
- formatHumanReadableSize(bytes) {
1167
- if (bytes === 0) {
1168
- return "0 B";
1169
- }
1170
- const units = ["B", "KB", "MB", "GB"];
1171
- const k = 1024;
1172
- const i = Math.floor(Math.log(bytes) / Math.log(k));
1173
- const idx = Math.min(i, units.length - 1);
1174
- return `${parseFloat((bytes / k ** idx).toFixed(2))} ${units[idx]}`;
1175
- }
1176
- // ===========================================================================
1177
- // TARGETED EXTRACTION API
1178
- // ===========================================================================
1179
- /**
1180
- * Extract a specific file from a ZIP archive and return its text content.
1181
- *
1182
- * Called by the `extract_file_content` tool for targeted access to files
1183
- * inside archives. Only supports ZIP archives (the most common format).
1184
- * Applies security checks (path traversal, size limits).
1185
- *
1186
- * @param buffer - Archive file buffer
1187
- * @param entryPath - Path of the entry within the archive (e.g., "src/index.ts")
1188
- * @returns Text content of the extracted file, or error message
1189
- */
1190
- async extractEntry(buffer, entryPath) {
1191
- try {
1192
- const AdmZip = (await import("adm-zip")).default;
1193
- const zip = new AdmZip(buffer);
1194
- const entries = zip.getEntries();
1195
- // Security: check for path traversal
1196
- if (this.hasPathTraversal(entryPath)) {
1197
- return `Security error: entry path "${entryPath}" contains path traversal.`;
1198
- }
1199
- // Find the matching entry (case-insensitive fallback)
1200
- let targetEntry = entries.find((e) => e.entryName === entryPath);
1201
- if (!targetEntry) {
1202
- targetEntry = entries.find((e) => e.entryName.toLowerCase() === entryPath.toLowerCase());
1203
- }
1204
- if (!targetEntry) {
1205
- // List available entries to help the LLM
1206
- const available = entries
1207
- .filter((e) => !e.isDirectory)
1208
- .slice(0, 20)
1209
- .map((e) => ` - ${e.entryName} (${this.formatHumanReadableSize(e.header.size)})`)
1210
- .join("\n");
1211
- return `Entry "${entryPath}" not found in archive.\n\nAvailable entries (first 20):\n${available}`;
1212
- }
1213
- if (targetEntry.isDirectory) {
1214
- return `"${entryPath}" is a directory, not a file.`;
1215
- }
1216
- // Security: size check
1217
- const maxSize = 5 * 1024 * 1024; // 5 MB
1218
- if (targetEntry.header.size > maxSize) {
1219
- return `Entry "${entryPath}" is too large (${this.formatHumanReadableSize(targetEntry.header.size)}). Maximum extraction size is 5 MB.`;
1220
- }
1221
- const data = targetEntry.getData();
1222
- // Check if it looks like text
1223
- const sampleSize = Math.min(data.length, 512);
1224
- let printable = 0;
1225
- for (let i = 0; i < sampleSize; i++) {
1226
- const b = data[i];
1227
- if ((b >= 0x20 && b <= 0x7e) || b === 0x09 || b === 0x0a || b === 0x0d || b >= 0x80) {
1228
- printable++;
1229
- }
1230
- }
1231
- if (sampleSize > 0 && printable / sampleSize < 0.8) {
1232
- return `Entry "${entryPath}" appears to be a binary file (${this.formatHumanReadableSize(data.length)}). Cannot display as text.`;
1233
- }
1234
- return data.toString("utf-8");
1235
- }
1236
- catch (err) {
1237
- return `Failed to extract entry "${entryPath}": ${err instanceof Error ? err.message : String(err)}`;
1238
- }
1239
- }
1240
- }
1241
- // =============================================================================
1242
- // SINGLETON INSTANCE
1243
- // =============================================================================
1244
- /**
1245
- * Singleton Archive processor instance.
1246
- * Use this for standard archive processing operations.
1247
- *
1248
- * @example
1249
- * ```typescript
1250
- * import { archiveProcessor } from "./ArchiveProcessor.js";
1251
- *
1252
- * const result = await archiveProcessor.processFile(fileInfo);
1253
- * ```
1254
- */
1255
- export const archiveProcessor = new ArchiveProcessor();
1256
- // =============================================================================
1257
- // HELPER FUNCTIONS
1258
- // =============================================================================
1259
- /**
1260
- * Check if a file is an archive file.
1261
- * Matches by MIME type or file extension.
1262
- *
1263
- * @param mimetype - MIME type of the file
1264
- * @param filename - Filename (for extension-based detection)
1265
- * @returns true if the file is a recognized archive format
1266
- *
1267
- * @example
1268
- * ```typescript
1269
- * if (isArchiveFile("application/zip", "backup.zip")) {
1270
- * // Process as archive
1271
- * }
1272
- *
1273
- * if (isArchiveFile("", "data.tar.gz")) {
1274
- * // Also matches by extension
1275
- * }
1276
- * ```
1277
- */
1278
- export function isArchiveFile(mimetype, filename) {
1279
- return archiveProcessor.isFileSupported(mimetype, filename);
1280
- }
1281
- /**
1282
- * Process a single archive file.
1283
- * Convenience function that uses the singleton processor.
1284
- *
1285
- * @param fileInfo - File information (can include URL or buffer)
1286
- * @param options - Optional processing options (auth headers, timeout, etc.)
1287
- * @returns Processing result with archive metadata and entry listing, or error
1288
- *
1289
- * @example
1290
- * ```typescript
1291
- * import { processArchive } from "./ArchiveProcessor.js";
1292
- *
1293
- * const result = await processArchive(fileInfo, {
1294
- * authHeaders: { Authorization: "Bearer token" },
1295
- * });
1296
- *
1297
- * if (result.success) {
1298
- * const { archiveMetadata, entries, textContent } = result.data;
1299
- * console.log(`Found ${entries.length} entries in ${archiveMetadata.format} archive`);
1300
- * console.log(textContent);
1301
- * } else {
1302
- * console.error(`Processing failed: ${result.error?.userMessage}`);
1303
- * }
1304
- * ```
1305
- */
1306
- export async function processArchive(fileInfo, options) {
1307
- return archiveProcessor.processFile(fileInfo, options);
1308
- }