@juspay/neurolink 9.32.0 → 9.32.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (467) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/dist/auth/anthropicOAuth.js +1 -1
  3. package/dist/cli/commands/proxy.js +18 -5
  4. package/dist/client/aiSdkAdapter.js +1 -1
  5. package/dist/client/index.js +137 -501
  6. package/dist/core/factory.js +0 -1
  7. package/dist/core/redisConversationMemoryManager.js +1 -1
  8. package/dist/features/ppt/slideGenerator.js +0 -1
  9. package/dist/features/ppt/utils.js +0 -1
  10. package/dist/lib/server/routes/claudeProxyRoutes.js +45 -9
  11. package/dist/mcp/elicitationProtocol.js +1 -1
  12. package/dist/mcp/servers/agent/directToolsServer.js +0 -1
  13. package/dist/providers/azureOpenai.js +1 -1
  14. package/dist/providers/huggingFace.js +0 -1
  15. package/dist/providers/openaiCompatible.js +0 -1
  16. package/dist/sdk/toolRegistration.js +0 -1
  17. package/dist/server/openapi/generator.js +1 -1
  18. package/dist/server/routes/claudeProxyRoutes.js +45 -9
  19. package/dist/types/configTypes.js +0 -5
  20. package/dist/types/modelTypes.js +0 -1
  21. package/dist/types/tools.js +0 -1
  22. package/dist/types/typeAliases.js +0 -1
  23. package/dist/types/utilities.js +1 -1
  24. package/dist/types/workflowTypes.js +0 -1
  25. package/dist/utils/providerRetry.js +0 -1
  26. package/dist/utils/providerUtils.js +0 -1
  27. package/package.json +2 -2
  28. package/dist/client/adapters/providerImageAdapter.js +0 -588
  29. package/dist/client/adapters/tts/googleTTSHandler.js +0 -344
  30. package/dist/client/adapters/video/directorPipeline.js +0 -516
  31. package/dist/client/adapters/video/ffmpegAdapter.js +0 -206
  32. package/dist/client/adapters/video/frameExtractor.js +0 -143
  33. package/dist/client/adapters/video/vertexVideoHandler.js +0 -763
  34. package/dist/client/adapters/video/videoAnalyzer.js +0 -238
  35. package/dist/client/adapters/video/videoMerger.js +0 -171
  36. package/dist/client/agent/directTools.js +0 -840
  37. package/dist/client/auth/AuthProviderFactory.js +0 -111
  38. package/dist/client/auth/AuthProviderRegistry.js +0 -190
  39. package/dist/client/auth/RequestContext.js +0 -78
  40. package/dist/client/auth/accountPool.js +0 -178
  41. package/dist/client/auth/anthropicOAuth.js +0 -974
  42. package/dist/client/auth/authContext.js +0 -314
  43. package/dist/client/auth/errors.js +0 -39
  44. package/dist/client/auth/index.js +0 -61
  45. package/dist/client/auth/middleware/AuthMiddleware.js +0 -519
  46. package/dist/client/auth/middleware/rateLimitByUser.js +0 -554
  47. package/dist/client/auth/providers/BaseAuthProvider.js +0 -723
  48. package/dist/client/auth/providers/CognitoProvider.js +0 -304
  49. package/dist/client/auth/providers/KeycloakProvider.js +0 -393
  50. package/dist/client/auth/providers/auth0.js +0 -274
  51. package/dist/client/auth/providers/betterAuth.js +0 -182
  52. package/dist/client/auth/providers/clerk.js +0 -317
  53. package/dist/client/auth/providers/custom.js +0 -112
  54. package/dist/client/auth/providers/firebase.js +0 -226
  55. package/dist/client/auth/providers/jwt.js +0 -212
  56. package/dist/client/auth/providers/oauth2.js +0 -303
  57. package/dist/client/auth/providers/supabase.js +0 -259
  58. package/dist/client/auth/providers/workos.js +0 -284
  59. package/dist/client/auth/serverBridge.js +0 -25
  60. package/dist/client/auth/sessionManager.js +0 -437
  61. package/dist/client/auth/tokenStore.js +0 -799
  62. package/dist/client/client/aiSdkAdapter.js +0 -487
  63. package/dist/client/client/auth.js +0 -473
  64. package/dist/client/client/errors.js +0 -552
  65. package/dist/client/client/httpClient.js +0 -837
  66. package/dist/client/client/index.js +0 -172
  67. package/dist/client/client/interceptors.js +0 -601
  68. package/dist/client/client/sseClient.js +0 -545
  69. package/dist/client/client/streamingClient.js +0 -917
  70. package/dist/client/client/wsClient.js +0 -369
  71. package/dist/client/config/configManager.js +0 -303
  72. package/dist/client/config/conversationMemory.js +0 -86
  73. package/dist/client/config/taskClassificationConfig.js +0 -148
  74. package/dist/client/constants/contextWindows.js +0 -295
  75. package/dist/client/constants/enums.js +0 -853
  76. package/dist/client/constants/index.js +0 -207
  77. package/dist/client/constants/performance.js +0 -389
  78. package/dist/client/constants/retry.js +0 -266
  79. package/dist/client/constants/timeouts.js +0 -182
  80. package/dist/client/constants/tokens.js +0 -380
  81. package/dist/client/constants/videoErrors.js +0 -46
  82. package/dist/client/context/budgetChecker.js +0 -98
  83. package/dist/client/context/contextCompactor.js +0 -205
  84. package/dist/client/context/emergencyTruncation.js +0 -88
  85. package/dist/client/context/errorDetection.js +0 -171
  86. package/dist/client/context/errors.js +0 -21
  87. package/dist/client/context/fileTokenBudget.js +0 -127
  88. package/dist/client/context/prompts/summarizationPrompt.js +0 -117
  89. package/dist/client/context/stages/fileReadDeduplicator.js +0 -66
  90. package/dist/client/context/stages/slidingWindowTruncator.js +0 -190
  91. package/dist/client/context/stages/structuredSummarizer.js +0 -99
  92. package/dist/client/context/stages/toolOutputPruner.js +0 -52
  93. package/dist/client/context/summarizationEngine.js +0 -136
  94. package/dist/client/context/toolOutputLimits.js +0 -78
  95. package/dist/client/context/toolPairRepair.js +0 -66
  96. package/dist/client/core/analytics.js +0 -88
  97. package/dist/client/core/baseProvider.js +0 -1385
  98. package/dist/client/core/constants.js +0 -140
  99. package/dist/client/core/conversationMemoryFactory.js +0 -141
  100. package/dist/client/core/conversationMemoryInitializer.js +0 -128
  101. package/dist/client/core/conversationMemoryManager.js +0 -344
  102. package/dist/client/core/dynamicModels.js +0 -358
  103. package/dist/client/core/evaluation.js +0 -309
  104. package/dist/client/core/evaluationProviders.js +0 -248
  105. package/dist/client/core/factory.js +0 -412
  106. package/dist/client/core/infrastructure/baseError.js +0 -22
  107. package/dist/client/core/infrastructure/baseFactory.js +0 -54
  108. package/dist/client/core/infrastructure/baseRegistry.js +0 -53
  109. package/dist/client/core/infrastructure/index.js +0 -5
  110. package/dist/client/core/infrastructure/retry.js +0 -20
  111. package/dist/client/core/infrastructure/typedEventEmitter.js +0 -23
  112. package/dist/client/core/modelConfiguration.js +0 -851
  113. package/dist/client/core/modules/GenerationHandler.js +0 -588
  114. package/dist/client/core/modules/MessageBuilder.js +0 -273
  115. package/dist/client/core/modules/StreamHandler.js +0 -185
  116. package/dist/client/core/modules/TelemetryHandler.js +0 -203
  117. package/dist/client/core/modules/ToolsManager.js +0 -499
  118. package/dist/client/core/modules/Utilities.js +0 -331
  119. package/dist/client/core/redisConversationMemoryManager.js +0 -1435
  120. package/dist/client/core/streamAnalytics.js +0 -131
  121. package/dist/client/evaluation/contextBuilder.js +0 -134
  122. package/dist/client/evaluation/index.js +0 -61
  123. package/dist/client/evaluation/prompts.js +0 -73
  124. package/dist/client/evaluation/ragasEvaluator.js +0 -110
  125. package/dist/client/evaluation/retryManager.js +0 -78
  126. package/dist/client/evaluation/scoring.js +0 -61
  127. package/dist/client/factories/providerFactory.js +0 -166
  128. package/dist/client/factories/providerRegistry.js +0 -166
  129. package/dist/client/features/ppt/constants.js +0 -896
  130. package/dist/client/features/ppt/contentPlanner.js +0 -529
  131. package/dist/client/features/ppt/presentationOrchestrator.js +0 -236
  132. package/dist/client/features/ppt/slideGenerator.js +0 -532
  133. package/dist/client/features/ppt/slideRenderers.js +0 -2383
  134. package/dist/client/features/ppt/slideTypeInference.js +0 -405
  135. package/dist/client/features/ppt/types.js +0 -13
  136. package/dist/client/features/ppt/utils.js +0 -443
  137. package/dist/client/files/fileReferenceRegistry.js +0 -1543
  138. package/dist/client/files/fileTools.js +0 -450
  139. package/dist/client/files/streamingReader.js +0 -321
  140. package/dist/client/files/types.js +0 -23
  141. package/dist/client/hitl/hitlErrors.js +0 -54
  142. package/dist/client/hitl/hitlManager.js +0 -460
  143. package/dist/client/mcp/agentExposure.js +0 -356
  144. package/dist/client/mcp/auth/index.js +0 -11
  145. package/dist/client/mcp/auth/oauthClientProvider.js +0 -325
  146. package/dist/client/mcp/auth/tokenStorage.js +0 -134
  147. package/dist/client/mcp/batching/index.js +0 -10
  148. package/dist/client/mcp/batching/requestBatcher.js +0 -441
  149. package/dist/client/mcp/caching/index.js +0 -10
  150. package/dist/client/mcp/caching/toolCache.js +0 -433
  151. package/dist/client/mcp/elicitation/elicitationManager.js +0 -376
  152. package/dist/client/mcp/elicitation/index.js +0 -11
  153. package/dist/client/mcp/elicitation/types.js +0 -10
  154. package/dist/client/mcp/elicitationProtocol.js +0 -375
  155. package/dist/client/mcp/enhancedToolDiscovery.js +0 -481
  156. package/dist/client/mcp/externalServerManager.js +0 -1478
  157. package/dist/client/mcp/factory.js +0 -161
  158. package/dist/client/mcp/flexibleToolValidator.js +0 -161
  159. package/dist/client/mcp/httpRateLimiter.js +0 -391
  160. package/dist/client/mcp/httpRetryHandler.js +0 -178
  161. package/dist/client/mcp/index.js +0 -74
  162. package/dist/client/mcp/mcpCircuitBreaker.js +0 -427
  163. package/dist/client/mcp/mcpClientFactory.js +0 -708
  164. package/dist/client/mcp/mcpRegistryClient.js +0 -488
  165. package/dist/client/mcp/mcpServerBase.js +0 -373
  166. package/dist/client/mcp/multiServerManager.js +0 -579
  167. package/dist/client/mcp/registry.js +0 -158
  168. package/dist/client/mcp/routing/index.js +0 -10
  169. package/dist/client/mcp/routing/toolRouter.js +0 -416
  170. package/dist/client/mcp/serverCapabilities.js +0 -502
  171. package/dist/client/mcp/servers/agent/directToolsServer.js +0 -150
  172. package/dist/client/mcp/toolAnnotations.js +0 -239
  173. package/dist/client/mcp/toolConverter.js +0 -258
  174. package/dist/client/mcp/toolDiscoveryService.js +0 -798
  175. package/dist/client/mcp/toolIntegration.js +0 -334
  176. package/dist/client/mcp/toolRegistry.js +0 -729
  177. package/dist/client/memory/hippocampusInitializer.js +0 -19
  178. package/dist/client/memory/memoryRetrievalTools.js +0 -166
  179. package/dist/client/middleware/builtin/analytics.js +0 -132
  180. package/dist/client/middleware/builtin/autoEvaluation.js +0 -203
  181. package/dist/client/middleware/builtin/guardrails.js +0 -109
  182. package/dist/client/middleware/builtin/lifecycle.js +0 -168
  183. package/dist/client/middleware/factory.js +0 -327
  184. package/dist/client/middleware/registry.js +0 -295
  185. package/dist/client/middleware/utils/guardrailsUtils.js +0 -396
  186. package/dist/client/models/anthropicModels.js +0 -527
  187. package/dist/client/neurolink.js +0 -8233
  188. package/dist/client/observability/exporterRegistry.js +0 -413
  189. package/dist/client/observability/exporters/arizeExporter.js +0 -138
  190. package/dist/client/observability/exporters/baseExporter.js +0 -190
  191. package/dist/client/observability/exporters/braintrustExporter.js +0 -154
  192. package/dist/client/observability/exporters/datadogExporter.js +0 -196
  193. package/dist/client/observability/exporters/laminarExporter.js +0 -302
  194. package/dist/client/observability/exporters/langfuseExporter.js +0 -209
  195. package/dist/client/observability/exporters/langsmithExporter.js +0 -143
  196. package/dist/client/observability/exporters/otelExporter.js +0 -164
  197. package/dist/client/observability/exporters/posthogExporter.js +0 -287
  198. package/dist/client/observability/exporters/sentryExporter.js +0 -165
  199. package/dist/client/observability/index.js +0 -31
  200. package/dist/client/observability/metricsAggregator.js +0 -556
  201. package/dist/client/observability/otelBridge.js +0 -131
  202. package/dist/client/observability/retryPolicy.js +0 -383
  203. package/dist/client/observability/sampling/samplers.js +0 -216
  204. package/dist/client/observability/spanProcessor.js +0 -303
  205. package/dist/client/observability/tokenTracker.js +0 -413
  206. package/dist/client/observability/types/exporterTypes.js +0 -5
  207. package/dist/client/observability/types/index.js +0 -4
  208. package/dist/client/observability/types/spanTypes.js +0 -92
  209. package/dist/client/observability/utils/safeMetadata.js +0 -25
  210. package/dist/client/observability/utils/spanSerializer.js +0 -292
  211. package/dist/client/processors/archive/ArchiveProcessor.js +0 -1308
  212. package/dist/client/processors/base/BaseFileProcessor.js +0 -614
  213. package/dist/client/processors/base/types.js +0 -82
  214. package/dist/client/processors/config/fileTypes.js +0 -520
  215. package/dist/client/processors/config/index.js +0 -92
  216. package/dist/client/processors/config/languageMap.js +0 -410
  217. package/dist/client/processors/config/mimeTypes.js +0 -363
  218. package/dist/client/processors/config/sizeLimits.js +0 -258
  219. package/dist/client/processors/document/ExcelProcessor.js +0 -590
  220. package/dist/client/processors/document/OpenDocumentProcessor.js +0 -212
  221. package/dist/client/processors/document/PptxProcessor.js +0 -157
  222. package/dist/client/processors/document/RtfProcessor.js +0 -361
  223. package/dist/client/processors/document/WordProcessor.js +0 -353
  224. package/dist/client/processors/errors/FileErrorCode.js +0 -255
  225. package/dist/client/processors/errors/errorHelpers.js +0 -386
  226. package/dist/client/processors/errors/errorSerializer.js +0 -507
  227. package/dist/client/processors/errors/index.js +0 -49
  228. package/dist/client/processors/markup/SvgProcessor.js +0 -240
  229. package/dist/client/processors/media/AudioProcessor.js +0 -707
  230. package/dist/client/processors/media/VideoProcessor.js +0 -1045
  231. package/dist/client/providers/amazonBedrock.js +0 -1512
  232. package/dist/client/providers/amazonSagemaker.js +0 -162
  233. package/dist/client/providers/anthropic.js +0 -831
  234. package/dist/client/providers/azureOpenai.js +0 -143
  235. package/dist/client/providers/googleAiStudio.js +0 -1200
  236. package/dist/client/providers/googleNativeGemini3.js +0 -543
  237. package/dist/client/providers/googleVertex.js +0 -2936
  238. package/dist/client/providers/huggingFace.js +0 -315
  239. package/dist/client/providers/litellm.js +0 -488
  240. package/dist/client/providers/mistral.js +0 -157
  241. package/dist/client/providers/ollama.js +0 -1579
  242. package/dist/client/providers/openAI.js +0 -627
  243. package/dist/client/providers/openRouter.js +0 -543
  244. package/dist/client/providers/openaiCompatible.js +0 -290
  245. package/dist/client/providers/providerTypeUtils.js +0 -46
  246. package/dist/client/providers/sagemaker/adaptive-semaphore.js +0 -215
  247. package/dist/client/providers/sagemaker/client.js +0 -472
  248. package/dist/client/providers/sagemaker/config.js +0 -317
  249. package/dist/client/providers/sagemaker/detection.js +0 -606
  250. package/dist/client/providers/sagemaker/error-constants.js +0 -227
  251. package/dist/client/providers/sagemaker/errors.js +0 -299
  252. package/dist/client/providers/sagemaker/language-model.js +0 -775
  253. package/dist/client/providers/sagemaker/parsers.js +0 -634
  254. package/dist/client/providers/sagemaker/streaming.js +0 -331
  255. package/dist/client/providers/sagemaker/structured-parser.js +0 -625
  256. package/dist/client/proxy/accountQuota.js +0 -162
  257. package/dist/client/proxy/claudeFormat.js +0 -595
  258. package/dist/client/proxy/modelRouter.js +0 -29
  259. package/dist/client/proxy/oauthFetch.js +0 -367
  260. package/dist/client/proxy/proxyFetch.js +0 -586
  261. package/dist/client/proxy/requestLogger.js +0 -207
  262. package/dist/client/proxy/tokenRefresh.js +0 -124
  263. package/dist/client/proxy/usageStats.js +0 -74
  264. package/dist/client/proxy/utils/noProxyUtils.js +0 -149
  265. package/dist/client/rag/ChunkerFactory.js +0 -320
  266. package/dist/client/rag/ChunkerRegistry.js +0 -421
  267. package/dist/client/rag/chunkers/BaseChunker.js +0 -143
  268. package/dist/client/rag/chunkers/CharacterChunker.js +0 -28
  269. package/dist/client/rag/chunkers/HTMLChunker.js +0 -38
  270. package/dist/client/rag/chunkers/JSONChunker.js +0 -68
  271. package/dist/client/rag/chunkers/LaTeXChunker.js +0 -63
  272. package/dist/client/rag/chunkers/MarkdownChunker.js +0 -306
  273. package/dist/client/rag/chunkers/RecursiveChunker.js +0 -139
  274. package/dist/client/rag/chunkers/SemanticMarkdownChunker.js +0 -138
  275. package/dist/client/rag/chunkers/SentenceChunker.js +0 -66
  276. package/dist/client/rag/chunkers/TokenChunker.js +0 -61
  277. package/dist/client/rag/chunkers/index.js +0 -15
  278. package/dist/client/rag/chunking/characterChunker.js +0 -142
  279. package/dist/client/rag/chunking/chunkerRegistry.js +0 -194
  280. package/dist/client/rag/chunking/htmlChunker.js +0 -247
  281. package/dist/client/rag/chunking/index.js +0 -17
  282. package/dist/client/rag/chunking/jsonChunker.js +0 -281
  283. package/dist/client/rag/chunking/latexChunker.js +0 -251
  284. package/dist/client/rag/chunking/markdownChunker.js +0 -373
  285. package/dist/client/rag/chunking/recursiveChunker.js +0 -148
  286. package/dist/client/rag/chunking/semanticChunker.js +0 -306
  287. package/dist/client/rag/chunking/sentenceChunker.js +0 -230
  288. package/dist/client/rag/chunking/tokenChunker.js +0 -183
  289. package/dist/client/rag/document/MDocument.js +0 -392
  290. package/dist/client/rag/document/index.js +0 -5
  291. package/dist/client/rag/document/loaders.js +0 -500
  292. package/dist/client/rag/errors/RAGError.js +0 -274
  293. package/dist/client/rag/errors/index.js +0 -6
  294. package/dist/client/rag/graphRag/graphRAG.js +0 -401
  295. package/dist/client/rag/graphRag/index.js +0 -4
  296. package/dist/client/rag/index.js +0 -141
  297. package/dist/client/rag/metadata/MetadataExtractorFactory.js +0 -418
  298. package/dist/client/rag/metadata/MetadataExtractorRegistry.js +0 -362
  299. package/dist/client/rag/metadata/index.js +0 -9
  300. package/dist/client/rag/metadata/metadataExtractor.js +0 -280
  301. package/dist/client/rag/pipeline/RAGPipeline.js +0 -436
  302. package/dist/client/rag/pipeline/contextAssembly.js +0 -341
  303. package/dist/client/rag/pipeline/index.js +0 -5
  304. package/dist/client/rag/ragIntegration.js +0 -321
  305. package/dist/client/rag/reranker/RerankerFactory.js +0 -430
  306. package/dist/client/rag/reranker/RerankerRegistry.js +0 -402
  307. package/dist/client/rag/reranker/index.js +0 -9
  308. package/dist/client/rag/reranker/reranker.js +0 -277
  309. package/dist/client/rag/resilience/CircuitBreaker.js +0 -431
  310. package/dist/client/rag/resilience/RetryHandler.js +0 -304
  311. package/dist/client/rag/resilience/index.js +0 -7
  312. package/dist/client/rag/retrieval/hybridSearch.js +0 -335
  313. package/dist/client/rag/retrieval/index.js +0 -5
  314. package/dist/client/rag/retrieval/vectorQueryTool.js +0 -307
  315. package/dist/client/rag/types.js +0 -8
  316. package/dist/client/sdk/toolRegistration.js +0 -377
  317. package/dist/client/server/abstract/baseServerAdapter.js +0 -575
  318. package/dist/client/server/adapters/expressAdapter.js +0 -486
  319. package/dist/client/server/adapters/fastifyAdapter.js +0 -472
  320. package/dist/client/server/adapters/honoAdapter.js +0 -632
  321. package/dist/client/server/adapters/koaAdapter.js +0 -510
  322. package/dist/client/server/errors.js +0 -486
  323. package/dist/client/server/factory/serverAdapterFactory.js +0 -160
  324. package/dist/client/server/index.js +0 -108
  325. package/dist/client/server/middleware/abortSignal.js +0 -111
  326. package/dist/client/server/middleware/auth.js +0 -388
  327. package/dist/client/server/middleware/cache.js +0 -359
  328. package/dist/client/server/middleware/common.js +0 -281
  329. package/dist/client/server/middleware/deprecation.js +0 -190
  330. package/dist/client/server/middleware/mcpBodyAttachment.js +0 -63
  331. package/dist/client/server/middleware/rateLimit.js +0 -227
  332. package/dist/client/server/middleware/validation.js +0 -388
  333. package/dist/client/server/openapi/generator.js +0 -398
  334. package/dist/client/server/openapi/index.js +0 -36
  335. package/dist/client/server/openapi/schemas.js +0 -695
  336. package/dist/client/server/openapi/templates.js +0 -374
  337. package/dist/client/server/routes/agentRoutes.js +0 -189
  338. package/dist/client/server/routes/claudeProxyRoutes.js +0 -1600
  339. package/dist/client/server/routes/healthRoutes.js +0 -187
  340. package/dist/client/server/routes/index.js +0 -57
  341. package/dist/client/server/routes/mcpRoutes.js +0 -342
  342. package/dist/client/server/routes/memoryRoutes.js +0 -350
  343. package/dist/client/server/routes/openApiRoutes.js +0 -126
  344. package/dist/client/server/routes/toolRoutes.js +0 -199
  345. package/dist/client/server/streaming/dataStream.js +0 -486
  346. package/dist/client/server/streaming/index.js +0 -11
  347. package/dist/client/server/types.js +0 -67
  348. package/dist/client/server/utils/redaction.js +0 -334
  349. package/dist/client/server/utils/validation.js +0 -243
  350. package/dist/client/server/websocket/WebSocketHandler.js +0 -383
  351. package/dist/client/server/websocket/index.js +0 -4
  352. package/dist/client/services/server/ai/observability/instrumentation.js +0 -808
  353. package/dist/client/telemetry/attributes.js +0 -100
  354. package/dist/client/telemetry/index.js +0 -26
  355. package/dist/client/telemetry/telemetryService.js +0 -308
  356. package/dist/client/telemetry/tracers.js +0 -17
  357. package/dist/client/telemetry/withSpan.js +0 -34
  358. package/dist/client/types/actionTypes.js +0 -6
  359. package/dist/client/types/analytics.js +0 -5
  360. package/dist/client/types/authTypes.js +0 -9
  361. package/dist/client/types/circuitBreakerErrors.js +0 -34
  362. package/dist/client/types/cli.js +0 -21
  363. package/dist/client/types/clientTypes.js +0 -10
  364. package/dist/client/types/common.js +0 -51
  365. package/dist/client/types/configTypes.js +0 -49
  366. package/dist/client/types/content.js +0 -19
  367. package/dist/client/types/contextTypes.js +0 -400
  368. package/dist/client/types/conversation.js +0 -47
  369. package/dist/client/types/conversationMemoryInterface.js +0 -6
  370. package/dist/client/types/domainTypes.js +0 -5
  371. package/dist/client/types/errors.js +0 -167
  372. package/dist/client/types/evaluation.js +0 -5
  373. package/dist/client/types/evaluationProviders.js +0 -5
  374. package/dist/client/types/evaluationTypes.js +0 -1
  375. package/dist/client/types/externalMcp.js +0 -6
  376. package/dist/client/types/fileReferenceTypes.js +0 -8
  377. package/dist/client/types/fileTypes.js +0 -4
  378. package/dist/client/types/generateTypes.js +0 -1
  379. package/dist/client/types/guardrails.js +0 -1
  380. package/dist/client/types/hitlTypes.js +0 -8
  381. package/dist/client/types/index.js +0 -57
  382. package/dist/client/types/mcpTypes.js +0 -5
  383. package/dist/client/types/middlewareTypes.js +0 -1
  384. package/dist/client/types/modelTypes.js +0 -30
  385. package/dist/client/types/multimodal.js +0 -135
  386. package/dist/client/types/observability.js +0 -6
  387. package/dist/client/types/pptTypes.js +0 -82
  388. package/dist/client/types/providers.js +0 -111
  389. package/dist/client/types/proxyTypes.js +0 -16
  390. package/dist/client/types/ragTypes.js +0 -7
  391. package/dist/client/types/sdkTypes.js +0 -8
  392. package/dist/client/types/serviceTypes.js +0 -5
  393. package/dist/client/types/streamTypes.js +0 -1
  394. package/dist/client/types/subscriptionTypes.js +0 -9
  395. package/dist/client/types/taskClassificationTypes.js +0 -5
  396. package/dist/client/types/tools.js +0 -24
  397. package/dist/client/types/ttsTypes.js +0 -57
  398. package/dist/client/types/typeAliases.js +0 -48
  399. package/dist/client/types/utilities.js +0 -4
  400. package/dist/client/types/workflowTypes.js +0 -30
  401. package/dist/client/utils/async/withTimeout.js +0 -98
  402. package/dist/client/utils/asyncMutex.js +0 -60
  403. package/dist/client/utils/conversationMemory.js +0 -431
  404. package/dist/client/utils/csvProcessor.js +0 -846
  405. package/dist/client/utils/errorHandling.js +0 -936
  406. package/dist/client/utils/evaluationUtils.js +0 -131
  407. package/dist/client/utils/factoryProcessing.js +0 -589
  408. package/dist/client/utils/fileDetector.js +0 -2161
  409. package/dist/client/utils/imageCache.js +0 -376
  410. package/dist/client/utils/imageProcessor.js +0 -704
  411. package/dist/client/utils/logger.js +0 -491
  412. package/dist/client/utils/mcpDefaults.js +0 -134
  413. package/dist/client/utils/messageBuilder.js +0 -1653
  414. package/dist/client/utils/modelAliasResolver.js +0 -54
  415. package/dist/client/utils/modelDetection.js +0 -80
  416. package/dist/client/utils/modelRouter.js +0 -292
  417. package/dist/client/utils/multimodalOptionsBuilder.js +0 -65
  418. package/dist/client/utils/observabilityHelpers.js +0 -47
  419. package/dist/client/utils/parameterValidation.js +0 -966
  420. package/dist/client/utils/pdfProcessor.js +0 -410
  421. package/dist/client/utils/performance.js +0 -222
  422. package/dist/client/utils/pricing.js +0 -340
  423. package/dist/client/utils/promptRedaction.js +0 -62
  424. package/dist/client/utils/providerConfig.js +0 -1009
  425. package/dist/client/utils/providerHealth.js +0 -1237
  426. package/dist/client/utils/providerRetry.js +0 -112
  427. package/dist/client/utils/providerUtils.js +0 -434
  428. package/dist/client/utils/rateLimiter.js +0 -200
  429. package/dist/client/utils/redis.js +0 -368
  430. package/dist/client/utils/retryHandler.js +0 -269
  431. package/dist/client/utils/retryability.js +0 -22
  432. package/dist/client/utils/sanitizers/svg.js +0 -481
  433. package/dist/client/utils/schemaConversion.js +0 -255
  434. package/dist/client/utils/taskClassificationUtils.js +0 -149
  435. package/dist/client/utils/taskClassifier.js +0 -94
  436. package/dist/client/utils/thinkingConfig.js +0 -104
  437. package/dist/client/utils/timeout.js +0 -359
  438. package/dist/client/utils/tokenEstimation.js +0 -142
  439. package/dist/client/utils/tokenLimits.js +0 -125
  440. package/dist/client/utils/tokenUtils.js +0 -239
  441. package/dist/client/utils/toolUtils.js +0 -75
  442. package/dist/client/utils/transformationUtils.js +0 -554
  443. package/dist/client/utils/ttsProcessor.js +0 -286
  444. package/dist/client/utils/typeUtils.js +0 -97
  445. package/dist/client/utils/videoAnalysisProcessor.js +0 -67
  446. package/dist/client/workflow/config.js +0 -398
  447. package/dist/client/workflow/core/ensembleExecutor.js +0 -407
  448. package/dist/client/workflow/core/judgeScorer.js +0 -544
  449. package/dist/client/workflow/core/responseConditioner.js +0 -225
  450. package/dist/client/workflow/core/types/conditionerTypes.js +0 -7
  451. package/dist/client/workflow/core/types/ensembleTypes.js +0 -7
  452. package/dist/client/workflow/core/types/index.js +0 -7
  453. package/dist/client/workflow/core/types/judgeTypes.js +0 -7
  454. package/dist/client/workflow/core/types/layerTypes.js +0 -7
  455. package/dist/client/workflow/core/types/registryTypes.js +0 -7
  456. package/dist/client/workflow/core/workflowRegistry.js +0 -304
  457. package/dist/client/workflow/core/workflowRunner.js +0 -586
  458. package/dist/client/workflow/index.js +0 -50
  459. package/dist/client/workflow/types.js +0 -9
  460. package/dist/client/workflow/utils/types/index.js +0 -7
  461. package/dist/client/workflow/utils/workflowMetrics.js +0 -311
  462. package/dist/client/workflow/utils/workflowValidation.js +0 -420
  463. package/dist/client/workflow/workflows/adaptiveWorkflow.js +0 -366
  464. package/dist/client/workflow/workflows/consensusWorkflow.js +0 -192
  465. package/dist/client/workflow/workflows/fallbackWorkflow.js +0 -225
  466. package/dist/client/workflow/workflows/multiJudgeWorkflow.js +0 -351
  467. /package/dist/client/{client/reactHooks.js → reactHooks.js} +0 -0
@@ -1,846 +0,0 @@
1
- /**
2
- * CSV Processing Utility
3
- * Converts CSV files to LLM-friendly text formats
4
- * Uses streaming for memory efficiency with large files
5
- */
6
- import csvParser from "csv-parser";
7
- import { Readable } from "stream";
8
- import { logger } from "./logger.js";
9
- // ============================================================================
10
- // Data Type Detection Patterns
11
- // ============================================================================
12
- const DATE_PATTERNS = [
13
- { regex: /^\d{4}-\d{2}-\d{2}$/, format: "YYYY-MM-DD" },
14
- { regex: /^\d{2}\/\d{2}\/\d{4}$/, format: "MM/DD/YYYY" },
15
- { regex: /^\d{2}-\d{2}-\d{4}$/, format: "DD-MM-YYYY" },
16
- { regex: /^\d{2}\.\d{2}\.\d{4}$/, format: "DD.MM.YYYY" },
17
- { regex: /^\d{4}\/\d{2}\/\d{2}$/, format: "YYYY/MM/DD" },
18
- ];
19
- const DATETIME_PATTERNS = [
20
- { regex: /^\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}/, format: "ISO8601" },
21
- { regex: /^\d{2}\/\d{2}\/\d{4} \d{2}:\d{2}/, format: "MM/DD/YYYY HH:mm" },
22
- ];
23
- const EMAIL_REGEX = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
24
- const URL_REGEX = /^(https?:\/\/|www\.)[^\s]+$/i;
25
- const INTEGER_REGEX = /^-?\d+$/;
26
- const FLOAT_REGEX = /^-?\d+\.\d+$/;
27
- const BOOLEAN_VALUES = new Set([
28
- "true",
29
- "false",
30
- "yes",
31
- "no",
32
- "1",
33
- "0",
34
- "t",
35
- "f",
36
- "y",
37
- "n",
38
- ]);
39
- // ============================================================================
40
- // Column Name Validation
41
- // ============================================================================
42
- /**
43
- * Validate column name and return issues
44
- */
45
- function validateColumnName(name) {
46
- const issues = [];
47
- if (!name || name.trim() === "") {
48
- issues.push("Empty or blank column name");
49
- return issues;
50
- }
51
- if (name !== name.trim()) {
52
- issues.push("Leading or trailing whitespace");
53
- }
54
- if (/^\d/.test(name)) {
55
- issues.push("Starts with a number");
56
- }
57
- if (/[^a-zA-Z0-9_\- ]/.test(name)) {
58
- issues.push("Contains special characters");
59
- }
60
- if (name.length > 64) {
61
- issues.push("Name exceeds 64 characters");
62
- }
63
- if (/\s{2,}/.test(name)) {
64
- issues.push("Contains multiple consecutive spaces");
65
- }
66
- return issues;
67
- }
68
- // ============================================================================
69
- // Data Type Detection
70
- // ============================================================================
71
- /**
72
- * Detect the data type of a single value
73
- */
74
- function detectValueType(value) {
75
- if (value === "" || value === null || value === undefined) {
76
- return "empty";
77
- }
78
- const trimmed = value.trim();
79
- if (trimmed === "") {
80
- return "empty";
81
- }
82
- // Check boolean first (before numbers since "1" and "0" could be both)
83
- if (BOOLEAN_VALUES.has(trimmed.toLowerCase())) {
84
- return "boolean";
85
- }
86
- // Check integer
87
- if (INTEGER_REGEX.test(trimmed)) {
88
- return "integer";
89
- }
90
- // Check float
91
- if (FLOAT_REGEX.test(trimmed)) {
92
- return "float";
93
- }
94
- // Check email
95
- if (EMAIL_REGEX.test(trimmed)) {
96
- return "email";
97
- }
98
- // Check URL
99
- if (URL_REGEX.test(trimmed)) {
100
- return "url";
101
- }
102
- // Check datetime (before date since datetime is more specific)
103
- for (const pattern of DATETIME_PATTERNS) {
104
- if (pattern.regex.test(trimmed)) {
105
- return "datetime";
106
- }
107
- }
108
- // Check date
109
- for (const pattern of DATE_PATTERNS) {
110
- if (pattern.regex.test(trimmed)) {
111
- return "date";
112
- }
113
- }
114
- return "string";
115
- }
116
- /**
117
- * Detect date format from value
118
- */
119
- function detectDateFormat(value) {
120
- const trimmed = value.trim();
121
- for (const pattern of DATETIME_PATTERNS) {
122
- if (pattern.regex.test(trimmed)) {
123
- return pattern.format;
124
- }
125
- }
126
- for (const pattern of DATE_PATTERNS) {
127
- if (pattern.regex.test(trimmed)) {
128
- return pattern.format;
129
- }
130
- }
131
- return undefined;
132
- }
133
- /**
134
- * Determine the predominant type for a column based on sampled values
135
- */
136
- function determineColumnType(types) {
137
- const nonEmpty = types.filter((t) => t !== "empty");
138
- if (nonEmpty.length === 0) {
139
- return { type: "empty", confidence: 100 };
140
- }
141
- // Count occurrences of each type
142
- const typeCounts = new Map();
143
- for (const t of nonEmpty) {
144
- typeCounts.set(t, (typeCounts.get(t) || 0) + 1);
145
- }
146
- // Find the most common type
147
- let maxType = "string";
148
- let maxCount = 0;
149
- for (const [type, count] of typeCounts) {
150
- if (count > maxCount) {
151
- maxCount = count;
152
- maxType = type;
153
- }
154
- }
155
- // Calculate confidence
156
- const confidence = Math.round((maxCount / nonEmpty.length) * 100);
157
- // Consolidate integer and float into number if the column contains only numeric types
158
- // This check must happen before the mixed-type check to avoid classifying numeric-only columns as mixed
159
- if (typeCounts.has("integer") && typeCounts.has("float")) {
160
- // Check if these are the only two types (purely numeric column)
161
- if (typeCounts.size === 2) {
162
- const totalNumeric = (typeCounts.get("integer") || 0) + (typeCounts.get("float") || 0);
163
- const numericConfidence = Math.round((totalNumeric / nonEmpty.length) * 100);
164
- return { type: "number", confidence: numericConfidence };
165
- }
166
- }
167
- // If confidence is low and multiple types exist, mark as mixed
168
- if (confidence < 70 && typeCounts.size > 1) {
169
- return { type: "mixed", confidence };
170
- }
171
- return { type: maxType, confidence };
172
- }
173
- /**
174
- * Analyze a single column and return rich metadata
175
- */
176
- function analyzeColumn(columnName, columnIndex, values) {
177
- const types = [];
178
- const uniqueValues = new Set();
179
- const numericValues = [];
180
- let nullCount = 0;
181
- let dateFormat;
182
- for (const value of values) {
183
- const trimmed = value?.trim() ?? "";
184
- if (trimmed === "") {
185
- nullCount++;
186
- types.push("empty");
187
- continue;
188
- }
189
- uniqueValues.add(trimmed);
190
- const type = detectValueType(trimmed);
191
- types.push(type);
192
- // Collect numeric values for statistics
193
- if (type === "integer" || type === "float") {
194
- const num = parseFloat(trimmed);
195
- if (!isNaN(num)) {
196
- numericValues.push(num);
197
- }
198
- }
199
- // Detect date format
200
- if ((type === "date" || type === "datetime") && !dateFormat) {
201
- dateFormat = detectDateFormat(trimmed);
202
- }
203
- }
204
- const { type: detectedType, confidence } = determineColumnType(types);
205
- // Get sample values (up to 5 unique non-empty)
206
- const sampleValues = Array.from(uniqueValues).slice(0, 5);
207
- // Calculate numeric statistics
208
- let minValue;
209
- let maxValue;
210
- let avgValue;
211
- if (numericValues.length > 0) {
212
- minValue = Math.min(...numericValues);
213
- maxValue = Math.max(...numericValues);
214
- avgValue =
215
- Math.round((numericValues.reduce((a, b) => a + b, 0) / numericValues.length) * 100) / 100;
216
- }
217
- // Validate column name
218
- const nameIssues = validateColumnName(columnName);
219
- const metadata = {
220
- name: columnName,
221
- index: columnIndex,
222
- detectedType,
223
- typeConfidence: confidence,
224
- nullCount,
225
- uniqueCount: uniqueValues.size,
226
- sampleValues,
227
- };
228
- if (minValue !== undefined) {
229
- metadata.minValue = minValue;
230
- }
231
- if (maxValue !== undefined) {
232
- metadata.maxValue = maxValue;
233
- }
234
- if (avgValue !== undefined) {
235
- metadata.avgValue = avgValue;
236
- }
237
- if (dateFormat) {
238
- metadata.dateFormat = dateFormat;
239
- }
240
- if (nameIssues.length > 0) {
241
- metadata.nameIssues = nameIssues;
242
- }
243
- return metadata;
244
- }
245
- /**
246
- * Generate data quality warnings based on column analysis
247
- */
248
- function generateDataQualityWarnings(columns, totalRows) {
249
- const warnings = [];
250
- for (const col of columns) {
251
- // Check for high null rate (>20%)
252
- const nullRate = totalRows > 0 ? col.nullCount / totalRows : 0;
253
- if (nullRate > 0.2) {
254
- warnings.push({
255
- column: col.name,
256
- type: "high_null_rate",
257
- message: `Column has ${Math.round(nullRate * 100)}% empty/null values (${col.nullCount} of ${totalRows} rows)`,
258
- severity: nullRate > 0.5 ? "warning" : "info",
259
- affectedRows: col.nullCount,
260
- });
261
- }
262
- // Check for invalid column names
263
- if (col.nameIssues && col.nameIssues.length > 0) {
264
- warnings.push({
265
- column: col.name,
266
- type: "invalid_name",
267
- message: `Column name issues: ${col.nameIssues.join(", ")}`,
268
- severity: col.name.trim() === "" ? "error" : "warning",
269
- });
270
- }
271
- // Check for mixed types (low confidence)
272
- if (col.detectedType === "mixed" || col.typeConfidence < 70) {
273
- warnings.push({
274
- column: col.name,
275
- type: "mixed_types",
276
- message: `Column has inconsistent data types (${col.typeConfidence}% confidence for ${col.detectedType})`,
277
- severity: "warning",
278
- });
279
- }
280
- // Check for potential duplicates (very low unique count)
281
- if (totalRows > 10 && col.uniqueCount === 1 && col.nullCount === 0) {
282
- warnings.push({
283
- column: col.name,
284
- type: "duplicates",
285
- message: `All ${totalRows} rows have the same value`,
286
- severity: "info",
287
- affectedRows: totalRows,
288
- });
289
- }
290
- // Check for all empty column
291
- if (col.detectedType === "empty") {
292
- warnings.push({
293
- column: col.name,
294
- type: "empty_values",
295
- message: "Column is entirely empty",
296
- severity: "warning",
297
- affectedRows: totalRows,
298
- });
299
- }
300
- }
301
- return warnings;
302
- }
303
- /**
304
- * Calculate overall data quality score
305
- */
306
- function calculateDataQualityScore(columns, warnings, totalRows) {
307
- if (columns.length === 0 || totalRows === 0) {
308
- return 0;
309
- }
310
- let score = 100;
311
- // Deduct for warnings
312
- for (const warning of warnings) {
313
- switch (warning.severity) {
314
- case "error":
315
- score -= 15;
316
- break;
317
- case "warning":
318
- score -= 8;
319
- break;
320
- case "info":
321
- score -= 3;
322
- break;
323
- }
324
- }
325
- // Deduct for overall null rate
326
- const totalNulls = columns.reduce((sum, col) => sum + col.nullCount, 0);
327
- const totalCells = columns.length * totalRows;
328
- const overallNullRate = totalCells > 0 ? totalNulls / totalCells : 0;
329
- score -= Math.round(overallNullRate * 30);
330
- // Deduct for low type confidence
331
- const avgConfidence = columns.reduce((sum, col) => sum + col.typeConfidence, 0) / columns.length;
332
- if (avgConfidence < 80) {
333
- score -= Math.round((80 - avgConfidence) / 2);
334
- }
335
- return Math.max(0, Math.min(100, score));
336
- }
337
- /**
338
- * Analyze all columns in parsed CSV data
339
- */
340
- function analyzeColumns(rows) {
341
- if (rows.length === 0) {
342
- return {
343
- columnMetadata: [],
344
- dataQualityWarnings: [],
345
- dataQualityScore: 0,
346
- };
347
- }
348
- const columnNames = Object.keys(rows[0]);
349
- const columnMetadata = [];
350
- for (let i = 0; i < columnNames.length; i++) {
351
- const colName = columnNames[i];
352
- const values = rows.map((row) => String(row[colName] ?? ""));
353
- columnMetadata.push(analyzeColumn(colName, i, values));
354
- }
355
- const dataQualityWarnings = generateDataQualityWarnings(columnMetadata, rows.length);
356
- const dataQualityScore = calculateDataQualityScore(columnMetadata, dataQualityWarnings, rows.length);
357
- return {
358
- columnMetadata,
359
- dataQualityWarnings,
360
- dataQualityScore,
361
- };
362
- }
363
- /**
364
- * Detect if the first row appears to be a header row
365
- *
366
- * Heuristics used:
367
- * 1. Header values should be text/string type (not numbers, dates, emails, etc.)
368
- * 2. Header values should be unique (no duplicate column names)
369
- * 3. If data rows exist, headers should have different type profile than data
370
- *
371
- * @param headerValues - The values from the first row (potential headers)
372
- * @param dataRows - Sample of data rows for comparison (optional)
373
- * @returns true if the first row appears to be headers
374
- */
375
- function detectHasHeaders(headerValues, dataRows) {
376
- if (headerValues.length === 0) {
377
- return false;
378
- }
379
- // Check 1: All header values should look like text labels, not data values
380
- let textLikeCount = 0;
381
- for (const value of headerValues) {
382
- const trimmed = value?.trim() ?? "";
383
- if (trimmed === "") {
384
- continue; // Empty headers are allowed but don't count toward text-like
385
- }
386
- const type = detectValueType(trimmed);
387
- // Headers are typically strings - not numbers, dates, emails, URLs, or booleans
388
- if (type === "string") {
389
- textLikeCount++;
390
- }
391
- }
392
- // If most header values are text-like (not numeric/date/etc.), likely headers
393
- const nonEmptyHeaders = headerValues.filter((v) => v?.trim()).length;
394
- if (nonEmptyHeaders === 0) {
395
- return false;
396
- }
397
- const textRatio = textLikeCount / nonEmptyHeaders;
398
- // Check 2: Headers should be unique
399
- const uniqueHeaders = new Set(headerValues.map((v) => v?.trim().toLowerCase()));
400
- const hasUniqueHeaders = uniqueHeaders.size === headerValues.length;
401
- // Check 3: Compare with data rows if available
402
- if (dataRows && dataRows.length > 0) {
403
- // If first data row has different type profile than headers, likely has headers
404
- const firstDataRow = Object.values(dataRows[0] || {}).map((v) => String(v ?? ""));
405
- let dataTextCount = 0;
406
- for (const value of firstDataRow) {
407
- const type = detectValueType(value?.trim() ?? "");
408
- if (type === "string") {
409
- dataTextCount++;
410
- }
411
- }
412
- const dataTextRatio = firstDataRow.length > 0 ? dataTextCount / firstDataRow.length : 0;
413
- // If headers are mostly text but data has more varied types, likely has headers
414
- if (textRatio > 0.7 && dataTextRatio < textRatio - 0.2) {
415
- return true;
416
- }
417
- }
418
- // Default: if >70% of header values are text-like and unique, assume headers
419
- return textRatio >= 0.7 && hasUniqueHeaders;
420
- }
421
- /**
422
- * Detect if first line is CSV metadata (not actual data/headers)
423
- * Common patterns:
424
- * - Excel separator line: "SEP=,"
425
- * - Lines with significantly different delimiter count than line 2
426
- * - Lines that don't match CSV structure of subsequent lines
427
- */
428
- function isMetadataLine(lines) {
429
- if (!lines[0] || lines.length < 2) {
430
- return false;
431
- }
432
- const firstLine = lines[0].trim();
433
- const secondLine = lines[1].trim();
434
- if (firstLine.match(/^sep=/i)) {
435
- return true;
436
- }
437
- const firstCommaCount = (firstLine.match(/,/g) || []).length;
438
- const secondCommaCount = (secondLine.match(/,/g) || []).length;
439
- if (firstCommaCount === 0 && secondCommaCount > 0) {
440
- return true;
441
- }
442
- if (secondCommaCount > 0 && firstCommaCount !== secondCommaCount) {
443
- return true;
444
- }
445
- return false;
446
- }
447
- /**
448
- * CSV processor for converting CSV data to LLM-optimized formats
449
- *
450
- * Supports three output formats:
451
- * - raw: Original CSV format with proper escaping (RECOMMENDED for best LLM performance)
452
- * - json: JSON array format (best for structured data processing)
453
- * - markdown: Markdown table format (best for small datasets <100 rows)
454
- *
455
- * All formats use csv-parser for reliable parsing, then convert to the target format.
456
- *
457
- * @example
458
- * ```typescript
459
- * const csvBuffer = Buffer.from('name,age\nAlice,30\nBob,25');
460
- * const result = await CSVProcessor.process(csvBuffer, {
461
- * maxRows: 1000,
462
- * formatStyle: 'raw'
463
- * });
464
- * console.log(result.content); // CSV string with proper escaping
465
- * ```
466
- */
467
- export class CSVProcessor {
468
- /**
469
- * Process CSV Buffer to LLM-friendly format
470
- * Content already loaded by FileDetector
471
- *
472
- * @param content - CSV file as Buffer
473
- * @param options - Processing options
474
- * @returns Formatted CSV data ready for LLM (JSON or Markdown)
475
- */
476
- static async process(content, options) {
477
- const { maxRows: rawMaxRows = 1000, formatStyle = "raw", includeHeaders = true, sampleDataFormat = "json", extension = null, } = options || {};
478
- const maxRows = Math.max(1, Math.min(10000, rawMaxRows));
479
- logger.debug("[CSVProcessor] Starting CSV processing", {
480
- contentSize: content.length,
481
- formatStyle,
482
- maxRows,
483
- includeHeaders,
484
- });
485
- const csvString = content.toString("utf-8");
486
- // For raw format, return original CSV with row limit (no parsing needed)
487
- // This preserves the exact original format which works best for LLMs
488
- if (formatStyle === "raw") {
489
- const lines = csvString.split("\n");
490
- const hasMetadataLine = isMetadataLine(lines);
491
- if (hasMetadataLine) {
492
- logger.debug("[CSVProcessor] Detected metadata line, skipping first line");
493
- }
494
- // Skip metadata line if present, then take header + maxRows data rows
495
- const csvLines = hasMetadataLine
496
- ? lines.slice(1) // Skip metadata line
497
- : lines;
498
- const limitedLines = csvLines.slice(0, 1 + maxRows); // header + data rows
499
- const limitedCSV = limitedLines.join("\n");
500
- const rowCount = limitedLines
501
- .slice(1)
502
- .filter((line) => line.trim() !== "").length;
503
- const originalRowCount = csvLines
504
- .slice(1)
505
- .filter((line) => line.trim() !== "").length;
506
- const wasTruncated = rowCount < originalRowCount;
507
- if (wasTruncated) {
508
- logger.warn(`[CSVProcessor] CSV data truncated: showing ${rowCount} of ${originalRowCount} rows (limit: ${maxRows})`);
509
- }
510
- logger.debug(`[CSVProcessor] raw format: ${rowCount} rows (original: ${originalRowCount}) → ${limitedCSV.length} chars`, {
511
- formatStyle: "raw",
512
- originalSize: csvString.length,
513
- limitedSize: limitedCSV.length,
514
- });
515
- logger.info("[CSVProcessor] ✅ Processed CSV file", {
516
- formatStyle: "raw",
517
- rowCount,
518
- columnCount: (limitedLines[0] || "").split(",").length,
519
- truncated: wasTruncated,
520
- });
521
- // Parse a sample for enhanced metadata analysis (raw format still benefits from column analysis)
522
- const sampleForAnalysis = await this.parseCSVString(limitedCSV, Math.min(rowCount, 500));
523
- const { columnMetadata, dataQualityWarnings, dataQualityScore } = analyzeColumns(sampleForAnalysis);
524
- // Log data quality summary
525
- if (dataQualityWarnings.length > 0) {
526
- logger.debug("[CSVProcessor] Data quality warnings detected", {
527
- warningCount: dataQualityWarnings.length,
528
- score: dataQualityScore,
529
- });
530
- }
531
- return {
532
- type: "csv",
533
- content: limitedCSV,
534
- mimeType: "text/csv",
535
- metadata: {
536
- confidence: 100,
537
- size: content.length,
538
- rowCount,
539
- totalLines: limitedLines.length,
540
- columnCount: (limitedLines[0] || "").split(",").length,
541
- extension,
542
- columnMetadata,
543
- dataQualityWarnings,
544
- dataQualityScore,
545
- hasHeaders: detectHasHeaders((limitedLines[0] || "").split(","), undefined),
546
- detectedDelimiter: ",",
547
- },
548
- };
549
- }
550
- // Parse CSV for JSON and Markdown formats only
551
- logger.debug("[CSVProcessor] Parsing CSV for structured format conversion", {
552
- formatStyle,
553
- maxRows,
554
- });
555
- const rows = await this.parseCSVString(csvString, maxRows);
556
- // Filter out empty rows (empty objects or rows with only whitespace values from blank lines)
557
- const nonEmptyRows = rows.filter((row) => {
558
- if (!row || typeof row !== "object") {
559
- return false;
560
- }
561
- const keys = Object.keys(row);
562
- if (keys.length === 0) {
563
- return false;
564
- }
565
- // Check if all values are empty or whitespace-only
566
- return !Object.values(row).every((val) => val === "" || (typeof val === "string" && val.trim() === ""));
567
- });
568
- // Extract metadata from parsed results
569
- const rowCount = nonEmptyRows.length;
570
- const columnNames = nonEmptyRows.length > 0
571
- ? Object.keys(nonEmptyRows[0])
572
- : [];
573
- const columnCount = columnNames.length;
574
- const hasEmptyColumns = columnNames.some((col) => !col || col.trim() === "");
575
- const sampleRows = nonEmptyRows.slice(0, 3);
576
- const sampleData = this.formatSampleData(sampleRows, sampleDataFormat, includeHeaders);
577
- if (hasEmptyColumns) {
578
- logger.warn("[CSVProcessor] CSV contains empty or blank column headers", {
579
- columnNames,
580
- });
581
- }
582
- if (rowCount === 0) {
583
- logger.warn("[CSVProcessor] CSV file contains no data rows");
584
- }
585
- // Perform enhanced column analysis
586
- const { columnMetadata, dataQualityWarnings, dataQualityScore } = analyzeColumns(nonEmptyRows);
587
- // Log data quality summary
588
- if (dataQualityWarnings.length > 0) {
589
- logger.debug("[CSVProcessor] Data quality warnings detected", {
590
- warningCount: dataQualityWarnings.length,
591
- score: dataQualityScore,
592
- });
593
- }
594
- // Format parsed data
595
- logger.debug(`[CSVProcessor] Converting ${rowCount} rows to ${formatStyle} format`);
596
- const formatted = this.formatForLLM(nonEmptyRows, formatStyle, includeHeaders);
597
- logger.info("[CSVProcessor] ✅ Processed CSV file", {
598
- formatStyle,
599
- rowCount,
600
- columnCount,
601
- outputLength: formatted.length,
602
- hasEmptyColumns,
603
- dataQualityScore,
604
- });
605
- return {
606
- type: "csv",
607
- content: formatted,
608
- mimeType: "text/csv",
609
- metadata: {
610
- confidence: 100,
611
- size: content.length,
612
- rowCount,
613
- columnCount,
614
- columnNames,
615
- sampleData,
616
- hasEmptyColumns,
617
- extension,
618
- columnMetadata,
619
- dataQualityWarnings,
620
- dataQualityScore,
621
- hasHeaders: detectHasHeaders(columnNames, nonEmptyRows),
622
- detectedDelimiter: ",",
623
- },
624
- };
625
- }
626
- /**
627
- * Parse CSV string into array of row objects using streaming
628
- * Memory-efficient for large files
629
- */
630
- /**
631
- * Parse CSV file from disk using streaming (memory efficient)
632
- *
633
- * @param filePath - Path to CSV file
634
- * @param maxRows - Maximum rows to parse (default: 1000)
635
- * @returns Array of row objects
636
- */
637
- static async parseCSVFile(filePath, maxRows = 1000) {
638
- const clampedMaxRows = Math.max(1, Math.min(10000, maxRows));
639
- const fs = await import("fs");
640
- logger.debug("[CSVProcessor] Starting file parsing", {
641
- filePath,
642
- maxRows: clampedMaxRows,
643
- });
644
- // Read first 2 lines to detect metadata
645
- const fileHandle = await fs.promises.open(filePath, "r");
646
- const firstLines = [];
647
- const lineReader = fileHandle.createReadStream({ encoding: "utf-8" });
648
- await new Promise((resolve) => {
649
- let buffer = "";
650
- lineReader.on("data", (chunk) => {
651
- buffer += chunk.toString();
652
- const lines = buffer.split("\n");
653
- if (lines.length >= 2) {
654
- firstLines.push(lines[0], lines[1]);
655
- lineReader.destroy();
656
- resolve();
657
- }
658
- });
659
- lineReader.on("end", () => resolve());
660
- });
661
- await fileHandle.close();
662
- const hasMetadataLine = isMetadataLine(firstLines);
663
- const skipLines = hasMetadataLine ? 1 : 0;
664
- if (hasMetadataLine) {
665
- logger.debug("[CSVProcessor] Detected metadata line in file, will skip first line");
666
- }
667
- return new Promise((resolve, reject) => {
668
- const rows = [];
669
- let count = 0;
670
- let lineCount = 0;
671
- const source = fs.createReadStream(filePath, { encoding: "utf-8" });
672
- const parser = csvParser();
673
- const abort = () => {
674
- source.destroy();
675
- parser.destroy();
676
- };
677
- source
678
- .pipe(parser)
679
- .on("data", (row) => {
680
- lineCount++;
681
- if (lineCount <= skipLines) {
682
- return;
683
- }
684
- rows.push(row);
685
- count++;
686
- if (count >= clampedMaxRows) {
687
- logger.debug(`[CSVProcessor] Reached row limit ${clampedMaxRows}, stopping parse`);
688
- abort();
689
- resolve(rows);
690
- }
691
- })
692
- .on("end", () => {
693
- logger.debug(`[CSVProcessor] File parsing complete: ${rows.length} rows parsed`);
694
- resolve(rows);
695
- })
696
- .on("error", (error) => {
697
- logger.error("[CSVProcessor] File parsing failed:", error);
698
- reject(error);
699
- });
700
- });
701
- }
702
- /**
703
- * Parse CSV string to array of row objects
704
- * Exposed for use by tools that need direct CSV parsing
705
- *
706
- * @param csvString - CSV data as string
707
- * @param maxRows - Maximum rows to parse (default: 1000)
708
- * @returns Array of row objects
709
- */
710
- static async parseCSVString(csvString, maxRows = 1000) {
711
- const clampedMaxRows = Math.max(1, Math.min(10000, maxRows));
712
- logger.debug("[CSVProcessor] Starting string parsing", {
713
- inputLength: csvString.length,
714
- maxRows: clampedMaxRows,
715
- });
716
- // Detect and skip metadata line
717
- const lines = csvString.split("\n");
718
- const hasMetadataLine = isMetadataLine(lines);
719
- const csvData = hasMetadataLine ? lines.slice(1).join("\n") : csvString;
720
- if (hasMetadataLine) {
721
- logger.debug("[CSVProcessor] Detected metadata line in string, skipping");
722
- }
723
- return new Promise((resolve, reject) => {
724
- const rows = [];
725
- let count = 0;
726
- const source = Readable.from([csvData]);
727
- const parser = csvParser();
728
- const abort = () => {
729
- source.destroy();
730
- parser.destroy();
731
- };
732
- source
733
- .pipe(parser)
734
- .on("data", (row) => {
735
- rows.push(row);
736
- count++;
737
- if (count >= clampedMaxRows) {
738
- logger.debug(`[CSVProcessor] Reached row limit ${clampedMaxRows}, stopping parse`);
739
- abort();
740
- resolve(rows);
741
- }
742
- })
743
- .on("end", () => {
744
- logger.debug(`[CSVProcessor] String parsing complete: ${rows.length} rows parsed`);
745
- resolve(rows);
746
- })
747
- .on("error", (error) => {
748
- logger.error("[CSVProcessor] Parsing failed:", error);
749
- reject(error);
750
- });
751
- });
752
- }
753
- /**
754
- * Format parsed CSV data for LLM consumption
755
- * Only used for JSON and Markdown formats (raw format handled separately)
756
- */
757
- static formatForLLM(rows, formatStyle, includeHeaders) {
758
- if (rows.length === 0) {
759
- return "CSV file is empty or contains no data.";
760
- }
761
- if (formatStyle === "json") {
762
- return JSON.stringify(rows, null, 2);
763
- }
764
- return this.toMarkdownTable(rows, includeHeaders);
765
- }
766
- /**
767
- * Format as markdown table
768
- * Best for small datasets (<100 rows)
769
- */
770
- static toMarkdownTable(rows, includeHeaders) {
771
- if (rows.length === 0) {
772
- return "CSV file is empty or contains no data.";
773
- }
774
- const headers = Object.keys(rows[0]);
775
- // Escape backslashes, pipes, and sanitize newlines to keep rows intact
776
- const escapePipe = (str) => str.replace(/\\/g, "\\\\").replace(/\|/g, "\\|").replace(/\r?\n/g, " ");
777
- let markdown = "";
778
- if (includeHeaders) {
779
- markdown = "| " + headers.map(escapePipe).join(" | ") + " |\n";
780
- markdown += "|" + headers.map(() => " --- ").join("|") + "|\n";
781
- }
782
- rows.forEach((row) => {
783
- markdown +=
784
- "| " +
785
- headers
786
- .map((h) => escapePipe(String(row[h] || "")))
787
- .join(" | ") +
788
- " |\n";
789
- });
790
- return markdown;
791
- }
792
- /**
793
- * Format sample data according to the specified format
794
- *
795
- * @param sampleRows - Array of sample row objects
796
- * @param format - Output format for sample data
797
- * @param includeHeaders - Whether to include headers in CSV/markdown formats
798
- * @returns Formatted sample data as string or array
799
- */
800
- static formatSampleData(sampleRows, format, includeHeaders) {
801
- if (sampleRows.length === 0) {
802
- return format === "object" ? [] : "No data rows";
803
- }
804
- switch (format) {
805
- case "object":
806
- return sampleRows;
807
- case "json":
808
- return JSON.stringify(sampleRows, null, 2);
809
- case "csv":
810
- return this.toCSVString(sampleRows, includeHeaders);
811
- case "markdown":
812
- return this.toMarkdownTable(sampleRows, includeHeaders);
813
- default:
814
- return sampleRows;
815
- }
816
- }
817
- /**
818
- * Convert row objects to CSV string format
819
- *
820
- * @param rows - Array of row objects
821
- * @param includeHeaders - Whether to include header row
822
- * @returns CSV formatted string
823
- */
824
- static toCSVString(rows, includeHeaders) {
825
- if (rows.length === 0) {
826
- return "";
827
- }
828
- const headers = Object.keys(rows[0]);
829
- // Escape CSV values (wrap in quotes if contains comma, quote, or newline)
830
- const escapeCSV = (value) => {
831
- if (value.includes(",") || value.includes('"') || value.includes("\n")) {
832
- return `"${value.replace(/"/g, '""')}"`;
833
- }
834
- return value;
835
- };
836
- const lines = [];
837
- if (includeHeaders) {
838
- lines.push(headers.map(escapeCSV).join(","));
839
- }
840
- rows.forEach((row) => {
841
- const values = headers.map((h) => escapeCSV(String(row[h] ?? "")));
842
- lines.push(values.join(","));
843
- });
844
- return lines.join("\n");
845
- }
846
- }