@juspay/neurolink 9.32.0 → 9.32.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (467) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/dist/auth/anthropicOAuth.js +1 -1
  3. package/dist/cli/commands/proxy.js +18 -5
  4. package/dist/client/aiSdkAdapter.js +1 -1
  5. package/dist/client/index.js +137 -501
  6. package/dist/core/factory.js +0 -1
  7. package/dist/core/redisConversationMemoryManager.js +1 -1
  8. package/dist/features/ppt/slideGenerator.js +0 -1
  9. package/dist/features/ppt/utils.js +0 -1
  10. package/dist/lib/server/routes/claudeProxyRoutes.js +45 -9
  11. package/dist/mcp/elicitationProtocol.js +1 -1
  12. package/dist/mcp/servers/agent/directToolsServer.js +0 -1
  13. package/dist/providers/azureOpenai.js +1 -1
  14. package/dist/providers/huggingFace.js +0 -1
  15. package/dist/providers/openaiCompatible.js +0 -1
  16. package/dist/sdk/toolRegistration.js +0 -1
  17. package/dist/server/openapi/generator.js +1 -1
  18. package/dist/server/routes/claudeProxyRoutes.js +45 -9
  19. package/dist/types/configTypes.js +0 -5
  20. package/dist/types/modelTypes.js +0 -1
  21. package/dist/types/tools.js +0 -1
  22. package/dist/types/typeAliases.js +0 -1
  23. package/dist/types/utilities.js +1 -1
  24. package/dist/types/workflowTypes.js +0 -1
  25. package/dist/utils/providerRetry.js +0 -1
  26. package/dist/utils/providerUtils.js +0 -1
  27. package/package.json +2 -2
  28. package/dist/client/adapters/providerImageAdapter.js +0 -588
  29. package/dist/client/adapters/tts/googleTTSHandler.js +0 -344
  30. package/dist/client/adapters/video/directorPipeline.js +0 -516
  31. package/dist/client/adapters/video/ffmpegAdapter.js +0 -206
  32. package/dist/client/adapters/video/frameExtractor.js +0 -143
  33. package/dist/client/adapters/video/vertexVideoHandler.js +0 -763
  34. package/dist/client/adapters/video/videoAnalyzer.js +0 -238
  35. package/dist/client/adapters/video/videoMerger.js +0 -171
  36. package/dist/client/agent/directTools.js +0 -840
  37. package/dist/client/auth/AuthProviderFactory.js +0 -111
  38. package/dist/client/auth/AuthProviderRegistry.js +0 -190
  39. package/dist/client/auth/RequestContext.js +0 -78
  40. package/dist/client/auth/accountPool.js +0 -178
  41. package/dist/client/auth/anthropicOAuth.js +0 -974
  42. package/dist/client/auth/authContext.js +0 -314
  43. package/dist/client/auth/errors.js +0 -39
  44. package/dist/client/auth/index.js +0 -61
  45. package/dist/client/auth/middleware/AuthMiddleware.js +0 -519
  46. package/dist/client/auth/middleware/rateLimitByUser.js +0 -554
  47. package/dist/client/auth/providers/BaseAuthProvider.js +0 -723
  48. package/dist/client/auth/providers/CognitoProvider.js +0 -304
  49. package/dist/client/auth/providers/KeycloakProvider.js +0 -393
  50. package/dist/client/auth/providers/auth0.js +0 -274
  51. package/dist/client/auth/providers/betterAuth.js +0 -182
  52. package/dist/client/auth/providers/clerk.js +0 -317
  53. package/dist/client/auth/providers/custom.js +0 -112
  54. package/dist/client/auth/providers/firebase.js +0 -226
  55. package/dist/client/auth/providers/jwt.js +0 -212
  56. package/dist/client/auth/providers/oauth2.js +0 -303
  57. package/dist/client/auth/providers/supabase.js +0 -259
  58. package/dist/client/auth/providers/workos.js +0 -284
  59. package/dist/client/auth/serverBridge.js +0 -25
  60. package/dist/client/auth/sessionManager.js +0 -437
  61. package/dist/client/auth/tokenStore.js +0 -799
  62. package/dist/client/client/aiSdkAdapter.js +0 -487
  63. package/dist/client/client/auth.js +0 -473
  64. package/dist/client/client/errors.js +0 -552
  65. package/dist/client/client/httpClient.js +0 -837
  66. package/dist/client/client/index.js +0 -172
  67. package/dist/client/client/interceptors.js +0 -601
  68. package/dist/client/client/sseClient.js +0 -545
  69. package/dist/client/client/streamingClient.js +0 -917
  70. package/dist/client/client/wsClient.js +0 -369
  71. package/dist/client/config/configManager.js +0 -303
  72. package/dist/client/config/conversationMemory.js +0 -86
  73. package/dist/client/config/taskClassificationConfig.js +0 -148
  74. package/dist/client/constants/contextWindows.js +0 -295
  75. package/dist/client/constants/enums.js +0 -853
  76. package/dist/client/constants/index.js +0 -207
  77. package/dist/client/constants/performance.js +0 -389
  78. package/dist/client/constants/retry.js +0 -266
  79. package/dist/client/constants/timeouts.js +0 -182
  80. package/dist/client/constants/tokens.js +0 -380
  81. package/dist/client/constants/videoErrors.js +0 -46
  82. package/dist/client/context/budgetChecker.js +0 -98
  83. package/dist/client/context/contextCompactor.js +0 -205
  84. package/dist/client/context/emergencyTruncation.js +0 -88
  85. package/dist/client/context/errorDetection.js +0 -171
  86. package/dist/client/context/errors.js +0 -21
  87. package/dist/client/context/fileTokenBudget.js +0 -127
  88. package/dist/client/context/prompts/summarizationPrompt.js +0 -117
  89. package/dist/client/context/stages/fileReadDeduplicator.js +0 -66
  90. package/dist/client/context/stages/slidingWindowTruncator.js +0 -190
  91. package/dist/client/context/stages/structuredSummarizer.js +0 -99
  92. package/dist/client/context/stages/toolOutputPruner.js +0 -52
  93. package/dist/client/context/summarizationEngine.js +0 -136
  94. package/dist/client/context/toolOutputLimits.js +0 -78
  95. package/dist/client/context/toolPairRepair.js +0 -66
  96. package/dist/client/core/analytics.js +0 -88
  97. package/dist/client/core/baseProvider.js +0 -1385
  98. package/dist/client/core/constants.js +0 -140
  99. package/dist/client/core/conversationMemoryFactory.js +0 -141
  100. package/dist/client/core/conversationMemoryInitializer.js +0 -128
  101. package/dist/client/core/conversationMemoryManager.js +0 -344
  102. package/dist/client/core/dynamicModels.js +0 -358
  103. package/dist/client/core/evaluation.js +0 -309
  104. package/dist/client/core/evaluationProviders.js +0 -248
  105. package/dist/client/core/factory.js +0 -412
  106. package/dist/client/core/infrastructure/baseError.js +0 -22
  107. package/dist/client/core/infrastructure/baseFactory.js +0 -54
  108. package/dist/client/core/infrastructure/baseRegistry.js +0 -53
  109. package/dist/client/core/infrastructure/index.js +0 -5
  110. package/dist/client/core/infrastructure/retry.js +0 -20
  111. package/dist/client/core/infrastructure/typedEventEmitter.js +0 -23
  112. package/dist/client/core/modelConfiguration.js +0 -851
  113. package/dist/client/core/modules/GenerationHandler.js +0 -588
  114. package/dist/client/core/modules/MessageBuilder.js +0 -273
  115. package/dist/client/core/modules/StreamHandler.js +0 -185
  116. package/dist/client/core/modules/TelemetryHandler.js +0 -203
  117. package/dist/client/core/modules/ToolsManager.js +0 -499
  118. package/dist/client/core/modules/Utilities.js +0 -331
  119. package/dist/client/core/redisConversationMemoryManager.js +0 -1435
  120. package/dist/client/core/streamAnalytics.js +0 -131
  121. package/dist/client/evaluation/contextBuilder.js +0 -134
  122. package/dist/client/evaluation/index.js +0 -61
  123. package/dist/client/evaluation/prompts.js +0 -73
  124. package/dist/client/evaluation/ragasEvaluator.js +0 -110
  125. package/dist/client/evaluation/retryManager.js +0 -78
  126. package/dist/client/evaluation/scoring.js +0 -61
  127. package/dist/client/factories/providerFactory.js +0 -166
  128. package/dist/client/factories/providerRegistry.js +0 -166
  129. package/dist/client/features/ppt/constants.js +0 -896
  130. package/dist/client/features/ppt/contentPlanner.js +0 -529
  131. package/dist/client/features/ppt/presentationOrchestrator.js +0 -236
  132. package/dist/client/features/ppt/slideGenerator.js +0 -532
  133. package/dist/client/features/ppt/slideRenderers.js +0 -2383
  134. package/dist/client/features/ppt/slideTypeInference.js +0 -405
  135. package/dist/client/features/ppt/types.js +0 -13
  136. package/dist/client/features/ppt/utils.js +0 -443
  137. package/dist/client/files/fileReferenceRegistry.js +0 -1543
  138. package/dist/client/files/fileTools.js +0 -450
  139. package/dist/client/files/streamingReader.js +0 -321
  140. package/dist/client/files/types.js +0 -23
  141. package/dist/client/hitl/hitlErrors.js +0 -54
  142. package/dist/client/hitl/hitlManager.js +0 -460
  143. package/dist/client/mcp/agentExposure.js +0 -356
  144. package/dist/client/mcp/auth/index.js +0 -11
  145. package/dist/client/mcp/auth/oauthClientProvider.js +0 -325
  146. package/dist/client/mcp/auth/tokenStorage.js +0 -134
  147. package/dist/client/mcp/batching/index.js +0 -10
  148. package/dist/client/mcp/batching/requestBatcher.js +0 -441
  149. package/dist/client/mcp/caching/index.js +0 -10
  150. package/dist/client/mcp/caching/toolCache.js +0 -433
  151. package/dist/client/mcp/elicitation/elicitationManager.js +0 -376
  152. package/dist/client/mcp/elicitation/index.js +0 -11
  153. package/dist/client/mcp/elicitation/types.js +0 -10
  154. package/dist/client/mcp/elicitationProtocol.js +0 -375
  155. package/dist/client/mcp/enhancedToolDiscovery.js +0 -481
  156. package/dist/client/mcp/externalServerManager.js +0 -1478
  157. package/dist/client/mcp/factory.js +0 -161
  158. package/dist/client/mcp/flexibleToolValidator.js +0 -161
  159. package/dist/client/mcp/httpRateLimiter.js +0 -391
  160. package/dist/client/mcp/httpRetryHandler.js +0 -178
  161. package/dist/client/mcp/index.js +0 -74
  162. package/dist/client/mcp/mcpCircuitBreaker.js +0 -427
  163. package/dist/client/mcp/mcpClientFactory.js +0 -708
  164. package/dist/client/mcp/mcpRegistryClient.js +0 -488
  165. package/dist/client/mcp/mcpServerBase.js +0 -373
  166. package/dist/client/mcp/multiServerManager.js +0 -579
  167. package/dist/client/mcp/registry.js +0 -158
  168. package/dist/client/mcp/routing/index.js +0 -10
  169. package/dist/client/mcp/routing/toolRouter.js +0 -416
  170. package/dist/client/mcp/serverCapabilities.js +0 -502
  171. package/dist/client/mcp/servers/agent/directToolsServer.js +0 -150
  172. package/dist/client/mcp/toolAnnotations.js +0 -239
  173. package/dist/client/mcp/toolConverter.js +0 -258
  174. package/dist/client/mcp/toolDiscoveryService.js +0 -798
  175. package/dist/client/mcp/toolIntegration.js +0 -334
  176. package/dist/client/mcp/toolRegistry.js +0 -729
  177. package/dist/client/memory/hippocampusInitializer.js +0 -19
  178. package/dist/client/memory/memoryRetrievalTools.js +0 -166
  179. package/dist/client/middleware/builtin/analytics.js +0 -132
  180. package/dist/client/middleware/builtin/autoEvaluation.js +0 -203
  181. package/dist/client/middleware/builtin/guardrails.js +0 -109
  182. package/dist/client/middleware/builtin/lifecycle.js +0 -168
  183. package/dist/client/middleware/factory.js +0 -327
  184. package/dist/client/middleware/registry.js +0 -295
  185. package/dist/client/middleware/utils/guardrailsUtils.js +0 -396
  186. package/dist/client/models/anthropicModels.js +0 -527
  187. package/dist/client/neurolink.js +0 -8233
  188. package/dist/client/observability/exporterRegistry.js +0 -413
  189. package/dist/client/observability/exporters/arizeExporter.js +0 -138
  190. package/dist/client/observability/exporters/baseExporter.js +0 -190
  191. package/dist/client/observability/exporters/braintrustExporter.js +0 -154
  192. package/dist/client/observability/exporters/datadogExporter.js +0 -196
  193. package/dist/client/observability/exporters/laminarExporter.js +0 -302
  194. package/dist/client/observability/exporters/langfuseExporter.js +0 -209
  195. package/dist/client/observability/exporters/langsmithExporter.js +0 -143
  196. package/dist/client/observability/exporters/otelExporter.js +0 -164
  197. package/dist/client/observability/exporters/posthogExporter.js +0 -287
  198. package/dist/client/observability/exporters/sentryExporter.js +0 -165
  199. package/dist/client/observability/index.js +0 -31
  200. package/dist/client/observability/metricsAggregator.js +0 -556
  201. package/dist/client/observability/otelBridge.js +0 -131
  202. package/dist/client/observability/retryPolicy.js +0 -383
  203. package/dist/client/observability/sampling/samplers.js +0 -216
  204. package/dist/client/observability/spanProcessor.js +0 -303
  205. package/dist/client/observability/tokenTracker.js +0 -413
  206. package/dist/client/observability/types/exporterTypes.js +0 -5
  207. package/dist/client/observability/types/index.js +0 -4
  208. package/dist/client/observability/types/spanTypes.js +0 -92
  209. package/dist/client/observability/utils/safeMetadata.js +0 -25
  210. package/dist/client/observability/utils/spanSerializer.js +0 -292
  211. package/dist/client/processors/archive/ArchiveProcessor.js +0 -1308
  212. package/dist/client/processors/base/BaseFileProcessor.js +0 -614
  213. package/dist/client/processors/base/types.js +0 -82
  214. package/dist/client/processors/config/fileTypes.js +0 -520
  215. package/dist/client/processors/config/index.js +0 -92
  216. package/dist/client/processors/config/languageMap.js +0 -410
  217. package/dist/client/processors/config/mimeTypes.js +0 -363
  218. package/dist/client/processors/config/sizeLimits.js +0 -258
  219. package/dist/client/processors/document/ExcelProcessor.js +0 -590
  220. package/dist/client/processors/document/OpenDocumentProcessor.js +0 -212
  221. package/dist/client/processors/document/PptxProcessor.js +0 -157
  222. package/dist/client/processors/document/RtfProcessor.js +0 -361
  223. package/dist/client/processors/document/WordProcessor.js +0 -353
  224. package/dist/client/processors/errors/FileErrorCode.js +0 -255
  225. package/dist/client/processors/errors/errorHelpers.js +0 -386
  226. package/dist/client/processors/errors/errorSerializer.js +0 -507
  227. package/dist/client/processors/errors/index.js +0 -49
  228. package/dist/client/processors/markup/SvgProcessor.js +0 -240
  229. package/dist/client/processors/media/AudioProcessor.js +0 -707
  230. package/dist/client/processors/media/VideoProcessor.js +0 -1045
  231. package/dist/client/providers/amazonBedrock.js +0 -1512
  232. package/dist/client/providers/amazonSagemaker.js +0 -162
  233. package/dist/client/providers/anthropic.js +0 -831
  234. package/dist/client/providers/azureOpenai.js +0 -143
  235. package/dist/client/providers/googleAiStudio.js +0 -1200
  236. package/dist/client/providers/googleNativeGemini3.js +0 -543
  237. package/dist/client/providers/googleVertex.js +0 -2936
  238. package/dist/client/providers/huggingFace.js +0 -315
  239. package/dist/client/providers/litellm.js +0 -488
  240. package/dist/client/providers/mistral.js +0 -157
  241. package/dist/client/providers/ollama.js +0 -1579
  242. package/dist/client/providers/openAI.js +0 -627
  243. package/dist/client/providers/openRouter.js +0 -543
  244. package/dist/client/providers/openaiCompatible.js +0 -290
  245. package/dist/client/providers/providerTypeUtils.js +0 -46
  246. package/dist/client/providers/sagemaker/adaptive-semaphore.js +0 -215
  247. package/dist/client/providers/sagemaker/client.js +0 -472
  248. package/dist/client/providers/sagemaker/config.js +0 -317
  249. package/dist/client/providers/sagemaker/detection.js +0 -606
  250. package/dist/client/providers/sagemaker/error-constants.js +0 -227
  251. package/dist/client/providers/sagemaker/errors.js +0 -299
  252. package/dist/client/providers/sagemaker/language-model.js +0 -775
  253. package/dist/client/providers/sagemaker/parsers.js +0 -634
  254. package/dist/client/providers/sagemaker/streaming.js +0 -331
  255. package/dist/client/providers/sagemaker/structured-parser.js +0 -625
  256. package/dist/client/proxy/accountQuota.js +0 -162
  257. package/dist/client/proxy/claudeFormat.js +0 -595
  258. package/dist/client/proxy/modelRouter.js +0 -29
  259. package/dist/client/proxy/oauthFetch.js +0 -367
  260. package/dist/client/proxy/proxyFetch.js +0 -586
  261. package/dist/client/proxy/requestLogger.js +0 -207
  262. package/dist/client/proxy/tokenRefresh.js +0 -124
  263. package/dist/client/proxy/usageStats.js +0 -74
  264. package/dist/client/proxy/utils/noProxyUtils.js +0 -149
  265. package/dist/client/rag/ChunkerFactory.js +0 -320
  266. package/dist/client/rag/ChunkerRegistry.js +0 -421
  267. package/dist/client/rag/chunkers/BaseChunker.js +0 -143
  268. package/dist/client/rag/chunkers/CharacterChunker.js +0 -28
  269. package/dist/client/rag/chunkers/HTMLChunker.js +0 -38
  270. package/dist/client/rag/chunkers/JSONChunker.js +0 -68
  271. package/dist/client/rag/chunkers/LaTeXChunker.js +0 -63
  272. package/dist/client/rag/chunkers/MarkdownChunker.js +0 -306
  273. package/dist/client/rag/chunkers/RecursiveChunker.js +0 -139
  274. package/dist/client/rag/chunkers/SemanticMarkdownChunker.js +0 -138
  275. package/dist/client/rag/chunkers/SentenceChunker.js +0 -66
  276. package/dist/client/rag/chunkers/TokenChunker.js +0 -61
  277. package/dist/client/rag/chunkers/index.js +0 -15
  278. package/dist/client/rag/chunking/characterChunker.js +0 -142
  279. package/dist/client/rag/chunking/chunkerRegistry.js +0 -194
  280. package/dist/client/rag/chunking/htmlChunker.js +0 -247
  281. package/dist/client/rag/chunking/index.js +0 -17
  282. package/dist/client/rag/chunking/jsonChunker.js +0 -281
  283. package/dist/client/rag/chunking/latexChunker.js +0 -251
  284. package/dist/client/rag/chunking/markdownChunker.js +0 -373
  285. package/dist/client/rag/chunking/recursiveChunker.js +0 -148
  286. package/dist/client/rag/chunking/semanticChunker.js +0 -306
  287. package/dist/client/rag/chunking/sentenceChunker.js +0 -230
  288. package/dist/client/rag/chunking/tokenChunker.js +0 -183
  289. package/dist/client/rag/document/MDocument.js +0 -392
  290. package/dist/client/rag/document/index.js +0 -5
  291. package/dist/client/rag/document/loaders.js +0 -500
  292. package/dist/client/rag/errors/RAGError.js +0 -274
  293. package/dist/client/rag/errors/index.js +0 -6
  294. package/dist/client/rag/graphRag/graphRAG.js +0 -401
  295. package/dist/client/rag/graphRag/index.js +0 -4
  296. package/dist/client/rag/index.js +0 -141
  297. package/dist/client/rag/metadata/MetadataExtractorFactory.js +0 -418
  298. package/dist/client/rag/metadata/MetadataExtractorRegistry.js +0 -362
  299. package/dist/client/rag/metadata/index.js +0 -9
  300. package/dist/client/rag/metadata/metadataExtractor.js +0 -280
  301. package/dist/client/rag/pipeline/RAGPipeline.js +0 -436
  302. package/dist/client/rag/pipeline/contextAssembly.js +0 -341
  303. package/dist/client/rag/pipeline/index.js +0 -5
  304. package/dist/client/rag/ragIntegration.js +0 -321
  305. package/dist/client/rag/reranker/RerankerFactory.js +0 -430
  306. package/dist/client/rag/reranker/RerankerRegistry.js +0 -402
  307. package/dist/client/rag/reranker/index.js +0 -9
  308. package/dist/client/rag/reranker/reranker.js +0 -277
  309. package/dist/client/rag/resilience/CircuitBreaker.js +0 -431
  310. package/dist/client/rag/resilience/RetryHandler.js +0 -304
  311. package/dist/client/rag/resilience/index.js +0 -7
  312. package/dist/client/rag/retrieval/hybridSearch.js +0 -335
  313. package/dist/client/rag/retrieval/index.js +0 -5
  314. package/dist/client/rag/retrieval/vectorQueryTool.js +0 -307
  315. package/dist/client/rag/types.js +0 -8
  316. package/dist/client/sdk/toolRegistration.js +0 -377
  317. package/dist/client/server/abstract/baseServerAdapter.js +0 -575
  318. package/dist/client/server/adapters/expressAdapter.js +0 -486
  319. package/dist/client/server/adapters/fastifyAdapter.js +0 -472
  320. package/dist/client/server/adapters/honoAdapter.js +0 -632
  321. package/dist/client/server/adapters/koaAdapter.js +0 -510
  322. package/dist/client/server/errors.js +0 -486
  323. package/dist/client/server/factory/serverAdapterFactory.js +0 -160
  324. package/dist/client/server/index.js +0 -108
  325. package/dist/client/server/middleware/abortSignal.js +0 -111
  326. package/dist/client/server/middleware/auth.js +0 -388
  327. package/dist/client/server/middleware/cache.js +0 -359
  328. package/dist/client/server/middleware/common.js +0 -281
  329. package/dist/client/server/middleware/deprecation.js +0 -190
  330. package/dist/client/server/middleware/mcpBodyAttachment.js +0 -63
  331. package/dist/client/server/middleware/rateLimit.js +0 -227
  332. package/dist/client/server/middleware/validation.js +0 -388
  333. package/dist/client/server/openapi/generator.js +0 -398
  334. package/dist/client/server/openapi/index.js +0 -36
  335. package/dist/client/server/openapi/schemas.js +0 -695
  336. package/dist/client/server/openapi/templates.js +0 -374
  337. package/dist/client/server/routes/agentRoutes.js +0 -189
  338. package/dist/client/server/routes/claudeProxyRoutes.js +0 -1600
  339. package/dist/client/server/routes/healthRoutes.js +0 -187
  340. package/dist/client/server/routes/index.js +0 -57
  341. package/dist/client/server/routes/mcpRoutes.js +0 -342
  342. package/dist/client/server/routes/memoryRoutes.js +0 -350
  343. package/dist/client/server/routes/openApiRoutes.js +0 -126
  344. package/dist/client/server/routes/toolRoutes.js +0 -199
  345. package/dist/client/server/streaming/dataStream.js +0 -486
  346. package/dist/client/server/streaming/index.js +0 -11
  347. package/dist/client/server/types.js +0 -67
  348. package/dist/client/server/utils/redaction.js +0 -334
  349. package/dist/client/server/utils/validation.js +0 -243
  350. package/dist/client/server/websocket/WebSocketHandler.js +0 -383
  351. package/dist/client/server/websocket/index.js +0 -4
  352. package/dist/client/services/server/ai/observability/instrumentation.js +0 -808
  353. package/dist/client/telemetry/attributes.js +0 -100
  354. package/dist/client/telemetry/index.js +0 -26
  355. package/dist/client/telemetry/telemetryService.js +0 -308
  356. package/dist/client/telemetry/tracers.js +0 -17
  357. package/dist/client/telemetry/withSpan.js +0 -34
  358. package/dist/client/types/actionTypes.js +0 -6
  359. package/dist/client/types/analytics.js +0 -5
  360. package/dist/client/types/authTypes.js +0 -9
  361. package/dist/client/types/circuitBreakerErrors.js +0 -34
  362. package/dist/client/types/cli.js +0 -21
  363. package/dist/client/types/clientTypes.js +0 -10
  364. package/dist/client/types/common.js +0 -51
  365. package/dist/client/types/configTypes.js +0 -49
  366. package/dist/client/types/content.js +0 -19
  367. package/dist/client/types/contextTypes.js +0 -400
  368. package/dist/client/types/conversation.js +0 -47
  369. package/dist/client/types/conversationMemoryInterface.js +0 -6
  370. package/dist/client/types/domainTypes.js +0 -5
  371. package/dist/client/types/errors.js +0 -167
  372. package/dist/client/types/evaluation.js +0 -5
  373. package/dist/client/types/evaluationProviders.js +0 -5
  374. package/dist/client/types/evaluationTypes.js +0 -1
  375. package/dist/client/types/externalMcp.js +0 -6
  376. package/dist/client/types/fileReferenceTypes.js +0 -8
  377. package/dist/client/types/fileTypes.js +0 -4
  378. package/dist/client/types/generateTypes.js +0 -1
  379. package/dist/client/types/guardrails.js +0 -1
  380. package/dist/client/types/hitlTypes.js +0 -8
  381. package/dist/client/types/index.js +0 -57
  382. package/dist/client/types/mcpTypes.js +0 -5
  383. package/dist/client/types/middlewareTypes.js +0 -1
  384. package/dist/client/types/modelTypes.js +0 -30
  385. package/dist/client/types/multimodal.js +0 -135
  386. package/dist/client/types/observability.js +0 -6
  387. package/dist/client/types/pptTypes.js +0 -82
  388. package/dist/client/types/providers.js +0 -111
  389. package/dist/client/types/proxyTypes.js +0 -16
  390. package/dist/client/types/ragTypes.js +0 -7
  391. package/dist/client/types/sdkTypes.js +0 -8
  392. package/dist/client/types/serviceTypes.js +0 -5
  393. package/dist/client/types/streamTypes.js +0 -1
  394. package/dist/client/types/subscriptionTypes.js +0 -9
  395. package/dist/client/types/taskClassificationTypes.js +0 -5
  396. package/dist/client/types/tools.js +0 -24
  397. package/dist/client/types/ttsTypes.js +0 -57
  398. package/dist/client/types/typeAliases.js +0 -48
  399. package/dist/client/types/utilities.js +0 -4
  400. package/dist/client/types/workflowTypes.js +0 -30
  401. package/dist/client/utils/async/withTimeout.js +0 -98
  402. package/dist/client/utils/asyncMutex.js +0 -60
  403. package/dist/client/utils/conversationMemory.js +0 -431
  404. package/dist/client/utils/csvProcessor.js +0 -846
  405. package/dist/client/utils/errorHandling.js +0 -936
  406. package/dist/client/utils/evaluationUtils.js +0 -131
  407. package/dist/client/utils/factoryProcessing.js +0 -589
  408. package/dist/client/utils/fileDetector.js +0 -2161
  409. package/dist/client/utils/imageCache.js +0 -376
  410. package/dist/client/utils/imageProcessor.js +0 -704
  411. package/dist/client/utils/logger.js +0 -491
  412. package/dist/client/utils/mcpDefaults.js +0 -134
  413. package/dist/client/utils/messageBuilder.js +0 -1653
  414. package/dist/client/utils/modelAliasResolver.js +0 -54
  415. package/dist/client/utils/modelDetection.js +0 -80
  416. package/dist/client/utils/modelRouter.js +0 -292
  417. package/dist/client/utils/multimodalOptionsBuilder.js +0 -65
  418. package/dist/client/utils/observabilityHelpers.js +0 -47
  419. package/dist/client/utils/parameterValidation.js +0 -966
  420. package/dist/client/utils/pdfProcessor.js +0 -410
  421. package/dist/client/utils/performance.js +0 -222
  422. package/dist/client/utils/pricing.js +0 -340
  423. package/dist/client/utils/promptRedaction.js +0 -62
  424. package/dist/client/utils/providerConfig.js +0 -1009
  425. package/dist/client/utils/providerHealth.js +0 -1237
  426. package/dist/client/utils/providerRetry.js +0 -112
  427. package/dist/client/utils/providerUtils.js +0 -434
  428. package/dist/client/utils/rateLimiter.js +0 -200
  429. package/dist/client/utils/redis.js +0 -368
  430. package/dist/client/utils/retryHandler.js +0 -269
  431. package/dist/client/utils/retryability.js +0 -22
  432. package/dist/client/utils/sanitizers/svg.js +0 -481
  433. package/dist/client/utils/schemaConversion.js +0 -255
  434. package/dist/client/utils/taskClassificationUtils.js +0 -149
  435. package/dist/client/utils/taskClassifier.js +0 -94
  436. package/dist/client/utils/thinkingConfig.js +0 -104
  437. package/dist/client/utils/timeout.js +0 -359
  438. package/dist/client/utils/tokenEstimation.js +0 -142
  439. package/dist/client/utils/tokenLimits.js +0 -125
  440. package/dist/client/utils/tokenUtils.js +0 -239
  441. package/dist/client/utils/toolUtils.js +0 -75
  442. package/dist/client/utils/transformationUtils.js +0 -554
  443. package/dist/client/utils/ttsProcessor.js +0 -286
  444. package/dist/client/utils/typeUtils.js +0 -97
  445. package/dist/client/utils/videoAnalysisProcessor.js +0 -67
  446. package/dist/client/workflow/config.js +0 -398
  447. package/dist/client/workflow/core/ensembleExecutor.js +0 -407
  448. package/dist/client/workflow/core/judgeScorer.js +0 -544
  449. package/dist/client/workflow/core/responseConditioner.js +0 -225
  450. package/dist/client/workflow/core/types/conditionerTypes.js +0 -7
  451. package/dist/client/workflow/core/types/ensembleTypes.js +0 -7
  452. package/dist/client/workflow/core/types/index.js +0 -7
  453. package/dist/client/workflow/core/types/judgeTypes.js +0 -7
  454. package/dist/client/workflow/core/types/layerTypes.js +0 -7
  455. package/dist/client/workflow/core/types/registryTypes.js +0 -7
  456. package/dist/client/workflow/core/workflowRegistry.js +0 -304
  457. package/dist/client/workflow/core/workflowRunner.js +0 -586
  458. package/dist/client/workflow/index.js +0 -50
  459. package/dist/client/workflow/types.js +0 -9
  460. package/dist/client/workflow/utils/types/index.js +0 -7
  461. package/dist/client/workflow/utils/workflowMetrics.js +0 -311
  462. package/dist/client/workflow/utils/workflowValidation.js +0 -420
  463. package/dist/client/workflow/workflows/adaptiveWorkflow.js +0 -366
  464. package/dist/client/workflow/workflows/consensusWorkflow.js +0 -192
  465. package/dist/client/workflow/workflows/fallbackWorkflow.js +0 -225
  466. package/dist/client/workflow/workflows/multiJudgeWorkflow.js +0 -351
  467. /package/dist/client/{client/reactHooks.js → reactHooks.js} +0 -0
@@ -1,148 +0,0 @@
1
- /**
2
- * Recursive Chunker
3
- *
4
- * Smart text splitting using hierarchical separators.
5
- * Tries each separator in order, recursively splitting chunks that are too large.
6
- * Best for general-purpose text that has natural boundaries.
7
- */
8
- import { randomUUID } from "crypto";
9
- /**
10
- * Recursive chunker implementation
11
- * Smart splitting based on content structure using hierarchical separators
12
- */
13
- export class RecursiveChunker {
14
- strategy = "recursive";
15
- defaultSeparators = ["\n\n", "\n", ". ", " ", ""];
16
- async chunk(text, config) {
17
- const { maxSize = 1000, overlap = 200, separators = this.defaultSeparators, isSeparatorRegex = false, trimWhitespace = true, metadata = {}, } = config || {};
18
- const documentId = randomUUID();
19
- const chunks = [];
20
- if (!text || text.length === 0) {
21
- return chunks;
22
- }
23
- const splitTexts = this.recursiveSplit(text, separators, maxSize, overlap, isSeparatorRegex);
24
- let chunkIndex = 0;
25
- let currentPosition = 0;
26
- for (const splitText of splitTexts) {
27
- const chunkText = trimWhitespace ? splitText.trim() : splitText;
28
- if (chunkText.length > 0) {
29
- const startPosition = text.indexOf(splitText, currentPosition);
30
- chunks.push({
31
- id: randomUUID(),
32
- text: chunkText,
33
- metadata: {
34
- documentId,
35
- chunkIndex,
36
- startPosition: startPosition >= 0 ? startPosition : currentPosition,
37
- endPosition: startPosition >= 0
38
- ? startPosition + splitText.length
39
- : currentPosition + splitText.length,
40
- documentType: "text",
41
- custom: metadata,
42
- },
43
- });
44
- chunkIndex++;
45
- if (startPosition >= 0) {
46
- currentPosition = startPosition + splitText.length - overlap;
47
- }
48
- }
49
- }
50
- // Update total chunks count
51
- chunks.forEach((chunk) => {
52
- chunk.metadata.totalChunks = chunks.length;
53
- });
54
- return chunks;
55
- }
56
- recursiveSplit(text, separators, maxSize, overlap, isRegex) {
57
- const results = [];
58
- if (text.length <= maxSize) {
59
- return [text];
60
- }
61
- // Find the best separator to use
62
- let separator = separators[separators.length - 1]; // Default to last (usually "")
63
- let newSeparators = separators;
64
- for (let i = 0; i < separators.length; i++) {
65
- const sep = separators[i];
66
- const hasMatch = isRegex
67
- ? new RegExp(sep).test(text)
68
- : text.includes(sep);
69
- if (sep === "" || hasMatch) {
70
- separator = sep;
71
- newSeparators = separators.slice(i + 1);
72
- break;
73
- }
74
- }
75
- // Split the text
76
- const splits = isRegex
77
- ? text.split(new RegExp(separator))
78
- : text.split(separator);
79
- // Merge splits into chunks
80
- let currentChunk = "";
81
- for (const split of splits) {
82
- const potentialChunk = currentChunk
83
- ? currentChunk + separator + split
84
- : split;
85
- if (potentialChunk.length <= maxSize) {
86
- currentChunk = potentialChunk;
87
- }
88
- else {
89
- // Current chunk is ready
90
- if (currentChunk.length > 0) {
91
- results.push(currentChunk);
92
- }
93
- // Handle split that's still too large
94
- if (split.length > maxSize) {
95
- const subSplits = this.recursiveSplit(split, newSeparators, maxSize, overlap, isRegex);
96
- results.push(...subSplits.slice(0, -1));
97
- currentChunk = subSplits[subSplits.length - 1] || "";
98
- }
99
- else {
100
- // Add overlap from previous chunk
101
- if (results.length > 0 && overlap > 0) {
102
- const lastChunk = results[results.length - 1];
103
- const overlapText = lastChunk.slice(-overlap);
104
- currentChunk = overlapText + separator + split;
105
- }
106
- else {
107
- currentChunk = split;
108
- }
109
- }
110
- }
111
- }
112
- // Don't forget the last chunk
113
- if (currentChunk.length > 0) {
114
- results.push(currentChunk);
115
- }
116
- return results;
117
- }
118
- validateConfig(config) {
119
- const errors = [];
120
- const warnings = [];
121
- const recConfig = config;
122
- if (recConfig.maxSize !== undefined && recConfig.maxSize <= 0) {
123
- errors.push("maxSize must be greater than 0");
124
- }
125
- if (recConfig.overlap !== undefined && recConfig.overlap < 0) {
126
- errors.push("overlap must be non-negative");
127
- }
128
- if (recConfig.separators !== undefined &&
129
- recConfig.separators.length === 0) {
130
- errors.push("separators array must not be empty");
131
- }
132
- if (recConfig.isSeparatorRegex && recConfig.separators) {
133
- for (const sep of recConfig.separators) {
134
- try {
135
- new RegExp(sep);
136
- }
137
- catch {
138
- errors.push(`Invalid regex separator: ${sep}`);
139
- }
140
- }
141
- }
142
- return {
143
- valid: errors.length === 0,
144
- errors,
145
- warnings,
146
- };
147
- }
148
- }
@@ -1,306 +0,0 @@
1
- /**
2
- * Semantic Chunker
3
- *
4
- * LLM-powered semantic chunking that groups related content together.
5
- * Uses embedding similarity to determine natural breakpoints.
6
- * Best for complex documents where meaning should drive segmentation.
7
- */
8
- import { randomUUID } from "crypto";
9
- import { ProviderFactory } from "../../factories/providerFactory.js";
10
- import { logger } from "../../utils/logger.js";
11
- /**
12
- * Semantic chunker implementation
13
- * Uses embedding similarity to find natural content boundaries
14
- */
15
- export class SemanticChunker {
16
- strategy = "semantic";
17
- async chunk(text, config) {
18
- const { maxSize = 1000, overlap = 0, joinThreshold = 100, modelName = "text-embedding-3-small", provider = "openai", similarityThreshold = 0.7, trimWhitespace = true, metadata = {}, } = config || {};
19
- const documentId = randomUUID();
20
- const chunks = [];
21
- if (!text || text.length === 0) {
22
- return chunks;
23
- }
24
- // First, split into initial segments (paragraphs or sentences)
25
- const segments = this.splitIntoSegments(text, joinThreshold);
26
- if (segments.length <= 1) {
27
- // Single segment, no need for semantic analysis
28
- chunks.push({
29
- id: randomUUID(),
30
- text: trimWhitespace ? text.trim() : text,
31
- metadata: {
32
- documentId,
33
- chunkIndex: 0,
34
- totalChunks: 1,
35
- startPosition: 0,
36
- endPosition: text.length,
37
- documentType: "text",
38
- custom: metadata,
39
- },
40
- });
41
- return chunks;
42
- }
43
- try {
44
- // Get embeddings for each segment
45
- const embeddings = await this.getEmbeddings(segments, provider, modelName);
46
- // Find semantic breakpoints
47
- const breakpoints = this.findSemanticBreakpoints(embeddings, similarityThreshold);
48
- // Group segments by semantic similarity
49
- const groups = this.groupSegments(segments, breakpoints, maxSize);
50
- // Create chunks from groups
51
- let chunkIndex = 0;
52
- let currentPosition = 0;
53
- for (const group of groups) {
54
- const chunkText = group.join("\n\n");
55
- const finalText = trimWhitespace ? chunkText.trim() : chunkText;
56
- if (finalText.length > 0) {
57
- chunks.push({
58
- id: randomUUID(),
59
- text: finalText,
60
- metadata: {
61
- documentId,
62
- chunkIndex,
63
- startPosition: currentPosition,
64
- endPosition: currentPosition + chunkText.length,
65
- documentType: "text",
66
- custom: {
67
- ...metadata,
68
- segmentCount: group.length,
69
- },
70
- },
71
- });
72
- chunkIndex++;
73
- }
74
- currentPosition += chunkText.length + 2; // +2 for separator
75
- }
76
- // Handle overlap if configured
77
- if (overlap > 0) {
78
- chunks.forEach((chunk, i) => {
79
- if (i > 0) {
80
- // Add overlap from previous chunk
81
- const prevText = chunks[i - 1].text;
82
- const overlapText = prevText.slice(-overlap);
83
- chunk.text = overlapText + "\n" + chunk.text;
84
- }
85
- });
86
- }
87
- }
88
- catch (error) {
89
- // Fallback to simple chunking if embeddings fail
90
- logger.warn("[SemanticChunker] Embedding failed, falling back to simple chunking", {
91
- error: error instanceof Error ? error.message : String(error),
92
- });
93
- return this.fallbackChunk(text, maxSize, overlap, documentId, metadata, trimWhitespace);
94
- }
95
- // Update total chunks count
96
- chunks.forEach((chunk) => {
97
- chunk.metadata.totalChunks = chunks.length;
98
- });
99
- return chunks;
100
- }
101
- /**
102
- * Split text into initial segments for embedding
103
- */
104
- splitIntoSegments(text, minSize) {
105
- const segments = [];
106
- // Split by double newlines (paragraphs)
107
- const paragraphs = text.split(/\n\n+/);
108
- let currentSegment = "";
109
- for (const paragraph of paragraphs) {
110
- const trimmed = paragraph.trim();
111
- if (trimmed.length === 0) {
112
- continue;
113
- }
114
- if (currentSegment.length === 0) {
115
- currentSegment = trimmed;
116
- }
117
- else if (currentSegment.length + trimmed.length < minSize) {
118
- // Join small paragraphs
119
- currentSegment += "\n\n" + trimmed;
120
- }
121
- else {
122
- // Save current and start new
123
- if (currentSegment.length > 0) {
124
- segments.push(currentSegment);
125
- }
126
- currentSegment = trimmed;
127
- }
128
- }
129
- // Don't forget the last segment
130
- if (currentSegment.length > 0) {
131
- segments.push(currentSegment);
132
- }
133
- return segments;
134
- }
135
- /**
136
- * Get embeddings for segments
137
- */
138
- async getEmbeddings(segments, provider, modelName) {
139
- const embeddingProvider = await ProviderFactory.createProvider(provider, modelName);
140
- // Check if provider has embed method
141
- if (typeof embeddingProvider.embed !==
142
- "function") {
143
- throw new Error(`Provider ${provider} does not support embeddings`);
144
- }
145
- const embeddings = [];
146
- // Process in batches to avoid rate limits
147
- const batchSize = 10;
148
- for (let i = 0; i < segments.length; i += batchSize) {
149
- const batch = segments.slice(i, i + batchSize);
150
- for (const segment of batch) {
151
- try {
152
- const embedding = await embeddingProvider.embed(segment);
153
- embeddings.push(embedding);
154
- }
155
- catch (error) {
156
- logger.warn("[SemanticChunker] Failed to embed segment", {
157
- error: error instanceof Error ? error.message : String(error),
158
- });
159
- // Use zero vector as fallback
160
- embeddings.push(new Array(1536).fill(0));
161
- }
162
- }
163
- }
164
- return embeddings;
165
- }
166
- /**
167
- * Find semantic breakpoints using cosine similarity
168
- */
169
- findSemanticBreakpoints(embeddings, threshold) {
170
- const breakpoints = [];
171
- for (let i = 1; i < embeddings.length; i++) {
172
- const similarity = this.cosineSimilarity(embeddings[i - 1], embeddings[i]);
173
- // If similarity is below threshold, it's a breakpoint
174
- if (similarity < threshold) {
175
- breakpoints.push(i);
176
- }
177
- }
178
- return breakpoints;
179
- }
180
- /**
181
- * Group segments based on breakpoints and size limits
182
- */
183
- groupSegments(segments, breakpoints, maxSize) {
184
- const groups = [];
185
- let currentGroup = [];
186
- let currentSize = 0;
187
- let breakpointIndex = 0;
188
- for (let i = 0; i < segments.length; i++) {
189
- const segment = segments[i];
190
- const segmentSize = segment.length;
191
- // Check if we're at a breakpoint or exceeding size
192
- const isBreakpoint = breakpointIndex < breakpoints.length &&
193
- breakpoints[breakpointIndex] === i;
194
- if ((currentSize + segmentSize > maxSize && currentGroup.length > 0) ||
195
- (isBreakpoint && currentGroup.length > 0)) {
196
- // Save current group
197
- groups.push(currentGroup);
198
- currentGroup = [];
199
- currentSize = 0;
200
- }
201
- if (isBreakpoint) {
202
- breakpointIndex++;
203
- }
204
- currentGroup.push(segment);
205
- currentSize += segmentSize;
206
- }
207
- // Don't forget the last group
208
- if (currentGroup.length > 0) {
209
- groups.push(currentGroup);
210
- }
211
- return groups;
212
- }
213
- /**
214
- * Calculate cosine similarity between two vectors
215
- */
216
- cosineSimilarity(a, b) {
217
- if (a.length !== b.length) {
218
- return 0;
219
- }
220
- let dotProduct = 0;
221
- let normA = 0;
222
- let normB = 0;
223
- for (let i = 0; i < a.length; i++) {
224
- dotProduct += a[i] * b[i];
225
- normA += a[i] * a[i];
226
- normB += b[i] * b[i];
227
- }
228
- const denominator = Math.sqrt(normA) * Math.sqrt(normB);
229
- return denominator === 0 ? 0 : dotProduct / denominator;
230
- }
231
- /**
232
- * Fallback to simple chunking when embeddings fail
233
- */
234
- fallbackChunk(text, maxSize, overlap, documentId, metadata, trimWhitespace) {
235
- const effectiveMaxSize = Math.max(maxSize, 1);
236
- const effectiveOverlap = Math.min(Math.max(overlap, 0), effectiveMaxSize - 1);
237
- const chunks = [];
238
- let start = 0;
239
- let chunkIndex = 0;
240
- while (start < text.length) {
241
- let end = Math.min(start + effectiveMaxSize, text.length);
242
- // Try to break at paragraph boundary
243
- if (end < text.length) {
244
- const searchStart = Math.max(start, end - 200);
245
- const searchText = text.slice(searchStart, end);
246
- const paragraphBreak = searchText.lastIndexOf("\n\n");
247
- if (paragraphBreak > 0) {
248
- end = searchStart + paragraphBreak;
249
- }
250
- }
251
- const chunkText = text.slice(start, end);
252
- const finalText = trimWhitespace ? chunkText.trim() : chunkText;
253
- if (finalText.length > 0) {
254
- chunks.push({
255
- id: randomUUID(),
256
- text: finalText,
257
- metadata: {
258
- documentId,
259
- chunkIndex,
260
- startPosition: start,
261
- endPosition: end,
262
- documentType: "text",
263
- custom: {
264
- ...metadata,
265
- fallbackChunking: true,
266
- },
267
- },
268
- });
269
- chunkIndex++;
270
- }
271
- start = Math.max(start + 1, end - effectiveOverlap);
272
- }
273
- return chunks;
274
- }
275
- validateConfig(config) {
276
- const errors = [];
277
- const warnings = [];
278
- const semConfig = config;
279
- if (semConfig.maxSize !== undefined && semConfig.maxSize <= 0) {
280
- errors.push("maxSize must be greater than 0");
281
- }
282
- if (semConfig.overlap !== undefined && semConfig.overlap < 0) {
283
- errors.push("overlap must be non-negative");
284
- }
285
- if (semConfig.overlap !== undefined &&
286
- semConfig.maxSize !== undefined &&
287
- semConfig.overlap >= semConfig.maxSize) {
288
- errors.push("overlap must be less than maxSize");
289
- }
290
- if (semConfig.similarityThreshold !== undefined) {
291
- if (semConfig.similarityThreshold < 0 ||
292
- semConfig.similarityThreshold > 1) {
293
- errors.push("similarityThreshold must be between 0 and 1");
294
- }
295
- }
296
- if (semConfig.joinThreshold !== undefined && semConfig.joinThreshold < 0) {
297
- errors.push("joinThreshold must be non-negative");
298
- }
299
- warnings.push("Semantic chunking requires an embedding provider. Ensure API credentials are configured.");
300
- return {
301
- valid: errors.length === 0,
302
- errors,
303
- warnings,
304
- };
305
- }
306
- }
@@ -1,230 +0,0 @@
1
- /**
2
- * Sentence-based Chunker
3
- *
4
- * Splits text based on sentence boundaries while respecting size limits.
5
- * Best for prose and natural language content where sentence integrity matters.
6
- */
7
- import { randomUUID } from "crypto";
8
- /**
9
- * Sentence-aware chunker implementation
10
- * Splits text by sentences while respecting size constraints
11
- */
12
- export class SentenceChunker {
13
- strategy = "sentence";
14
- defaultSentenceEnders = [".", "!", "?"];
15
- async chunk(text, config) {
16
- const { maxSize = 1000, overlap = 0, sentenceEnders = this.defaultSentenceEnders, minSentences = 1, maxSentences, trimWhitespace = true, metadata = {}, } = config || {};
17
- const chunks = [];
18
- const documentId = randomUUID();
19
- if (!text || text.length === 0) {
20
- return chunks;
21
- }
22
- // Split text into sentences
23
- const sentences = this.splitIntoSentences(text, sentenceEnders);
24
- if (sentences.length === 0) {
25
- return chunks;
26
- }
27
- let currentChunkSentences = [];
28
- let currentChunkLength = 0;
29
- let chunkIndex = 0;
30
- let startPosition = 0;
31
- let currentPosition = 0;
32
- for (let i = 0; i < sentences.length; i++) {
33
- const sentence = sentences[i];
34
- const sentenceLength = sentence.length;
35
- // Check if adding this sentence would exceed limits
36
- const wouldExceedSize = currentChunkLength + sentenceLength + 1 > maxSize;
37
- const wouldExceedSentences = maxSentences !== undefined &&
38
- currentChunkSentences.length >= maxSentences;
39
- if (currentChunkSentences.length > 0 &&
40
- (wouldExceedSize || wouldExceedSentences)) {
41
- // Save current chunk if it meets minimum requirements
42
- if (currentChunkSentences.length >= minSentences) {
43
- const chunkText = currentChunkSentences.join(" ");
44
- const finalText = trimWhitespace ? chunkText.trim() : chunkText;
45
- if (finalText.length > 0) {
46
- chunks.push({
47
- id: randomUUID(),
48
- text: finalText,
49
- metadata: {
50
- documentId,
51
- chunkIndex,
52
- startPosition,
53
- endPosition: startPosition + chunkText.length,
54
- documentType: "text",
55
- custom: metadata,
56
- },
57
- });
58
- chunkIndex++;
59
- }
60
- }
61
- // Handle overlap by keeping some sentences
62
- if (overlap > 0 && currentChunkSentences.length > 0) {
63
- // Calculate how many sentences to keep for overlap
64
- let overlapLength = 0;
65
- const overlapSentences = [];
66
- for (let j = currentChunkSentences.length - 1; j >= 0; j--) {
67
- const s = currentChunkSentences[j];
68
- if (overlapLength + s.length + 1 <= overlap) {
69
- overlapSentences.unshift(s);
70
- overlapLength += s.length + 1;
71
- }
72
- else {
73
- break;
74
- }
75
- }
76
- currentChunkSentences = overlapSentences;
77
- currentChunkLength = overlapLength;
78
- startPosition = currentPosition - overlapLength;
79
- }
80
- else {
81
- currentChunkSentences = [];
82
- currentChunkLength = 0;
83
- startPosition = currentPosition;
84
- }
85
- }
86
- // Handle sentences larger than maxSize
87
- if (sentenceLength > maxSize) {
88
- // Split the sentence itself if necessary
89
- const subChunks = this.splitLargeSentence(sentence, maxSize);
90
- for (const subChunk of subChunks) {
91
- chunks.push({
92
- id: randomUUID(),
93
- text: trimWhitespace ? subChunk.trim() : subChunk,
94
- metadata: {
95
- documentId,
96
- chunkIndex,
97
- startPosition: currentPosition,
98
- endPosition: currentPosition + subChunk.length,
99
- documentType: "text",
100
- custom: metadata,
101
- },
102
- });
103
- chunkIndex++;
104
- currentPosition += subChunk.length;
105
- }
106
- startPosition = currentPosition;
107
- }
108
- else {
109
- currentChunkSentences.push(sentence);
110
- currentChunkLength += sentenceLength + 1; // +1 for space
111
- currentPosition += sentenceLength + 1;
112
- }
113
- }
114
- // Don't forget the last chunk
115
- if (currentChunkSentences.length >= minSentences) {
116
- const chunkText = currentChunkSentences.join(" ");
117
- const finalText = trimWhitespace ? chunkText.trim() : chunkText;
118
- if (finalText.length > 0) {
119
- chunks.push({
120
- id: randomUUID(),
121
- text: finalText,
122
- metadata: {
123
- documentId,
124
- chunkIndex,
125
- startPosition,
126
- endPosition: startPosition + chunkText.length,
127
- documentType: "text",
128
- custom: metadata,
129
- },
130
- });
131
- }
132
- }
133
- // Update total chunks count
134
- chunks.forEach((chunk) => {
135
- chunk.metadata.totalChunks = chunks.length;
136
- });
137
- return chunks;
138
- }
139
- /**
140
- * Split text into sentences based on sentence enders
141
- */
142
- splitIntoSentences(text, sentenceEnders) {
143
- const sentences = [];
144
- // Build regex pattern for sentence splitting
145
- // Look for sentence enders followed by whitespace or end of string
146
- const pattern = new RegExp(`([${sentenceEnders.map((e) => "\\" + e).join("")}]+)(?=\\s|$)`, "g");
147
- let lastIndex = 0;
148
- let match;
149
- // Reset regex state
150
- pattern.lastIndex = 0;
151
- while ((match = pattern.exec(text)) !== null) {
152
- const endIndex = match.index + match[0].length;
153
- const sentence = text.slice(lastIndex, endIndex).trim();
154
- if (sentence.length > 0) {
155
- sentences.push(sentence);
156
- }
157
- lastIndex = endIndex;
158
- // Skip whitespace
159
- while (lastIndex < text.length && /\s/.test(text[lastIndex])) {
160
- lastIndex++;
161
- }
162
- }
163
- // Don't forget the last part
164
- if (lastIndex < text.length) {
165
- const remaining = text.slice(lastIndex).trim();
166
- if (remaining.length > 0) {
167
- sentences.push(remaining);
168
- }
169
- }
170
- return sentences;
171
- }
172
- /**
173
- * Split a large sentence into smaller chunks
174
- */
175
- splitLargeSentence(sentence, maxSize) {
176
- const chunks = [];
177
- const words = sentence.split(/\s+/);
178
- let currentChunk = "";
179
- for (const word of words) {
180
- if (currentChunk.length + word.length + 1 <= maxSize) {
181
- currentChunk = currentChunk ? currentChunk + " " + word : word;
182
- }
183
- else {
184
- if (currentChunk.length > 0) {
185
- chunks.push(currentChunk);
186
- }
187
- // If a single word is larger than maxSize, we have to include it anyway
188
- currentChunk = word;
189
- }
190
- }
191
- if (currentChunk.length > 0) {
192
- chunks.push(currentChunk);
193
- }
194
- return chunks;
195
- }
196
- validateConfig(config) {
197
- const errors = [];
198
- const warnings = [];
199
- const sentConfig = config;
200
- if (sentConfig.maxSize !== undefined && sentConfig.maxSize <= 0) {
201
- errors.push("maxSize must be greater than 0");
202
- }
203
- if (sentConfig.overlap !== undefined && sentConfig.overlap < 0) {
204
- errors.push("overlap must be non-negative");
205
- }
206
- if (sentConfig.overlap !== undefined &&
207
- sentConfig.maxSize !== undefined &&
208
- sentConfig.overlap >= sentConfig.maxSize) {
209
- errors.push("overlap must be less than maxSize");
210
- }
211
- if (sentConfig.minSentences !== undefined && sentConfig.minSentences < 1) {
212
- errors.push("minSentences must be at least 1");
213
- }
214
- if (sentConfig.maxSentences !== undefined &&
215
- sentConfig.minSentences !== undefined) {
216
- if (sentConfig.maxSentences < sentConfig.minSentences) {
217
- errors.push("maxSentences must be >= minSentences");
218
- }
219
- }
220
- if (sentConfig.sentenceEnders !== undefined &&
221
- sentConfig.sentenceEnders.length === 0) {
222
- warnings.push("No sentence enders specified, using defaults");
223
- }
224
- return {
225
- valid: errors.length === 0,
226
- errors,
227
- warnings,
228
- };
229
- }
230
- }