@juspay/neurolink 7.49.0 → 7.51.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (258) hide show
  1. package/CHANGELOG.md +13 -0
  2. package/README.md +12 -9
  3. package/dist/adapters/providerImageAdapter.js +82 -10
  4. package/dist/agent/directTools.d.ts +10 -10
  5. package/dist/agent/directTools.js +5 -3
  6. package/dist/cli/commands/config.js +1 -0
  7. package/dist/cli/commands/mcp.js +1 -0
  8. package/dist/cli/commands/models.js +1 -0
  9. package/dist/cli/commands/ollama.js +1 -0
  10. package/dist/cli/commands/setup-anthropic.js +1 -0
  11. package/dist/cli/commands/setup-azure.js +1 -0
  12. package/dist/cli/commands/setup-bedrock.js +1 -0
  13. package/dist/cli/commands/setup-gcp.js +1 -0
  14. package/dist/cli/commands/setup-google-ai.js +1 -0
  15. package/dist/cli/commands/setup-huggingface.js +1 -0
  16. package/dist/cli/commands/setup-mistral.js +1 -0
  17. package/dist/cli/commands/setup-openai.js +1 -0
  18. package/dist/cli/commands/setup.js +1 -0
  19. package/dist/cli/errorHandler.js +1 -0
  20. package/dist/cli/factories/commandFactory.d.ts +5 -0
  21. package/dist/cli/factories/commandFactory.js +42 -6
  22. package/dist/cli/factories/ollamaCommandFactory.js +1 -0
  23. package/dist/cli/factories/sagemakerCommandFactory.js +1 -0
  24. package/dist/cli/factories/setupCommandFactory.js +1 -0
  25. package/dist/cli/index.js +14 -2
  26. package/dist/cli/loop/conversationSelector.js +1 -0
  27. package/dist/cli/loop/optionsSchema.js +1 -0
  28. package/dist/cli/loop/session.js +1 -0
  29. package/dist/cli/parser.js +1 -0
  30. package/dist/cli/utils/completeSetup.js +1 -0
  31. package/dist/cli/utils/envManager.js +1 -0
  32. package/dist/cli/utils/interactiveSetup.js +1 -0
  33. package/dist/cli/utils/ollamaUtils.js +1 -0
  34. package/dist/constants/index.js +1 -1
  35. package/dist/core/baseProvider.d.ts +14 -0
  36. package/dist/core/baseProvider.js +106 -23
  37. package/dist/index.d.ts +11 -5
  38. package/dist/index.js +11 -10
  39. package/dist/lib/adapters/providerImageAdapter.js +83 -10
  40. package/dist/lib/agent/directTools.d.ts +10 -10
  41. package/dist/lib/agent/directTools.js +6 -3
  42. package/dist/lib/config/configManager.js +1 -0
  43. package/dist/lib/config/conversationMemory.js +1 -0
  44. package/dist/lib/config/taskClassificationConfig.js +1 -0
  45. package/dist/lib/constants/index.js +2 -1
  46. package/dist/lib/constants/performance.js +1 -0
  47. package/dist/lib/constants/retry.js +1 -0
  48. package/dist/lib/constants/timeouts.js +1 -0
  49. package/dist/lib/constants/tokens.js +1 -0
  50. package/dist/lib/core/analytics.js +1 -0
  51. package/dist/lib/core/baseProvider.d.ts +14 -0
  52. package/dist/lib/core/baseProvider.js +107 -23
  53. package/dist/lib/core/constants.js +1 -0
  54. package/dist/lib/core/conversationMemoryFactory.js +1 -0
  55. package/dist/lib/core/conversationMemoryInitializer.js +1 -0
  56. package/dist/lib/core/conversationMemoryManager.js +1 -0
  57. package/dist/lib/core/dynamicModels.js +1 -0
  58. package/dist/lib/core/evaluation.js +1 -0
  59. package/dist/lib/core/evaluationProviders.js +1 -0
  60. package/dist/lib/core/factory.js +1 -0
  61. package/dist/lib/core/modelConfiguration.js +1 -0
  62. package/dist/lib/core/redisConversationMemoryManager.js +1 -0
  63. package/dist/lib/core/serviceRegistry.js +1 -0
  64. package/dist/lib/core/streamAnalytics.js +1 -0
  65. package/dist/lib/evaluation/contextBuilder.js +1 -0
  66. package/dist/lib/evaluation/index.js +1 -0
  67. package/dist/lib/evaluation/prompts.js +1 -0
  68. package/dist/lib/evaluation/ragasEvaluator.js +1 -0
  69. package/dist/lib/evaluation/retryManager.js +1 -0
  70. package/dist/lib/evaluation/scoring.js +1 -0
  71. package/dist/lib/factories/providerFactory.js +1 -0
  72. package/dist/lib/factories/providerRegistry.js +1 -0
  73. package/dist/lib/hitl/hitlErrors.js +1 -0
  74. package/dist/lib/hitl/hitlManager.js +1 -0
  75. package/dist/lib/hitl/index.js +1 -0
  76. package/dist/lib/hitl/types.js +1 -0
  77. package/dist/lib/index.d.ts +11 -5
  78. package/dist/lib/index.js +12 -10
  79. package/dist/lib/mcp/externalServerManager.js +1 -0
  80. package/dist/lib/mcp/factory.js +1 -0
  81. package/dist/lib/mcp/flexibleToolValidator.js +1 -0
  82. package/dist/lib/mcp/index.js +1 -0
  83. package/dist/lib/mcp/mcpCircuitBreaker.js +1 -0
  84. package/dist/lib/mcp/mcpClientFactory.js +2 -1
  85. package/dist/lib/mcp/registry.js +1 -0
  86. package/dist/lib/mcp/servers/agent/directToolsServer.js +2 -0
  87. package/dist/lib/mcp/servers/aiProviders/aiAnalysisTools.js +1 -0
  88. package/dist/lib/mcp/servers/aiProviders/aiCoreServer.js +1 -0
  89. package/dist/lib/mcp/servers/aiProviders/aiWorkflowTools.js +1 -0
  90. package/dist/lib/mcp/servers/utilities/utilityServer.js +1 -0
  91. package/dist/lib/mcp/toolDiscoveryService.js +1 -0
  92. package/dist/lib/mcp/toolRegistry.js +1 -0
  93. package/dist/lib/memory/mem0Initializer.js +1 -0
  94. package/dist/lib/middleware/builtin/analytics.js +1 -0
  95. package/dist/lib/middleware/builtin/autoEvaluation.js +1 -0
  96. package/dist/lib/middleware/builtin/guardrails.js +1 -0
  97. package/dist/lib/middleware/factory.js +1 -0
  98. package/dist/lib/middleware/index.js +1 -0
  99. package/dist/lib/middleware/registry.js +1 -0
  100. package/dist/lib/middleware/utils/guardrailsUtils.js +1 -0
  101. package/dist/lib/models/modelRegistry.js +1 -0
  102. package/dist/lib/models/modelResolver.js +2 -0
  103. package/dist/lib/neurolink.d.ts +41 -6
  104. package/dist/lib/neurolink.js +276 -5
  105. package/dist/lib/providers/amazonBedrock.d.ts +1 -0
  106. package/dist/lib/providers/amazonBedrock.js +166 -14
  107. package/dist/lib/providers/amazonSagemaker.js +1 -0
  108. package/dist/lib/providers/anthropic.js +8 -21
  109. package/dist/lib/providers/anthropicBaseProvider.js +1 -0
  110. package/dist/lib/providers/azureOpenai.js +6 -21
  111. package/dist/lib/providers/googleAiStudio.js +6 -21
  112. package/dist/lib/providers/googleVertex.js +9 -1
  113. package/dist/lib/providers/huggingFace.js +34 -3
  114. package/dist/lib/providers/index.js +1 -0
  115. package/dist/lib/providers/litellm.js +34 -3
  116. package/dist/lib/providers/mistral.js +32 -2
  117. package/dist/lib/providers/ollama.d.ts +37 -1
  118. package/dist/lib/providers/ollama.js +544 -58
  119. package/dist/lib/providers/openAI.js +6 -21
  120. package/dist/lib/providers/openaiCompatible.js +41 -4
  121. package/dist/lib/providers/sagemaker/adaptive-semaphore.js +1 -0
  122. package/dist/lib/providers/sagemaker/client.js +1 -0
  123. package/dist/lib/providers/sagemaker/config.js +1 -0
  124. package/dist/lib/providers/sagemaker/detection.js +1 -0
  125. package/dist/lib/providers/sagemaker/diagnostics.js +1 -0
  126. package/dist/lib/providers/sagemaker/error-constants.js +1 -0
  127. package/dist/lib/providers/sagemaker/errors.js +1 -0
  128. package/dist/lib/providers/sagemaker/index.js +1 -0
  129. package/dist/lib/providers/sagemaker/language-model.js +1 -0
  130. package/dist/lib/providers/sagemaker/parsers.js +1 -0
  131. package/dist/lib/providers/sagemaker/streaming.js +1 -0
  132. package/dist/lib/providers/sagemaker/structured-parser.js +1 -0
  133. package/dist/lib/proxy/awsProxyIntegration.js +1 -0
  134. package/dist/lib/proxy/proxyFetch.js +1 -0
  135. package/dist/lib/proxy/utils/noProxyUtils.js +1 -0
  136. package/dist/lib/sdk/toolRegistration.js +2 -0
  137. package/dist/lib/services/server/ai/observability/instrumentation.d.ts +57 -0
  138. package/dist/lib/services/server/ai/observability/instrumentation.js +171 -0
  139. package/dist/lib/session/globalSessionState.js +38 -1
  140. package/dist/lib/telemetry/index.d.ts +1 -0
  141. package/dist/lib/telemetry/index.js +1 -0
  142. package/dist/lib/telemetry/telemetryService.d.ts +2 -0
  143. package/dist/lib/telemetry/telemetryService.js +8 -7
  144. package/dist/lib/types/analytics.js +1 -0
  145. package/dist/lib/types/cli.js +1 -0
  146. package/dist/lib/types/common.js +1 -0
  147. package/dist/lib/types/configTypes.js +1 -0
  148. package/dist/lib/types/content.d.ts +14 -1
  149. package/dist/lib/types/content.js +1 -0
  150. package/dist/lib/types/contextTypes.js +1 -0
  151. package/dist/lib/types/conversation.d.ts +2 -0
  152. package/dist/lib/types/conversation.js +1 -0
  153. package/dist/lib/types/domainTypes.js +1 -0
  154. package/dist/lib/types/errors.js +1 -0
  155. package/dist/lib/types/evaluation.js +1 -0
  156. package/dist/lib/types/evaluationProviders.js +1 -0
  157. package/dist/lib/types/evaluationTypes.js +1 -0
  158. package/dist/lib/types/externalMcp.js +1 -0
  159. package/dist/lib/types/fileTypes.d.ts +44 -0
  160. package/dist/lib/types/fileTypes.js +1 -0
  161. package/dist/lib/types/generateTypes.d.ts +1 -0
  162. package/dist/lib/types/generateTypes.js +1 -0
  163. package/dist/lib/types/guardrails.js +1 -0
  164. package/dist/lib/types/index.js +1 -0
  165. package/dist/lib/types/mcpTypes.js +1 -0
  166. package/dist/lib/types/middlewareTypes.js +1 -0
  167. package/dist/lib/types/modelTypes.d.ts +6 -6
  168. package/dist/lib/types/modelTypes.js +1 -0
  169. package/dist/lib/types/observability.d.ts +49 -0
  170. package/dist/lib/types/observability.js +7 -0
  171. package/dist/lib/types/providers.d.ts +44 -0
  172. package/dist/lib/types/providers.js +1 -0
  173. package/dist/lib/types/sdkTypes.js +1 -0
  174. package/dist/lib/types/serviceTypes.js +1 -0
  175. package/dist/lib/types/streamTypes.d.ts +1 -0
  176. package/dist/lib/types/streamTypes.js +1 -0
  177. package/dist/lib/types/taskClassificationTypes.js +1 -0
  178. package/dist/lib/types/tools.js +2 -0
  179. package/dist/lib/types/typeAliases.js +1 -0
  180. package/dist/lib/types/universalProviderOptions.js +1 -0
  181. package/dist/lib/utils/analyticsUtils.js +1 -0
  182. package/dist/lib/utils/conversationMemory.js +1 -0
  183. package/dist/lib/utils/conversationMemoryUtils.js +1 -0
  184. package/dist/lib/utils/csvProcessor.js +1 -0
  185. package/dist/lib/utils/errorHandling.js +1 -0
  186. package/dist/lib/utils/evaluationUtils.js +1 -0
  187. package/dist/lib/utils/factoryProcessing.js +1 -0
  188. package/dist/lib/utils/fileDetector.js +7 -3
  189. package/dist/lib/utils/imageProcessor.js +1 -0
  190. package/dist/lib/utils/logger.js +1 -0
  191. package/dist/lib/utils/loopUtils.js +1 -0
  192. package/dist/lib/utils/mcpDefaults.js +1 -0
  193. package/dist/lib/utils/messageBuilder.js +96 -9
  194. package/dist/lib/utils/modelRouter.js +1 -0
  195. package/dist/lib/utils/multimodalOptionsBuilder.d.ts +67 -0
  196. package/dist/lib/utils/multimodalOptionsBuilder.js +65 -0
  197. package/dist/lib/utils/optionsConversion.js +1 -0
  198. package/dist/lib/utils/optionsUtils.js +1 -0
  199. package/dist/lib/utils/parameterValidation.js +1 -0
  200. package/dist/lib/utils/pdfProcessor.d.ts +10 -0
  201. package/dist/lib/utils/pdfProcessor.js +199 -0
  202. package/dist/lib/utils/performance.js +1 -0
  203. package/dist/lib/utils/promptRedaction.js +1 -0
  204. package/dist/lib/utils/providerConfig.js +1 -0
  205. package/dist/lib/utils/providerHealth.js +1 -0
  206. package/dist/lib/utils/providerSetupMessages.js +1 -0
  207. package/dist/lib/utils/providerUtils.js +1 -0
  208. package/dist/lib/utils/redis.js +1 -0
  209. package/dist/lib/utils/retryHandler.js +1 -0
  210. package/dist/lib/utils/schemaConversion.js +1 -0
  211. package/dist/lib/utils/taskClassificationUtils.js +1 -0
  212. package/dist/lib/utils/taskClassifier.js +1 -0
  213. package/dist/lib/utils/timeout.js +1 -0
  214. package/dist/lib/utils/tokenLimits.js +1 -0
  215. package/dist/lib/utils/toolUtils.js +1 -0
  216. package/dist/lib/utils/transformationUtils.js +1 -0
  217. package/dist/lib/utils/typeUtils.js +1 -0
  218. package/dist/mcp/mcpClientFactory.js +1 -1
  219. package/dist/mcp/servers/agent/directToolsServer.js +1 -0
  220. package/dist/models/modelResolver.js +1 -0
  221. package/dist/neurolink.d.ts +41 -6
  222. package/dist/neurolink.js +275 -5
  223. package/dist/providers/amazonBedrock.d.ts +1 -0
  224. package/dist/providers/amazonBedrock.js +165 -14
  225. package/dist/providers/anthropic.js +7 -21
  226. package/dist/providers/azureOpenai.js +5 -21
  227. package/dist/providers/googleAiStudio.js +5 -21
  228. package/dist/providers/googleVertex.js +8 -1
  229. package/dist/providers/huggingFace.js +33 -3
  230. package/dist/providers/litellm.js +33 -3
  231. package/dist/providers/mistral.js +31 -2
  232. package/dist/providers/ollama.d.ts +37 -1
  233. package/dist/providers/ollama.js +543 -58
  234. package/dist/providers/openAI.js +5 -21
  235. package/dist/providers/openaiCompatible.js +40 -4
  236. package/dist/sdk/toolRegistration.js +1 -0
  237. package/dist/services/server/ai/observability/instrumentation.d.ts +57 -0
  238. package/dist/services/server/ai/observability/instrumentation.js +170 -0
  239. package/dist/session/globalSessionState.js +37 -1
  240. package/dist/telemetry/index.d.ts +1 -0
  241. package/dist/telemetry/telemetryService.d.ts +2 -0
  242. package/dist/telemetry/telemetryService.js +7 -7
  243. package/dist/types/content.d.ts +14 -1
  244. package/dist/types/conversation.d.ts +2 -0
  245. package/dist/types/fileTypes.d.ts +44 -0
  246. package/dist/types/generateTypes.d.ts +1 -0
  247. package/dist/types/observability.d.ts +49 -0
  248. package/dist/types/observability.js +6 -0
  249. package/dist/types/providers.d.ts +44 -0
  250. package/dist/types/streamTypes.d.ts +1 -0
  251. package/dist/types/tools.js +1 -0
  252. package/dist/utils/fileDetector.js +6 -3
  253. package/dist/utils/messageBuilder.js +95 -9
  254. package/dist/utils/multimodalOptionsBuilder.d.ts +67 -0
  255. package/dist/utils/multimodalOptionsBuilder.js +64 -0
  256. package/dist/utils/pdfProcessor.d.ts +10 -0
  257. package/dist/utils/pdfProcessor.js +198 -0
  258. package/package.json +12 -16
@@ -441,6 +441,19 @@ export type BedrockToolResult = {
441
441
  */
442
442
  export type BedrockContentBlock = {
443
443
  text?: string;
444
+ image?: {
445
+ format: "png" | "jpeg" | "gif" | "webp";
446
+ source: {
447
+ bytes?: Uint8Array | Buffer;
448
+ };
449
+ };
450
+ document?: {
451
+ format: "pdf" | "csv" | "doc" | "docx" | "xls" | "xlsx" | "html" | "txt" | "md";
452
+ name: string;
453
+ source: {
454
+ bytes?: Uint8Array | Buffer;
455
+ };
456
+ };
444
457
  toolUse?: BedrockToolUse;
445
458
  toolResult?: BedrockToolResult;
446
459
  };
@@ -555,6 +568,37 @@ export type ModelsResponse = {
555
568
  owned_by?: string;
556
569
  }>;
557
570
  };
571
+ /**
572
+ * Ollama tool call structure
573
+ */
574
+ export type OllamaToolCall = {
575
+ id: string;
576
+ type: "function";
577
+ function: {
578
+ name: string;
579
+ arguments: string;
580
+ };
581
+ };
582
+ /**
583
+ * Ollama tool result structure
584
+ */
585
+ export type OllamaToolResult = {
586
+ tool_call_id: string;
587
+ content: string;
588
+ };
589
+ /**
590
+ * Ollama message structure for conversation and tool execution
591
+ */
592
+ export type OllamaMessage = {
593
+ role: "system" | "user" | "assistant" | "tool";
594
+ content: string | Array<{
595
+ type: string;
596
+ text?: string;
597
+ [key: string]: unknown;
598
+ }>;
599
+ tool_calls?: OllamaToolCall[];
600
+ images?: string[];
601
+ };
558
602
  /**
559
603
  * Default model aliases for easy reference
560
604
  */
@@ -126,6 +126,7 @@ export interface StreamOptions {
126
126
  audio?: AudioInputSpec;
127
127
  images?: Array<Buffer | string>;
128
128
  csvFiles?: Array<Buffer | string>;
129
+ pdfFiles?: Array<Buffer | string>;
129
130
  files?: Array<Buffer | string>;
130
131
  content?: Array<TextContent | ImageContent>;
131
132
  };
@@ -2,6 +2,7 @@
2
2
  * Type definitions for NeuroLink tool system, including parameter schemas,
3
3
  * argument patterns, execution metadata, context, and result types.
4
4
  */
5
+ import { z } from "zod";
5
6
  /**
6
7
  * Type guard for tool result
7
8
  */
@@ -8,6 +8,7 @@ import { readFile, stat } from "fs/promises";
8
8
  import { logger } from "./logger.js";
9
9
  import { CSVProcessor } from "./csvProcessor.js";
10
10
  import { ImageProcessor } from "./imageProcessor.js";
11
+ import { PDFProcessor } from "./pdfProcessor.js";
11
12
  /**
12
13
  * Format file size in human-readable units
13
14
  */
@@ -56,7 +57,7 @@ export class FileDetector {
56
57
  const content = await this.loadContent(input, detection, options);
57
58
  // Extract CSV-specific options from FileDetectorOptions
58
59
  const csvOptions = options?.csvOptions;
59
- return await this.processFile(content, detection, csvOptions);
60
+ return await this.processFile(content, detection, csvOptions, options?.provider);
60
61
  }
61
62
  /**
62
63
  * Detect file type using multi-strategy approach
@@ -119,12 +120,14 @@ export class FileDetector {
119
120
  /**
120
121
  * Route to appropriate processor
121
122
  */
122
- static async processFile(content, detection, options) {
123
+ static async processFile(content, detection, options, provider) {
123
124
  switch (detection.type) {
124
125
  case "csv":
125
126
  return await CSVProcessor.process(content, options);
126
127
  case "image":
127
128
  return await ImageProcessor.process(content);
129
+ case "pdf":
130
+ return await PDFProcessor.process(content, { provider });
128
131
  case "text":
129
132
  return {
130
133
  type: "text",
@@ -354,7 +357,7 @@ class ExtensionStrategy {
354
357
  mimeType: this.getMimeType(ext),
355
358
  extension: ext,
356
359
  source: this.detectSource(input),
357
- metadata: { confidence: type ? 70 : 0 },
360
+ metadata: { confidence: type ? 85 : 0 },
358
361
  };
359
362
  }
360
363
  getExtension(input) {
@@ -7,6 +7,7 @@ import { CONVERSATION_INSTRUCTIONS } from "../config/conversationMemory.js";
7
7
  import { ProviderImageAdapter, MultimodalLogger, } from "../adapters/providerImageAdapter.js";
8
8
  import { logger } from "./logger.js";
9
9
  import { FileDetector } from "./fileDetector.js";
10
+ import { PDFProcessor } from "./pdfProcessor.js";
10
11
  import { request } from "undici";
11
12
  import { readFileSync, existsSync } from "fs";
12
13
  /**
@@ -32,6 +33,10 @@ function isValidContentItem(item) {
32
33
  (contentItem.mimeType === undefined ||
33
34
  typeof contentItem.mimeType === "string"));
34
35
  }
36
+ if (contentItem.type === "file") {
37
+ return (Buffer.isBuffer(contentItem.data) &&
38
+ typeof contentItem.mimeType === "string");
39
+ }
35
40
  return false;
36
41
  }
37
42
  /**
@@ -52,6 +57,15 @@ function convertContentItem(item) {
52
57
  ...(contentItem.mimeType && { mimeType: contentItem.mimeType }),
53
58
  };
54
59
  }
60
+ if (contentItem.type === "file" &&
61
+ Buffer.isBuffer(contentItem.data) &&
62
+ contentItem.mimeType) {
63
+ return {
64
+ type: "file",
65
+ data: contentItem.data,
66
+ mimeType: contentItem.mimeType,
67
+ };
68
+ }
55
69
  return null;
56
70
  }
57
71
  /**
@@ -268,7 +282,7 @@ export async function buildMessagesArray(options) {
268
282
  const filename = extractFilename(file);
269
283
  try {
270
284
  const result = await FileDetector.detectAndProcess(file, {
271
- maxSize: 10 * 1024 * 1024,
285
+ maxSize: 50 * 1024 * 1024,
272
286
  allowedTypes: ["csv"],
273
287
  csvOptions: csvOptions,
274
288
  });
@@ -310,6 +324,11 @@ export async function buildMessagesArray(options) {
310
324
  * Detects when images are present and routes through provider adapter
311
325
  */
312
326
  export async function buildMultimodalMessagesArray(options, provider, model) {
327
+ // Compute provider-specific max PDF size once for consistent validation
328
+ const pdfConfig = PDFProcessor.getProviderConfig(provider);
329
+ const maxSize = pdfConfig
330
+ ? pdfConfig.maxSizeMB * 1024 * 1024
331
+ : 10 * 1024 * 1024;
313
332
  // Process unified files array (auto-detect)
314
333
  if (options.input.files && options.input.files.length > 0) {
315
334
  logger.info(`[FileDetector] Processing ${options.input.files.length} file(s) with auto-detection`);
@@ -317,9 +336,10 @@ export async function buildMultimodalMessagesArray(options, provider, model) {
317
336
  for (const file of options.input.files) {
318
337
  try {
319
338
  const result = await FileDetector.detectAndProcess(file, {
320
- maxSize: 10 * 1024 * 1024,
321
- allowedTypes: ["csv", "image"],
339
+ maxSize,
340
+ allowedTypes: ["csv", "image", "pdf"],
322
341
  csvOptions: options.csvOptions,
342
+ provider: provider,
323
343
  });
324
344
  if (result.type === "csv") {
325
345
  const filename = extractFilename(file);
@@ -344,6 +364,13 @@ export async function buildMultimodalMessagesArray(options, provider, model) {
344
364
  ];
345
365
  logger.info(`[FileDetector] ✅ Image: ${result.mimeType}`);
346
366
  }
367
+ else if (result.type === "pdf") {
368
+ options.input.pdfFiles = [
369
+ ...(options.input.pdfFiles || []),
370
+ result.content,
371
+ ];
372
+ logger.info(`[FileDetector] ✅ PDF: ${extractFilename(file)}`);
373
+ }
347
374
  }
348
375
  catch (error) {
349
376
  logger.error(`[FileDetector] ❌ Failed to process file:`, error);
@@ -384,17 +411,46 @@ export async function buildMultimodalMessagesArray(options, provider, model) {
384
411
  }
385
412
  }
386
413
  }
414
+ // Track PDF files for multimodal processing (NOT text conversion)
415
+ const pdfFiles = [];
416
+ // Process explicit PDF files array
417
+ if (options.input.pdfFiles && options.input.pdfFiles.length > 0) {
418
+ logger.info(`[PDF] Processing ${options.input.pdfFiles.length} explicit PDF file(s) for ${provider}`);
419
+ for (let i = 0; i < options.input.pdfFiles.length; i++) {
420
+ const pdfFile = options.input.pdfFiles[i];
421
+ const filename = extractFilename(pdfFile, i);
422
+ try {
423
+ const result = await FileDetector.detectAndProcess(pdfFile, {
424
+ maxSize,
425
+ allowedTypes: ["pdf"],
426
+ provider: provider,
427
+ });
428
+ if (Buffer.isBuffer(result.content)) {
429
+ pdfFiles.push({ buffer: result.content, filename });
430
+ logger.info(`[PDF] ✅ Queued for multimodal: ${filename}`);
431
+ }
432
+ }
433
+ catch (error) {
434
+ logger.error(`[PDF] ❌ Failed to process ${filename}:`, error);
435
+ throw error;
436
+ }
437
+ }
438
+ }
387
439
  // Check if this is a multimodal request
388
440
  const hasImages = (options.input.images && options.input.images.length > 0) ||
389
441
  (options.input.content &&
390
442
  options.input.content.some((c) => c.type === "image"));
391
- // If no images, use standard message building and convert to MultimodalChatMessage[]
392
- if (!hasImages) {
393
- // Clear csvFiles and files arrays to prevent duplication
443
+ const hasPDFs = pdfFiles.length > 0;
444
+ // If no images or PDFs, use standard message building and convert to MultimodalChatMessage[]
445
+ if (!hasImages && !hasPDFs) {
446
+ // Clear csvFiles, pdfFiles, and files arrays to prevent duplication
394
447
  // (already processed and added to options.input.text above)
395
448
  if (options.input.csvFiles) {
396
449
  options.input.csvFiles = [];
397
450
  }
451
+ if (options.input.pdfFiles) {
452
+ options.input.pdfFiles = [];
453
+ }
398
454
  if (options.input.files) {
399
455
  options.input.files = [];
400
456
  }
@@ -441,9 +497,10 @@ export async function buildMultimodalMessagesArray(options, provider, model) {
441
497
  // Advanced content format - convert to provider-specific format
442
498
  userContent = await convertContentToProviderFormat(options.input.content, provider, model);
443
499
  }
444
- else if (options.input.images && options.input.images.length > 0) {
445
- // Simple images format - convert to provider-specific format
446
- userContent = await convertSimpleImagesToProviderFormat(options.input.text, options.input.images, provider, model);
500
+ else if ((options.input.images && options.input.images.length > 0) ||
501
+ pdfFiles.length > 0) {
502
+ // Simple images/PDFs format - convert to provider-specific format
503
+ userContent = await convertMultimodalToProviderFormat(options.input.text, options.input.images || [], pdfFiles, provider, model);
447
504
  }
448
505
  else {
449
506
  // Text-only fallback
@@ -664,6 +721,35 @@ async function convertSimpleImagesToProviderFormat(text, images, provider, _mode
664
721
  });
665
722
  return content;
666
723
  }
724
+ /**
725
+ * Convert multimodal content (images + PDFs) to provider format
726
+ */
727
+ async function convertMultimodalToProviderFormat(text, images, pdfFiles, provider, model) {
728
+ const content = [
729
+ { type: "text", text },
730
+ ];
731
+ // Add images if present
732
+ if (images.length > 0) {
733
+ const imageContent = await convertSimpleImagesToProviderFormat("", images, provider, model);
734
+ if (Array.isArray(imageContent)) {
735
+ imageContent.forEach((item) => {
736
+ if (item.type !== "text") {
737
+ content.push(item);
738
+ }
739
+ });
740
+ }
741
+ }
742
+ // Add PDFs using Vercel AI SDK standard format (works for all providers)
743
+ content.push(...pdfFiles.map((pdf) => {
744
+ logger.info(`[PDF] ✅ Added to content (Vercel AI SDK format): ${pdf.filename}`);
745
+ return {
746
+ type: "file",
747
+ data: pdf.buffer,
748
+ mimeType: "application/pdf",
749
+ };
750
+ }));
751
+ return content;
752
+ }
667
753
  /**
668
754
  * Extract filename from file input
669
755
  */
@@ -0,0 +1,67 @@
1
+ import type { StreamOptions } from "../types/streamTypes.js";
2
+ /**
3
+ * Builds a normalized multimodal options payload for streaming providers.
4
+ *
5
+ * This utility extracts and normalizes multimodal input fields from StreamOptions
6
+ * into a consistent format that can be consumed by buildMultimodalMessagesArray.
7
+ *
8
+ * @param {StreamOptions} options - Stream options containing:
9
+ * - input.text: Main text prompt
10
+ * - input.images: Image files (Buffer | string paths/URLs)
11
+ * - input.content: Advanced multimodal content array
12
+ * - input.files: Auto-detected file types
13
+ * - input.csvFiles: CSV files for tabular data
14
+ * - input.pdfFiles: PDF documents (Buffer | string paths)
15
+ * - csvOptions: CSV parsing options
16
+ * - systemPrompt: System-level instructions
17
+ * - conversationMessages: Chat history
18
+ * - temperature: Model temperature (0-1)
19
+ * - maxTokens: Maximum output tokens
20
+ * - enableAnalytics: Enable analytics tracking
21
+ * - enableEvaluation: Enable response evaluation
22
+ * - context: Additional context data
23
+ * @param {string} providerName - Provider identifier (e.g., "vertex", "openai", "anthropic")
24
+ * @param {string} modelName - Model identifier (e.g., "gemini-2.5-flash", "gpt-4o")
25
+ * @returns {object} Normalized options object with:
26
+ * - input: { text, images, content, files, csvFiles, pdfFiles }
27
+ * - csvOptions: CSV processing options
28
+ * - systemPrompt: System prompt string
29
+ * - conversationHistory: Message history array
30
+ * - provider: Provider name
31
+ * - model: Model name
32
+ * - temperature: Temperature value
33
+ * - maxTokens: Token limit
34
+ * - enableAnalytics: Analytics flag
35
+ * - enableEvaluation: Evaluation flag
36
+ * - context: Context data
37
+ *
38
+ * @example
39
+ * ```typescript
40
+ * const opts = buildMultimodalOptions(streamOptions, "vertex", "gemini-2.5-flash");
41
+ * const messages = await buildMultimodalMessagesArray(opts, "vertex", "gemini-2.5-flash");
42
+ * ```
43
+ */
44
+ export declare function buildMultimodalOptions(options: StreamOptions, providerName: string, modelName: string): {
45
+ input: {
46
+ text: string;
47
+ images: (string | Buffer<ArrayBufferLike>)[] | undefined;
48
+ content: (import("../types/content.js").TextContent | import("../types/content.js").ImageContent)[] | undefined;
49
+ files: (string | Buffer<ArrayBufferLike>)[] | undefined;
50
+ csvFiles: (string | Buffer<ArrayBufferLike>)[] | undefined;
51
+ pdfFiles: (string | Buffer<ArrayBufferLike>)[] | undefined;
52
+ };
53
+ csvOptions: {
54
+ maxRows?: number;
55
+ formatStyle?: "raw" | "markdown" | "json";
56
+ includeHeaders?: boolean;
57
+ } | undefined;
58
+ systemPrompt: string | undefined;
59
+ conversationHistory: import("../types/conversation.js").ChatMessage[] | undefined;
60
+ provider: string;
61
+ model: string;
62
+ temperature: number | undefined;
63
+ maxTokens: number | undefined;
64
+ enableAnalytics: boolean | undefined;
65
+ enableEvaluation: boolean | undefined;
66
+ context: import("../types/common.js").UnknownRecord | undefined;
67
+ };
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Builds a normalized multimodal options payload for streaming providers.
3
+ *
4
+ * This utility extracts and normalizes multimodal input fields from StreamOptions
5
+ * into a consistent format that can be consumed by buildMultimodalMessagesArray.
6
+ *
7
+ * @param {StreamOptions} options - Stream options containing:
8
+ * - input.text: Main text prompt
9
+ * - input.images: Image files (Buffer | string paths/URLs)
10
+ * - input.content: Advanced multimodal content array
11
+ * - input.files: Auto-detected file types
12
+ * - input.csvFiles: CSV files for tabular data
13
+ * - input.pdfFiles: PDF documents (Buffer | string paths)
14
+ * - csvOptions: CSV parsing options
15
+ * - systemPrompt: System-level instructions
16
+ * - conversationMessages: Chat history
17
+ * - temperature: Model temperature (0-1)
18
+ * - maxTokens: Maximum output tokens
19
+ * - enableAnalytics: Enable analytics tracking
20
+ * - enableEvaluation: Enable response evaluation
21
+ * - context: Additional context data
22
+ * @param {string} providerName - Provider identifier (e.g., "vertex", "openai", "anthropic")
23
+ * @param {string} modelName - Model identifier (e.g., "gemini-2.5-flash", "gpt-4o")
24
+ * @returns {object} Normalized options object with:
25
+ * - input: { text, images, content, files, csvFiles, pdfFiles }
26
+ * - csvOptions: CSV processing options
27
+ * - systemPrompt: System prompt string
28
+ * - conversationHistory: Message history array
29
+ * - provider: Provider name
30
+ * - model: Model name
31
+ * - temperature: Temperature value
32
+ * - maxTokens: Token limit
33
+ * - enableAnalytics: Analytics flag
34
+ * - enableEvaluation: Evaluation flag
35
+ * - context: Context data
36
+ *
37
+ * @example
38
+ * ```typescript
39
+ * const opts = buildMultimodalOptions(streamOptions, "vertex", "gemini-2.5-flash");
40
+ * const messages = await buildMultimodalMessagesArray(opts, "vertex", "gemini-2.5-flash");
41
+ * ```
42
+ */
43
+ export function buildMultimodalOptions(options, providerName, modelName) {
44
+ return {
45
+ input: {
46
+ text: options.input?.text || "",
47
+ images: options.input?.images,
48
+ content: options.input?.content,
49
+ files: options.input?.files,
50
+ csvFiles: options.input?.csvFiles,
51
+ pdfFiles: options.input?.pdfFiles,
52
+ },
53
+ csvOptions: options.csvOptions,
54
+ systemPrompt: options.systemPrompt,
55
+ conversationHistory: options.conversationMessages,
56
+ provider: providerName,
57
+ model: modelName,
58
+ temperature: options.temperature,
59
+ maxTokens: options.maxTokens,
60
+ enableAnalytics: options.enableAnalytics,
61
+ enableEvaluation: options.enableEvaluation,
62
+ context: options.context,
63
+ };
64
+ }
@@ -0,0 +1,10 @@
1
+ import type { FileProcessingResult, PDFProviderConfig, PDFProcessorOptions } from "../types/fileTypes.js";
2
+ export declare class PDFProcessor {
3
+ private static readonly PDF_SIGNATURE;
4
+ static process(content: Buffer, options?: PDFProcessorOptions): Promise<FileProcessingResult>;
5
+ static supportsNativePDF(provider: string): boolean;
6
+ static getProviderConfig(provider: string): PDFProviderConfig | null;
7
+ private static isValidPDF;
8
+ private static extractBasicMetadata;
9
+ static estimateTokens(pageCount: number, mode?: "text-only" | "visual"): number;
10
+ }
@@ -0,0 +1,198 @@
1
+ import { logger } from "./logger.js";
2
+ const PDF_PROVIDER_CONFIGS = {
3
+ anthropic: {
4
+ maxSizeMB: 5,
5
+ maxPages: 100,
6
+ supportsNative: true,
7
+ requiresCitations: false,
8
+ apiType: "document",
9
+ },
10
+ bedrock: {
11
+ maxSizeMB: 5,
12
+ maxPages: 100,
13
+ supportsNative: true,
14
+ requiresCitations: "auto",
15
+ apiType: "document",
16
+ },
17
+ "google-vertex": {
18
+ maxSizeMB: 5,
19
+ maxPages: 100,
20
+ supportsNative: true,
21
+ requiresCitations: false,
22
+ apiType: "document",
23
+ },
24
+ vertex: {
25
+ maxSizeMB: 5,
26
+ maxPages: 100,
27
+ supportsNative: true,
28
+ requiresCitations: false,
29
+ apiType: "document",
30
+ },
31
+ "google-ai-studio": {
32
+ maxSizeMB: 2000,
33
+ maxPages: 100,
34
+ supportsNative: true,
35
+ requiresCitations: false,
36
+ apiType: "files-api",
37
+ },
38
+ gemini: {
39
+ maxSizeMB: 2000,
40
+ maxPages: 100,
41
+ supportsNative: true,
42
+ requiresCitations: false,
43
+ apiType: "files-api",
44
+ },
45
+ "google-ai": {
46
+ maxSizeMB: 2000,
47
+ maxPages: 100,
48
+ supportsNative: true,
49
+ requiresCitations: false,
50
+ apiType: "files-api",
51
+ },
52
+ openai: {
53
+ maxSizeMB: 10,
54
+ maxPages: 100,
55
+ supportsNative: true,
56
+ requiresCitations: false,
57
+ apiType: "files-api",
58
+ },
59
+ azure: {
60
+ maxSizeMB: 10,
61
+ maxPages: 100,
62
+ supportsNative: true,
63
+ requiresCitations: false,
64
+ apiType: "files-api",
65
+ },
66
+ "azure-openai": {
67
+ maxSizeMB: 10,
68
+ maxPages: 100,
69
+ supportsNative: true,
70
+ requiresCitations: false,
71
+ apiType: "files-api",
72
+ },
73
+ litellm: {
74
+ maxSizeMB: 10,
75
+ maxPages: 100,
76
+ supportsNative: true,
77
+ requiresCitations: false,
78
+ apiType: "files-api",
79
+ },
80
+ "openai-compatible": {
81
+ maxSizeMB: 10,
82
+ maxPages: 100,
83
+ supportsNative: true,
84
+ requiresCitations: false,
85
+ apiType: "files-api",
86
+ },
87
+ mistral: {
88
+ maxSizeMB: 10,
89
+ maxPages: 100,
90
+ supportsNative: true,
91
+ requiresCitations: false,
92
+ apiType: "files-api",
93
+ },
94
+ "hugging-face": {
95
+ maxSizeMB: 10,
96
+ maxPages: 100,
97
+ supportsNative: true,
98
+ requiresCitations: false,
99
+ apiType: "files-api",
100
+ },
101
+ huggingface: {
102
+ maxSizeMB: 10,
103
+ maxPages: 100,
104
+ supportsNative: true,
105
+ requiresCitations: false,
106
+ apiType: "files-api",
107
+ },
108
+ };
109
+ export class PDFProcessor {
110
+ // PDF magic bytes: %PDF-
111
+ static PDF_SIGNATURE = Buffer.from("%PDF-", "ascii");
112
+ static async process(content, options) {
113
+ const provider = (options?.provider || "unknown").toLowerCase();
114
+ const config = PDF_PROVIDER_CONFIGS[provider];
115
+ if (!this.isValidPDF(content)) {
116
+ throw new Error("Invalid PDF file format. File must start with %PDF- header.");
117
+ }
118
+ if (!config || !config.supportsNative) {
119
+ const supportedProviders = Object.keys(PDF_PROVIDER_CONFIGS)
120
+ .filter((p) => PDF_PROVIDER_CONFIGS[p].supportsNative)
121
+ .join(", ");
122
+ throw new Error(`PDF files are not currently supported with ${provider} provider.\n` +
123
+ `Supported providers: ${supportedProviders}\n` +
124
+ `Current provider: ${provider}\n\n` +
125
+ `Options:\n` +
126
+ `1. Switch to a supported provider (--provider openai or --provider vertex)\n` +
127
+ `2. Convert your PDF to text manually`);
128
+ }
129
+ const sizeMB = content.length / (1024 * 1024);
130
+ if (sizeMB > config.maxSizeMB) {
131
+ throw new Error(`PDF size ${sizeMB.toFixed(2)}MB exceeds ${config.maxSizeMB}MB limit for ${provider}`);
132
+ }
133
+ const metadata = this.extractBasicMetadata(content);
134
+ if (metadata.estimatedPages && metadata.estimatedPages > config.maxPages) {
135
+ logger.warn(`[PDF] PDF appears to have ${metadata.estimatedPages}+ pages. ` +
136
+ `${provider} supports up to ${config.maxPages} pages.`);
137
+ }
138
+ if (provider === "bedrock" && options?.bedrockApiMode === "converse") {
139
+ logger.info("[PDF] Using Bedrock Converse API. " +
140
+ "Visual PDF analysis requires citations enabled. " +
141
+ "Text-only mode: ~1,000 tokens/3 pages. " +
142
+ "Visual mode: ~7,000 tokens/3 pages.");
143
+ }
144
+ logger.info("[PDF] ✅ Validated PDF file", {
145
+ provider,
146
+ size: `${sizeMB.toFixed(2)}MB`,
147
+ version: metadata.version,
148
+ estimatedPages: metadata.estimatedPages,
149
+ apiType: config.apiType,
150
+ });
151
+ return {
152
+ type: "pdf",
153
+ content,
154
+ mimeType: "application/pdf",
155
+ metadata: {
156
+ confidence: 100,
157
+ size: content.length,
158
+ ...metadata,
159
+ provider,
160
+ apiType: config.apiType,
161
+ },
162
+ };
163
+ }
164
+ static supportsNativePDF(provider) {
165
+ const config = PDF_PROVIDER_CONFIGS[provider];
166
+ return config?.supportsNative || false;
167
+ }
168
+ static getProviderConfig(provider) {
169
+ return PDF_PROVIDER_CONFIGS[provider] || null;
170
+ }
171
+ static isValidPDF(buffer) {
172
+ if (buffer.length < 5) {
173
+ return false;
174
+ }
175
+ return buffer.subarray(0, 5).equals(this.PDF_SIGNATURE);
176
+ }
177
+ static extractBasicMetadata(buffer) {
178
+ const headerSize = Math.min(10000, buffer.length);
179
+ const header = buffer.toString("utf-8", 0, headerSize);
180
+ const versionMatch = header.match(/%PDF-(\d\.\d)/);
181
+ const version = versionMatch ? versionMatch[1] : "unknown";
182
+ const pageMatches = header.match(/\/Type\s*\/Page[^s]/g);
183
+ const estimatedPages = pageMatches ? pageMatches.length : null;
184
+ return {
185
+ version,
186
+ estimatedPages,
187
+ filename: undefined,
188
+ };
189
+ }
190
+ static estimateTokens(pageCount, mode = "visual") {
191
+ if (mode === "text-only") {
192
+ return Math.ceil((pageCount / 3) * 1000);
193
+ }
194
+ else {
195
+ return Math.ceil((pageCount / 3) * 7000);
196
+ }
197
+ }
198
+ }