@juspay/neurolink 8.18.0 → 8.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/adapters/providerImageAdapter.d.ts +12 -0
  3. package/dist/adapters/providerImageAdapter.js +30 -3
  4. package/dist/cli/loop/optionsSchema.d.ts +1 -1
  5. package/dist/config/conversationMemory.d.ts +2 -1
  6. package/dist/config/conversationMemory.js +15 -7
  7. package/dist/core/baseProvider.d.ts +15 -0
  8. package/dist/core/baseProvider.js +79 -1
  9. package/dist/core/modules/GenerationHandler.d.ts +5 -0
  10. package/dist/core/modules/GenerationHandler.js +56 -9
  11. package/dist/factories/providerRegistry.js +18 -0
  12. package/dist/lib/adapters/providerImageAdapter.d.ts +12 -0
  13. package/dist/lib/adapters/providerImageAdapter.js +30 -3
  14. package/dist/lib/config/conversationMemory.d.ts +2 -1
  15. package/dist/lib/config/conversationMemory.js +15 -7
  16. package/dist/lib/core/baseProvider.d.ts +15 -0
  17. package/dist/lib/core/baseProvider.js +79 -1
  18. package/dist/lib/core/modules/GenerationHandler.d.ts +5 -0
  19. package/dist/lib/core/modules/GenerationHandler.js +56 -9
  20. package/dist/lib/factories/providerRegistry.js +18 -0
  21. package/dist/lib/mcp/servers/agent/directToolsServer.js +5 -0
  22. package/dist/lib/mcp/toolRegistry.js +5 -0
  23. package/dist/lib/neurolink.js +6 -1
  24. package/dist/lib/types/generateTypes.d.ts +31 -0
  25. package/dist/lib/types/ttsTypes.d.ts +29 -0
  26. package/dist/lib/utils/fileDetector.d.ts +25 -0
  27. package/dist/lib/utils/fileDetector.js +433 -10
  28. package/dist/lib/utils/messageBuilder.js +6 -2
  29. package/dist/lib/utils/ttsProcessor.d.ts +41 -14
  30. package/dist/lib/utils/ttsProcessor.js +10 -26
  31. package/dist/mcp/servers/agent/directToolsServer.js +5 -0
  32. package/dist/mcp/toolRegistry.js +5 -0
  33. package/dist/neurolink.js +6 -1
  34. package/dist/types/generateTypes.d.ts +31 -0
  35. package/dist/types/ttsTypes.d.ts +29 -0
  36. package/dist/utils/fileDetector.d.ts +25 -0
  37. package/dist/utils/fileDetector.js +433 -10
  38. package/dist/utils/messageBuilder.js +6 -2
  39. package/dist/utils/ttsProcessor.d.ts +41 -14
  40. package/dist/utils/ttsProcessor.js +10 -26
  41. package/package.json +1 -1
@@ -13,6 +13,7 @@ import { GenerationHandler } from "./modules/GenerationHandler.js";
13
13
  import { TelemetryHandler } from "./modules/TelemetryHandler.js";
14
14
  import { Utilities } from "./modules/Utilities.js";
15
15
  import { ToolsManager } from "./modules/ToolsManager.js";
16
+ import { TTSProcessor } from "../utils/ttsProcessor.js";
16
17
  /**
17
18
  * Abstract base class for all AI providers
18
19
  * Tools are integrated as first-class citizens - always available by default
@@ -298,6 +299,21 @@ export class BaseProvider {
298
299
  /**
299
300
  * Text generation method - implements AIProvider interface
300
301
  * Tools are always available unless explicitly disabled
302
+ *
303
+ * Supports Text-to-Speech (TTS) audio generation in two modes:
304
+ * 1. Direct synthesis (default): TTS synthesizes the input text without AI generation
305
+ * 2. AI response synthesis: TTS synthesizes the AI-generated response after generation
306
+ *
307
+ * When TTS is enabled with useAiResponse=false (default), the method returns early with
308
+ * only the audio result, skipping AI generation entirely for optimal performance.
309
+ *
310
+ * When TTS is enabled with useAiResponse=true, the method performs full AI generation
311
+ * and then synthesizes the AI response to audio.
312
+ *
313
+ * @param optionsOrPrompt - Generation options or prompt string
314
+ * @param _analysisSchema - Optional analysis schema (not used)
315
+ * @returns Enhanced result with optional audio field containing TTSResult
316
+ *
301
317
  * IMPLEMENTATION NOTE: Uses streamText() under the hood and accumulates results
302
318
  * for consistency and better performance
303
319
  */
@@ -306,6 +322,30 @@ export class BaseProvider {
306
322
  this.validateOptions(options);
307
323
  const startTime = Date.now();
308
324
  try {
325
+ // ===== TTS MODE 1: Direct Input Synthesis (useAiResponse=false) =====
326
+ // Synthesize input text directly without AI generation
327
+ // This is optimal for simple read-aloud scenarios
328
+ if (options.tts?.enabled && !options.tts?.useAiResponse) {
329
+ const textToSynthesize = options.prompt ?? options.input?.text ?? "";
330
+ // Build base result structure - common to both paths
331
+ const baseResult = {
332
+ content: textToSynthesize,
333
+ provider: options.provider ?? this.providerName,
334
+ model: this.modelName,
335
+ usage: { input: 0, output: 0, total: 0 },
336
+ };
337
+ try {
338
+ const ttsResult = await TTSProcessor.synthesize(textToSynthesize, options.provider ?? this.providerName, options.tts);
339
+ baseResult.audio = ttsResult;
340
+ }
341
+ catch (ttsError) {
342
+ logger.error(`TTS synthesis failed in Mode 1 (direct input synthesis):`, ttsError);
343
+ // baseResult remains without audio - graceful degradation
344
+ }
345
+ // Call enhanceResult for consistency - enables analytics/evaluation for TTS-only requests
346
+ return await this.enhanceResult(baseResult, options, startTime);
347
+ }
348
+ // ===== Normal AI Generation Flow =====
309
349
  const { tools, model } = await this.prepareGenerationContext(options);
310
350
  const messages = await this.buildMessages(options);
311
351
  const generateResult = await this.executeGeneration(model, messages, tools, options);
@@ -314,7 +354,44 @@ export class BaseProvider {
314
354
  const responseTime = Date.now() - startTime;
315
355
  await this.recordPerformanceMetrics(generateResult.usage, responseTime);
316
356
  const { toolsUsed, toolExecutions } = this.extractToolInformation(generateResult);
317
- const enhancedResult = this.formatEnhancedResult(generateResult, tools, toolsUsed, toolExecutions, options);
357
+ let enhancedResult = this.formatEnhancedResult(generateResult, tools, toolsUsed, toolExecutions, options);
358
+ // ===== TTS MODE 2: AI Response Synthesis (useAiResponse=true) =====
359
+ // Synthesize AI-generated response after generation completes
360
+ if (options.tts?.enabled && options.tts?.useAiResponse) {
361
+ const aiResponse = enhancedResult.content;
362
+ const provider = options.provider ?? this.providerName;
363
+ // Validate AI response and provider before synthesis
364
+ if (aiResponse && provider) {
365
+ try {
366
+ const ttsResult = await TTSProcessor.synthesize(aiResponse, provider, options.tts);
367
+ // Add audio to enhanced result (TTSProcessor already includes latency in metadata)
368
+ enhancedResult = {
369
+ ...enhancedResult,
370
+ audio: ttsResult,
371
+ };
372
+ }
373
+ catch (ttsError) {
374
+ // Log TTS error but continue with text-only result
375
+ logger.error(`TTS synthesis failed in Mode 2 (AI response synthesis):`, ttsError);
376
+ // enhancedResult remains unchanged (no audio field added)
377
+ }
378
+ }
379
+ else {
380
+ logger.warn(`TTS synthesis skipped despite being enabled`, {
381
+ provider: this.providerName,
382
+ hasAiResponse: !!aiResponse,
383
+ aiResponseLength: aiResponse?.length ?? 0,
384
+ hasProvider: !!provider,
385
+ ttsConfig: {
386
+ enabled: options.tts?.enabled,
387
+ useAiResponse: options.tts?.useAiResponse,
388
+ },
389
+ reason: !aiResponse
390
+ ? "AI response is empty or undefined"
391
+ : "Provider is missing",
392
+ });
393
+ }
394
+ }
318
395
  return await this.enhanceResult(enhancedResult, options, startTime);
319
396
  }
320
397
  catch (error) {
@@ -361,6 +438,7 @@ export class BaseProvider {
361
438
  enhancedWithTools: !!(result.toolsUsed && result.toolsUsed.length > 0),
362
439
  analytics: result.analytics,
363
440
  evaluation: result.evaluation,
441
+ audio: result.audio,
364
442
  };
365
443
  }
366
444
  /**
@@ -29,6 +29,11 @@ export declare class GenerationHandler {
29
29
  functionId?: string;
30
30
  metadata?: Record<string, string | number | boolean>;
31
31
  } | undefined, handleToolStorageFn: (toolCalls: unknown[], toolResults: unknown[], options: TextGenerationOptions, timestamp: Date) => Promise<void>);
32
+ /**
33
+ * Helper method to call generateText with optional structured output
34
+ * @private
35
+ */
36
+ private callGenerateText;
32
37
  /**
33
38
  * Execute the generation with AI SDK
34
39
  */
@@ -12,7 +12,7 @@
12
12
  *
13
13
  * @module core/modules/GenerationHandler
14
14
  */
15
- import { generateText, Output } from "ai";
15
+ import { generateText, Output, NoObjectGeneratedError } from "ai";
16
16
  import { logger } from "../../utils/logger.js";
17
17
  import { DEFAULT_MAX_STEPS } from "../constants.js";
18
18
  /**
@@ -32,11 +32,12 @@ export class GenerationHandler {
32
32
  this.handleToolStorageFn = handleToolStorageFn;
33
33
  }
34
34
  /**
35
- * Execute the generation with AI SDK
35
+ * Helper method to call generateText with optional structured output
36
+ * @private
36
37
  */
37
- async executeGeneration(model, messages, tools, options) {
38
- const shouldUseTools = !options.disableTools && this.supportsToolsFn();
39
- const useStructuredOutput = !!options.schema &&
38
+ async callGenerateText(model, messages, tools, options, shouldUseTools, includeStructuredOutput) {
39
+ const useStructuredOutput = includeStructuredOutput &&
40
+ !!options.schema &&
40
41
  (options.output?.format === "json" ||
41
42
  options.output?.format === "structured");
42
43
  return await generateText({
@@ -64,6 +65,34 @@ export class GenerationHandler {
64
65
  },
65
66
  });
66
67
  }
68
+ /**
69
+ * Execute the generation with AI SDK
70
+ */
71
+ async executeGeneration(model, messages, tools, options) {
72
+ const shouldUseTools = !options.disableTools && this.supportsToolsFn();
73
+ const useStructuredOutput = !!options.schema &&
74
+ (options.output?.format === "json" ||
75
+ options.output?.format === "structured");
76
+ try {
77
+ return await this.callGenerateText(model, messages, tools, options, shouldUseTools, true);
78
+ }
79
+ catch (error) {
80
+ // If NoObjectGeneratedError is thrown when using schema + tools together,
81
+ // fall back to generating without experimental_output and extract JSON manually
82
+ if (error instanceof NoObjectGeneratedError && useStructuredOutput) {
83
+ logger.debug("[GenerationHandler] NoObjectGeneratedError caught - falling back to manual JSON extraction", {
84
+ provider: this.providerName,
85
+ model: this.modelName,
86
+ error: error.message,
87
+ });
88
+ // Retry without experimental_output - the formatEnhancedResult method
89
+ // will extract JSON from the text response
90
+ return await this.callGenerateText(model, messages, tools, options, shouldUseTools, false);
91
+ }
92
+ // Re-throw other errors
93
+ throw error;
94
+ }
95
+ }
67
96
  /**
68
97
  * Log generation completion information
69
98
  */
@@ -164,11 +193,29 @@ export class GenerationHandler {
164
193
  options.output?.format === "structured");
165
194
  let content;
166
195
  if (useStructuredOutput) {
167
- if (generateResult.experimental_output !== undefined) {
168
- content = JSON.stringify(generateResult.experimental_output);
196
+ try {
197
+ const experimentalOutput = generateResult.experimental_output;
198
+ if (experimentalOutput !== undefined) {
199
+ content = JSON.stringify(experimentalOutput);
200
+ }
201
+ else {
202
+ // Fall back to text parsing
203
+ const rawText = generateResult.text || "";
204
+ const strippedText = rawText
205
+ .replace(/^```(?:json)?\s*\n?/i, "")
206
+ .replace(/\n?```\s*$/i, "")
207
+ .trim();
208
+ content = strippedText;
209
+ }
169
210
  }
170
- else {
171
- logger.debug("[GenerationHandler] experimental_output not available, falling back to text parsing");
211
+ catch (outputError) {
212
+ // experimental_output is a getter that can throw NoObjectGeneratedError
213
+ // Fall back to text parsing when structured output fails
214
+ logger.debug("[GenerationHandler] experimental_output threw, falling back to text parsing", {
215
+ error: outputError instanceof Error
216
+ ? outputError.message
217
+ : String(outputError),
218
+ });
172
219
  const rawText = generateResult.text || "";
173
220
  const strippedText = rawText
174
221
  .replace(/^```(?:json)?\s*\n?/i, "")
@@ -89,6 +89,24 @@ export class ProviderRegistry {
89
89
  }, process.env.SAGEMAKER_MODEL || "sagemaker-model", ["sagemaker", "aws-sagemaker"]);
90
90
  logger.debug("All providers registered successfully");
91
91
  this.registered = true;
92
+ // ===== TTS HANDLER REGISTRATION =====
93
+ try {
94
+ // Create handler instance and register explicitly
95
+ const { GoogleTTSHandler } = await import("../adapters/tts/googleTTSHandler.js");
96
+ const { TTSProcessor } = await import("../utils/ttsProcessor.js");
97
+ const googleHandler = new GoogleTTSHandler();
98
+ TTSProcessor.registerHandler("google-ai", googleHandler);
99
+ TTSProcessor.registerHandler("vertex", googleHandler);
100
+ logger.debug("TTS handlers registered successfully", {
101
+ providers: ["google-ai", "vertex"],
102
+ });
103
+ }
104
+ catch (ttsError) {
105
+ logger.warn("Failed to register TTS handlers - TTS functionality will be unavailable", {
106
+ error: ttsError instanceof Error ? ttsError.message : String(ttsError),
107
+ });
108
+ // Don't throw - TTS is optional functionality
109
+ }
92
110
  }
93
111
  catch (error) {
94
112
  logger.error("Failed to register providers:", error);
@@ -23,6 +23,11 @@ export const directToolsServer = createMCPServer({
23
23
  */
24
24
  if (!shouldDisableBuiltinTools()) {
25
25
  Object.entries(directAgentTools).forEach(([toolName, toolDef]) => {
26
+ // Skip undefined tools
27
+ if (!toolDef) {
28
+ logger.warn(`Skipping undefined tool during direct tools server registration: ${toolName}`);
29
+ return;
30
+ }
26
31
  // The toolDef is a Vercel AI SDK Tool object
27
32
  // Extract properties from the Tool object
28
33
  const toolSpec = toolDef._spec || toolDef;
@@ -47,6 +47,11 @@ export class MCPToolRegistry extends MCPRegistry {
47
47
  registerDirectTools() {
48
48
  registryLogger.debug("Auto-registering direct tools...");
49
49
  for (const [toolName, toolDef] of Object.entries(directAgentTools)) {
50
+ // Skip undefined tools
51
+ if (!toolDef) {
52
+ registryLogger.warn(`Skipping undefined tool during registration: ${toolName}`);
53
+ continue;
54
+ }
50
55
  const toolId = `direct.${toolName}`;
51
56
  const toolInfo = {
52
57
  name: toolName,
@@ -1287,6 +1287,7 @@ Current user's request: ${currentInput}`;
1287
1287
  toolUsageContext: options.toolUsageContext,
1288
1288
  input: options.input, // This includes text, images, and content arrays
1289
1289
  region: options.region,
1290
+ tts: options.tts,
1290
1291
  };
1291
1292
  // Apply factory enhancement using centralized utilities
1292
1293
  const textOptions = enhanceTextGenerationOptions(baseOptions, factoryResult);
@@ -1360,6 +1361,7 @@ Current user's request: ${currentInput}`;
1360
1361
  factoryResult.domainType,
1361
1362
  }
1362
1363
  : undefined,
1364
+ audio: textResult.audio,
1363
1365
  };
1364
1366
  if (this.conversationMemoryConfig?.conversationMemory?.mem0Enabled &&
1365
1367
  options.context?.userId &&
@@ -1497,7 +1499,8 @@ Current user's request: ${currentInput}`;
1497
1499
  * Attempt MCP generation with retry logic
1498
1500
  */
1499
1501
  async attemptMCPGeneration(options, generateInternalId, generateInternalStartTime, generateInternalHrTimeStart, functionTag) {
1500
- if (!options.disableTools) {
1502
+ if (!options.disableTools &&
1503
+ !(options.tts?.enabled && !options.tts?.useAiResponse)) {
1501
1504
  return await this.performMCPGenerationRetries(options, generateInternalId, generateInternalStartTime, generateInternalHrTimeStart, functionTag);
1502
1505
  }
1503
1506
  return null;
@@ -1658,6 +1661,7 @@ Current user's request: ${currentInput}`;
1658
1661
  toolExecutions: transformedToolExecutions,
1659
1662
  enhancedWithTools: Boolean(hasToolExecutions), // Mark as enhanced if tools were actually used
1660
1663
  availableTools: transformToolsForMCP(transformToolsToExpectedFormat(availableTools)),
1664
+ audio: result.audio,
1661
1665
  // Include analytics and evaluation from BaseProvider
1662
1666
  analytics: result.analytics,
1663
1667
  evaluation: result.evaluation,
@@ -1750,6 +1754,7 @@ Current user's request: ${currentInput}`;
1750
1754
  enhancedWithTools: false,
1751
1755
  analytics: result.analytics,
1752
1756
  evaluation: result.evaluation,
1757
+ audio: result.audio,
1753
1758
  };
1754
1759
  }
1755
1760
  catch (error) {
@@ -300,6 +300,36 @@ export type TextGenerationOptions = {
300
300
  timeout?: number | string;
301
301
  disableTools?: boolean;
302
302
  maxSteps?: number;
303
+ /**
304
+ * Text-to-Speech (TTS) configuration
305
+ *
306
+ * Enable audio generation from text. Behavior depends on useAiResponse flag:
307
+ * - When useAiResponse is false/undefined (default): TTS synthesizes the input text directly
308
+ * - When useAiResponse is true: TTS synthesizes the AI-generated response
309
+ *
310
+ * @example Using input text (default)
311
+ * ```typescript
312
+ * const neurolink = new NeuroLink();
313
+ * const result = await neurolink.generate({
314
+ * input: { text: "Hello world" },
315
+ * provider: "google-ai",
316
+ * tts: { enabled: true, voice: "en-US-Neural2-C" }
317
+ * });
318
+ * // TTS synthesizes "Hello world" directly, no AI generation
319
+ * ```
320
+ *
321
+ * @example Using AI response
322
+ * ```typescript
323
+ * const neurolink = new NeuroLink();
324
+ * const result = await neurolink.generate({
325
+ * input: { text: "Tell me a joke" },
326
+ * provider: "google-ai",
327
+ * tts: { enabled: true, useAiResponse: true, voice: "en-US-Neural2-C" }
328
+ * });
329
+ * // AI generates the joke, then TTS synthesizes the AI's response
330
+ * ```
331
+ */
332
+ tts?: TTSOptions;
303
333
  enableEvaluation?: boolean;
304
334
  enableAnalytics?: boolean;
305
335
  context?: Record<string, JsonValue>;
@@ -346,6 +376,7 @@ export type TextGenerationResult = {
346
376
  }>;
347
377
  analytics?: AnalyticsData;
348
378
  evaluation?: EvaluationData;
379
+ audio?: TTSResult;
349
380
  };
350
381
  /**
351
382
  * Enhanced result type with optional analytics/evaluation
@@ -19,6 +19,35 @@ export type TTSQuality = "standard" | "hd";
19
19
  export type TTSOptions = {
20
20
  /** Enable TTS output */
21
21
  enabled?: boolean;
22
+ /**
23
+ * Use the AI-generated response for TTS instead of the input text
24
+ *
25
+ * When false or undefined (default): TTS will synthesize the input text/prompt directly without calling AI generation
26
+ * When true: TTS will synthesize the AI-generated response after generation completes
27
+ *
28
+ * @default false
29
+ *
30
+ * @example Using input text directly (default)
31
+ * ```typescript
32
+ * const result = await neurolink.generate({
33
+ * input: { text: "Hello world" },
34
+ * provider: "google-ai",
35
+ * tts: { enabled: true } // or useAiResponse: false
36
+ * });
37
+ * // TTS synthesizes "Hello world" directly, no AI generation
38
+ * ```
39
+ *
40
+ * @example Using AI response
41
+ * ```typescript
42
+ * const result = await neurolink.generate({
43
+ * input: { text: "Tell me a joke" },
44
+ * provider: "google-ai",
45
+ * tts: { enabled: true, useAiResponse: true }
46
+ * });
47
+ * // AI generates the joke, then TTS synthesizes the AI's response
48
+ * ```
49
+ */
50
+ useAiResponse?: boolean;
22
51
  /** Voice identifier (e.g., "en-US-Neural2-C") */
23
52
  voice?: string;
24
53
  /** Audio format (default: mp3) */
@@ -31,6 +31,31 @@ export declare class FileDetector {
31
31
  * @returns Processed file result with type and content
32
32
  */
33
33
  static detectAndProcess(input: FileInput, options?: FileDetectorOptions): Promise<FileProcessingResult>;
34
+ /**
35
+ * Try fallback parsing for a specific file type
36
+ * Used when file detection returns "unknown" but we want to try parsing anyway
37
+ */
38
+ private static tryFallbackParsing;
39
+ /**
40
+ * Check if content is valid text (UTF-8, mostly printable)
41
+ */
42
+ private static isValidText;
43
+ /**
44
+ * Guess the MIME type for text content based on content patterns
45
+ */
46
+ private static guessTextMimeType;
47
+ /**
48
+ * Strict YAML detection for guessTextMimeType
49
+ * Similar to ContentHeuristicStrategy but requires at least 2 indicators
50
+ * to avoid false positives from simple key: value patterns
51
+ */
52
+ private static looksLikeYAMLStrict;
53
+ /**
54
+ * Strict XML detection for guessTextMimeType
55
+ * Ensures content has proper XML declaration or valid tag structure with closing tags
56
+ * Prevents false positives from arbitrary content starting with <
57
+ */
58
+ private static looksLikeXMLStrict;
34
59
  /**
35
60
  * Detect file type using multi-strategy approach
36
61
  * Stops at first strategy with confidence >= threshold (default: 80%)