@juspay/neurolink 8.18.0 → 8.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## [8.19.0](https://github.com/juspay/neurolink/compare/v8.18.0...v8.19.0) (2025-12-18)
2
+
3
+ ### Features
4
+
5
+ - **(tts):** Integrate TTS into BaseProvider.generate() ([ffae0b5](https://github.com/juspay/neurolink/commit/ffae0b5be9c4a2ef249876bdeee265004adf28a3))
6
+
1
7
  ## [8.18.0](https://github.com/juspay/neurolink/compare/v8.17.0...v8.18.0) (2025-12-16)
2
8
 
3
9
  ### Features
@@ -5,4 +5,4 @@ import type { OptionSchema } from "../../lib/types/cli.js";
5
5
  * This object provides metadata for validation and help text in the CLI loop.
6
6
  * It is derived from the main TextGenerationOptions interface to ensure consistency.
7
7
  */
8
- export declare const textGenerationOptionsSchema: Record<keyof Omit<TextGenerationOptions, "prompt" | "input" | "schema" | "tools" | "context" | "conversationHistory" | "conversationMessages" | "conversationMemoryConfig" | "originalPrompt" | "middleware" | "expectedOutcome" | "evaluationCriteria" | "region" | "csvOptions">, OptionSchema>;
8
+ export declare const textGenerationOptionsSchema: Record<keyof Omit<TextGenerationOptions, "prompt" | "input" | "schema" | "tools" | "context" | "conversationHistory" | "conversationMessages" | "conversationMemoryConfig" | "originalPrompt" | "middleware" | "expectedOutcome" | "evaluationCriteria" | "region" | "csvOptions" | "tts">, OptionSchema>;
@@ -85,6 +85,21 @@ export declare abstract class BaseProvider implements AIProvider {
85
85
  /**
86
86
  * Text generation method - implements AIProvider interface
87
87
  * Tools are always available unless explicitly disabled
88
+ *
89
+ * Supports Text-to-Speech (TTS) audio generation in two modes:
90
+ * 1. Direct synthesis (default): TTS synthesizes the input text without AI generation
91
+ * 2. AI response synthesis: TTS synthesizes the AI-generated response after generation
92
+ *
93
+ * When TTS is enabled with useAiResponse=false (default), the method returns early with
94
+ * only the audio result, skipping AI generation entirely for optimal performance.
95
+ *
96
+ * When TTS is enabled with useAiResponse=true, the method performs full AI generation
97
+ * and then synthesizes the AI response to audio.
98
+ *
99
+ * @param optionsOrPrompt - Generation options or prompt string
100
+ * @param _analysisSchema - Optional analysis schema (not used)
101
+ * @returns Enhanced result with optional audio field containing TTSResult
102
+ *
88
103
  * IMPLEMENTATION NOTE: Uses streamText() under the hood and accumulates results
89
104
  * for consistency and better performance
90
105
  */
@@ -13,6 +13,7 @@ import { GenerationHandler } from "./modules/GenerationHandler.js";
13
13
  import { TelemetryHandler } from "./modules/TelemetryHandler.js";
14
14
  import { Utilities } from "./modules/Utilities.js";
15
15
  import { ToolsManager } from "./modules/ToolsManager.js";
16
+ import { TTSProcessor } from "../utils/ttsProcessor.js";
16
17
  /**
17
18
  * Abstract base class for all AI providers
18
19
  * Tools are integrated as first-class citizens - always available by default
@@ -298,6 +299,21 @@ export class BaseProvider {
298
299
  /**
299
300
  * Text generation method - implements AIProvider interface
300
301
  * Tools are always available unless explicitly disabled
302
+ *
303
+ * Supports Text-to-Speech (TTS) audio generation in two modes:
304
+ * 1. Direct synthesis (default): TTS synthesizes the input text without AI generation
305
+ * 2. AI response synthesis: TTS synthesizes the AI-generated response after generation
306
+ *
307
+ * When TTS is enabled with useAiResponse=false (default), the method returns early with
308
+ * only the audio result, skipping AI generation entirely for optimal performance.
309
+ *
310
+ * When TTS is enabled with useAiResponse=true, the method performs full AI generation
311
+ * and then synthesizes the AI response to audio.
312
+ *
313
+ * @param optionsOrPrompt - Generation options or prompt string
314
+ * @param _analysisSchema - Optional analysis schema (not used)
315
+ * @returns Enhanced result with optional audio field containing TTSResult
316
+ *
301
317
  * IMPLEMENTATION NOTE: Uses streamText() under the hood and accumulates results
302
318
  * for consistency and better performance
303
319
  */
@@ -306,6 +322,27 @@ export class BaseProvider {
306
322
  this.validateOptions(options);
307
323
  const startTime = Date.now();
308
324
  try {
325
+ // ===== TTS MODE 1: Direct Input Synthesis (useAiResponse=false) =====
326
+ // Synthesize input text directly without AI generation
327
+ // This is optimal for simple read-aloud scenarios
328
+ if (options.tts?.enabled && !options.tts?.useAiResponse) {
329
+ const textToSynthesize = options.prompt ?? options.input?.text ?? "";
330
+ const ttsResult = await TTSProcessor.synthesize(textToSynthesize, options.provider ?? this.providerName, options.tts);
331
+ const baseResult = {
332
+ content: textToSynthesize,
333
+ audio: ttsResult,
334
+ provider: options.provider ?? this.providerName,
335
+ model: this.modelName,
336
+ usage: {
337
+ input: 0,
338
+ output: 0,
339
+ total: 0,
340
+ },
341
+ };
342
+ // Call enhanceResult for consistency - enables analytics/evaluation for TTS-only requests
343
+ return await this.enhanceResult(baseResult, options, startTime);
344
+ }
345
+ // ===== Normal AI Generation Flow =====
309
346
  const { tools, model } = await this.prepareGenerationContext(options);
310
347
  const messages = await this.buildMessages(options);
311
348
  const generateResult = await this.executeGeneration(model, messages, tools, options);
@@ -314,7 +351,37 @@ export class BaseProvider {
314
351
  const responseTime = Date.now() - startTime;
315
352
  await this.recordPerformanceMetrics(generateResult.usage, responseTime);
316
353
  const { toolsUsed, toolExecutions } = this.extractToolInformation(generateResult);
317
- const enhancedResult = this.formatEnhancedResult(generateResult, tools, toolsUsed, toolExecutions, options);
354
+ let enhancedResult = this.formatEnhancedResult(generateResult, tools, toolsUsed, toolExecutions, options);
355
+ // ===== TTS MODE 2: AI Response Synthesis (useAiResponse=true) =====
356
+ // Synthesize AI-generated response after generation completes
357
+ if (options.tts?.enabled && options.tts?.useAiResponse) {
358
+ const aiResponse = enhancedResult.content;
359
+ const provider = options.provider ?? this.providerName;
360
+ // Validate AI response and provider before synthesis
361
+ if (aiResponse && provider) {
362
+ const ttsResult = await TTSProcessor.synthesize(aiResponse, provider, options.tts);
363
+ // Add audio to enhanced result (TTSProcessor already includes latency in metadata)
364
+ enhancedResult = {
365
+ ...enhancedResult,
366
+ audio: ttsResult,
367
+ };
368
+ }
369
+ else {
370
+ logger.warn(`TTS synthesis skipped despite being enabled`, {
371
+ provider: this.providerName,
372
+ hasAiResponse: !!aiResponse,
373
+ aiResponseLength: aiResponse?.length ?? 0,
374
+ hasProvider: !!provider,
375
+ ttsConfig: {
376
+ enabled: options.tts?.enabled,
377
+ useAiResponse: options.tts?.useAiResponse,
378
+ },
379
+ reason: !aiResponse
380
+ ? "AI response is empty or undefined"
381
+ : "Provider is missing",
382
+ });
383
+ }
384
+ }
318
385
  return await this.enhanceResult(enhancedResult, options, startTime);
319
386
  }
320
387
  catch (error) {
@@ -361,6 +428,7 @@ export class BaseProvider {
361
428
  enhancedWithTools: !!(result.toolsUsed && result.toolsUsed.length > 0),
362
429
  analytics: result.analytics,
363
430
  evaluation: result.evaluation,
431
+ audio: result.audio,
364
432
  };
365
433
  }
366
434
  /**
@@ -89,6 +89,24 @@ export class ProviderRegistry {
89
89
  }, process.env.SAGEMAKER_MODEL || "sagemaker-model", ["sagemaker", "aws-sagemaker"]);
90
90
  logger.debug("All providers registered successfully");
91
91
  this.registered = true;
92
+ // ===== TTS HANDLER REGISTRATION =====
93
+ try {
94
+ // Create handler instance and register explicitly
95
+ const { GoogleTTSHandler } = await import("../adapters/tts/googleTTSHandler.js");
96
+ const { TTSProcessor } = await import("../utils/ttsProcessor.js");
97
+ const googleHandler = new GoogleTTSHandler();
98
+ TTSProcessor.registerHandler("google-ai", googleHandler);
99
+ TTSProcessor.registerHandler("vertex", googleHandler);
100
+ logger.debug("TTS handlers registered successfully", {
101
+ providers: ["google-ai", "vertex"],
102
+ });
103
+ }
104
+ catch (ttsError) {
105
+ logger.warn("Failed to register TTS handlers - TTS functionality will be unavailable", {
106
+ error: ttsError instanceof Error ? ttsError.message : String(ttsError),
107
+ });
108
+ // Don't throw - TTS is optional functionality
109
+ }
92
110
  }
93
111
  catch (error) {
94
112
  logger.error("Failed to register providers:", error);
@@ -85,6 +85,21 @@ export declare abstract class BaseProvider implements AIProvider {
85
85
  /**
86
86
  * Text generation method - implements AIProvider interface
87
87
  * Tools are always available unless explicitly disabled
88
+ *
89
+ * Supports Text-to-Speech (TTS) audio generation in two modes:
90
+ * 1. Direct synthesis (default): TTS synthesizes the input text without AI generation
91
+ * 2. AI response synthesis: TTS synthesizes the AI-generated response after generation
92
+ *
93
+ * When TTS is enabled with useAiResponse=false (default), the method returns early with
94
+ * only the audio result, skipping AI generation entirely for optimal performance.
95
+ *
96
+ * When TTS is enabled with useAiResponse=true, the method performs full AI generation
97
+ * and then synthesizes the AI response to audio.
98
+ *
99
+ * @param optionsOrPrompt - Generation options or prompt string
100
+ * @param _analysisSchema - Optional analysis schema (not used)
101
+ * @returns Enhanced result with optional audio field containing TTSResult
102
+ *
88
103
  * IMPLEMENTATION NOTE: Uses streamText() under the hood and accumulates results
89
104
  * for consistency and better performance
90
105
  */
@@ -13,6 +13,7 @@ import { GenerationHandler } from "./modules/GenerationHandler.js";
13
13
  import { TelemetryHandler } from "./modules/TelemetryHandler.js";
14
14
  import { Utilities } from "./modules/Utilities.js";
15
15
  import { ToolsManager } from "./modules/ToolsManager.js";
16
+ import { TTSProcessor } from "../utils/ttsProcessor.js";
16
17
  /**
17
18
  * Abstract base class for all AI providers
18
19
  * Tools are integrated as first-class citizens - always available by default
@@ -298,6 +299,21 @@ export class BaseProvider {
298
299
  /**
299
300
  * Text generation method - implements AIProvider interface
300
301
  * Tools are always available unless explicitly disabled
302
+ *
303
+ * Supports Text-to-Speech (TTS) audio generation in two modes:
304
+ * 1. Direct synthesis (default): TTS synthesizes the input text without AI generation
305
+ * 2. AI response synthesis: TTS synthesizes the AI-generated response after generation
306
+ *
307
+ * When TTS is enabled with useAiResponse=false (default), the method returns early with
308
+ * only the audio result, skipping AI generation entirely for optimal performance.
309
+ *
310
+ * When TTS is enabled with useAiResponse=true, the method performs full AI generation
311
+ * and then synthesizes the AI response to audio.
312
+ *
313
+ * @param optionsOrPrompt - Generation options or prompt string
314
+ * @param _analysisSchema - Optional analysis schema (not used)
315
+ * @returns Enhanced result with optional audio field containing TTSResult
316
+ *
301
317
  * IMPLEMENTATION NOTE: Uses streamText() under the hood and accumulates results
302
318
  * for consistency and better performance
303
319
  */
@@ -306,6 +322,27 @@ export class BaseProvider {
306
322
  this.validateOptions(options);
307
323
  const startTime = Date.now();
308
324
  try {
325
+ // ===== TTS MODE 1: Direct Input Synthesis (useAiResponse=false) =====
326
+ // Synthesize input text directly without AI generation
327
+ // This is optimal for simple read-aloud scenarios
328
+ if (options.tts?.enabled && !options.tts?.useAiResponse) {
329
+ const textToSynthesize = options.prompt ?? options.input?.text ?? "";
330
+ const ttsResult = await TTSProcessor.synthesize(textToSynthesize, options.provider ?? this.providerName, options.tts);
331
+ const baseResult = {
332
+ content: textToSynthesize,
333
+ audio: ttsResult,
334
+ provider: options.provider ?? this.providerName,
335
+ model: this.modelName,
336
+ usage: {
337
+ input: 0,
338
+ output: 0,
339
+ total: 0,
340
+ },
341
+ };
342
+ // Call enhanceResult for consistency - enables analytics/evaluation for TTS-only requests
343
+ return await this.enhanceResult(baseResult, options, startTime);
344
+ }
345
+ // ===== Normal AI Generation Flow =====
309
346
  const { tools, model } = await this.prepareGenerationContext(options);
310
347
  const messages = await this.buildMessages(options);
311
348
  const generateResult = await this.executeGeneration(model, messages, tools, options);
@@ -314,7 +351,37 @@ export class BaseProvider {
314
351
  const responseTime = Date.now() - startTime;
315
352
  await this.recordPerformanceMetrics(generateResult.usage, responseTime);
316
353
  const { toolsUsed, toolExecutions } = this.extractToolInformation(generateResult);
317
- const enhancedResult = this.formatEnhancedResult(generateResult, tools, toolsUsed, toolExecutions, options);
354
+ let enhancedResult = this.formatEnhancedResult(generateResult, tools, toolsUsed, toolExecutions, options);
355
+ // ===== TTS MODE 2: AI Response Synthesis (useAiResponse=true) =====
356
+ // Synthesize AI-generated response after generation completes
357
+ if (options.tts?.enabled && options.tts?.useAiResponse) {
358
+ const aiResponse = enhancedResult.content;
359
+ const provider = options.provider ?? this.providerName;
360
+ // Validate AI response and provider before synthesis
361
+ if (aiResponse && provider) {
362
+ const ttsResult = await TTSProcessor.synthesize(aiResponse, provider, options.tts);
363
+ // Add audio to enhanced result (TTSProcessor already includes latency in metadata)
364
+ enhancedResult = {
365
+ ...enhancedResult,
366
+ audio: ttsResult,
367
+ };
368
+ }
369
+ else {
370
+ logger.warn(`TTS synthesis skipped despite being enabled`, {
371
+ provider: this.providerName,
372
+ hasAiResponse: !!aiResponse,
373
+ aiResponseLength: aiResponse?.length ?? 0,
374
+ hasProvider: !!provider,
375
+ ttsConfig: {
376
+ enabled: options.tts?.enabled,
377
+ useAiResponse: options.tts?.useAiResponse,
378
+ },
379
+ reason: !aiResponse
380
+ ? "AI response is empty or undefined"
381
+ : "Provider is missing",
382
+ });
383
+ }
384
+ }
318
385
  return await this.enhanceResult(enhancedResult, options, startTime);
319
386
  }
320
387
  catch (error) {
@@ -361,6 +428,7 @@ export class BaseProvider {
361
428
  enhancedWithTools: !!(result.toolsUsed && result.toolsUsed.length > 0),
362
429
  analytics: result.analytics,
363
430
  evaluation: result.evaluation,
431
+ audio: result.audio,
364
432
  };
365
433
  }
366
434
  /**
@@ -89,6 +89,24 @@ export class ProviderRegistry {
89
89
  }, process.env.SAGEMAKER_MODEL || "sagemaker-model", ["sagemaker", "aws-sagemaker"]);
90
90
  logger.debug("All providers registered successfully");
91
91
  this.registered = true;
92
+ // ===== TTS HANDLER REGISTRATION =====
93
+ try {
94
+ // Create handler instance and register explicitly
95
+ const { GoogleTTSHandler } = await import("../adapters/tts/googleTTSHandler.js");
96
+ const { TTSProcessor } = await import("../utils/ttsProcessor.js");
97
+ const googleHandler = new GoogleTTSHandler();
98
+ TTSProcessor.registerHandler("google-ai", googleHandler);
99
+ TTSProcessor.registerHandler("vertex", googleHandler);
100
+ logger.debug("TTS handlers registered successfully", {
101
+ providers: ["google-ai", "vertex"],
102
+ });
103
+ }
104
+ catch (ttsError) {
105
+ logger.warn("Failed to register TTS handlers - TTS functionality will be unavailable", {
106
+ error: ttsError instanceof Error ? ttsError.message : String(ttsError),
107
+ });
108
+ // Don't throw - TTS is optional functionality
109
+ }
92
110
  }
93
111
  catch (error) {
94
112
  logger.error("Failed to register providers:", error);
@@ -1287,6 +1287,7 @@ Current user's request: ${currentInput}`;
1287
1287
  toolUsageContext: options.toolUsageContext,
1288
1288
  input: options.input, // This includes text, images, and content arrays
1289
1289
  region: options.region,
1290
+ tts: options.tts,
1290
1291
  };
1291
1292
  // Apply factory enhancement using centralized utilities
1292
1293
  const textOptions = enhanceTextGenerationOptions(baseOptions, factoryResult);
@@ -1360,6 +1361,7 @@ Current user's request: ${currentInput}`;
1360
1361
  factoryResult.domainType,
1361
1362
  }
1362
1363
  : undefined,
1364
+ audio: textResult.audio,
1363
1365
  };
1364
1366
  if (this.conversationMemoryConfig?.conversationMemory?.mem0Enabled &&
1365
1367
  options.context?.userId &&
@@ -1497,7 +1499,8 @@ Current user's request: ${currentInput}`;
1497
1499
  * Attempt MCP generation with retry logic
1498
1500
  */
1499
1501
  async attemptMCPGeneration(options, generateInternalId, generateInternalStartTime, generateInternalHrTimeStart, functionTag) {
1500
- if (!options.disableTools) {
1502
+ if (!options.disableTools &&
1503
+ !(options.tts?.enabled && !options.tts?.useAiResponse)) {
1501
1504
  return await this.performMCPGenerationRetries(options, generateInternalId, generateInternalStartTime, generateInternalHrTimeStart, functionTag);
1502
1505
  }
1503
1506
  return null;
@@ -1658,6 +1661,7 @@ Current user's request: ${currentInput}`;
1658
1661
  toolExecutions: transformedToolExecutions,
1659
1662
  enhancedWithTools: Boolean(hasToolExecutions), // Mark as enhanced if tools were actually used
1660
1663
  availableTools: transformToolsForMCP(transformToolsToExpectedFormat(availableTools)),
1664
+ audio: result.audio,
1661
1665
  // Include analytics and evaluation from BaseProvider
1662
1666
  analytics: result.analytics,
1663
1667
  evaluation: result.evaluation,
@@ -1750,6 +1754,7 @@ Current user's request: ${currentInput}`;
1750
1754
  enhancedWithTools: false,
1751
1755
  analytics: result.analytics,
1752
1756
  evaluation: result.evaluation,
1757
+ audio: result.audio,
1753
1758
  };
1754
1759
  }
1755
1760
  catch (error) {
@@ -300,6 +300,36 @@ export type TextGenerationOptions = {
300
300
  timeout?: number | string;
301
301
  disableTools?: boolean;
302
302
  maxSteps?: number;
303
+ /**
304
+ * Text-to-Speech (TTS) configuration
305
+ *
306
+ * Enable audio generation from text. Behavior depends on useAiResponse flag:
307
+ * - When useAiResponse is false/undefined (default): TTS synthesizes the input text directly
308
+ * - When useAiResponse is true: TTS synthesizes the AI-generated response
309
+ *
310
+ * @example Using input text (default)
311
+ * ```typescript
312
+ * const neurolink = new NeuroLink();
313
+ * const result = await neurolink.generate({
314
+ * input: { text: "Hello world" },
315
+ * provider: "google-ai",
316
+ * tts: { enabled: true, voice: "en-US-Neural2-C" }
317
+ * });
318
+ * // TTS synthesizes "Hello world" directly, no AI generation
319
+ * ```
320
+ *
321
+ * @example Using AI response
322
+ * ```typescript
323
+ * const neurolink = new NeuroLink();
324
+ * const result = await neurolink.generate({
325
+ * input: { text: "Tell me a joke" },
326
+ * provider: "google-ai",
327
+ * tts: { enabled: true, useAiResponse: true, voice: "en-US-Neural2-C" }
328
+ * });
329
+ * // AI generates the joke, then TTS synthesizes the AI's response
330
+ * ```
331
+ */
332
+ tts?: TTSOptions;
303
333
  enableEvaluation?: boolean;
304
334
  enableAnalytics?: boolean;
305
335
  context?: Record<string, JsonValue>;
@@ -346,6 +376,7 @@ export type TextGenerationResult = {
346
376
  }>;
347
377
  analytics?: AnalyticsData;
348
378
  evaluation?: EvaluationData;
379
+ audio?: TTSResult;
349
380
  };
350
381
  /**
351
382
  * Enhanced result type with optional analytics/evaluation
@@ -19,6 +19,35 @@ export type TTSQuality = "standard" | "hd";
19
19
  export type TTSOptions = {
20
20
  /** Enable TTS output */
21
21
  enabled?: boolean;
22
+ /**
23
+ * Use the AI-generated response for TTS instead of the input text
24
+ *
25
+ * When false or undefined (default): TTS will synthesize the input text/prompt directly without calling AI generation
26
+ * When true: TTS will synthesize the AI-generated response after generation completes
27
+ *
28
+ * @default false
29
+ *
30
+ * @example Using input text directly (default)
31
+ * ```typescript
32
+ * const result = await neurolink.generate({
33
+ * input: { text: "Hello world" },
34
+ * provider: "google-ai",
35
+ * tts: { enabled: true } // or useAiResponse: false
36
+ * });
37
+ * // TTS synthesizes "Hello world" directly, no AI generation
38
+ * ```
39
+ *
40
+ * @example Using AI response
41
+ * ```typescript
42
+ * const result = await neurolink.generate({
43
+ * input: { text: "Tell me a joke" },
44
+ * provider: "google-ai",
45
+ * tts: { enabled: true, useAiResponse: true }
46
+ * });
47
+ * // AI generates the joke, then TTS synthesizes the AI's response
48
+ * ```
49
+ */
50
+ useAiResponse?: boolean;
22
51
  /** Voice identifier (e.g., "en-US-Neural2-C") */
23
52
  voice?: string;
24
53
  /** Audio format (default: mp3) */
@@ -39,14 +39,46 @@ export declare class TTSError extends NeuroLinkError {
39
39
  *
40
40
  * Each provider (Google AI, OpenAI, etc.) implements this interface
41
41
  * to provide TTS generation capabilities using their respective APIs.
42
+ *
43
+ * **Timeout Handling:**
44
+ * Implementations MUST handle their own timeouts for the `synthesize()` method.
45
+ * Recommended timeout: 30 seconds. Implementations should use `withTimeout()` utility
46
+ * or provider-specific timeout mechanisms (e.g., Google Cloud client timeout).
47
+ *
48
+ * **Error Handling:**
49
+ * Implementations should throw TTSError for all failures, including timeouts.
50
+ * Use appropriate error codes from TTS_ERROR_CODES.
51
+ *
52
+ * @example
53
+ * ```typescript
54
+ * class MyTTSHandler implements TTSHandler {
55
+ * async synthesize(text: string, options: TTSOptions): Promise<TTSResult> {
56
+ * // REQUIRED: Implement timeout handling
57
+ * return await withTimeout(
58
+ * this.actualSynthesis(text, options),
59
+ * 30000, // 30 second timeout
60
+ * 'TTS synthesis timed out'
61
+ * );
62
+ * }
63
+ *
64
+ * isConfigured(): boolean {
65
+ * return !!process.env.MY_TTS_API_KEY;
66
+ * }
67
+ * }
68
+ * ```
42
69
  */
43
70
  export interface TTSHandler {
44
71
  /**
45
72
  * Generate audio from text using provider-specific TTS API
46
73
  *
47
- * @param text - Text to convert to speech
48
- * @param options - TTS configuration options
74
+ * **IMPORTANT: Timeout Responsibility**
75
+ * Implementations MUST enforce their own timeouts (recommended: 30 seconds).
76
+ * Use the `withTimeout()` utility or provider-specific timeout mechanisms.
77
+ *
78
+ * @param text - Text to convert to speech (pre-validated, non-empty, within length limits)
79
+ * @param options - TTS configuration options (voice, format, speed, etc.)
49
80
  * @returns Audio buffer with metadata
81
+ * @throws {TTSError} On synthesis failure, timeout, or configuration issues
50
82
  */
51
83
  synthesize(text: string, options: TTSOptions): Promise<TTSResult>;
52
84
  /**
@@ -105,16 +137,6 @@ export declare class TTSProcessor {
105
137
  * @private
106
138
  */
107
139
  private static readonly DEFAULT_MAX_TEXT_LENGTH;
108
- /**
109
- * Default timeout for TTS synthesis operations (milliseconds)
110
- *
111
- * This timeout prevents indefinite hangs in provider API calls and serves as
112
- * a safety net for all TTS operations. Individual handlers may implement
113
- * shorter provider-specific timeouts.
114
- *
115
- * @private
116
- */
117
- private static readonly DEFAULT_SYNTHESIS_TIMEOUT_MS;
118
140
  /**
119
141
  * Register a TTS handler for a specific provider
120
142
  *
@@ -164,14 +186,19 @@ export declare class TTSProcessor {
164
186
  * 1. Validates input text (not empty, within length limits)
165
187
  * 2. Looks up the provider handler
166
188
  * 3. Verifies provider configuration
167
- * 4. Delegates synthesis to the provider
189
+ * 4. Delegates synthesis to the provider (timeout handled by provider)
168
190
  * 5. Enriches result with metadata
169
191
  *
192
+ * **Timeout Handling:**
193
+ * Timeouts are enforced by individual provider implementations (see TTSHandler interface).
194
+ * Providers typically use 30-second timeouts via `withTimeout()` utility or
195
+ * provider-specific timeout mechanisms (e.g., Google Cloud client timeout).
196
+ *
170
197
  * @param text - Text to convert to speech
171
198
  * @param provider - Provider identifier
172
199
  * @param options - TTS configuration options
173
200
  * @returns Audio result with buffer and metadata
174
- * @throws TTSError if validation fails, provider not supported/configured, or synthesis times out
201
+ * @throws TTSError if validation fails or provider not supported/configured
175
202
  *
176
203
  * @example
177
204
  * ```typescript
@@ -8,7 +8,7 @@
8
8
  */
9
9
  import { logger } from "./logger.js";
10
10
  import { ErrorCategory, ErrorSeverity } from "../constants/enums.js";
11
- import { NeuroLinkError, withTimeout } from "./errorHandling.js";
11
+ import { NeuroLinkError } from "./errorHandling.js";
12
12
  /**
13
13
  * TTS-specific error codes
14
14
  */
@@ -72,16 +72,6 @@ export class TTSProcessor {
72
72
  * @private
73
73
  */
74
74
  static DEFAULT_MAX_TEXT_LENGTH = 3000;
75
- /**
76
- * Default timeout for TTS synthesis operations (milliseconds)
77
- *
78
- * This timeout prevents indefinite hangs in provider API calls and serves as
79
- * a safety net for all TTS operations. Individual handlers may implement
80
- * shorter provider-specific timeouts.
81
- *
82
- * @private
83
- */
84
- static DEFAULT_SYNTHESIS_TIMEOUT_MS = 60000;
85
75
  /**
86
76
  * Register a TTS handler for a specific provider
87
77
  *
@@ -158,14 +148,19 @@ export class TTSProcessor {
158
148
  * 1. Validates input text (not empty, within length limits)
159
149
  * 2. Looks up the provider handler
160
150
  * 3. Verifies provider configuration
161
- * 4. Delegates synthesis to the provider
151
+ * 4. Delegates synthesis to the provider (timeout handled by provider)
162
152
  * 5. Enriches result with metadata
163
153
  *
154
+ * **Timeout Handling:**
155
+ * Timeouts are enforced by individual provider implementations (see TTSHandler interface).
156
+ * Providers typically use 30-second timeouts via `withTimeout()` utility or
157
+ * provider-specific timeout mechanisms (e.g., Google Cloud client timeout).
158
+ *
164
159
  * @param text - Text to convert to speech
165
160
  * @param provider - Provider identifier
166
161
  * @param options - TTS configuration options
167
162
  * @returns Audio result with buffer and metadata
168
- * @throws TTSError if validation fails, provider not supported/configured, or synthesis times out
163
+ * @throws TTSError if validation fails or provider not supported/configured
169
164
  *
170
165
  * @example
171
166
  * ```typescript
@@ -238,19 +233,8 @@ export class TTSProcessor {
238
233
  }
239
234
  try {
240
235
  logger.debug(`[TTSProcessor] Starting synthesis with provider: ${provider}`);
241
- // 5. Call handler.synthesize() with timeout protection (60 second safety net)
242
- const result = await withTimeout(handler.synthesize(trimmedText, options), this.DEFAULT_SYNTHESIS_TIMEOUT_MS, new TTSError({
243
- code: TTS_ERROR_CODES.SYNTHESIS_FAILED,
244
- message: `TTS synthesis timeout for provider "${provider}" after ${this.DEFAULT_SYNTHESIS_TIMEOUT_MS}ms`,
245
- category: ErrorCategory.EXECUTION,
246
- severity: ErrorSeverity.HIGH,
247
- retriable: true,
248
- context: {
249
- provider,
250
- timeoutMs: this.DEFAULT_SYNTHESIS_TIMEOUT_MS,
251
- textLength: trimmedText.length,
252
- },
253
- }));
236
+ // 5. Call handler.synthesize() - providers handle their own timeouts
237
+ const result = await handler.synthesize(trimmedText, options);
254
238
  // 6. Post-processing: add metadata
255
239
  const enrichedResult = {
256
240
  ...result,
package/dist/neurolink.js CHANGED
@@ -1287,6 +1287,7 @@ Current user's request: ${currentInput}`;
1287
1287
  toolUsageContext: options.toolUsageContext,
1288
1288
  input: options.input, // This includes text, images, and content arrays
1289
1289
  region: options.region,
1290
+ tts: options.tts,
1290
1291
  };
1291
1292
  // Apply factory enhancement using centralized utilities
1292
1293
  const textOptions = enhanceTextGenerationOptions(baseOptions, factoryResult);
@@ -1360,6 +1361,7 @@ Current user's request: ${currentInput}`;
1360
1361
  factoryResult.domainType,
1361
1362
  }
1362
1363
  : undefined,
1364
+ audio: textResult.audio,
1363
1365
  };
1364
1366
  if (this.conversationMemoryConfig?.conversationMemory?.mem0Enabled &&
1365
1367
  options.context?.userId &&
@@ -1497,7 +1499,8 @@ Current user's request: ${currentInput}`;
1497
1499
  * Attempt MCP generation with retry logic
1498
1500
  */
1499
1501
  async attemptMCPGeneration(options, generateInternalId, generateInternalStartTime, generateInternalHrTimeStart, functionTag) {
1500
- if (!options.disableTools) {
1502
+ if (!options.disableTools &&
1503
+ !(options.tts?.enabled && !options.tts?.useAiResponse)) {
1501
1504
  return await this.performMCPGenerationRetries(options, generateInternalId, generateInternalStartTime, generateInternalHrTimeStart, functionTag);
1502
1505
  }
1503
1506
  return null;
@@ -1658,6 +1661,7 @@ Current user's request: ${currentInput}`;
1658
1661
  toolExecutions: transformedToolExecutions,
1659
1662
  enhancedWithTools: Boolean(hasToolExecutions), // Mark as enhanced if tools were actually used
1660
1663
  availableTools: transformToolsForMCP(transformToolsToExpectedFormat(availableTools)),
1664
+ audio: result.audio,
1661
1665
  // Include analytics and evaluation from BaseProvider
1662
1666
  analytics: result.analytics,
1663
1667
  evaluation: result.evaluation,
@@ -1750,6 +1754,7 @@ Current user's request: ${currentInput}`;
1750
1754
  enhancedWithTools: false,
1751
1755
  analytics: result.analytics,
1752
1756
  evaluation: result.evaluation,
1757
+ audio: result.audio,
1753
1758
  };
1754
1759
  }
1755
1760
  catch (error) {
@@ -300,6 +300,36 @@ export type TextGenerationOptions = {
300
300
  timeout?: number | string;
301
301
  disableTools?: boolean;
302
302
  maxSteps?: number;
303
+ /**
304
+ * Text-to-Speech (TTS) configuration
305
+ *
306
+ * Enable audio generation from text. Behavior depends on useAiResponse flag:
307
+ * - When useAiResponse is false/undefined (default): TTS synthesizes the input text directly
308
+ * - When useAiResponse is true: TTS synthesizes the AI-generated response
309
+ *
310
+ * @example Using input text (default)
311
+ * ```typescript
312
+ * const neurolink = new NeuroLink();
313
+ * const result = await neurolink.generate({
314
+ * input: { text: "Hello world" },
315
+ * provider: "google-ai",
316
+ * tts: { enabled: true, voice: "en-US-Neural2-C" }
317
+ * });
318
+ * // TTS synthesizes "Hello world" directly, no AI generation
319
+ * ```
320
+ *
321
+ * @example Using AI response
322
+ * ```typescript
323
+ * const neurolink = new NeuroLink();
324
+ * const result = await neurolink.generate({
325
+ * input: { text: "Tell me a joke" },
326
+ * provider: "google-ai",
327
+ * tts: { enabled: true, useAiResponse: true, voice: "en-US-Neural2-C" }
328
+ * });
329
+ * // AI generates the joke, then TTS synthesizes the AI's response
330
+ * ```
331
+ */
332
+ tts?: TTSOptions;
303
333
  enableEvaluation?: boolean;
304
334
  enableAnalytics?: boolean;
305
335
  context?: Record<string, JsonValue>;
@@ -346,6 +376,7 @@ export type TextGenerationResult = {
346
376
  }>;
347
377
  analytics?: AnalyticsData;
348
378
  evaluation?: EvaluationData;
379
+ audio?: TTSResult;
349
380
  };
350
381
  /**
351
382
  * Enhanced result type with optional analytics/evaluation
@@ -19,6 +19,35 @@ export type TTSQuality = "standard" | "hd";
19
19
  export type TTSOptions = {
20
20
  /** Enable TTS output */
21
21
  enabled?: boolean;
22
+ /**
23
+ * Use the AI-generated response for TTS instead of the input text
24
+ *
25
+ * When false or undefined (default): TTS will synthesize the input text/prompt directly without calling AI generation
26
+ * When true: TTS will synthesize the AI-generated response after generation completes
27
+ *
28
+ * @default false
29
+ *
30
+ * @example Using input text directly (default)
31
+ * ```typescript
32
+ * const result = await neurolink.generate({
33
+ * input: { text: "Hello world" },
34
+ * provider: "google-ai",
35
+ * tts: { enabled: true } // or useAiResponse: false
36
+ * });
37
+ * // TTS synthesizes "Hello world" directly, no AI generation
38
+ * ```
39
+ *
40
+ * @example Using AI response
41
+ * ```typescript
42
+ * const result = await neurolink.generate({
43
+ * input: { text: "Tell me a joke" },
44
+ * provider: "google-ai",
45
+ * tts: { enabled: true, useAiResponse: true }
46
+ * });
47
+ * // AI generates the joke, then TTS synthesizes the AI's response
48
+ * ```
49
+ */
50
+ useAiResponse?: boolean;
22
51
  /** Voice identifier (e.g., "en-US-Neural2-C") */
23
52
  voice?: string;
24
53
  /** Audio format (default: mp3) */
@@ -39,14 +39,46 @@ export declare class TTSError extends NeuroLinkError {
39
39
  *
40
40
  * Each provider (Google AI, OpenAI, etc.) implements this interface
41
41
  * to provide TTS generation capabilities using their respective APIs.
42
+ *
43
+ * **Timeout Handling:**
44
+ * Implementations MUST handle their own timeouts for the `synthesize()` method.
45
+ * Recommended timeout: 30 seconds. Implementations should use `withTimeout()` utility
46
+ * or provider-specific timeout mechanisms (e.g., Google Cloud client timeout).
47
+ *
48
+ * **Error Handling:**
49
+ * Implementations should throw TTSError for all failures, including timeouts.
50
+ * Use appropriate error codes from TTS_ERROR_CODES.
51
+ *
52
+ * @example
53
+ * ```typescript
54
+ * class MyTTSHandler implements TTSHandler {
55
+ * async synthesize(text: string, options: TTSOptions): Promise<TTSResult> {
56
+ * // REQUIRED: Implement timeout handling
57
+ * return await withTimeout(
58
+ * this.actualSynthesis(text, options),
59
+ * 30000, // 30 second timeout
60
+ * 'TTS synthesis timed out'
61
+ * );
62
+ * }
63
+ *
64
+ * isConfigured(): boolean {
65
+ * return !!process.env.MY_TTS_API_KEY;
66
+ * }
67
+ * }
68
+ * ```
42
69
  */
43
70
  export interface TTSHandler {
44
71
  /**
45
72
  * Generate audio from text using provider-specific TTS API
46
73
  *
47
- * @param text - Text to convert to speech
48
- * @param options - TTS configuration options
74
+ * **IMPORTANT: Timeout Responsibility**
75
+ * Implementations MUST enforce their own timeouts (recommended: 30 seconds).
76
+ * Use the `withTimeout()` utility or provider-specific timeout mechanisms.
77
+ *
78
+ * @param text - Text to convert to speech (pre-validated, non-empty, within length limits)
79
+ * @param options - TTS configuration options (voice, format, speed, etc.)
49
80
  * @returns Audio buffer with metadata
81
+ * @throws {TTSError} On synthesis failure, timeout, or configuration issues
50
82
  */
51
83
  synthesize(text: string, options: TTSOptions): Promise<TTSResult>;
52
84
  /**
@@ -105,16 +137,6 @@ export declare class TTSProcessor {
105
137
  * @private
106
138
  */
107
139
  private static readonly DEFAULT_MAX_TEXT_LENGTH;
108
- /**
109
- * Default timeout for TTS synthesis operations (milliseconds)
110
- *
111
- * This timeout prevents indefinite hangs in provider API calls and serves as
112
- * a safety net for all TTS operations. Individual handlers may implement
113
- * shorter provider-specific timeouts.
114
- *
115
- * @private
116
- */
117
- private static readonly DEFAULT_SYNTHESIS_TIMEOUT_MS;
118
140
  /**
119
141
  * Register a TTS handler for a specific provider
120
142
  *
@@ -164,14 +186,19 @@ export declare class TTSProcessor {
164
186
  * 1. Validates input text (not empty, within length limits)
165
187
  * 2. Looks up the provider handler
166
188
  * 3. Verifies provider configuration
167
- * 4. Delegates synthesis to the provider
189
+ * 4. Delegates synthesis to the provider (timeout handled by provider)
168
190
  * 5. Enriches result with metadata
169
191
  *
192
+ * **Timeout Handling:**
193
+ * Timeouts are enforced by individual provider implementations (see TTSHandler interface).
194
+ * Providers typically use 30-second timeouts via `withTimeout()` utility or
195
+ * provider-specific timeout mechanisms (e.g., Google Cloud client timeout).
196
+ *
170
197
  * @param text - Text to convert to speech
171
198
  * @param provider - Provider identifier
172
199
  * @param options - TTS configuration options
173
200
  * @returns Audio result with buffer and metadata
174
- * @throws TTSError if validation fails, provider not supported/configured, or synthesis times out
201
+ * @throws TTSError if validation fails or provider not supported/configured
175
202
  *
176
203
  * @example
177
204
  * ```typescript
@@ -8,7 +8,7 @@
8
8
  */
9
9
  import { logger } from "./logger.js";
10
10
  import { ErrorCategory, ErrorSeverity } from "../constants/enums.js";
11
- import { NeuroLinkError, withTimeout } from "./errorHandling.js";
11
+ import { NeuroLinkError } from "./errorHandling.js";
12
12
  /**
13
13
  * TTS-specific error codes
14
14
  */
@@ -72,16 +72,6 @@ export class TTSProcessor {
72
72
  * @private
73
73
  */
74
74
  static DEFAULT_MAX_TEXT_LENGTH = 3000;
75
- /**
76
- * Default timeout for TTS synthesis operations (milliseconds)
77
- *
78
- * This timeout prevents indefinite hangs in provider API calls and serves as
79
- * a safety net for all TTS operations. Individual handlers may implement
80
- * shorter provider-specific timeouts.
81
- *
82
- * @private
83
- */
84
- static DEFAULT_SYNTHESIS_TIMEOUT_MS = 60000;
85
75
  /**
86
76
  * Register a TTS handler for a specific provider
87
77
  *
@@ -158,14 +148,19 @@ export class TTSProcessor {
158
148
  * 1. Validates input text (not empty, within length limits)
159
149
  * 2. Looks up the provider handler
160
150
  * 3. Verifies provider configuration
161
- * 4. Delegates synthesis to the provider
151
+ * 4. Delegates synthesis to the provider (timeout handled by provider)
162
152
  * 5. Enriches result with metadata
163
153
  *
154
+ * **Timeout Handling:**
155
+ * Timeouts are enforced by individual provider implementations (see TTSHandler interface).
156
+ * Providers typically use 30-second timeouts via `withTimeout()` utility or
157
+ * provider-specific timeout mechanisms (e.g., Google Cloud client timeout).
158
+ *
164
159
  * @param text - Text to convert to speech
165
160
  * @param provider - Provider identifier
166
161
  * @param options - TTS configuration options
167
162
  * @returns Audio result with buffer and metadata
168
- * @throws TTSError if validation fails, provider not supported/configured, or synthesis times out
163
+ * @throws TTSError if validation fails or provider not supported/configured
169
164
  *
170
165
  * @example
171
166
  * ```typescript
@@ -238,19 +233,8 @@ export class TTSProcessor {
238
233
  }
239
234
  try {
240
235
  logger.debug(`[TTSProcessor] Starting synthesis with provider: ${provider}`);
241
- // 5. Call handler.synthesize() with timeout protection (60 second safety net)
242
- const result = await withTimeout(handler.synthesize(trimmedText, options), this.DEFAULT_SYNTHESIS_TIMEOUT_MS, new TTSError({
243
- code: TTS_ERROR_CODES.SYNTHESIS_FAILED,
244
- message: `TTS synthesis timeout for provider "${provider}" after ${this.DEFAULT_SYNTHESIS_TIMEOUT_MS}ms`,
245
- category: ErrorCategory.EXECUTION,
246
- severity: ErrorSeverity.HIGH,
247
- retriable: true,
248
- context: {
249
- provider,
250
- timeoutMs: this.DEFAULT_SYNTHESIS_TIMEOUT_MS,
251
- textLength: trimmedText.length,
252
- },
253
- }));
236
+ // 5. Call handler.synthesize() - providers handle their own timeouts
237
+ const result = await handler.synthesize(trimmedText, options);
254
238
  // 6. Post-processing: add metadata
255
239
  const enrichedResult = {
256
240
  ...result,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@juspay/neurolink",
3
- "version": "8.18.0",
3
+ "version": "8.19.0",
4
4
  "description": "Universal AI Development Platform with working MCP integration, multi-provider support, and professional CLI. Built-in tools operational, 58+ external MCP servers discoverable. Connect to filesystem, GitHub, database operations, and more. Build, test, and deploy AI applications with 9 major providers: OpenAI, Anthropic, Google AI, AWS Bedrock, Azure, Hugging Face, Ollama, and Mistral AI.",
5
5
  "author": {
6
6
  "name": "Juspay Technologies",