@juspay/neurolink 9.61.1 → 9.62.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +23 -17
  3. package/dist/adapters/tts/googleTTSHandler.js +1 -1
  4. package/dist/browser/neurolink.min.js +382 -364
  5. package/dist/cli/commands/serve.js +9 -0
  6. package/dist/cli/commands/voiceServer.d.ts +7 -0
  7. package/dist/cli/commands/voiceServer.js +9 -1
  8. package/dist/cli/factories/commandFactory.js +136 -11
  9. package/dist/cli/loop/optionsSchema.d.ts +1 -1
  10. package/dist/cli/utils/audioFileUtils.d.ts +3 -3
  11. package/dist/cli/utils/audioFileUtils.js +5 -1
  12. package/dist/core/baseProvider.js +29 -6
  13. package/dist/factories/providerRegistry.d.ts +14 -0
  14. package/dist/factories/providerRegistry.js +141 -2
  15. package/dist/lib/adapters/tts/googleTTSHandler.js +1 -1
  16. package/dist/lib/core/baseProvider.js +29 -6
  17. package/dist/lib/factories/providerRegistry.d.ts +14 -0
  18. package/dist/lib/factories/providerRegistry.js +141 -2
  19. package/dist/lib/mcp/toolRegistry.js +7 -1
  20. package/dist/lib/neurolink.d.ts +19 -0
  21. package/dist/lib/neurolink.js +252 -14
  22. package/dist/lib/observability/exporters/laminarExporter.js +1 -0
  23. package/dist/lib/observability/exporters/posthogExporter.js +1 -0
  24. package/dist/lib/observability/utils/spanSerializer.js +1 -0
  25. package/dist/lib/server/voice/tokenCompare.d.ts +14 -0
  26. package/dist/lib/server/voice/tokenCompare.js +23 -0
  27. package/dist/lib/server/voice/voiceServerApp.js +62 -3
  28. package/dist/lib/server/voice/voiceWebSocketHandler.d.ts +20 -3
  29. package/dist/lib/server/voice/voiceWebSocketHandler.js +555 -435
  30. package/dist/lib/types/generate.d.ts +47 -0
  31. package/dist/lib/types/hitl.d.ts +3 -0
  32. package/dist/lib/types/index.d.ts +1 -1
  33. package/dist/lib/types/index.js +1 -1
  34. package/dist/lib/types/realtime.d.ts +243 -0
  35. package/dist/lib/types/realtime.js +70 -0
  36. package/dist/lib/types/server.d.ts +68 -0
  37. package/dist/lib/types/span.d.ts +2 -0
  38. package/dist/lib/types/span.js +2 -0
  39. package/dist/lib/types/stream.d.ts +36 -14
  40. package/dist/lib/types/stt.d.ts +585 -0
  41. package/dist/lib/types/stt.js +90 -0
  42. package/dist/lib/types/tools.d.ts +2 -0
  43. package/dist/lib/types/tts.d.ts +23 -11
  44. package/dist/lib/types/tts.js +7 -0
  45. package/dist/lib/types/voice.d.ts +272 -0
  46. package/dist/lib/types/voice.js +137 -0
  47. package/dist/lib/utils/audioFormatDetector.d.ts +15 -0
  48. package/dist/lib/utils/audioFormatDetector.js +34 -0
  49. package/dist/lib/utils/errorHandling.js +4 -0
  50. package/dist/lib/utils/sttProcessor.d.ts +115 -0
  51. package/dist/lib/utils/sttProcessor.js +295 -0
  52. package/dist/lib/voice/RealtimeVoiceAPI.d.ts +183 -0
  53. package/dist/lib/voice/RealtimeVoiceAPI.js +439 -0
  54. package/dist/lib/voice/audio-utils.d.ts +135 -0
  55. package/dist/lib/voice/audio-utils.js +435 -0
  56. package/dist/lib/voice/errors.d.ts +123 -0
  57. package/dist/lib/voice/errors.js +386 -0
  58. package/dist/lib/voice/index.d.ts +26 -0
  59. package/dist/lib/voice/index.js +55 -0
  60. package/dist/lib/voice/providers/AzureSTT.d.ts +47 -0
  61. package/dist/lib/voice/providers/AzureSTT.js +345 -0
  62. package/dist/lib/voice/providers/AzureTTS.d.ts +59 -0
  63. package/dist/lib/voice/providers/AzureTTS.js +349 -0
  64. package/dist/lib/voice/providers/DeepgramSTT.d.ts +40 -0
  65. package/dist/lib/voice/providers/DeepgramSTT.js +550 -0
  66. package/dist/lib/voice/providers/ElevenLabsTTS.d.ts +53 -0
  67. package/dist/lib/voice/providers/ElevenLabsTTS.js +311 -0
  68. package/dist/lib/voice/providers/GeminiLive.d.ts +52 -0
  69. package/dist/lib/voice/providers/GeminiLive.js +372 -0
  70. package/dist/lib/voice/providers/GoogleSTT.d.ts +60 -0
  71. package/dist/lib/voice/providers/GoogleSTT.js +454 -0
  72. package/dist/lib/voice/providers/OpenAIRealtime.d.ts +47 -0
  73. package/dist/lib/voice/providers/OpenAIRealtime.js +412 -0
  74. package/dist/lib/voice/providers/OpenAISTT.d.ts +41 -0
  75. package/dist/lib/voice/providers/OpenAISTT.js +286 -0
  76. package/dist/lib/voice/providers/OpenAITTS.d.ts +49 -0
  77. package/dist/lib/voice/providers/OpenAITTS.js +271 -0
  78. package/dist/lib/voice/stream-handler.d.ts +166 -0
  79. package/dist/lib/voice/stream-handler.js +514 -0
  80. package/dist/mcp/toolRegistry.js +7 -1
  81. package/dist/neurolink.d.ts +19 -0
  82. package/dist/neurolink.js +252 -14
  83. package/dist/observability/exporters/laminarExporter.js +1 -0
  84. package/dist/observability/exporters/posthogExporter.js +1 -0
  85. package/dist/observability/utils/spanSerializer.js +1 -0
  86. package/dist/server/voice/tokenCompare.d.ts +14 -0
  87. package/dist/server/voice/tokenCompare.js +22 -0
  88. package/dist/server/voice/voiceServerApp.js +62 -3
  89. package/dist/server/voice/voiceWebSocketHandler.d.ts +20 -3
  90. package/dist/server/voice/voiceWebSocketHandler.js +555 -435
  91. package/dist/types/generate.d.ts +47 -0
  92. package/dist/types/hitl.d.ts +3 -0
  93. package/dist/types/index.d.ts +1 -1
  94. package/dist/types/index.js +1 -1
  95. package/dist/types/realtime.d.ts +243 -0
  96. package/dist/types/realtime.js +69 -0
  97. package/dist/types/server.d.ts +68 -0
  98. package/dist/types/span.d.ts +2 -0
  99. package/dist/types/span.js +2 -0
  100. package/dist/types/stream.d.ts +36 -14
  101. package/dist/types/stt.d.ts +585 -0
  102. package/dist/types/stt.js +89 -0
  103. package/dist/types/tools.d.ts +2 -0
  104. package/dist/types/tts.d.ts +23 -11
  105. package/dist/types/tts.js +7 -0
  106. package/dist/types/voice.d.ts +272 -0
  107. package/dist/types/voice.js +136 -0
  108. package/dist/utils/audioFormatDetector.d.ts +15 -0
  109. package/dist/utils/audioFormatDetector.js +33 -0
  110. package/dist/utils/errorHandling.js +4 -0
  111. package/dist/utils/sttProcessor.d.ts +115 -0
  112. package/dist/utils/sttProcessor.js +294 -0
  113. package/dist/voice/RealtimeVoiceAPI.d.ts +183 -0
  114. package/dist/voice/RealtimeVoiceAPI.js +438 -0
  115. package/dist/voice/audio-utils.d.ts +135 -0
  116. package/dist/voice/audio-utils.js +434 -0
  117. package/dist/voice/errors.d.ts +123 -0
  118. package/dist/voice/errors.js +385 -0
  119. package/dist/voice/index.d.ts +26 -0
  120. package/dist/voice/index.js +54 -0
  121. package/dist/voice/providers/AzureSTT.d.ts +47 -0
  122. package/dist/voice/providers/AzureSTT.js +344 -0
  123. package/dist/voice/providers/AzureTTS.d.ts +59 -0
  124. package/dist/voice/providers/AzureTTS.js +348 -0
  125. package/dist/voice/providers/DeepgramSTT.d.ts +40 -0
  126. package/dist/voice/providers/DeepgramSTT.js +549 -0
  127. package/dist/voice/providers/ElevenLabsTTS.d.ts +53 -0
  128. package/dist/voice/providers/ElevenLabsTTS.js +310 -0
  129. package/dist/voice/providers/GeminiLive.d.ts +52 -0
  130. package/dist/voice/providers/GeminiLive.js +371 -0
  131. package/dist/voice/providers/GoogleSTT.d.ts +60 -0
  132. package/dist/voice/providers/GoogleSTT.js +453 -0
  133. package/dist/voice/providers/OpenAIRealtime.d.ts +47 -0
  134. package/dist/voice/providers/OpenAIRealtime.js +411 -0
  135. package/dist/voice/providers/OpenAISTT.d.ts +41 -0
  136. package/dist/voice/providers/OpenAISTT.js +285 -0
  137. package/dist/voice/providers/OpenAITTS.d.ts +49 -0
  138. package/dist/voice/providers/OpenAITTS.js +270 -0
  139. package/dist/voice/stream-handler.d.ts +166 -0
  140. package/dist/voice/stream-handler.js +513 -0
  141. package/package.json +5 -2
@@ -0,0 +1,294 @@
1
+ /**
2
+ * Speech-to-Text (STT) Processing Utility
3
+ *
4
+ * Central orchestrator for all STT operations across providers.
5
+ * Manages provider-specific STT handlers and audio transcription.
6
+ *
7
+ * @module utils/sttProcessor
8
+ */
9
+ import { logger } from "./logger.js";
10
+ import { STT_ERROR_CODES } from "../types/index.js";
11
+ import { ErrorCategory, ErrorSeverity } from "../constants/enums.js";
12
+ import { STTError } from "../voice/errors.js";
13
+ import { SpanSerializer, SpanType, SpanStatus, getMetricsAggregator, } from "../observability/index.js";
14
+ /**
15
+ * STT processor class for orchestrating speech-to-text operations
16
+ *
17
+ * Follows the same pattern as TTSProcessor, CSVProcessor, ImageProcessor, and PDFProcessor.
18
+ * Provides a unified interface for STT transcription across multiple providers.
19
+ *
20
+ * @example
21
+ * ```typescript
22
+ * // Register a handler
23
+ * STTProcessor.registerHandler('whisper', whisperHandler);
24
+ *
25
+ * // Check if provider is supported
26
+ * if (STTProcessor.supports('whisper')) {
27
+ * // Provider is registered
28
+ * }
29
+ * ```
30
+ */
31
+ export class STTProcessor {
32
+ /**
33
+ * Handler registry mapping provider names to STT handlers
34
+ * Uses Map for O(1) lookups and better type safety
35
+ *
36
+ * @private
37
+ */
38
+ static handlers = new Map();
39
+ /**
40
+ * Default maximum audio duration for STT transcription (in seconds)
41
+ *
42
+ * Providers can override this value by specifying the `maxAudioDuration` property
43
+ * in their respective `STTHandler` implementation. If not specified, this default
44
+ * value will be used (5 minutes).
45
+ *
46
+ * @private
47
+ */
48
+ static DEFAULT_MAX_AUDIO_DURATION = 300;
49
+ /**
50
+ * Register an STT handler for a specific provider
51
+ *
52
+ * Allows providers to register their STT implementation at runtime.
53
+ *
54
+ * @param providerName - Provider identifier (e.g., 'whisper', 'deepgram')
55
+ * @param handler - STT handler implementation
56
+ *
57
+ * @example
58
+ * ```typescript
59
+ * const whisperHandler: STTHandler = {
60
+ * transcribe: async (audio, options) => { ... },
61
+ * getSupportedFormats: () => ["mp3", "wav"],
62
+ * isConfigured: () => true
63
+ * };
64
+ *
65
+ * STTProcessor.registerHandler('whisper', whisperHandler);
66
+ * ```
67
+ */
68
+ static registerHandler(providerName, handler) {
69
+ if (!providerName) {
70
+ throw new Error("Provider name is required");
71
+ }
72
+ if (!handler) {
73
+ throw new Error("Handler is required");
74
+ }
75
+ const normalizedName = providerName.toLowerCase();
76
+ if (this.handlers.has(normalizedName)) {
77
+ logger.warn(`[STTProcessor] Overwriting existing handler for provider: ${normalizedName}`);
78
+ }
79
+ this.handlers.set(normalizedName, handler);
80
+ logger.debug(`[STTProcessor] Registered STT handler for provider: ${normalizedName}`);
81
+ }
82
+ /**
83
+ * Get a registered STT handler by provider name
84
+ *
85
+ * @private
86
+ * @param providerName - Provider identifier
87
+ * @returns Handler instance or undefined if not registered
88
+ */
89
+ static getHandler(providerName) {
90
+ const normalizedName = providerName.toLowerCase();
91
+ return this.handlers.get(normalizedName);
92
+ }
93
+ /**
94
+ * Check if a provider is supported (has a registered STT handler)
95
+ *
96
+ * @param providerName - Provider identifier
97
+ * @returns True if handler is registered
98
+ *
99
+ * @example
100
+ * ```typescript
101
+ * if (STTProcessor.supports('whisper')) {
102
+ * console.log('Whisper STT is supported');
103
+ * }
104
+ * ```
105
+ */
106
+ static supports(providerName) {
107
+ if (!providerName) {
108
+ logger.error("[STTProcessor] Provider name is required for supports check");
109
+ return false;
110
+ }
111
+ const normalizedName = providerName.toLowerCase();
112
+ const isSupported = this.handlers.has(normalizedName);
113
+ if (!isSupported) {
114
+ logger.debug(`[STTProcessor] Provider ${providerName} is not supported`);
115
+ }
116
+ return isSupported;
117
+ }
118
+ /**
119
+ * Transcribe audio to text using a registered STT provider
120
+ *
121
+ * Orchestrates the speech-to-text transcription process:
122
+ * 1. Validates audio input (non-empty)
123
+ * 2. Looks up the provider handler
124
+ * 3. Verifies provider configuration
125
+ * 4. Delegates transcription to the provider
126
+ * 5. Enriches result with provider metadata
127
+ *
128
+ * @param audio - Audio data as Buffer or ArrayBuffer
129
+ * @param provider - Provider identifier
130
+ * @param options - STT configuration options
131
+ * @returns Transcription result with text and metadata
132
+ * @throws STTError if validation fails or provider not supported/configured
133
+ *
134
+ * @example
135
+ * ```typescript
136
+ * const result = await STTProcessor.transcribe(audioBuffer, "whisper", {
137
+ * language: "en-US",
138
+ * punctuation: true,
139
+ * });
140
+ *
141
+ * console.log(`Transcription: ${result.text}`);
142
+ * console.log(`Confidence: ${result.confidence}`);
143
+ * ```
144
+ */
145
+ static async transcribe(audio, provider, options) {
146
+ // Create span early so preflight failures are captured
147
+ const span = SpanSerializer.createSpan(SpanType.STT, "stt.transcribe", {
148
+ "stt.operation": "transcribe",
149
+ "stt.provider": provider,
150
+ "stt.language": options.language,
151
+ "stt.format": options.format,
152
+ });
153
+ try {
154
+ // 1. Audio validation: reject empty + oversized audio
155
+ const byteLength = audio instanceof ArrayBuffer ? audio.byteLength : audio.length;
156
+ if (!byteLength || byteLength === 0) {
157
+ logger.error("[STTProcessor] Audio data is required for transcription");
158
+ throw new STTError({
159
+ code: STT_ERROR_CODES.AUDIO_EMPTY,
160
+ message: "Audio data is required for STT transcription",
161
+ severity: ErrorSeverity.LOW,
162
+ retriable: false,
163
+ context: { provider },
164
+ });
165
+ }
166
+ // NEW13: enforce a size upper bound so a multi-GB Buffer can't OOM the
167
+ // process. Default 25 MB matches Whisper's documented limit; callers
168
+ // can override via `options.maxAudioBytes`. Permanent errors at the
169
+ // provider level (e.g. Whisper rejecting >25MB) become this clean
170
+ // STTError instead of a memory crash or vendor 413.
171
+ const maxAudioBytes = options.maxAudioBytes ?? 25_000_000;
172
+ if (byteLength > maxAudioBytes) {
173
+ logger.error(`[STTProcessor] Audio buffer ${byteLength} bytes exceeds limit ${maxAudioBytes}`);
174
+ throw new STTError({
175
+ code: STT_ERROR_CODES.AUDIO_TOO_LONG,
176
+ message: `Audio buffer ${byteLength} bytes exceeds maximum ${maxAudioBytes} bytes for STT transcription. Increase maxAudioBytes in options or chunk the audio.`,
177
+ severity: ErrorSeverity.HIGH,
178
+ retriable: false,
179
+ context: { provider, byteLength, maxAudioBytes },
180
+ });
181
+ }
182
+ // 2. Handler lookup and error if provider not supported
183
+ const handler = this.getHandler(provider);
184
+ if (!handler) {
185
+ logger.error(`[STTProcessor] Provider "${provider}" is not registered`);
186
+ throw new STTError({
187
+ code: STT_ERROR_CODES.PROVIDER_NOT_SUPPORTED,
188
+ message: `STT provider "${provider}" is not supported. Use STTProcessor.registerHandler() to register it.`,
189
+ severity: ErrorSeverity.HIGH,
190
+ retriable: false,
191
+ context: {
192
+ provider,
193
+ availableProviders: Array.from(this.handlers.keys()),
194
+ },
195
+ });
196
+ }
197
+ // 3. Format compatibility check — fail fast when the caller passes
198
+ // an audio format the provider explicitly does not decode (e.g. MP3 to
199
+ // azure-stt). Without this, providers like Azure return a Success
200
+ // response with empty text, which then cascades into a confusing
201
+ // "prompt must be at least 1 character long" failure on the downstream
202
+ // LLM call. We only validate when both `options.format` and
203
+ // `handler.getSupportedFormats()` are present so we never block providers
204
+ // that prefer to do their own detection.
205
+ if (options.format && typeof handler.getSupportedFormats === "function") {
206
+ const supported = handler.getSupportedFormats();
207
+ if (Array.isArray(supported) &&
208
+ supported.length > 0 &&
209
+ !supported.includes(options.format)) {
210
+ logger.error(`[STTProcessor] Provider "${provider}" does not support audio format "${options.format}"`);
211
+ throw new STTError({
212
+ code: STT_ERROR_CODES.INVALID_AUDIO_FORMAT,
213
+ message: `STT provider "${provider}" does not support audio format "${options.format}". Supported formats: ${supported.join(", ")}.`,
214
+ severity: ErrorSeverity.HIGH,
215
+ retriable: false,
216
+ context: {
217
+ provider,
218
+ requestedFormat: options.format,
219
+ supportedFormats: supported,
220
+ },
221
+ });
222
+ }
223
+ }
224
+ // 4. Configuration check
225
+ if (!handler.isConfigured()) {
226
+ logger.warn(`[STTProcessor] Provider "${provider}" is not properly configured`);
227
+ throw new STTError({
228
+ code: STT_ERROR_CODES.PROVIDER_NOT_CONFIGURED,
229
+ message: `STT provider "${provider}" is not configured. Please set the required API keys.`,
230
+ category: ErrorCategory.CONFIGURATION,
231
+ severity: ErrorSeverity.HIGH,
232
+ retriable: false,
233
+ context: { provider },
234
+ });
235
+ }
236
+ logger.debug(`[STTProcessor] Starting transcription with provider: ${provider}`);
237
+ // 5. Call handler.transcribe() - providers handle their own timeouts
238
+ const result = await handler.transcribe(audio, options);
239
+ // 6. Post-processing: enrich result with provider metadata
240
+ const enrichedResult = {
241
+ ...result,
242
+ metadata: {
243
+ ...result.metadata,
244
+ provider,
245
+ latency: result.metadata?.latency ?? 0,
246
+ },
247
+ };
248
+ // Don't log transcript content at INFO — voice transcriptions can carry
249
+ // PII / health / financial data, and INFO is typically persisted in
250
+ // production log aggregation (CloudWatch, Datadog, etc.). GDPR / CCPA
251
+ // concern. Length and provider are safe to record.
252
+ logger.debug(`[STTProcessor] Transcription completed for provider "${provider}" (${result.text.length} chars)`);
253
+ // 7. Record successful span
254
+ const endedSpan = SpanSerializer.endSpan(span, SpanStatus.OK);
255
+ getMetricsAggregator().recordSpan(endedSpan);
256
+ // 8. Return STTResult with text, confidence, metadata
257
+ return enrichedResult;
258
+ }
259
+ catch (err) {
260
+ // Record error span
261
+ const endedSpan = SpanSerializer.endSpan(span, SpanStatus.ERROR, err instanceof Error ? err.message : String(err));
262
+ getMetricsAggregator().recordSpan(endedSpan);
263
+ // Re-throw STTError as-is
264
+ if (err instanceof STTError) {
265
+ throw err;
266
+ }
267
+ // Wrap other errors in STTError
268
+ const errorMessage = err instanceof Error ? err.message : String(err || "Unknown error");
269
+ logger.error(`[STTProcessor] Transcription failed for provider "${provider}": ${errorMessage}`);
270
+ throw new STTError({
271
+ code: STT_ERROR_CODES.TRANSCRIPTION_FAILED,
272
+ message: `STT transcription failed for provider "${provider}": ${errorMessage}`,
273
+ category: ErrorCategory.EXECUTION,
274
+ severity: ErrorSeverity.HIGH,
275
+ retriable: true,
276
+ context: {
277
+ provider,
278
+ audioByteLength: audio instanceof ArrayBuffer ? audio.byteLength : audio.length,
279
+ // Sanitize: strip free-text user-supplied fields (e.g. WhisperSTTOptions.prompt)
280
+ // from the error context so error-monitoring pipelines (Sentry, Datadog APM)
281
+ // don't ingest user audio prompt text.
282
+ options: {
283
+ format: options.format,
284
+ language: options.language,
285
+ wordTimestamps: options.wordTimestamps,
286
+ maxAudioBytes: options.maxAudioBytes,
287
+ speakerDiarization: options.speakerDiarization,
288
+ },
289
+ },
290
+ originalError: err instanceof Error ? err : undefined,
291
+ });
292
+ }
293
+ }
294
+ }
@@ -0,0 +1,183 @@
1
+ /**
2
+ * Realtime Voice API Infrastructure
3
+ *
4
+ * Base handler and processor for realtime voice communication.
5
+ * Supports bidirectional audio streaming with providers like OpenAI and Gemini.
6
+ *
7
+ * @module voice/RealtimeVoiceAPI
8
+ */
9
+ import type { TTSAudioFormat, RealtimeAudioChunk, RealtimeConfig, RealtimeEventHandlers, RealtimeHandler, RealtimeSession, RealtimeSessionState } from "../types/index.js";
10
+ /**
11
+ * Realtime Processor class for orchestrating realtime voice operations
12
+ *
13
+ * Provides a unified interface for realtime voice across multiple providers.
14
+ *
15
+ * @example
16
+ * ```typescript
17
+ * // Register a handler (typically done in providerRegistry.ts on startup)
18
+ * RealtimeProcessor.registerHandler('openai-realtime', openaiHandler);
19
+ *
20
+ * // Connect to a session — the first arg is the registered handler key,
21
+ * // and `config.provider` must match the same key.
22
+ * const session = await RealtimeProcessor.connect('openai-realtime', {
23
+ * provider: 'openai-realtime',
24
+ * voice: 'alloy',
25
+ * systemPrompt: 'You are a helpful assistant.'
26
+ * });
27
+ *
28
+ * // Send audio
29
+ * await RealtimeProcessor.sendAudio('openai-realtime', audioBuffer);
30
+ *
31
+ * // Disconnect
32
+ * await RealtimeProcessor.disconnect('openai-realtime');
33
+ * ```
34
+ */
35
+ export declare class RealtimeProcessor {
36
+ /**
37
+ * Handler registry mapping provider names to Realtime handlers
38
+ */
39
+ private static readonly handlers;
40
+ /**
41
+ * Active sessions by provider
42
+ */
43
+ private static readonly sessions;
44
+ /**
45
+ * Register a Realtime handler for a specific provider
46
+ *
47
+ * @param providerName - Provider identifier (e.g., 'openai', 'gemini')
48
+ * @param handler - Realtime handler implementation
49
+ */
50
+ static registerHandler(providerName: string, handler: RealtimeHandler): void;
51
+ /**
52
+ * Get a registered Realtime handler by provider name
53
+ */
54
+ private static getHandler;
55
+ /**
56
+ * Check if a provider is supported
57
+ */
58
+ static supports(providerName: string): boolean;
59
+ /**
60
+ * Get list of all registered providers
61
+ */
62
+ static getProviders(): string[];
63
+ /**
64
+ * Connect to a realtime session
65
+ *
66
+ * @param provider - Provider identifier
67
+ * @param config - Session configuration
68
+ * @param handlers - Event handlers
69
+ * @returns Session information
70
+ */
71
+ static connect(provider: string, config: RealtimeConfig, handlers?: RealtimeEventHandlers): Promise<RealtimeSession>;
72
+ /**
73
+ * Disconnect from a realtime session
74
+ *
75
+ * @param provider - Provider identifier
76
+ */
77
+ static disconnect(provider: string): Promise<void>;
78
+ /**
79
+ * Send audio to a realtime session
80
+ *
81
+ * @param provider - Provider identifier
82
+ * @param audio - Audio data
83
+ */
84
+ static sendAudio(provider: string, audio: Buffer | RealtimeAudioChunk): Promise<void>;
85
+ /**
86
+ * Send text to a realtime session
87
+ *
88
+ * @param provider - Provider identifier
89
+ * @param text - Text to send
90
+ */
91
+ static sendText(provider: string, text: string): Promise<void>;
92
+ /**
93
+ * Trigger a response from the model (manual turn detection)
94
+ *
95
+ * @param provider - Provider identifier
96
+ */
97
+ static triggerResponse(provider: string): Promise<void>;
98
+ /**
99
+ * Cancel the current response
100
+ *
101
+ * @param provider - Provider identifier
102
+ */
103
+ static cancelResponse(provider: string): Promise<void>;
104
+ /**
105
+ * Get current session for a provider
106
+ *
107
+ * @param provider - Provider identifier
108
+ * @returns Session or null
109
+ */
110
+ static getSession(provider: string): RealtimeSession | null;
111
+ /**
112
+ * Check if a provider has an active session
113
+ *
114
+ * @param provider - Provider identifier
115
+ */
116
+ static isConnected(provider: string): boolean;
117
+ /**
118
+ * Get supported formats for a provider
119
+ *
120
+ * @param provider - Provider identifier
121
+ */
122
+ static getSupportedFormats(provider: string): TTSAudioFormat[];
123
+ /**
124
+ * Clear all handlers and sessions (for testing)
125
+ */
126
+ static clearHandlers(): void;
127
+ }
128
+ /**
129
+ * Base Realtime Handler with common functionality
130
+ *
131
+ * Providers can extend this class for common behavior.
132
+ */
133
+ export declare abstract class BaseRealtimeHandler implements RealtimeHandler {
134
+ abstract readonly name: RealtimeConfig["provider"];
135
+ protected session: RealtimeSession | null;
136
+ protected eventHandlers: RealtimeEventHandlers | null;
137
+ protected state: RealtimeSessionState;
138
+ abstract connect(config: RealtimeConfig): Promise<RealtimeSession>;
139
+ abstract disconnect(): Promise<void>;
140
+ abstract sendAudio(audio: Buffer | RealtimeAudioChunk): Promise<void>;
141
+ abstract isConfigured(): boolean;
142
+ abstract getSupportedFormats(): TTSAudioFormat[];
143
+ isConnected(): boolean;
144
+ getSession(): RealtimeSession | null;
145
+ on(handlers: RealtimeEventHandlers): void;
146
+ off(): void;
147
+ /**
148
+ * Emit state change event
149
+ */
150
+ protected emitStateChange(newState: RealtimeSessionState): void;
151
+ /**
152
+ * Emit audio event
153
+ */
154
+ protected emitAudio(chunk: RealtimeAudioChunk): void;
155
+ /**
156
+ * Emit transcript event
157
+ */
158
+ protected emitTranscript(text: string, isFinal: boolean): void;
159
+ /**
160
+ * Emit text event
161
+ */
162
+ protected emitText(text: string, isFinal: boolean): void;
163
+ /**
164
+ * Emit function call event
165
+ */
166
+ protected emitFunctionCall(name: string, args: Record<string, unknown>): Promise<unknown>;
167
+ /**
168
+ * Emit error event
169
+ */
170
+ protected emitError(error: Error): void;
171
+ /**
172
+ * Emit turn start event
173
+ */
174
+ protected emitTurnStart(): void;
175
+ /**
176
+ * Emit turn end event
177
+ */
178
+ protected emitTurnEnd(): void;
179
+ /**
180
+ * Create a session object
181
+ */
182
+ protected createSession(id: string, config: RealtimeConfig): RealtimeSession;
183
+ }