@lobu/gateway 3.0.9 → 3.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. package/dist/api/platform.d.ts.map +1 -1
  2. package/dist/api/platform.js +7 -26
  3. package/dist/api/platform.js.map +1 -1
  4. package/dist/auth/mcp/proxy.d.ts +14 -0
  5. package/dist/auth/mcp/proxy.d.ts.map +1 -1
  6. package/dist/auth/mcp/proxy.js +149 -13
  7. package/dist/auth/mcp/proxy.js.map +1 -1
  8. package/dist/cli/gateway.d.ts.map +1 -1
  9. package/dist/cli/gateway.js +29 -0
  10. package/dist/cli/gateway.js.map +1 -1
  11. package/dist/connections/chat-instance-manager.d.ts.map +1 -1
  12. package/dist/connections/chat-instance-manager.js +2 -1
  13. package/dist/connections/chat-instance-manager.js.map +1 -1
  14. package/dist/connections/interaction-bridge.d.ts +9 -2
  15. package/dist/connections/interaction-bridge.d.ts.map +1 -1
  16. package/dist/connections/interaction-bridge.js +121 -261
  17. package/dist/connections/interaction-bridge.js.map +1 -1
  18. package/dist/interactions.d.ts +9 -43
  19. package/dist/interactions.d.ts.map +1 -1
  20. package/dist/interactions.js +10 -52
  21. package/dist/interactions.js.map +1 -1
  22. package/dist/routes/public/agent.d.ts +4 -0
  23. package/dist/routes/public/agent.d.ts.map +1 -1
  24. package/dist/routes/public/agent.js +21 -0
  25. package/dist/routes/public/agent.js.map +1 -1
  26. package/dist/services/core-services.d.ts.map +1 -1
  27. package/dist/services/core-services.js +4 -0
  28. package/dist/services/core-services.js.map +1 -1
  29. package/package.json +9 -9
  30. package/src/__tests__/agent-config-routes.test.ts +0 -254
  31. package/src/__tests__/agent-history-routes.test.ts +0 -72
  32. package/src/__tests__/agent-routes.test.ts +0 -68
  33. package/src/__tests__/agent-schedules-routes.test.ts +0 -59
  34. package/src/__tests__/agent-settings-store.test.ts +0 -323
  35. package/src/__tests__/bedrock-model-catalog.test.ts +0 -40
  36. package/src/__tests__/bedrock-openai-service.test.ts +0 -157
  37. package/src/__tests__/bedrock-provider-module.test.ts +0 -56
  38. package/src/__tests__/chat-instance-manager-slack.test.ts +0 -204
  39. package/src/__tests__/chat-response-bridge.test.ts +0 -131
  40. package/src/__tests__/config-memory-plugins.test.ts +0 -92
  41. package/src/__tests__/config-request-store.test.ts +0 -127
  42. package/src/__tests__/connection-routes.test.ts +0 -144
  43. package/src/__tests__/core-services-store-selection.test.ts +0 -92
  44. package/src/__tests__/docker-deployment.test.ts +0 -1211
  45. package/src/__tests__/embedded-deployment.test.ts +0 -342
  46. package/src/__tests__/grant-store.test.ts +0 -148
  47. package/src/__tests__/http-proxy.test.ts +0 -281
  48. package/src/__tests__/instruction-service.test.ts +0 -37
  49. package/src/__tests__/link-buttons.test.ts +0 -112
  50. package/src/__tests__/lobu.test.ts +0 -32
  51. package/src/__tests__/mcp-config-service.test.ts +0 -347
  52. package/src/__tests__/mcp-proxy.test.ts +0 -694
  53. package/src/__tests__/message-handler-bridge.test.ts +0 -17
  54. package/src/__tests__/model-selection.test.ts +0 -172
  55. package/src/__tests__/oauth-templates.test.ts +0 -39
  56. package/src/__tests__/platform-adapter-slack-send.test.ts +0 -114
  57. package/src/__tests__/platform-helpers-model-resolution.test.ts +0 -253
  58. package/src/__tests__/provider-inheritance.test.ts +0 -212
  59. package/src/__tests__/routes/cli-auth.test.ts +0 -337
  60. package/src/__tests__/routes/interactions.test.ts +0 -121
  61. package/src/__tests__/secret-proxy.test.ts +0 -85
  62. package/src/__tests__/session-manager.test.ts +0 -572
  63. package/src/__tests__/setup.ts +0 -133
  64. package/src/__tests__/skill-and-mcp-registry.test.ts +0 -203
  65. package/src/__tests__/slack-routes.test.ts +0 -161
  66. package/src/__tests__/system-config-resolver.test.ts +0 -75
  67. package/src/__tests__/system-message-limiter.test.ts +0 -89
  68. package/src/__tests__/system-skills-service.test.ts +0 -362
  69. package/src/__tests__/transcription-service.test.ts +0 -222
  70. package/src/__tests__/utils/rate-limiter.test.ts +0 -102
  71. package/src/__tests__/worker-connection-manager.test.ts +0 -497
  72. package/src/__tests__/worker-job-router.test.ts +0 -722
  73. package/src/api/index.ts +0 -1
  74. package/src/api/platform.ts +0 -292
  75. package/src/api/response-renderer.ts +0 -157
  76. package/src/auth/agent-metadata-store.ts +0 -168
  77. package/src/auth/api-auth-middleware.ts +0 -69
  78. package/src/auth/api-key-provider-module.ts +0 -213
  79. package/src/auth/base-provider-module.ts +0 -201
  80. package/src/auth/bedrock/provider-module.ts +0 -110
  81. package/src/auth/chatgpt/chatgpt-oauth-module.ts +0 -185
  82. package/src/auth/chatgpt/device-code-client.ts +0 -218
  83. package/src/auth/chatgpt/index.ts +0 -1
  84. package/src/auth/claude/oauth-module.ts +0 -280
  85. package/src/auth/cli/token-service.ts +0 -249
  86. package/src/auth/external/client.ts +0 -560
  87. package/src/auth/external/device-code-client.ts +0 -235
  88. package/src/auth/mcp/config-service.ts +0 -420
  89. package/src/auth/mcp/proxy.ts +0 -1086
  90. package/src/auth/mcp/string-substitution.ts +0 -17
  91. package/src/auth/mcp/tool-cache.ts +0 -90
  92. package/src/auth/oauth/base-client.ts +0 -267
  93. package/src/auth/oauth/client.ts +0 -153
  94. package/src/auth/oauth/credentials.ts +0 -7
  95. package/src/auth/oauth/providers.ts +0 -69
  96. package/src/auth/oauth/state-store.ts +0 -150
  97. package/src/auth/oauth-templates.ts +0 -179
  98. package/src/auth/provider-catalog.ts +0 -220
  99. package/src/auth/provider-model-options.ts +0 -41
  100. package/src/auth/settings/agent-settings-store.ts +0 -565
  101. package/src/auth/settings/auth-profiles-manager.ts +0 -216
  102. package/src/auth/settings/index.ts +0 -12
  103. package/src/auth/settings/model-preference-store.ts +0 -52
  104. package/src/auth/settings/model-selection.ts +0 -135
  105. package/src/auth/settings/resolved-settings-view.ts +0 -298
  106. package/src/auth/settings/template-utils.ts +0 -44
  107. package/src/auth/settings/token-service.ts +0 -88
  108. package/src/auth/system-env-store.ts +0 -98
  109. package/src/auth/user-agents-store.ts +0 -68
  110. package/src/channels/binding-service.ts +0 -214
  111. package/src/channels/index.ts +0 -4
  112. package/src/cli/gateway.ts +0 -1312
  113. package/src/cli/index.ts +0 -74
  114. package/src/commands/built-in-commands.ts +0 -80
  115. package/src/commands/command-dispatcher.ts +0 -94
  116. package/src/commands/command-reply-adapters.ts +0 -27
  117. package/src/config/file-loader.ts +0 -618
  118. package/src/config/index.ts +0 -588
  119. package/src/config/network-allowlist.ts +0 -71
  120. package/src/connections/chat-instance-manager.ts +0 -1284
  121. package/src/connections/chat-response-bridge.ts +0 -618
  122. package/src/connections/index.ts +0 -7
  123. package/src/connections/interaction-bridge.ts +0 -831
  124. package/src/connections/message-handler-bridge.ts +0 -440
  125. package/src/connections/platform-auth-methods.ts +0 -15
  126. package/src/connections/types.ts +0 -84
  127. package/src/gateway/connection-manager.ts +0 -291
  128. package/src/gateway/index.ts +0 -698
  129. package/src/gateway/job-router.ts +0 -201
  130. package/src/gateway-main.ts +0 -200
  131. package/src/index.ts +0 -41
  132. package/src/infrastructure/queue/index.ts +0 -12
  133. package/src/infrastructure/queue/queue-producer.ts +0 -148
  134. package/src/infrastructure/queue/redis-queue.ts +0 -361
  135. package/src/infrastructure/queue/types.ts +0 -133
  136. package/src/infrastructure/redis/system-message-limiter.ts +0 -94
  137. package/src/interactions/config-request-store.ts +0 -198
  138. package/src/interactions.ts +0 -363
  139. package/src/lobu.ts +0 -311
  140. package/src/metrics/prometheus.ts +0 -159
  141. package/src/modules/module-system.ts +0 -179
  142. package/src/orchestration/base-deployment-manager.ts +0 -900
  143. package/src/orchestration/deployment-utils.ts +0 -98
  144. package/src/orchestration/impl/docker-deployment.ts +0 -620
  145. package/src/orchestration/impl/embedded-deployment.ts +0 -268
  146. package/src/orchestration/impl/index.ts +0 -8
  147. package/src/orchestration/impl/k8s/deployment.ts +0 -1061
  148. package/src/orchestration/impl/k8s/helpers.ts +0 -610
  149. package/src/orchestration/impl/k8s/index.ts +0 -1
  150. package/src/orchestration/index.ts +0 -333
  151. package/src/orchestration/message-consumer.ts +0 -584
  152. package/src/orchestration/scheduled-wakeup.ts +0 -704
  153. package/src/permissions/approval-policy.ts +0 -36
  154. package/src/permissions/grant-store.ts +0 -219
  155. package/src/platform/file-handler.ts +0 -66
  156. package/src/platform/link-buttons.ts +0 -57
  157. package/src/platform/renderer-utils.ts +0 -44
  158. package/src/platform/response-renderer.ts +0 -84
  159. package/src/platform/unified-thread-consumer.ts +0 -194
  160. package/src/platform.ts +0 -318
  161. package/src/proxy/http-proxy.ts +0 -752
  162. package/src/proxy/proxy-manager.ts +0 -81
  163. package/src/proxy/secret-proxy.ts +0 -402
  164. package/src/proxy/token-refresh-job.ts +0 -143
  165. package/src/routes/internal/audio.ts +0 -141
  166. package/src/routes/internal/device-auth.ts +0 -652
  167. package/src/routes/internal/files.ts +0 -226
  168. package/src/routes/internal/history.ts +0 -69
  169. package/src/routes/internal/images.ts +0 -127
  170. package/src/routes/internal/interactions.ts +0 -84
  171. package/src/routes/internal/middleware.ts +0 -23
  172. package/src/routes/internal/schedule.ts +0 -226
  173. package/src/routes/internal/types.ts +0 -22
  174. package/src/routes/openapi-auto.ts +0 -239
  175. package/src/routes/public/agent-access.ts +0 -23
  176. package/src/routes/public/agent-config.ts +0 -675
  177. package/src/routes/public/agent-history.ts +0 -422
  178. package/src/routes/public/agent-schedules.ts +0 -296
  179. package/src/routes/public/agent.ts +0 -1086
  180. package/src/routes/public/agents.ts +0 -373
  181. package/src/routes/public/channels.ts +0 -191
  182. package/src/routes/public/cli-auth.ts +0 -896
  183. package/src/routes/public/connections.ts +0 -574
  184. package/src/routes/public/landing.ts +0 -16
  185. package/src/routes/public/oauth.ts +0 -147
  186. package/src/routes/public/settings-auth.ts +0 -104
  187. package/src/routes/public/slack.ts +0 -173
  188. package/src/routes/shared/agent-ownership.ts +0 -101
  189. package/src/routes/shared/token-verifier.ts +0 -34
  190. package/src/services/bedrock-model-catalog.ts +0 -217
  191. package/src/services/bedrock-openai-service.ts +0 -658
  192. package/src/services/core-services.ts +0 -1072
  193. package/src/services/image-generation-service.ts +0 -257
  194. package/src/services/instruction-service.ts +0 -318
  195. package/src/services/mcp-registry.ts +0 -94
  196. package/src/services/platform-helpers.ts +0 -287
  197. package/src/services/session-manager.ts +0 -262
  198. package/src/services/settings-resolver.ts +0 -74
  199. package/src/services/system-config-resolver.ts +0 -89
  200. package/src/services/system-skills-service.ts +0 -229
  201. package/src/services/transcription-service.ts +0 -684
  202. package/src/session.ts +0 -110
  203. package/src/spaces/index.ts +0 -1
  204. package/src/spaces/space-resolver.ts +0 -17
  205. package/src/stores/in-memory-agent-store.ts +0 -403
  206. package/src/stores/redis-agent-store.ts +0 -279
  207. package/src/utils/public-url.ts +0 -44
  208. package/src/utils/rate-limiter.ts +0 -94
  209. package/tsconfig.json +0 -33
  210. package/tsconfig.tsbuildinfo +0 -1
@@ -1,684 +0,0 @@
1
- /**
2
- * Multi-Provider Audio Service
3
- *
4
- * Supports speech-to-text and text-to-speech via auth profiles (installed providers):
5
- * - OpenAI (chatgpt auth profile) - Whisper for STT, TTS API for speech
6
- * - Google Gemini (gemini auth profile) - Audio input/output
7
- * - ElevenLabs (elevenlabs auth profile) - STT and high-quality TTS
8
- *
9
- * STT selection: built-ins (chatgpt/openai, gemini, elevenlabs) plus optional
10
- * config-driven STT providers declared in system-skills provider config.
11
- * TTS selection stays built-in only (openai → gemini → elevenlabs).
12
- */
13
-
14
- import type { ProviderConfigEntry } from "@lobu/core";
15
- import { createLogger } from "@lobu/core";
16
- import type { AuthProfilesManager } from "../auth/settings/auth-profiles-manager";
17
-
18
- const logger = createLogger("transcription-service");
19
-
20
- export type TranscriptionProvider = "openai" | "gemini" | "elevenlabs";
21
-
22
- interface TranscriptionConfig {
23
- profileProviderId: string;
24
- displayName: string;
25
- provider: TranscriptionProvider;
26
- apiKey: string;
27
- openaiCompat?: {
28
- endpointUrl: string;
29
- model: string;
30
- };
31
- }
32
-
33
- export interface TranscriptionSuccess {
34
- text: string;
35
- provider: TranscriptionProvider;
36
- }
37
-
38
- export interface TranscriptionError {
39
- error: string;
40
- availableProviders: TranscriptionProvider[];
41
- }
42
-
43
- export type TranscriptionResult = TranscriptionSuccess | TranscriptionError;
44
-
45
- export interface SynthesisSuccess {
46
- audioBuffer: Buffer;
47
- mimeType: string;
48
- provider: TranscriptionProvider;
49
- }
50
-
51
- export interface SynthesisError {
52
- error: string;
53
- availableProviders: TranscriptionProvider[];
54
- }
55
-
56
- export type SynthesisResult = SynthesisSuccess | SynthesisError;
57
-
58
- // Voice options for TTS
59
- export interface VoiceOptions {
60
- voice?: string; // Provider-specific voice ID
61
- speed?: number; // Speech speed (0.5-2.0, default 1.0)
62
- }
63
-
64
- // Auth profile providerId → TTS provider mapping (single source of truth)
65
- const TTS_CAPABLE_PROVIDERS: {
66
- profileProviderId: string;
67
- ttsProvider: TranscriptionProvider;
68
- displayName: string;
69
- }[] = [
70
- {
71
- profileProviderId: "chatgpt",
72
- ttsProvider: "openai",
73
- displayName: "OpenAI",
74
- },
75
- {
76
- profileProviderId: "gemini",
77
- ttsProvider: "gemini",
78
- displayName: "Google Gemini",
79
- },
80
- {
81
- profileProviderId: "elevenlabs",
82
- ttsProvider: "elevenlabs",
83
- displayName: "ElevenLabs",
84
- },
85
- ];
86
-
87
- function displayName(provider: TranscriptionProvider): string {
88
- return (
89
- TTS_CAPABLE_PROVIDERS.find((p) => p.ttsProvider === provider)
90
- ?.displayName ?? provider
91
- );
92
- }
93
-
94
- export class TranscriptionService {
95
- private providerConfigSource?:
96
- | (() => Promise<Record<string, ProviderConfigEntry>>)
97
- | undefined;
98
-
99
- constructor(
100
- private readonly authProfilesManager: AuthProfilesManager,
101
- providerConfigSource?: () => Promise<Record<string, ProviderConfigEntry>>
102
- ) {
103
- this.providerConfigSource = providerConfigSource;
104
- }
105
-
106
- setProviderConfigSource(
107
- source: () => Promise<Record<string, ProviderConfigEntry>>
108
- ): void {
109
- this.providerConfigSource = source;
110
- }
111
-
112
- /**
113
- * Transcribe audio buffer to text
114
- */
115
- async transcribe(
116
- audioBuffer: Buffer,
117
- agentId: string,
118
- mimeType = "audio/ogg"
119
- ): Promise<TranscriptionResult> {
120
- const configs = await this.getTranscriptionConfigs(agentId);
121
-
122
- if (configs.length === 0) {
123
- return this.noProviderError(
124
- "No transcription provider configured",
125
- agentId
126
- );
127
- }
128
-
129
- const attemptErrors: string[] = [];
130
- for (const config of configs) {
131
- logger.info("Transcribing audio", {
132
- agentId,
133
- provider: config.provider,
134
- profileProviderId: config.profileProviderId,
135
- bufferSize: audioBuffer.length,
136
- mimeType,
137
- });
138
-
139
- try {
140
- const text = await this.transcribeWithProvider(
141
- audioBuffer,
142
- config,
143
- mimeType
144
- );
145
- logger.info("Transcription successful", {
146
- agentId,
147
- provider: config.provider,
148
- profileProviderId: config.profileProviderId,
149
- textLength: text.length,
150
- });
151
- return { text, provider: config.provider };
152
- } catch (error) {
153
- const errorMessage =
154
- error instanceof Error ? error.message : String(error);
155
- logger.error("Transcription failed", {
156
- agentId,
157
- provider: config.provider,
158
- profileProviderId: config.profileProviderId,
159
- error: errorMessage,
160
- });
161
- attemptErrors.push(`${config.displayName}: ${errorMessage}`);
162
- }
163
- }
164
-
165
- return {
166
- error: `Transcription failed with all configured providers: ${attemptErrors.join(" | ")}`,
167
- availableProviders: [...new Set(configs.map((c) => c.provider))],
168
- };
169
- }
170
-
171
- /**
172
- * Get transcription config for an agent by checking installed auth profiles.
173
- * First TTS-capable provider with a valid profile wins (openai → gemini → elevenlabs).
174
- */
175
- async getConfig(agentId: string): Promise<TranscriptionConfig | null> {
176
- const configs = await this.getTranscriptionConfigs(agentId);
177
- return configs[0] ?? null;
178
- }
179
-
180
- private async getSynthesisConfigs(
181
- agentId: string
182
- ): Promise<TranscriptionConfig[]> {
183
- const configs: TranscriptionConfig[] = [];
184
- for (const { profileProviderId, ttsProvider } of TTS_CAPABLE_PROVIDERS) {
185
- const profile = await this.authProfilesManager.getBestProfile(
186
- agentId,
187
- profileProviderId
188
- );
189
- if (profile) {
190
- configs.push({
191
- profileProviderId,
192
- displayName: displayName(ttsProvider),
193
- provider: ttsProvider,
194
- apiKey: profile.credential,
195
- });
196
- }
197
- }
198
- return configs;
199
- }
200
-
201
- private async getTranscriptionConfigs(
202
- agentId: string
203
- ): Promise<TranscriptionConfig[]> {
204
- const configs = await this.getSynthesisConfigs(agentId);
205
- const providerIds = new Set(configs.map((c) => c.profileProviderId));
206
- const configDriven = await this.getConfigDrivenSttCandidates();
207
-
208
- for (const candidate of configDriven) {
209
- if (providerIds.has(candidate.profileProviderId)) continue;
210
-
211
- const profile = await this.authProfilesManager.getBestProfile(
212
- agentId,
213
- candidate.profileProviderId
214
- );
215
- if (!profile) continue;
216
-
217
- configs.push({
218
- profileProviderId: candidate.profileProviderId,
219
- displayName: candidate.displayName,
220
- provider: candidate.provider,
221
- apiKey: profile.credential,
222
- openaiCompat: candidate.openaiCompat,
223
- });
224
- providerIds.add(candidate.profileProviderId);
225
- }
226
-
227
- return configs;
228
- }
229
-
230
- private async getConfigDrivenSttCandidates(): Promise<
231
- Array<Omit<TranscriptionConfig, "apiKey">>
232
- > {
233
- if (!this.providerConfigSource) return [];
234
-
235
- let providerConfigs: Record<string, ProviderConfigEntry>;
236
- try {
237
- providerConfigs = await this.providerConfigSource();
238
- } catch (error) {
239
- logger.warn("Failed to load provider configs for STT", {
240
- error: error instanceof Error ? error.message : String(error),
241
- });
242
- return [];
243
- }
244
-
245
- const candidates: Array<Omit<TranscriptionConfig, "apiKey">> = [];
246
- for (const [providerId, entry] of Object.entries(providerConfigs)) {
247
- const stt = entry.stt;
248
- const compat = stt?.sdkCompat || entry.sdkCompat;
249
- const sttEnabled = stt ? stt.enabled !== false : compat === "openai";
250
- if (!sttEnabled) continue;
251
-
252
- if (compat !== "openai") {
253
- logger.warn("Unsupported config-driven STT compatibility", {
254
- providerId,
255
- compat,
256
- });
257
- continue;
258
- }
259
-
260
- const endpoint = this.resolveEndpointUrl(
261
- stt?.transcriptionPath,
262
- stt?.baseUrl || entry.upstreamBaseUrl
263
- );
264
- if (!endpoint) {
265
- logger.warn("Invalid STT endpoint configuration", {
266
- providerId,
267
- transcriptionPath: stt?.transcriptionPath,
268
- baseUrl: stt?.baseUrl || entry.upstreamBaseUrl,
269
- });
270
- continue;
271
- }
272
-
273
- candidates.push({
274
- profileProviderId: providerId,
275
- displayName: entry.displayName || providerId,
276
- provider: "openai",
277
- openaiCompat: {
278
- endpointUrl: endpoint,
279
- model: stt?.model?.trim() || "whisper-1",
280
- },
281
- });
282
- }
283
- return candidates;
284
- }
285
-
286
- /**
287
- * Get provider info for documentation/help messages
288
- */
289
- getProviderInfo(): Array<{ provider: TranscriptionProvider; name: string }> {
290
- return TTS_CAPABLE_PROVIDERS.map(({ ttsProvider, displayName }) => ({
291
- provider: ttsProvider,
292
- name: displayName,
293
- }));
294
- }
295
-
296
- // ==========================================================================
297
- // Text-to-Speech (Synthesis)
298
- // ==========================================================================
299
-
300
- /**
301
- * Synthesize text to audio
302
- */
303
- async synthesize(
304
- text: string,
305
- agentId: string,
306
- options: VoiceOptions = {}
307
- ): Promise<SynthesisResult> {
308
- const config = await this.getConfig(agentId);
309
-
310
- if (!config) {
311
- return this.noProviderError("No audio provider configured", agentId);
312
- }
313
-
314
- logger.info("Synthesizing audio", {
315
- agentId,
316
- provider: config.provider,
317
- textLength: text.length,
318
- voice: options.voice,
319
- });
320
-
321
- try {
322
- const result = await this.synthesizeWithProvider(text, config, options);
323
- logger.info("Synthesis successful", {
324
- agentId,
325
- provider: config.provider,
326
- audioSize: result.audioBuffer.length,
327
- });
328
- return { ...result, provider: config.provider };
329
- } catch (error) {
330
- const errorMessage =
331
- error instanceof Error ? error.message : String(error);
332
- logger.error("Synthesis failed", {
333
- agentId,
334
- provider: config.provider,
335
- error: errorMessage,
336
- });
337
- return {
338
- error: `Synthesis failed with ${displayName(config.provider)}: ${errorMessage}`,
339
- availableProviders: [],
340
- };
341
- }
342
- }
343
-
344
- private noProviderError(message: string, agentId: string) {
345
- const availableProviders = TTS_CAPABLE_PROVIDERS.map((p) => p.ttsProvider);
346
- logger.info(message, { agentId, availableProviders });
347
- return { error: message, availableProviders };
348
- }
349
-
350
- // ==========================================================================
351
- // Provider-specific implementations - Transcription (STT)
352
- // ==========================================================================
353
-
354
- private async transcribeWithProvider(
355
- buffer: Buffer,
356
- config: TranscriptionConfig,
357
- mimeType: string
358
- ): Promise<string> {
359
- switch (config.provider) {
360
- case "openai":
361
- return this.transcribeWithOpenAI(
362
- buffer,
363
- config.apiKey,
364
- mimeType,
365
- config.openaiCompat
366
- );
367
- case "gemini":
368
- return this.transcribeWithGemini(buffer, config.apiKey, mimeType);
369
- case "elevenlabs":
370
- return this.transcribeWithElevenLabs(buffer, config.apiKey, mimeType);
371
- default:
372
- throw new Error(`Unknown provider: ${config.provider}`);
373
- }
374
- }
375
-
376
- private async transcribeWithOpenAI(
377
- buffer: Buffer,
378
- apiKey: string,
379
- mimeType: string,
380
- options?: { endpointUrl: string; model: string }
381
- ): Promise<string> {
382
- const formData = new FormData();
383
- const ext = this.getExtensionFromMime(mimeType);
384
- formData.append(
385
- "file",
386
- new Blob([buffer], { type: mimeType }),
387
- `audio.${ext}`
388
- );
389
- formData.append("model", options?.model || "whisper-1");
390
-
391
- const resp = await fetch(
392
- options?.endpointUrl || "https://api.openai.com/v1/audio/transcriptions",
393
- {
394
- method: "POST",
395
- headers: { Authorization: `Bearer ${apiKey}` },
396
- body: formData,
397
- }
398
- );
399
-
400
- if (!resp.ok) {
401
- const error = await resp.text();
402
- throw new Error(`OpenAI API error: ${resp.status} - ${error}`);
403
- }
404
-
405
- const data = (await resp.json()) as { text: string };
406
- return data.text;
407
- }
408
-
409
- private async transcribeWithGemini(
410
- buffer: Buffer,
411
- apiKey: string,
412
- mimeType: string
413
- ): Promise<string> {
414
- // Gemini uses inline audio data with base64 encoding
415
- const resp = await fetch(
416
- `https://generativelanguage.googleapis.com/v1/models/gemini-1.5-flash:generateContent?key=${apiKey}`,
417
- {
418
- method: "POST",
419
- headers: { "Content-Type": "application/json" },
420
- body: JSON.stringify({
421
- contents: [
422
- {
423
- parts: [
424
- {
425
- text: "Transcribe this audio exactly as spoken. Return only the transcription text, nothing else:",
426
- },
427
- {
428
- inline_data: {
429
- mime_type: mimeType,
430
- data: buffer.toString("base64"),
431
- },
432
- },
433
- ],
434
- },
435
- ],
436
- }),
437
- }
438
- );
439
-
440
- if (!resp.ok) {
441
- const error = await resp.text();
442
- throw new Error(`Gemini API error: ${resp.status} - ${error}`);
443
- }
444
-
445
- const data = (await resp.json()) as {
446
- candidates: Array<{
447
- content: { parts: Array<{ text: string }> };
448
- }>;
449
- };
450
- return data.candidates[0]?.content?.parts[0]?.text || "";
451
- }
452
-
453
- private async transcribeWithElevenLabs(
454
- buffer: Buffer,
455
- apiKey: string,
456
- mimeType: string
457
- ): Promise<string> {
458
- // ElevenLabs speech-to-text API
459
- const formData = new FormData();
460
- const ext = this.getExtensionFromMime(mimeType);
461
- formData.append(
462
- "audio",
463
- new Blob([buffer], { type: mimeType }),
464
- `audio.${ext}`
465
- );
466
-
467
- const resp = await fetch("https://api.elevenlabs.io/v1/speech-to-text", {
468
- method: "POST",
469
- headers: { "xi-api-key": apiKey },
470
- body: formData,
471
- });
472
-
473
- if (!resp.ok) {
474
- const error = await resp.text();
475
- throw new Error(`ElevenLabs API error: ${resp.status} - ${error}`);
476
- }
477
-
478
- const data = (await resp.json()) as { text: string };
479
- return data.text;
480
- }
481
-
482
- // ==========================================================================
483
- // Provider-specific implementations - Synthesis (TTS)
484
- // ==========================================================================
485
-
486
- private async synthesizeWithProvider(
487
- text: string,
488
- config: TranscriptionConfig,
489
- options: VoiceOptions
490
- ): Promise<{ audioBuffer: Buffer; mimeType: string }> {
491
- switch (config.provider) {
492
- case "openai":
493
- return this.synthesizeWithOpenAI(text, config.apiKey, options);
494
- case "gemini":
495
- return this.synthesizeWithGemini(text, config.apiKey);
496
- case "elevenlabs":
497
- return this.synthesizeWithElevenLabs(text, config.apiKey, options);
498
- default:
499
- throw new Error(`Unknown provider: ${config.provider}`);
500
- }
501
- }
502
-
503
- private async synthesizeWithOpenAI(
504
- text: string,
505
- apiKey: string,
506
- options: VoiceOptions
507
- ): Promise<{ audioBuffer: Buffer; mimeType: string }> {
508
- // OpenAI TTS API
509
- // Voices: alloy, echo, fable, onyx, nova, shimmer
510
- const voice = options.voice || "alloy";
511
- const speed = options.speed || 1.0;
512
-
513
- const resp = await fetch("https://api.openai.com/v1/audio/speech", {
514
- method: "POST",
515
- headers: {
516
- Authorization: `Bearer ${apiKey}`,
517
- "Content-Type": "application/json",
518
- },
519
- body: JSON.stringify({
520
- model: "tts-1",
521
- input: text,
522
- voice,
523
- speed,
524
- response_format: "opus", // Good for WhatsApp
525
- }),
526
- });
527
-
528
- if (!resp.ok) {
529
- const error = await resp.text();
530
- throw new Error(`OpenAI TTS API error: ${resp.status} - ${error}`);
531
- }
532
-
533
- const arrayBuffer = await resp.arrayBuffer();
534
- return {
535
- audioBuffer: Buffer.from(arrayBuffer),
536
- mimeType: "audio/opus",
537
- };
538
- }
539
-
540
- private async synthesizeWithGemini(
541
- text: string,
542
- apiKey: string
543
- ): Promise<{ audioBuffer: Buffer; mimeType: string }> {
544
- const resp = await fetch(
545
- `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?key=${apiKey}`,
546
- {
547
- method: "POST",
548
- headers: { "Content-Type": "application/json" },
549
- body: JSON.stringify({
550
- contents: [
551
- {
552
- parts: [{ text: `Please speak this text aloud: "${text}"` }],
553
- },
554
- ],
555
- generationConfig: {
556
- responseModalities: ["AUDIO"],
557
- speechConfig: {
558
- voiceConfig: {
559
- prebuiltVoiceConfig: {
560
- voiceName: "Aoede", // Default Gemini voice
561
- },
562
- },
563
- },
564
- },
565
- }),
566
- }
567
- );
568
-
569
- if (!resp.ok) {
570
- const error = await resp.text();
571
- throw new Error(`Gemini TTS API error: ${resp.status} - ${error}`);
572
- }
573
-
574
- const data = (await resp.json()) as {
575
- candidates: Array<{
576
- content: {
577
- parts: Array<{
578
- inlineData?: { mimeType: string; data: string };
579
- }>;
580
- };
581
- }>;
582
- };
583
-
584
- const audioPart = data.candidates[0]?.content?.parts?.find((p) =>
585
- p.inlineData?.mimeType?.startsWith("audio/")
586
- );
587
-
588
- if (!audioPart?.inlineData) {
589
- throw new Error("Gemini did not return audio data");
590
- }
591
-
592
- return {
593
- audioBuffer: Buffer.from(audioPart.inlineData.data, "base64"),
594
- mimeType: audioPart.inlineData.mimeType,
595
- };
596
- }
597
-
598
- private async synthesizeWithElevenLabs(
599
- text: string,
600
- apiKey: string,
601
- options: VoiceOptions
602
- ): Promise<{ audioBuffer: Buffer; mimeType: string }> {
603
- // ElevenLabs TTS API
604
- // Default voice: Rachel (21m00Tcm4TlvDq8ikWAM)
605
- const voiceId = options.voice || "21m00Tcm4TlvDq8ikWAM";
606
-
607
- const resp = await fetch(
608
- `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`,
609
- {
610
- method: "POST",
611
- headers: {
612
- "xi-api-key": apiKey,
613
- "Content-Type": "application/json",
614
- Accept: "audio/mpeg",
615
- },
616
- body: JSON.stringify({
617
- text,
618
- model_id: "eleven_monolingual_v1",
619
- voice_settings: {
620
- stability: 0.5,
621
- similarity_boost: 0.5,
622
- },
623
- }),
624
- }
625
- );
626
-
627
- if (!resp.ok) {
628
- const error = await resp.text();
629
- throw new Error(`ElevenLabs TTS API error: ${resp.status} - ${error}`);
630
- }
631
-
632
- const arrayBuffer = await resp.arrayBuffer();
633
- return {
634
- audioBuffer: Buffer.from(arrayBuffer),
635
- mimeType: "audio/mpeg",
636
- };
637
- }
638
-
639
- // ==========================================================================
640
- // Utility methods
641
- // ==========================================================================
642
-
643
- private getExtensionFromMime(mimeType: string): string {
644
- const mimeToExt: Record<string, string> = {
645
- "audio/ogg": "ogg",
646
- "audio/opus": "opus",
647
- "audio/mpeg": "mp3",
648
- "audio/mp3": "mp3",
649
- "audio/wav": "wav",
650
- "audio/webm": "webm",
651
- "audio/m4a": "m4a",
652
- "audio/mp4": "m4a",
653
- };
654
- return mimeToExt[mimeType] || "ogg";
655
- }
656
-
657
- private resolveEndpointUrl(
658
- transcriptionPath: string | undefined,
659
- baseUrl: string | undefined
660
- ): string | null {
661
- const path = (
662
- transcriptionPath || this.getDefaultOpenAiTranscriptionPath(baseUrl)
663
- ).trim();
664
- if (/^https?:\/\//i.test(path)) {
665
- return path;
666
- }
667
-
668
- const base = (baseUrl || "").trim();
669
- if (!base) return null;
670
-
671
- const normalizedPath = path.startsWith("/") ? path : `/${path}`;
672
- return `${base.replace(/\/+$/, "")}${normalizedPath}`;
673
- }
674
-
675
- private getDefaultOpenAiTranscriptionPath(
676
- baseUrl: string | undefined
677
- ): string {
678
- const trimmedBase = (baseUrl || "").trim().replace(/\/+$/, "");
679
- if (trimmedBase.endsWith("/v1")) {
680
- return "/audio/transcriptions";
681
- }
682
- return "/v1/audio/transcriptions";
683
- }
684
- }