@lobu/gateway 3.0.9 → 3.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/dist/api/platform.d.ts.map +1 -1
  2. package/dist/api/platform.js +7 -26
  3. package/dist/api/platform.js.map +1 -1
  4. package/dist/auth/mcp/proxy.d.ts +14 -0
  5. package/dist/auth/mcp/proxy.d.ts.map +1 -1
  6. package/dist/auth/mcp/proxy.js +149 -13
  7. package/dist/auth/mcp/proxy.js.map +1 -1
  8. package/dist/cli/gateway.d.ts.map +1 -1
  9. package/dist/cli/gateway.js +29 -0
  10. package/dist/cli/gateway.js.map +1 -1
  11. package/dist/connections/chat-instance-manager.d.ts.map +1 -1
  12. package/dist/connections/chat-instance-manager.js +2 -1
  13. package/dist/connections/chat-instance-manager.js.map +1 -1
  14. package/dist/connections/interaction-bridge.d.ts +9 -2
  15. package/dist/connections/interaction-bridge.d.ts.map +1 -1
  16. package/dist/connections/interaction-bridge.js +121 -261
  17. package/dist/connections/interaction-bridge.js.map +1 -1
  18. package/dist/gateway/index.js +1 -1
  19. package/dist/gateway/index.js.map +1 -1
  20. package/dist/interactions.d.ts +9 -43
  21. package/dist/interactions.d.ts.map +1 -1
  22. package/dist/interactions.js +10 -52
  23. package/dist/interactions.js.map +1 -1
  24. package/dist/routes/public/agent.d.ts +4 -0
  25. package/dist/routes/public/agent.d.ts.map +1 -1
  26. package/dist/routes/public/agent.js +21 -0
  27. package/dist/routes/public/agent.js.map +1 -1
  28. package/dist/services/core-services.d.ts.map +1 -1
  29. package/dist/services/core-services.js +4 -0
  30. package/dist/services/core-services.js.map +1 -1
  31. package/package.json +9 -9
  32. package/src/__tests__/agent-config-routes.test.ts +0 -254
  33. package/src/__tests__/agent-history-routes.test.ts +0 -72
  34. package/src/__tests__/agent-routes.test.ts +0 -68
  35. package/src/__tests__/agent-schedules-routes.test.ts +0 -59
  36. package/src/__tests__/agent-settings-store.test.ts +0 -323
  37. package/src/__tests__/bedrock-model-catalog.test.ts +0 -40
  38. package/src/__tests__/bedrock-openai-service.test.ts +0 -157
  39. package/src/__tests__/bedrock-provider-module.test.ts +0 -56
  40. package/src/__tests__/chat-instance-manager-slack.test.ts +0 -204
  41. package/src/__tests__/chat-response-bridge.test.ts +0 -131
  42. package/src/__tests__/config-memory-plugins.test.ts +0 -92
  43. package/src/__tests__/config-request-store.test.ts +0 -127
  44. package/src/__tests__/connection-routes.test.ts +0 -144
  45. package/src/__tests__/core-services-store-selection.test.ts +0 -92
  46. package/src/__tests__/docker-deployment.test.ts +0 -1211
  47. package/src/__tests__/embedded-deployment.test.ts +0 -342
  48. package/src/__tests__/grant-store.test.ts +0 -148
  49. package/src/__tests__/http-proxy.test.ts +0 -281
  50. package/src/__tests__/instruction-service.test.ts +0 -37
  51. package/src/__tests__/link-buttons.test.ts +0 -112
  52. package/src/__tests__/lobu.test.ts +0 -32
  53. package/src/__tests__/mcp-config-service.test.ts +0 -347
  54. package/src/__tests__/mcp-proxy.test.ts +0 -694
  55. package/src/__tests__/message-handler-bridge.test.ts +0 -17
  56. package/src/__tests__/model-selection.test.ts +0 -172
  57. package/src/__tests__/oauth-templates.test.ts +0 -39
  58. package/src/__tests__/platform-adapter-slack-send.test.ts +0 -114
  59. package/src/__tests__/platform-helpers-model-resolution.test.ts +0 -253
  60. package/src/__tests__/provider-inheritance.test.ts +0 -212
  61. package/src/__tests__/routes/cli-auth.test.ts +0 -337
  62. package/src/__tests__/routes/interactions.test.ts +0 -121
  63. package/src/__tests__/secret-proxy.test.ts +0 -85
  64. package/src/__tests__/session-manager.test.ts +0 -572
  65. package/src/__tests__/setup.ts +0 -133
  66. package/src/__tests__/skill-and-mcp-registry.test.ts +0 -203
  67. package/src/__tests__/slack-routes.test.ts +0 -161
  68. package/src/__tests__/system-config-resolver.test.ts +0 -75
  69. package/src/__tests__/system-message-limiter.test.ts +0 -89
  70. package/src/__tests__/system-skills-service.test.ts +0 -362
  71. package/src/__tests__/transcription-service.test.ts +0 -222
  72. package/src/__tests__/utils/rate-limiter.test.ts +0 -102
  73. package/src/__tests__/worker-connection-manager.test.ts +0 -497
  74. package/src/__tests__/worker-job-router.test.ts +0 -722
  75. package/src/api/index.ts +0 -1
  76. package/src/api/platform.ts +0 -292
  77. package/src/api/response-renderer.ts +0 -157
  78. package/src/auth/agent-metadata-store.ts +0 -168
  79. package/src/auth/api-auth-middleware.ts +0 -69
  80. package/src/auth/api-key-provider-module.ts +0 -213
  81. package/src/auth/base-provider-module.ts +0 -201
  82. package/src/auth/bedrock/provider-module.ts +0 -110
  83. package/src/auth/chatgpt/chatgpt-oauth-module.ts +0 -185
  84. package/src/auth/chatgpt/device-code-client.ts +0 -218
  85. package/src/auth/chatgpt/index.ts +0 -1
  86. package/src/auth/claude/oauth-module.ts +0 -280
  87. package/src/auth/cli/token-service.ts +0 -249
  88. package/src/auth/external/client.ts +0 -560
  89. package/src/auth/external/device-code-client.ts +0 -235
  90. package/src/auth/mcp/config-service.ts +0 -420
  91. package/src/auth/mcp/proxy.ts +0 -1086
  92. package/src/auth/mcp/string-substitution.ts +0 -17
  93. package/src/auth/mcp/tool-cache.ts +0 -90
  94. package/src/auth/oauth/base-client.ts +0 -267
  95. package/src/auth/oauth/client.ts +0 -153
  96. package/src/auth/oauth/credentials.ts +0 -7
  97. package/src/auth/oauth/providers.ts +0 -69
  98. package/src/auth/oauth/state-store.ts +0 -150
  99. package/src/auth/oauth-templates.ts +0 -179
  100. package/src/auth/provider-catalog.ts +0 -220
  101. package/src/auth/provider-model-options.ts +0 -41
  102. package/src/auth/settings/agent-settings-store.ts +0 -565
  103. package/src/auth/settings/auth-profiles-manager.ts +0 -216
  104. package/src/auth/settings/index.ts +0 -12
  105. package/src/auth/settings/model-preference-store.ts +0 -52
  106. package/src/auth/settings/model-selection.ts +0 -135
  107. package/src/auth/settings/resolved-settings-view.ts +0 -298
  108. package/src/auth/settings/template-utils.ts +0 -44
  109. package/src/auth/settings/token-service.ts +0 -88
  110. package/src/auth/system-env-store.ts +0 -98
  111. package/src/auth/user-agents-store.ts +0 -68
  112. package/src/channels/binding-service.ts +0 -214
  113. package/src/channels/index.ts +0 -4
  114. package/src/cli/gateway.ts +0 -1312
  115. package/src/cli/index.ts +0 -74
  116. package/src/commands/built-in-commands.ts +0 -80
  117. package/src/commands/command-dispatcher.ts +0 -94
  118. package/src/commands/command-reply-adapters.ts +0 -27
  119. package/src/config/file-loader.ts +0 -618
  120. package/src/config/index.ts +0 -588
  121. package/src/config/network-allowlist.ts +0 -71
  122. package/src/connections/chat-instance-manager.ts +0 -1284
  123. package/src/connections/chat-response-bridge.ts +0 -618
  124. package/src/connections/index.ts +0 -7
  125. package/src/connections/interaction-bridge.ts +0 -831
  126. package/src/connections/message-handler-bridge.ts +0 -440
  127. package/src/connections/platform-auth-methods.ts +0 -15
  128. package/src/connections/types.ts +0 -84
  129. package/src/gateway/connection-manager.ts +0 -291
  130. package/src/gateway/index.ts +0 -698
  131. package/src/gateway/job-router.ts +0 -201
  132. package/src/gateway-main.ts +0 -200
  133. package/src/index.ts +0 -41
  134. package/src/infrastructure/queue/index.ts +0 -12
  135. package/src/infrastructure/queue/queue-producer.ts +0 -148
  136. package/src/infrastructure/queue/redis-queue.ts +0 -361
  137. package/src/infrastructure/queue/types.ts +0 -133
  138. package/src/infrastructure/redis/system-message-limiter.ts +0 -94
  139. package/src/interactions/config-request-store.ts +0 -198
  140. package/src/interactions.ts +0 -363
  141. package/src/lobu.ts +0 -311
  142. package/src/metrics/prometheus.ts +0 -159
  143. package/src/modules/module-system.ts +0 -179
  144. package/src/orchestration/base-deployment-manager.ts +0 -900
  145. package/src/orchestration/deployment-utils.ts +0 -98
  146. package/src/orchestration/impl/docker-deployment.ts +0 -620
  147. package/src/orchestration/impl/embedded-deployment.ts +0 -268
  148. package/src/orchestration/impl/index.ts +0 -8
  149. package/src/orchestration/impl/k8s/deployment.ts +0 -1061
  150. package/src/orchestration/impl/k8s/helpers.ts +0 -610
  151. package/src/orchestration/impl/k8s/index.ts +0 -1
  152. package/src/orchestration/index.ts +0 -333
  153. package/src/orchestration/message-consumer.ts +0 -584
  154. package/src/orchestration/scheduled-wakeup.ts +0 -704
  155. package/src/permissions/approval-policy.ts +0 -36
  156. package/src/permissions/grant-store.ts +0 -219
  157. package/src/platform/file-handler.ts +0 -66
  158. package/src/platform/link-buttons.ts +0 -57
  159. package/src/platform/renderer-utils.ts +0 -44
  160. package/src/platform/response-renderer.ts +0 -84
  161. package/src/platform/unified-thread-consumer.ts +0 -194
  162. package/src/platform.ts +0 -318
  163. package/src/proxy/http-proxy.ts +0 -752
  164. package/src/proxy/proxy-manager.ts +0 -81
  165. package/src/proxy/secret-proxy.ts +0 -402
  166. package/src/proxy/token-refresh-job.ts +0 -143
  167. package/src/routes/internal/audio.ts +0 -141
  168. package/src/routes/internal/device-auth.ts +0 -652
  169. package/src/routes/internal/files.ts +0 -226
  170. package/src/routes/internal/history.ts +0 -69
  171. package/src/routes/internal/images.ts +0 -127
  172. package/src/routes/internal/interactions.ts +0 -84
  173. package/src/routes/internal/middleware.ts +0 -23
  174. package/src/routes/internal/schedule.ts +0 -226
  175. package/src/routes/internal/types.ts +0 -22
  176. package/src/routes/openapi-auto.ts +0 -239
  177. package/src/routes/public/agent-access.ts +0 -23
  178. package/src/routes/public/agent-config.ts +0 -675
  179. package/src/routes/public/agent-history.ts +0 -422
  180. package/src/routes/public/agent-schedules.ts +0 -296
  181. package/src/routes/public/agent.ts +0 -1086
  182. package/src/routes/public/agents.ts +0 -373
  183. package/src/routes/public/channels.ts +0 -191
  184. package/src/routes/public/cli-auth.ts +0 -896
  185. package/src/routes/public/connections.ts +0 -574
  186. package/src/routes/public/landing.ts +0 -16
  187. package/src/routes/public/oauth.ts +0 -147
  188. package/src/routes/public/settings-auth.ts +0 -104
  189. package/src/routes/public/slack.ts +0 -173
  190. package/src/routes/shared/agent-ownership.ts +0 -101
  191. package/src/routes/shared/token-verifier.ts +0 -34
  192. package/src/services/bedrock-model-catalog.ts +0 -217
  193. package/src/services/bedrock-openai-service.ts +0 -658
  194. package/src/services/core-services.ts +0 -1072
  195. package/src/services/image-generation-service.ts +0 -257
  196. package/src/services/instruction-service.ts +0 -318
  197. package/src/services/mcp-registry.ts +0 -94
  198. package/src/services/platform-helpers.ts +0 -287
  199. package/src/services/session-manager.ts +0 -262
  200. package/src/services/settings-resolver.ts +0 -74
  201. package/src/services/system-config-resolver.ts +0 -89
  202. package/src/services/system-skills-service.ts +0 -229
  203. package/src/services/transcription-service.ts +0 -684
  204. package/src/session.ts +0 -110
  205. package/src/spaces/index.ts +0 -1
  206. package/src/spaces/space-resolver.ts +0 -17
  207. package/src/stores/in-memory-agent-store.ts +0 -403
  208. package/src/stores/redis-agent-store.ts +0 -279
  209. package/src/utils/public-url.ts +0 -44
  210. package/src/utils/rate-limiter.ts +0 -94
  211. package/tsconfig.json +0 -33
  212. package/tsconfig.tsbuildinfo +0 -1
@@ -1,684 +0,0 @@
1
- /**
2
- * Multi-Provider Audio Service
3
- *
4
- * Supports speech-to-text and text-to-speech via auth profiles (installed providers):
5
- * - OpenAI (chatgpt auth profile) - Whisper for STT, TTS API for speech
6
- * - Google Gemini (gemini auth profile) - Audio input/output
7
- * - ElevenLabs (elevenlabs auth profile) - STT and high-quality TTS
8
- *
9
- * STT selection: built-ins (chatgpt/openai, gemini, elevenlabs) plus optional
10
- * config-driven STT providers declared in system-skills provider config.
11
- * TTS selection stays built-in only (openai → gemini → elevenlabs).
12
- */
13
-
14
- import type { ProviderConfigEntry } from "@lobu/core";
15
- import { createLogger } from "@lobu/core";
16
- import type { AuthProfilesManager } from "../auth/settings/auth-profiles-manager";
17
-
18
- const logger = createLogger("transcription-service");
19
-
20
- export type TranscriptionProvider = "openai" | "gemini" | "elevenlabs";
21
-
22
- interface TranscriptionConfig {
23
- profileProviderId: string;
24
- displayName: string;
25
- provider: TranscriptionProvider;
26
- apiKey: string;
27
- openaiCompat?: {
28
- endpointUrl: string;
29
- model: string;
30
- };
31
- }
32
-
33
- export interface TranscriptionSuccess {
34
- text: string;
35
- provider: TranscriptionProvider;
36
- }
37
-
38
- export interface TranscriptionError {
39
- error: string;
40
- availableProviders: TranscriptionProvider[];
41
- }
42
-
43
- export type TranscriptionResult = TranscriptionSuccess | TranscriptionError;
44
-
45
- export interface SynthesisSuccess {
46
- audioBuffer: Buffer;
47
- mimeType: string;
48
- provider: TranscriptionProvider;
49
- }
50
-
51
- export interface SynthesisError {
52
- error: string;
53
- availableProviders: TranscriptionProvider[];
54
- }
55
-
56
- export type SynthesisResult = SynthesisSuccess | SynthesisError;
57
-
58
- // Voice options for TTS
59
- export interface VoiceOptions {
60
- voice?: string; // Provider-specific voice ID
61
- speed?: number; // Speech speed (0.5-2.0, default 1.0)
62
- }
63
-
64
- // Auth profile providerId → TTS provider mapping (single source of truth)
65
- const TTS_CAPABLE_PROVIDERS: {
66
- profileProviderId: string;
67
- ttsProvider: TranscriptionProvider;
68
- displayName: string;
69
- }[] = [
70
- {
71
- profileProviderId: "chatgpt",
72
- ttsProvider: "openai",
73
- displayName: "OpenAI",
74
- },
75
- {
76
- profileProviderId: "gemini",
77
- ttsProvider: "gemini",
78
- displayName: "Google Gemini",
79
- },
80
- {
81
- profileProviderId: "elevenlabs",
82
- ttsProvider: "elevenlabs",
83
- displayName: "ElevenLabs",
84
- },
85
- ];
86
-
87
- function displayName(provider: TranscriptionProvider): string {
88
- return (
89
- TTS_CAPABLE_PROVIDERS.find((p) => p.ttsProvider === provider)
90
- ?.displayName ?? provider
91
- );
92
- }
93
-
94
- export class TranscriptionService {
95
- private providerConfigSource?:
96
- | (() => Promise<Record<string, ProviderConfigEntry>>)
97
- | undefined;
98
-
99
- constructor(
100
- private readonly authProfilesManager: AuthProfilesManager,
101
- providerConfigSource?: () => Promise<Record<string, ProviderConfigEntry>>
102
- ) {
103
- this.providerConfigSource = providerConfigSource;
104
- }
105
-
106
- setProviderConfigSource(
107
- source: () => Promise<Record<string, ProviderConfigEntry>>
108
- ): void {
109
- this.providerConfigSource = source;
110
- }
111
-
112
- /**
113
- * Transcribe audio buffer to text
114
- */
115
- async transcribe(
116
- audioBuffer: Buffer,
117
- agentId: string,
118
- mimeType = "audio/ogg"
119
- ): Promise<TranscriptionResult> {
120
- const configs = await this.getTranscriptionConfigs(agentId);
121
-
122
- if (configs.length === 0) {
123
- return this.noProviderError(
124
- "No transcription provider configured",
125
- agentId
126
- );
127
- }
128
-
129
- const attemptErrors: string[] = [];
130
- for (const config of configs) {
131
- logger.info("Transcribing audio", {
132
- agentId,
133
- provider: config.provider,
134
- profileProviderId: config.profileProviderId,
135
- bufferSize: audioBuffer.length,
136
- mimeType,
137
- });
138
-
139
- try {
140
- const text = await this.transcribeWithProvider(
141
- audioBuffer,
142
- config,
143
- mimeType
144
- );
145
- logger.info("Transcription successful", {
146
- agentId,
147
- provider: config.provider,
148
- profileProviderId: config.profileProviderId,
149
- textLength: text.length,
150
- });
151
- return { text, provider: config.provider };
152
- } catch (error) {
153
- const errorMessage =
154
- error instanceof Error ? error.message : String(error);
155
- logger.error("Transcription failed", {
156
- agentId,
157
- provider: config.provider,
158
- profileProviderId: config.profileProviderId,
159
- error: errorMessage,
160
- });
161
- attemptErrors.push(`${config.displayName}: ${errorMessage}`);
162
- }
163
- }
164
-
165
- return {
166
- error: `Transcription failed with all configured providers: ${attemptErrors.join(" | ")}`,
167
- availableProviders: [...new Set(configs.map((c) => c.provider))],
168
- };
169
- }
170
-
171
- /**
172
- * Get transcription config for an agent by checking installed auth profiles.
173
- * First TTS-capable provider with a valid profile wins (openai → gemini → elevenlabs).
174
- */
175
- async getConfig(agentId: string): Promise<TranscriptionConfig | null> {
176
- const configs = await this.getTranscriptionConfigs(agentId);
177
- return configs[0] ?? null;
178
- }
179
-
180
- private async getSynthesisConfigs(
181
- agentId: string
182
- ): Promise<TranscriptionConfig[]> {
183
- const configs: TranscriptionConfig[] = [];
184
- for (const { profileProviderId, ttsProvider } of TTS_CAPABLE_PROVIDERS) {
185
- const profile = await this.authProfilesManager.getBestProfile(
186
- agentId,
187
- profileProviderId
188
- );
189
- if (profile) {
190
- configs.push({
191
- profileProviderId,
192
- displayName: displayName(ttsProvider),
193
- provider: ttsProvider,
194
- apiKey: profile.credential,
195
- });
196
- }
197
- }
198
- return configs;
199
- }
200
-
201
- private async getTranscriptionConfigs(
202
- agentId: string
203
- ): Promise<TranscriptionConfig[]> {
204
- const configs = await this.getSynthesisConfigs(agentId);
205
- const providerIds = new Set(configs.map((c) => c.profileProviderId));
206
- const configDriven = await this.getConfigDrivenSttCandidates();
207
-
208
- for (const candidate of configDriven) {
209
- if (providerIds.has(candidate.profileProviderId)) continue;
210
-
211
- const profile = await this.authProfilesManager.getBestProfile(
212
- agentId,
213
- candidate.profileProviderId
214
- );
215
- if (!profile) continue;
216
-
217
- configs.push({
218
- profileProviderId: candidate.profileProviderId,
219
- displayName: candidate.displayName,
220
- provider: candidate.provider,
221
- apiKey: profile.credential,
222
- openaiCompat: candidate.openaiCompat,
223
- });
224
- providerIds.add(candidate.profileProviderId);
225
- }
226
-
227
- return configs;
228
- }
229
-
230
- private async getConfigDrivenSttCandidates(): Promise<
231
- Array<Omit<TranscriptionConfig, "apiKey">>
232
- > {
233
- if (!this.providerConfigSource) return [];
234
-
235
- let providerConfigs: Record<string, ProviderConfigEntry>;
236
- try {
237
- providerConfigs = await this.providerConfigSource();
238
- } catch (error) {
239
- logger.warn("Failed to load provider configs for STT", {
240
- error: error instanceof Error ? error.message : String(error),
241
- });
242
- return [];
243
- }
244
-
245
- const candidates: Array<Omit<TranscriptionConfig, "apiKey">> = [];
246
- for (const [providerId, entry] of Object.entries(providerConfigs)) {
247
- const stt = entry.stt;
248
- const compat = stt?.sdkCompat || entry.sdkCompat;
249
- const sttEnabled = stt ? stt.enabled !== false : compat === "openai";
250
- if (!sttEnabled) continue;
251
-
252
- if (compat !== "openai") {
253
- logger.warn("Unsupported config-driven STT compatibility", {
254
- providerId,
255
- compat,
256
- });
257
- continue;
258
- }
259
-
260
- const endpoint = this.resolveEndpointUrl(
261
- stt?.transcriptionPath,
262
- stt?.baseUrl || entry.upstreamBaseUrl
263
- );
264
- if (!endpoint) {
265
- logger.warn("Invalid STT endpoint configuration", {
266
- providerId,
267
- transcriptionPath: stt?.transcriptionPath,
268
- baseUrl: stt?.baseUrl || entry.upstreamBaseUrl,
269
- });
270
- continue;
271
- }
272
-
273
- candidates.push({
274
- profileProviderId: providerId,
275
- displayName: entry.displayName || providerId,
276
- provider: "openai",
277
- openaiCompat: {
278
- endpointUrl: endpoint,
279
- model: stt?.model?.trim() || "whisper-1",
280
- },
281
- });
282
- }
283
- return candidates;
284
- }
285
-
286
- /**
287
- * Get provider info for documentation/help messages
288
- */
289
- getProviderInfo(): Array<{ provider: TranscriptionProvider; name: string }> {
290
- return TTS_CAPABLE_PROVIDERS.map(({ ttsProvider, displayName }) => ({
291
- provider: ttsProvider,
292
- name: displayName,
293
- }));
294
- }
295
-
296
- // ==========================================================================
297
- // Text-to-Speech (Synthesis)
298
- // ==========================================================================
299
-
300
- /**
301
- * Synthesize text to audio
302
- */
303
- async synthesize(
304
- text: string,
305
- agentId: string,
306
- options: VoiceOptions = {}
307
- ): Promise<SynthesisResult> {
308
- const config = await this.getConfig(agentId);
309
-
310
- if (!config) {
311
- return this.noProviderError("No audio provider configured", agentId);
312
- }
313
-
314
- logger.info("Synthesizing audio", {
315
- agentId,
316
- provider: config.provider,
317
- textLength: text.length,
318
- voice: options.voice,
319
- });
320
-
321
- try {
322
- const result = await this.synthesizeWithProvider(text, config, options);
323
- logger.info("Synthesis successful", {
324
- agentId,
325
- provider: config.provider,
326
- audioSize: result.audioBuffer.length,
327
- });
328
- return { ...result, provider: config.provider };
329
- } catch (error) {
330
- const errorMessage =
331
- error instanceof Error ? error.message : String(error);
332
- logger.error("Synthesis failed", {
333
- agentId,
334
- provider: config.provider,
335
- error: errorMessage,
336
- });
337
- return {
338
- error: `Synthesis failed with ${displayName(config.provider)}: ${errorMessage}`,
339
- availableProviders: [],
340
- };
341
- }
342
- }
343
-
344
- private noProviderError(message: string, agentId: string) {
345
- const availableProviders = TTS_CAPABLE_PROVIDERS.map((p) => p.ttsProvider);
346
- logger.info(message, { agentId, availableProviders });
347
- return { error: message, availableProviders };
348
- }
349
-
350
- // ==========================================================================
351
- // Provider-specific implementations - Transcription (STT)
352
- // ==========================================================================
353
-
354
- private async transcribeWithProvider(
355
- buffer: Buffer,
356
- config: TranscriptionConfig,
357
- mimeType: string
358
- ): Promise<string> {
359
- switch (config.provider) {
360
- case "openai":
361
- return this.transcribeWithOpenAI(
362
- buffer,
363
- config.apiKey,
364
- mimeType,
365
- config.openaiCompat
366
- );
367
- case "gemini":
368
- return this.transcribeWithGemini(buffer, config.apiKey, mimeType);
369
- case "elevenlabs":
370
- return this.transcribeWithElevenLabs(buffer, config.apiKey, mimeType);
371
- default:
372
- throw new Error(`Unknown provider: ${config.provider}`);
373
- }
374
- }
375
-
376
- private async transcribeWithOpenAI(
377
- buffer: Buffer,
378
- apiKey: string,
379
- mimeType: string,
380
- options?: { endpointUrl: string; model: string }
381
- ): Promise<string> {
382
- const formData = new FormData();
383
- const ext = this.getExtensionFromMime(mimeType);
384
- formData.append(
385
- "file",
386
- new Blob([buffer], { type: mimeType }),
387
- `audio.${ext}`
388
- );
389
- formData.append("model", options?.model || "whisper-1");
390
-
391
- const resp = await fetch(
392
- options?.endpointUrl || "https://api.openai.com/v1/audio/transcriptions",
393
- {
394
- method: "POST",
395
- headers: { Authorization: `Bearer ${apiKey}` },
396
- body: formData,
397
- }
398
- );
399
-
400
- if (!resp.ok) {
401
- const error = await resp.text();
402
- throw new Error(`OpenAI API error: ${resp.status} - ${error}`);
403
- }
404
-
405
- const data = (await resp.json()) as { text: string };
406
- return data.text;
407
- }
408
-
409
- private async transcribeWithGemini(
410
- buffer: Buffer,
411
- apiKey: string,
412
- mimeType: string
413
- ): Promise<string> {
414
- // Gemini uses inline audio data with base64 encoding
415
- const resp = await fetch(
416
- `https://generativelanguage.googleapis.com/v1/models/gemini-1.5-flash:generateContent?key=${apiKey}`,
417
- {
418
- method: "POST",
419
- headers: { "Content-Type": "application/json" },
420
- body: JSON.stringify({
421
- contents: [
422
- {
423
- parts: [
424
- {
425
- text: "Transcribe this audio exactly as spoken. Return only the transcription text, nothing else:",
426
- },
427
- {
428
- inline_data: {
429
- mime_type: mimeType,
430
- data: buffer.toString("base64"),
431
- },
432
- },
433
- ],
434
- },
435
- ],
436
- }),
437
- }
438
- );
439
-
440
- if (!resp.ok) {
441
- const error = await resp.text();
442
- throw new Error(`Gemini API error: ${resp.status} - ${error}`);
443
- }
444
-
445
- const data = (await resp.json()) as {
446
- candidates: Array<{
447
- content: { parts: Array<{ text: string }> };
448
- }>;
449
- };
450
- return data.candidates[0]?.content?.parts[0]?.text || "";
451
- }
452
-
453
- private async transcribeWithElevenLabs(
454
- buffer: Buffer,
455
- apiKey: string,
456
- mimeType: string
457
- ): Promise<string> {
458
- // ElevenLabs speech-to-text API
459
- const formData = new FormData();
460
- const ext = this.getExtensionFromMime(mimeType);
461
- formData.append(
462
- "audio",
463
- new Blob([buffer], { type: mimeType }),
464
- `audio.${ext}`
465
- );
466
-
467
- const resp = await fetch("https://api.elevenlabs.io/v1/speech-to-text", {
468
- method: "POST",
469
- headers: { "xi-api-key": apiKey },
470
- body: formData,
471
- });
472
-
473
- if (!resp.ok) {
474
- const error = await resp.text();
475
- throw new Error(`ElevenLabs API error: ${resp.status} - ${error}`);
476
- }
477
-
478
- const data = (await resp.json()) as { text: string };
479
- return data.text;
480
- }
481
-
482
- // ==========================================================================
483
- // Provider-specific implementations - Synthesis (TTS)
484
- // ==========================================================================
485
-
486
- private async synthesizeWithProvider(
487
- text: string,
488
- config: TranscriptionConfig,
489
- options: VoiceOptions
490
- ): Promise<{ audioBuffer: Buffer; mimeType: string }> {
491
- switch (config.provider) {
492
- case "openai":
493
- return this.synthesizeWithOpenAI(text, config.apiKey, options);
494
- case "gemini":
495
- return this.synthesizeWithGemini(text, config.apiKey);
496
- case "elevenlabs":
497
- return this.synthesizeWithElevenLabs(text, config.apiKey, options);
498
- default:
499
- throw new Error(`Unknown provider: ${config.provider}`);
500
- }
501
- }
502
-
503
- private async synthesizeWithOpenAI(
504
- text: string,
505
- apiKey: string,
506
- options: VoiceOptions
507
- ): Promise<{ audioBuffer: Buffer; mimeType: string }> {
508
- // OpenAI TTS API
509
- // Voices: alloy, echo, fable, onyx, nova, shimmer
510
- const voice = options.voice || "alloy";
511
- const speed = options.speed || 1.0;
512
-
513
- const resp = await fetch("https://api.openai.com/v1/audio/speech", {
514
- method: "POST",
515
- headers: {
516
- Authorization: `Bearer ${apiKey}`,
517
- "Content-Type": "application/json",
518
- },
519
- body: JSON.stringify({
520
- model: "tts-1",
521
- input: text,
522
- voice,
523
- speed,
524
- response_format: "opus", // Good for WhatsApp
525
- }),
526
- });
527
-
528
- if (!resp.ok) {
529
- const error = await resp.text();
530
- throw new Error(`OpenAI TTS API error: ${resp.status} - ${error}`);
531
- }
532
-
533
- const arrayBuffer = await resp.arrayBuffer();
534
- return {
535
- audioBuffer: Buffer.from(arrayBuffer),
536
- mimeType: "audio/opus",
537
- };
538
- }
539
-
540
- private async synthesizeWithGemini(
541
- text: string,
542
- apiKey: string
543
- ): Promise<{ audioBuffer: Buffer; mimeType: string }> {
544
- const resp = await fetch(
545
- `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?key=${apiKey}`,
546
- {
547
- method: "POST",
548
- headers: { "Content-Type": "application/json" },
549
- body: JSON.stringify({
550
- contents: [
551
- {
552
- parts: [{ text: `Please speak this text aloud: "${text}"` }],
553
- },
554
- ],
555
- generationConfig: {
556
- responseModalities: ["AUDIO"],
557
- speechConfig: {
558
- voiceConfig: {
559
- prebuiltVoiceConfig: {
560
- voiceName: "Aoede", // Default Gemini voice
561
- },
562
- },
563
- },
564
- },
565
- }),
566
- }
567
- );
568
-
569
- if (!resp.ok) {
570
- const error = await resp.text();
571
- throw new Error(`Gemini TTS API error: ${resp.status} - ${error}`);
572
- }
573
-
574
- const data = (await resp.json()) as {
575
- candidates: Array<{
576
- content: {
577
- parts: Array<{
578
- inlineData?: { mimeType: string; data: string };
579
- }>;
580
- };
581
- }>;
582
- };
583
-
584
- const audioPart = data.candidates[0]?.content?.parts?.find((p) =>
585
- p.inlineData?.mimeType?.startsWith("audio/")
586
- );
587
-
588
- if (!audioPart?.inlineData) {
589
- throw new Error("Gemini did not return audio data");
590
- }
591
-
592
- return {
593
- audioBuffer: Buffer.from(audioPart.inlineData.data, "base64"),
594
- mimeType: audioPart.inlineData.mimeType,
595
- };
596
- }
597
-
598
- private async synthesizeWithElevenLabs(
599
- text: string,
600
- apiKey: string,
601
- options: VoiceOptions
602
- ): Promise<{ audioBuffer: Buffer; mimeType: string }> {
603
- // ElevenLabs TTS API
604
- // Default voice: Rachel (21m00Tcm4TlvDq8ikWAM)
605
- const voiceId = options.voice || "21m00Tcm4TlvDq8ikWAM";
606
-
607
- const resp = await fetch(
608
- `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`,
609
- {
610
- method: "POST",
611
- headers: {
612
- "xi-api-key": apiKey,
613
- "Content-Type": "application/json",
614
- Accept: "audio/mpeg",
615
- },
616
- body: JSON.stringify({
617
- text,
618
- model_id: "eleven_monolingual_v1",
619
- voice_settings: {
620
- stability: 0.5,
621
- similarity_boost: 0.5,
622
- },
623
- }),
624
- }
625
- );
626
-
627
- if (!resp.ok) {
628
- const error = await resp.text();
629
- throw new Error(`ElevenLabs TTS API error: ${resp.status} - ${error}`);
630
- }
631
-
632
- const arrayBuffer = await resp.arrayBuffer();
633
- return {
634
- audioBuffer: Buffer.from(arrayBuffer),
635
- mimeType: "audio/mpeg",
636
- };
637
- }
638
-
639
- // ==========================================================================
640
- // Utility methods
641
- // ==========================================================================
642
-
643
- private getExtensionFromMime(mimeType: string): string {
644
- const mimeToExt: Record<string, string> = {
645
- "audio/ogg": "ogg",
646
- "audio/opus": "opus",
647
- "audio/mpeg": "mp3",
648
- "audio/mp3": "mp3",
649
- "audio/wav": "wav",
650
- "audio/webm": "webm",
651
- "audio/m4a": "m4a",
652
- "audio/mp4": "m4a",
653
- };
654
- return mimeToExt[mimeType] || "ogg";
655
- }
656
-
657
- private resolveEndpointUrl(
658
- transcriptionPath: string | undefined,
659
- baseUrl: string | undefined
660
- ): string | null {
661
- const path = (
662
- transcriptionPath || this.getDefaultOpenAiTranscriptionPath(baseUrl)
663
- ).trim();
664
- if (/^https?:\/\//i.test(path)) {
665
- return path;
666
- }
667
-
668
- const base = (baseUrl || "").trim();
669
- if (!base) return null;
670
-
671
- const normalizedPath = path.startsWith("/") ? path : `/${path}`;
672
- return `${base.replace(/\/+$/, "")}${normalizedPath}`;
673
- }
674
-
675
- private getDefaultOpenAiTranscriptionPath(
676
- baseUrl: string | undefined
677
- ): string {
678
- const trimmedBase = (baseUrl || "").trim().replace(/\/+$/, "");
679
- if (trimmedBase.endsWith("/v1")) {
680
- return "/audio/transcriptions";
681
- }
682
- return "/v1/audio/transcriptions";
683
- }
684
- }