@lobu/gateway 3.0.8 → 3.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. package/dist/api/platform.d.ts.map +1 -1
  2. package/dist/api/platform.js +8 -26
  3. package/dist/api/platform.js.map +1 -1
  4. package/dist/auth/mcp/proxy.d.ts +14 -0
  5. package/dist/auth/mcp/proxy.d.ts.map +1 -1
  6. package/dist/auth/mcp/proxy.js +149 -13
  7. package/dist/auth/mcp/proxy.js.map +1 -1
  8. package/dist/cli/gateway.d.ts.map +1 -1
  9. package/dist/cli/gateway.js +29 -0
  10. package/dist/cli/gateway.js.map +1 -1
  11. package/dist/cli/index.js +2 -2
  12. package/dist/cli/index.js.map +1 -1
  13. package/dist/connections/chat-instance-manager.d.ts.map +1 -1
  14. package/dist/connections/chat-instance-manager.js +2 -1
  15. package/dist/connections/chat-instance-manager.js.map +1 -1
  16. package/dist/connections/interaction-bridge.d.ts +9 -2
  17. package/dist/connections/interaction-bridge.d.ts.map +1 -1
  18. package/dist/connections/interaction-bridge.js +132 -230
  19. package/dist/connections/interaction-bridge.js.map +1 -1
  20. package/dist/connections/message-handler-bridge.d.ts.map +1 -1
  21. package/dist/connections/message-handler-bridge.js +44 -26
  22. package/dist/connections/message-handler-bridge.js.map +1 -1
  23. package/dist/interactions.d.ts +9 -43
  24. package/dist/interactions.d.ts.map +1 -1
  25. package/dist/interactions.js +10 -52
  26. package/dist/interactions.js.map +1 -1
  27. package/dist/orchestration/base-deployment-manager.js +7 -7
  28. package/dist/orchestration/base-deployment-manager.js.map +1 -1
  29. package/dist/platform/unified-thread-consumer.d.ts.map +1 -1
  30. package/dist/platform/unified-thread-consumer.js +38 -34
  31. package/dist/platform/unified-thread-consumer.js.map +1 -1
  32. package/dist/routes/public/agent.d.ts +4 -0
  33. package/dist/routes/public/agent.d.ts.map +1 -1
  34. package/dist/routes/public/agent.js +21 -0
  35. package/dist/routes/public/agent.js.map +1 -1
  36. package/dist/services/core-services.d.ts.map +1 -1
  37. package/dist/services/core-services.js +4 -0
  38. package/dist/services/core-services.js.map +1 -1
  39. package/package.json +2 -2
  40. package/src/__tests__/agent-config-routes.test.ts +0 -254
  41. package/src/__tests__/agent-history-routes.test.ts +0 -72
  42. package/src/__tests__/agent-routes.test.ts +0 -68
  43. package/src/__tests__/agent-schedules-routes.test.ts +0 -59
  44. package/src/__tests__/agent-settings-store.test.ts +0 -323
  45. package/src/__tests__/bedrock-model-catalog.test.ts +0 -40
  46. package/src/__tests__/bedrock-openai-service.test.ts +0 -157
  47. package/src/__tests__/bedrock-provider-module.test.ts +0 -56
  48. package/src/__tests__/chat-instance-manager-slack.test.ts +0 -204
  49. package/src/__tests__/chat-response-bridge.test.ts +0 -131
  50. package/src/__tests__/config-memory-plugins.test.ts +0 -92
  51. package/src/__tests__/config-request-store.test.ts +0 -127
  52. package/src/__tests__/connection-routes.test.ts +0 -144
  53. package/src/__tests__/core-services-store-selection.test.ts +0 -92
  54. package/src/__tests__/docker-deployment.test.ts +0 -1211
  55. package/src/__tests__/embedded-deployment.test.ts +0 -342
  56. package/src/__tests__/grant-store.test.ts +0 -148
  57. package/src/__tests__/http-proxy.test.ts +0 -281
  58. package/src/__tests__/instruction-service.test.ts +0 -37
  59. package/src/__tests__/link-buttons.test.ts +0 -112
  60. package/src/__tests__/lobu.test.ts +0 -32
  61. package/src/__tests__/mcp-config-service.test.ts +0 -347
  62. package/src/__tests__/mcp-proxy.test.ts +0 -694
  63. package/src/__tests__/message-handler-bridge.test.ts +0 -17
  64. package/src/__tests__/model-selection.test.ts +0 -172
  65. package/src/__tests__/oauth-templates.test.ts +0 -39
  66. package/src/__tests__/platform-adapter-slack-send.test.ts +0 -114
  67. package/src/__tests__/platform-helpers-model-resolution.test.ts +0 -253
  68. package/src/__tests__/provider-inheritance.test.ts +0 -212
  69. package/src/__tests__/routes/cli-auth.test.ts +0 -337
  70. package/src/__tests__/routes/interactions.test.ts +0 -121
  71. package/src/__tests__/secret-proxy.test.ts +0 -85
  72. package/src/__tests__/session-manager.test.ts +0 -572
  73. package/src/__tests__/setup.ts +0 -133
  74. package/src/__tests__/skill-and-mcp-registry.test.ts +0 -203
  75. package/src/__tests__/slack-routes.test.ts +0 -161
  76. package/src/__tests__/system-config-resolver.test.ts +0 -75
  77. package/src/__tests__/system-message-limiter.test.ts +0 -89
  78. package/src/__tests__/system-skills-service.test.ts +0 -362
  79. package/src/__tests__/transcription-service.test.ts +0 -222
  80. package/src/__tests__/utils/rate-limiter.test.ts +0 -102
  81. package/src/__tests__/worker-connection-manager.test.ts +0 -497
  82. package/src/__tests__/worker-job-router.test.ts +0 -722
  83. package/src/api/index.ts +0 -1
  84. package/src/api/platform.ts +0 -292
  85. package/src/api/response-renderer.ts +0 -157
  86. package/src/auth/agent-metadata-store.ts +0 -168
  87. package/src/auth/api-auth-middleware.ts +0 -69
  88. package/src/auth/api-key-provider-module.ts +0 -213
  89. package/src/auth/base-provider-module.ts +0 -201
  90. package/src/auth/bedrock/provider-module.ts +0 -110
  91. package/src/auth/chatgpt/chatgpt-oauth-module.ts +0 -185
  92. package/src/auth/chatgpt/device-code-client.ts +0 -218
  93. package/src/auth/chatgpt/index.ts +0 -1
  94. package/src/auth/claude/oauth-module.ts +0 -280
  95. package/src/auth/cli/token-service.ts +0 -249
  96. package/src/auth/external/client.ts +0 -560
  97. package/src/auth/external/device-code-client.ts +0 -235
  98. package/src/auth/mcp/config-service.ts +0 -420
  99. package/src/auth/mcp/proxy.ts +0 -1086
  100. package/src/auth/mcp/string-substitution.ts +0 -17
  101. package/src/auth/mcp/tool-cache.ts +0 -90
  102. package/src/auth/oauth/base-client.ts +0 -267
  103. package/src/auth/oauth/client.ts +0 -153
  104. package/src/auth/oauth/credentials.ts +0 -7
  105. package/src/auth/oauth/providers.ts +0 -69
  106. package/src/auth/oauth/state-store.ts +0 -150
  107. package/src/auth/oauth-templates.ts +0 -179
  108. package/src/auth/provider-catalog.ts +0 -220
  109. package/src/auth/provider-model-options.ts +0 -41
  110. package/src/auth/settings/agent-settings-store.ts +0 -565
  111. package/src/auth/settings/auth-profiles-manager.ts +0 -216
  112. package/src/auth/settings/index.ts +0 -12
  113. package/src/auth/settings/model-preference-store.ts +0 -52
  114. package/src/auth/settings/model-selection.ts +0 -135
  115. package/src/auth/settings/resolved-settings-view.ts +0 -298
  116. package/src/auth/settings/template-utils.ts +0 -44
  117. package/src/auth/settings/token-service.ts +0 -88
  118. package/src/auth/system-env-store.ts +0 -98
  119. package/src/auth/user-agents-store.ts +0 -68
  120. package/src/channels/binding-service.ts +0 -214
  121. package/src/channels/index.ts +0 -4
  122. package/src/cli/gateway.ts +0 -1312
  123. package/src/cli/index.ts +0 -74
  124. package/src/commands/built-in-commands.ts +0 -80
  125. package/src/commands/command-dispatcher.ts +0 -94
  126. package/src/commands/command-reply-adapters.ts +0 -27
  127. package/src/config/file-loader.ts +0 -618
  128. package/src/config/index.ts +0 -588
  129. package/src/config/network-allowlist.ts +0 -71
  130. package/src/connections/chat-instance-manager.ts +0 -1284
  131. package/src/connections/chat-response-bridge.ts +0 -618
  132. package/src/connections/index.ts +0 -7
  133. package/src/connections/interaction-bridge.ts +0 -831
  134. package/src/connections/message-handler-bridge.ts +0 -415
  135. package/src/connections/platform-auth-methods.ts +0 -15
  136. package/src/connections/types.ts +0 -84
  137. package/src/gateway/connection-manager.ts +0 -291
  138. package/src/gateway/index.ts +0 -698
  139. package/src/gateway/job-router.ts +0 -201
  140. package/src/gateway-main.ts +0 -200
  141. package/src/index.ts +0 -41
  142. package/src/infrastructure/queue/index.ts +0 -12
  143. package/src/infrastructure/queue/queue-producer.ts +0 -148
  144. package/src/infrastructure/queue/redis-queue.ts +0 -361
  145. package/src/infrastructure/queue/types.ts +0 -133
  146. package/src/infrastructure/redis/system-message-limiter.ts +0 -94
  147. package/src/interactions/config-request-store.ts +0 -198
  148. package/src/interactions.ts +0 -363
  149. package/src/lobu.ts +0 -311
  150. package/src/metrics/prometheus.ts +0 -159
  151. package/src/modules/module-system.ts +0 -179
  152. package/src/orchestration/base-deployment-manager.ts +0 -900
  153. package/src/orchestration/deployment-utils.ts +0 -98
  154. package/src/orchestration/impl/docker-deployment.ts +0 -620
  155. package/src/orchestration/impl/embedded-deployment.ts +0 -268
  156. package/src/orchestration/impl/index.ts +0 -8
  157. package/src/orchestration/impl/k8s/deployment.ts +0 -1061
  158. package/src/orchestration/impl/k8s/helpers.ts +0 -610
  159. package/src/orchestration/impl/k8s/index.ts +0 -1
  160. package/src/orchestration/index.ts +0 -333
  161. package/src/orchestration/message-consumer.ts +0 -584
  162. package/src/orchestration/scheduled-wakeup.ts +0 -704
  163. package/src/permissions/approval-policy.ts +0 -36
  164. package/src/permissions/grant-store.ts +0 -219
  165. package/src/platform/file-handler.ts +0 -66
  166. package/src/platform/link-buttons.ts +0 -57
  167. package/src/platform/renderer-utils.ts +0 -44
  168. package/src/platform/response-renderer.ts +0 -84
  169. package/src/platform/unified-thread-consumer.ts +0 -187
  170. package/src/platform.ts +0 -318
  171. package/src/proxy/http-proxy.ts +0 -752
  172. package/src/proxy/proxy-manager.ts +0 -81
  173. package/src/proxy/secret-proxy.ts +0 -402
  174. package/src/proxy/token-refresh-job.ts +0 -143
  175. package/src/routes/internal/audio.ts +0 -141
  176. package/src/routes/internal/device-auth.ts +0 -652
  177. package/src/routes/internal/files.ts +0 -226
  178. package/src/routes/internal/history.ts +0 -69
  179. package/src/routes/internal/images.ts +0 -127
  180. package/src/routes/internal/interactions.ts +0 -84
  181. package/src/routes/internal/middleware.ts +0 -23
  182. package/src/routes/internal/schedule.ts +0 -226
  183. package/src/routes/internal/types.ts +0 -22
  184. package/src/routes/openapi-auto.ts +0 -239
  185. package/src/routes/public/agent-access.ts +0 -23
  186. package/src/routes/public/agent-config.ts +0 -675
  187. package/src/routes/public/agent-history.ts +0 -422
  188. package/src/routes/public/agent-schedules.ts +0 -296
  189. package/src/routes/public/agent.ts +0 -1086
  190. package/src/routes/public/agents.ts +0 -373
  191. package/src/routes/public/channels.ts +0 -191
  192. package/src/routes/public/cli-auth.ts +0 -896
  193. package/src/routes/public/connections.ts +0 -574
  194. package/src/routes/public/landing.ts +0 -16
  195. package/src/routes/public/oauth.ts +0 -147
  196. package/src/routes/public/settings-auth.ts +0 -104
  197. package/src/routes/public/slack.ts +0 -173
  198. package/src/routes/shared/agent-ownership.ts +0 -101
  199. package/src/routes/shared/token-verifier.ts +0 -34
  200. package/src/services/bedrock-model-catalog.ts +0 -217
  201. package/src/services/bedrock-openai-service.ts +0 -658
  202. package/src/services/core-services.ts +0 -1072
  203. package/src/services/image-generation-service.ts +0 -257
  204. package/src/services/instruction-service.ts +0 -318
  205. package/src/services/mcp-registry.ts +0 -94
  206. package/src/services/platform-helpers.ts +0 -287
  207. package/src/services/session-manager.ts +0 -262
  208. package/src/services/settings-resolver.ts +0 -74
  209. package/src/services/system-config-resolver.ts +0 -89
  210. package/src/services/system-skills-service.ts +0 -229
  211. package/src/services/transcription-service.ts +0 -684
  212. package/src/session.ts +0 -110
  213. package/src/spaces/index.ts +0 -1
  214. package/src/spaces/space-resolver.ts +0 -17
  215. package/src/stores/in-memory-agent-store.ts +0 -403
  216. package/src/stores/redis-agent-store.ts +0 -279
  217. package/src/utils/public-url.ts +0 -44
  218. package/src/utils/rate-limiter.ts +0 -94
  219. package/tsconfig.json +0 -33
@@ -1,684 +0,0 @@
1
- /**
2
- * Multi-Provider Audio Service
3
- *
4
- * Supports speech-to-text and text-to-speech via auth profiles (installed providers):
5
- * - OpenAI (chatgpt auth profile) - Whisper for STT, TTS API for speech
6
- * - Google Gemini (gemini auth profile) - Audio input/output
7
- * - ElevenLabs (elevenlabs auth profile) - STT and high-quality TTS
8
- *
9
- * STT selection: built-ins (chatgpt/openai, gemini, elevenlabs) plus optional
10
- * config-driven STT providers declared in system-skills provider config.
11
- * TTS selection stays built-in only (openai → gemini → elevenlabs).
12
- */
13
-
14
- import type { ProviderConfigEntry } from "@lobu/core";
15
- import { createLogger } from "@lobu/core";
16
- import type { AuthProfilesManager } from "../auth/settings/auth-profiles-manager";
17
-
18
- const logger = createLogger("transcription-service");
19
-
20
- export type TranscriptionProvider = "openai" | "gemini" | "elevenlabs";
21
-
22
- interface TranscriptionConfig {
23
- profileProviderId: string;
24
- displayName: string;
25
- provider: TranscriptionProvider;
26
- apiKey: string;
27
- openaiCompat?: {
28
- endpointUrl: string;
29
- model: string;
30
- };
31
- }
32
-
33
- export interface TranscriptionSuccess {
34
- text: string;
35
- provider: TranscriptionProvider;
36
- }
37
-
38
- export interface TranscriptionError {
39
- error: string;
40
- availableProviders: TranscriptionProvider[];
41
- }
42
-
43
- export type TranscriptionResult = TranscriptionSuccess | TranscriptionError;
44
-
45
- export interface SynthesisSuccess {
46
- audioBuffer: Buffer;
47
- mimeType: string;
48
- provider: TranscriptionProvider;
49
- }
50
-
51
- export interface SynthesisError {
52
- error: string;
53
- availableProviders: TranscriptionProvider[];
54
- }
55
-
56
- export type SynthesisResult = SynthesisSuccess | SynthesisError;
57
-
58
- // Voice options for TTS
59
- export interface VoiceOptions {
60
- voice?: string; // Provider-specific voice ID
61
- speed?: number; // Speech speed (0.5-2.0, default 1.0)
62
- }
63
-
64
- // Auth profile providerId → TTS provider mapping (single source of truth)
65
- const TTS_CAPABLE_PROVIDERS: {
66
- profileProviderId: string;
67
- ttsProvider: TranscriptionProvider;
68
- displayName: string;
69
- }[] = [
70
- {
71
- profileProviderId: "chatgpt",
72
- ttsProvider: "openai",
73
- displayName: "OpenAI",
74
- },
75
- {
76
- profileProviderId: "gemini",
77
- ttsProvider: "gemini",
78
- displayName: "Google Gemini",
79
- },
80
- {
81
- profileProviderId: "elevenlabs",
82
- ttsProvider: "elevenlabs",
83
- displayName: "ElevenLabs",
84
- },
85
- ];
86
-
87
- function displayName(provider: TranscriptionProvider): string {
88
- return (
89
- TTS_CAPABLE_PROVIDERS.find((p) => p.ttsProvider === provider)
90
- ?.displayName ?? provider
91
- );
92
- }
93
-
94
- export class TranscriptionService {
95
- private providerConfigSource?:
96
- | (() => Promise<Record<string, ProviderConfigEntry>>)
97
- | undefined;
98
-
99
- constructor(
100
- private readonly authProfilesManager: AuthProfilesManager,
101
- providerConfigSource?: () => Promise<Record<string, ProviderConfigEntry>>
102
- ) {
103
- this.providerConfigSource = providerConfigSource;
104
- }
105
-
106
- setProviderConfigSource(
107
- source: () => Promise<Record<string, ProviderConfigEntry>>
108
- ): void {
109
- this.providerConfigSource = source;
110
- }
111
-
112
- /**
113
- * Transcribe audio buffer to text
114
- */
115
- async transcribe(
116
- audioBuffer: Buffer,
117
- agentId: string,
118
- mimeType = "audio/ogg"
119
- ): Promise<TranscriptionResult> {
120
- const configs = await this.getTranscriptionConfigs(agentId);
121
-
122
- if (configs.length === 0) {
123
- return this.noProviderError(
124
- "No transcription provider configured",
125
- agentId
126
- );
127
- }
128
-
129
- const attemptErrors: string[] = [];
130
- for (const config of configs) {
131
- logger.info("Transcribing audio", {
132
- agentId,
133
- provider: config.provider,
134
- profileProviderId: config.profileProviderId,
135
- bufferSize: audioBuffer.length,
136
- mimeType,
137
- });
138
-
139
- try {
140
- const text = await this.transcribeWithProvider(
141
- audioBuffer,
142
- config,
143
- mimeType
144
- );
145
- logger.info("Transcription successful", {
146
- agentId,
147
- provider: config.provider,
148
- profileProviderId: config.profileProviderId,
149
- textLength: text.length,
150
- });
151
- return { text, provider: config.provider };
152
- } catch (error) {
153
- const errorMessage =
154
- error instanceof Error ? error.message : String(error);
155
- logger.error("Transcription failed", {
156
- agentId,
157
- provider: config.provider,
158
- profileProviderId: config.profileProviderId,
159
- error: errorMessage,
160
- });
161
- attemptErrors.push(`${config.displayName}: ${errorMessage}`);
162
- }
163
- }
164
-
165
- return {
166
- error: `Transcription failed with all configured providers: ${attemptErrors.join(" | ")}`,
167
- availableProviders: [...new Set(configs.map((c) => c.provider))],
168
- };
169
- }
170
-
171
- /**
172
- * Get transcription config for an agent by checking installed auth profiles.
173
- * First TTS-capable provider with a valid profile wins (openai → gemini → elevenlabs).
174
- */
175
- async getConfig(agentId: string): Promise<TranscriptionConfig | null> {
176
- const configs = await this.getTranscriptionConfigs(agentId);
177
- return configs[0] ?? null;
178
- }
179
-
180
- private async getSynthesisConfigs(
181
- agentId: string
182
- ): Promise<TranscriptionConfig[]> {
183
- const configs: TranscriptionConfig[] = [];
184
- for (const { profileProviderId, ttsProvider } of TTS_CAPABLE_PROVIDERS) {
185
- const profile = await this.authProfilesManager.getBestProfile(
186
- agentId,
187
- profileProviderId
188
- );
189
- if (profile) {
190
- configs.push({
191
- profileProviderId,
192
- displayName: displayName(ttsProvider),
193
- provider: ttsProvider,
194
- apiKey: profile.credential,
195
- });
196
- }
197
- }
198
- return configs;
199
- }
200
-
201
- private async getTranscriptionConfigs(
202
- agentId: string
203
- ): Promise<TranscriptionConfig[]> {
204
- const configs = await this.getSynthesisConfigs(agentId);
205
- const providerIds = new Set(configs.map((c) => c.profileProviderId));
206
- const configDriven = await this.getConfigDrivenSttCandidates();
207
-
208
- for (const candidate of configDriven) {
209
- if (providerIds.has(candidate.profileProviderId)) continue;
210
-
211
- const profile = await this.authProfilesManager.getBestProfile(
212
- agentId,
213
- candidate.profileProviderId
214
- );
215
- if (!profile) continue;
216
-
217
- configs.push({
218
- profileProviderId: candidate.profileProviderId,
219
- displayName: candidate.displayName,
220
- provider: candidate.provider,
221
- apiKey: profile.credential,
222
- openaiCompat: candidate.openaiCompat,
223
- });
224
- providerIds.add(candidate.profileProviderId);
225
- }
226
-
227
- return configs;
228
- }
229
-
230
- private async getConfigDrivenSttCandidates(): Promise<
231
- Array<Omit<TranscriptionConfig, "apiKey">>
232
- > {
233
- if (!this.providerConfigSource) return [];
234
-
235
- let providerConfigs: Record<string, ProviderConfigEntry>;
236
- try {
237
- providerConfigs = await this.providerConfigSource();
238
- } catch (error) {
239
- logger.warn("Failed to load provider configs for STT", {
240
- error: error instanceof Error ? error.message : String(error),
241
- });
242
- return [];
243
- }
244
-
245
- const candidates: Array<Omit<TranscriptionConfig, "apiKey">> = [];
246
- for (const [providerId, entry] of Object.entries(providerConfigs)) {
247
- const stt = entry.stt;
248
- const compat = stt?.sdkCompat || entry.sdkCompat;
249
- const sttEnabled = stt ? stt.enabled !== false : compat === "openai";
250
- if (!sttEnabled) continue;
251
-
252
- if (compat !== "openai") {
253
- logger.warn("Unsupported config-driven STT compatibility", {
254
- providerId,
255
- compat,
256
- });
257
- continue;
258
- }
259
-
260
- const endpoint = this.resolveEndpointUrl(
261
- stt?.transcriptionPath,
262
- stt?.baseUrl || entry.upstreamBaseUrl
263
- );
264
- if (!endpoint) {
265
- logger.warn("Invalid STT endpoint configuration", {
266
- providerId,
267
- transcriptionPath: stt?.transcriptionPath,
268
- baseUrl: stt?.baseUrl || entry.upstreamBaseUrl,
269
- });
270
- continue;
271
- }
272
-
273
- candidates.push({
274
- profileProviderId: providerId,
275
- displayName: entry.displayName || providerId,
276
- provider: "openai",
277
- openaiCompat: {
278
- endpointUrl: endpoint,
279
- model: stt?.model?.trim() || "whisper-1",
280
- },
281
- });
282
- }
283
- return candidates;
284
- }
285
-
286
- /**
287
- * Get provider info for documentation/help messages
288
- */
289
- getProviderInfo(): Array<{ provider: TranscriptionProvider; name: string }> {
290
- return TTS_CAPABLE_PROVIDERS.map(({ ttsProvider, displayName }) => ({
291
- provider: ttsProvider,
292
- name: displayName,
293
- }));
294
- }
295
-
296
- // ==========================================================================
297
- // Text-to-Speech (Synthesis)
298
- // ==========================================================================
299
-
300
- /**
301
- * Synthesize text to audio
302
- */
303
- async synthesize(
304
- text: string,
305
- agentId: string,
306
- options: VoiceOptions = {}
307
- ): Promise<SynthesisResult> {
308
- const config = await this.getConfig(agentId);
309
-
310
- if (!config) {
311
- return this.noProviderError("No audio provider configured", agentId);
312
- }
313
-
314
- logger.info("Synthesizing audio", {
315
- agentId,
316
- provider: config.provider,
317
- textLength: text.length,
318
- voice: options.voice,
319
- });
320
-
321
- try {
322
- const result = await this.synthesizeWithProvider(text, config, options);
323
- logger.info("Synthesis successful", {
324
- agentId,
325
- provider: config.provider,
326
- audioSize: result.audioBuffer.length,
327
- });
328
- return { ...result, provider: config.provider };
329
- } catch (error) {
330
- const errorMessage =
331
- error instanceof Error ? error.message : String(error);
332
- logger.error("Synthesis failed", {
333
- agentId,
334
- provider: config.provider,
335
- error: errorMessage,
336
- });
337
- return {
338
- error: `Synthesis failed with ${displayName(config.provider)}: ${errorMessage}`,
339
- availableProviders: [],
340
- };
341
- }
342
- }
343
-
344
- private noProviderError(message: string, agentId: string) {
345
- const availableProviders = TTS_CAPABLE_PROVIDERS.map((p) => p.ttsProvider);
346
- logger.info(message, { agentId, availableProviders });
347
- return { error: message, availableProviders };
348
- }
349
-
350
- // ==========================================================================
351
- // Provider-specific implementations - Transcription (STT)
352
- // ==========================================================================
353
-
354
- private async transcribeWithProvider(
355
- buffer: Buffer,
356
- config: TranscriptionConfig,
357
- mimeType: string
358
- ): Promise<string> {
359
- switch (config.provider) {
360
- case "openai":
361
- return this.transcribeWithOpenAI(
362
- buffer,
363
- config.apiKey,
364
- mimeType,
365
- config.openaiCompat
366
- );
367
- case "gemini":
368
- return this.transcribeWithGemini(buffer, config.apiKey, mimeType);
369
- case "elevenlabs":
370
- return this.transcribeWithElevenLabs(buffer, config.apiKey, mimeType);
371
- default:
372
- throw new Error(`Unknown provider: ${config.provider}`);
373
- }
374
- }
375
-
376
- private async transcribeWithOpenAI(
377
- buffer: Buffer,
378
- apiKey: string,
379
- mimeType: string,
380
- options?: { endpointUrl: string; model: string }
381
- ): Promise<string> {
382
- const formData = new FormData();
383
- const ext = this.getExtensionFromMime(mimeType);
384
- formData.append(
385
- "file",
386
- new Blob([buffer], { type: mimeType }),
387
- `audio.${ext}`
388
- );
389
- formData.append("model", options?.model || "whisper-1");
390
-
391
- const resp = await fetch(
392
- options?.endpointUrl || "https://api.openai.com/v1/audio/transcriptions",
393
- {
394
- method: "POST",
395
- headers: { Authorization: `Bearer ${apiKey}` },
396
- body: formData,
397
- }
398
- );
399
-
400
- if (!resp.ok) {
401
- const error = await resp.text();
402
- throw new Error(`OpenAI API error: ${resp.status} - ${error}`);
403
- }
404
-
405
- const data = (await resp.json()) as { text: string };
406
- return data.text;
407
- }
408
-
409
- private async transcribeWithGemini(
410
- buffer: Buffer,
411
- apiKey: string,
412
- mimeType: string
413
- ): Promise<string> {
414
- // Gemini uses inline audio data with base64 encoding
415
- const resp = await fetch(
416
- `https://generativelanguage.googleapis.com/v1/models/gemini-1.5-flash:generateContent?key=${apiKey}`,
417
- {
418
- method: "POST",
419
- headers: { "Content-Type": "application/json" },
420
- body: JSON.stringify({
421
- contents: [
422
- {
423
- parts: [
424
- {
425
- text: "Transcribe this audio exactly as spoken. Return only the transcription text, nothing else:",
426
- },
427
- {
428
- inline_data: {
429
- mime_type: mimeType,
430
- data: buffer.toString("base64"),
431
- },
432
- },
433
- ],
434
- },
435
- ],
436
- }),
437
- }
438
- );
439
-
440
- if (!resp.ok) {
441
- const error = await resp.text();
442
- throw new Error(`Gemini API error: ${resp.status} - ${error}`);
443
- }
444
-
445
- const data = (await resp.json()) as {
446
- candidates: Array<{
447
- content: { parts: Array<{ text: string }> };
448
- }>;
449
- };
450
- return data.candidates[0]?.content?.parts[0]?.text || "";
451
- }
452
-
453
- private async transcribeWithElevenLabs(
454
- buffer: Buffer,
455
- apiKey: string,
456
- mimeType: string
457
- ): Promise<string> {
458
- // ElevenLabs speech-to-text API
459
- const formData = new FormData();
460
- const ext = this.getExtensionFromMime(mimeType);
461
- formData.append(
462
- "audio",
463
- new Blob([buffer], { type: mimeType }),
464
- `audio.${ext}`
465
- );
466
-
467
- const resp = await fetch("https://api.elevenlabs.io/v1/speech-to-text", {
468
- method: "POST",
469
- headers: { "xi-api-key": apiKey },
470
- body: formData,
471
- });
472
-
473
- if (!resp.ok) {
474
- const error = await resp.text();
475
- throw new Error(`ElevenLabs API error: ${resp.status} - ${error}`);
476
- }
477
-
478
- const data = (await resp.json()) as { text: string };
479
- return data.text;
480
- }
481
-
482
- // ==========================================================================
483
- // Provider-specific implementations - Synthesis (TTS)
484
- // ==========================================================================
485
-
486
- private async synthesizeWithProvider(
487
- text: string,
488
- config: TranscriptionConfig,
489
- options: VoiceOptions
490
- ): Promise<{ audioBuffer: Buffer; mimeType: string }> {
491
- switch (config.provider) {
492
- case "openai":
493
- return this.synthesizeWithOpenAI(text, config.apiKey, options);
494
- case "gemini":
495
- return this.synthesizeWithGemini(text, config.apiKey);
496
- case "elevenlabs":
497
- return this.synthesizeWithElevenLabs(text, config.apiKey, options);
498
- default:
499
- throw new Error(`Unknown provider: ${config.provider}`);
500
- }
501
- }
502
-
503
- private async synthesizeWithOpenAI(
504
- text: string,
505
- apiKey: string,
506
- options: VoiceOptions
507
- ): Promise<{ audioBuffer: Buffer; mimeType: string }> {
508
- // OpenAI TTS API
509
- // Voices: alloy, echo, fable, onyx, nova, shimmer
510
- const voice = options.voice || "alloy";
511
- const speed = options.speed || 1.0;
512
-
513
- const resp = await fetch("https://api.openai.com/v1/audio/speech", {
514
- method: "POST",
515
- headers: {
516
- Authorization: `Bearer ${apiKey}`,
517
- "Content-Type": "application/json",
518
- },
519
- body: JSON.stringify({
520
- model: "tts-1",
521
- input: text,
522
- voice,
523
- speed,
524
- response_format: "opus", // Good for WhatsApp
525
- }),
526
- });
527
-
528
- if (!resp.ok) {
529
- const error = await resp.text();
530
- throw new Error(`OpenAI TTS API error: ${resp.status} - ${error}`);
531
- }
532
-
533
- const arrayBuffer = await resp.arrayBuffer();
534
- return {
535
- audioBuffer: Buffer.from(arrayBuffer),
536
- mimeType: "audio/opus",
537
- };
538
- }
539
-
540
- private async synthesizeWithGemini(
541
- text: string,
542
- apiKey: string
543
- ): Promise<{ audioBuffer: Buffer; mimeType: string }> {
544
- const resp = await fetch(
545
- `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?key=${apiKey}`,
546
- {
547
- method: "POST",
548
- headers: { "Content-Type": "application/json" },
549
- body: JSON.stringify({
550
- contents: [
551
- {
552
- parts: [{ text: `Please speak this text aloud: "${text}"` }],
553
- },
554
- ],
555
- generationConfig: {
556
- responseModalities: ["AUDIO"],
557
- speechConfig: {
558
- voiceConfig: {
559
- prebuiltVoiceConfig: {
560
- voiceName: "Aoede", // Default Gemini voice
561
- },
562
- },
563
- },
564
- },
565
- }),
566
- }
567
- );
568
-
569
- if (!resp.ok) {
570
- const error = await resp.text();
571
- throw new Error(`Gemini TTS API error: ${resp.status} - ${error}`);
572
- }
573
-
574
- const data = (await resp.json()) as {
575
- candidates: Array<{
576
- content: {
577
- parts: Array<{
578
- inlineData?: { mimeType: string; data: string };
579
- }>;
580
- };
581
- }>;
582
- };
583
-
584
- const audioPart = data.candidates[0]?.content?.parts?.find((p) =>
585
- p.inlineData?.mimeType?.startsWith("audio/")
586
- );
587
-
588
- if (!audioPart?.inlineData) {
589
- throw new Error("Gemini did not return audio data");
590
- }
591
-
592
- return {
593
- audioBuffer: Buffer.from(audioPart.inlineData.data, "base64"),
594
- mimeType: audioPart.inlineData.mimeType,
595
- };
596
- }
597
-
598
- private async synthesizeWithElevenLabs(
599
- text: string,
600
- apiKey: string,
601
- options: VoiceOptions
602
- ): Promise<{ audioBuffer: Buffer; mimeType: string }> {
603
- // ElevenLabs TTS API
604
- // Default voice: Rachel (21m00Tcm4TlvDq8ikWAM)
605
- const voiceId = options.voice || "21m00Tcm4TlvDq8ikWAM";
606
-
607
- const resp = await fetch(
608
- `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`,
609
- {
610
- method: "POST",
611
- headers: {
612
- "xi-api-key": apiKey,
613
- "Content-Type": "application/json",
614
- Accept: "audio/mpeg",
615
- },
616
- body: JSON.stringify({
617
- text,
618
- model_id: "eleven_monolingual_v1",
619
- voice_settings: {
620
- stability: 0.5,
621
- similarity_boost: 0.5,
622
- },
623
- }),
624
- }
625
- );
626
-
627
- if (!resp.ok) {
628
- const error = await resp.text();
629
- throw new Error(`ElevenLabs TTS API error: ${resp.status} - ${error}`);
630
- }
631
-
632
- const arrayBuffer = await resp.arrayBuffer();
633
- return {
634
- audioBuffer: Buffer.from(arrayBuffer),
635
- mimeType: "audio/mpeg",
636
- };
637
- }
638
-
639
- // ==========================================================================
640
- // Utility methods
641
- // ==========================================================================
642
-
643
- private getExtensionFromMime(mimeType: string): string {
644
- const mimeToExt: Record<string, string> = {
645
- "audio/ogg": "ogg",
646
- "audio/opus": "opus",
647
- "audio/mpeg": "mp3",
648
- "audio/mp3": "mp3",
649
- "audio/wav": "wav",
650
- "audio/webm": "webm",
651
- "audio/m4a": "m4a",
652
- "audio/mp4": "m4a",
653
- };
654
- return mimeToExt[mimeType] || "ogg";
655
- }
656
-
657
- private resolveEndpointUrl(
658
- transcriptionPath: string | undefined,
659
- baseUrl: string | undefined
660
- ): string | null {
661
- const path = (
662
- transcriptionPath || this.getDefaultOpenAiTranscriptionPath(baseUrl)
663
- ).trim();
664
- if (/^https?:\/\//i.test(path)) {
665
- return path;
666
- }
667
-
668
- const base = (baseUrl || "").trim();
669
- if (!base) return null;
670
-
671
- const normalizedPath = path.startsWith("/") ? path : `/${path}`;
672
- return `${base.replace(/\/+$/, "")}${normalizedPath}`;
673
- }
674
-
675
- private getDefaultOpenAiTranscriptionPath(
676
- baseUrl: string | undefined
677
- ): string {
678
- const trimmedBase = (baseUrl || "").trim().replace(/\/+$/, "");
679
- if (trimmedBase.endsWith("/v1")) {
680
- return "/audio/transcriptions";
681
- }
682
- return "/v1/audio/transcriptions";
683
- }
684
- }