@lobu/gateway 3.0.9 → 3.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/platform.d.ts.map +1 -1
- package/dist/api/platform.js +7 -26
- package/dist/api/platform.js.map +1 -1
- package/dist/auth/mcp/proxy.d.ts +14 -0
- package/dist/auth/mcp/proxy.d.ts.map +1 -1
- package/dist/auth/mcp/proxy.js +149 -13
- package/dist/auth/mcp/proxy.js.map +1 -1
- package/dist/cli/gateway.d.ts.map +1 -1
- package/dist/cli/gateway.js +29 -0
- package/dist/cli/gateway.js.map +1 -1
- package/dist/connections/chat-instance-manager.d.ts.map +1 -1
- package/dist/connections/chat-instance-manager.js +2 -1
- package/dist/connections/chat-instance-manager.js.map +1 -1
- package/dist/connections/interaction-bridge.d.ts +9 -2
- package/dist/connections/interaction-bridge.d.ts.map +1 -1
- package/dist/connections/interaction-bridge.js +121 -261
- package/dist/connections/interaction-bridge.js.map +1 -1
- package/dist/interactions.d.ts +9 -43
- package/dist/interactions.d.ts.map +1 -1
- package/dist/interactions.js +10 -52
- package/dist/interactions.js.map +1 -1
- package/dist/routes/public/agent.d.ts +4 -0
- package/dist/routes/public/agent.d.ts.map +1 -1
- package/dist/routes/public/agent.js +21 -0
- package/dist/routes/public/agent.js.map +1 -1
- package/dist/services/core-services.d.ts.map +1 -1
- package/dist/services/core-services.js +4 -0
- package/dist/services/core-services.js.map +1 -1
- package/package.json +9 -9
- package/src/__tests__/agent-config-routes.test.ts +0 -254
- package/src/__tests__/agent-history-routes.test.ts +0 -72
- package/src/__tests__/agent-routes.test.ts +0 -68
- package/src/__tests__/agent-schedules-routes.test.ts +0 -59
- package/src/__tests__/agent-settings-store.test.ts +0 -323
- package/src/__tests__/bedrock-model-catalog.test.ts +0 -40
- package/src/__tests__/bedrock-openai-service.test.ts +0 -157
- package/src/__tests__/bedrock-provider-module.test.ts +0 -56
- package/src/__tests__/chat-instance-manager-slack.test.ts +0 -204
- package/src/__tests__/chat-response-bridge.test.ts +0 -131
- package/src/__tests__/config-memory-plugins.test.ts +0 -92
- package/src/__tests__/config-request-store.test.ts +0 -127
- package/src/__tests__/connection-routes.test.ts +0 -144
- package/src/__tests__/core-services-store-selection.test.ts +0 -92
- package/src/__tests__/docker-deployment.test.ts +0 -1211
- package/src/__tests__/embedded-deployment.test.ts +0 -342
- package/src/__tests__/grant-store.test.ts +0 -148
- package/src/__tests__/http-proxy.test.ts +0 -281
- package/src/__tests__/instruction-service.test.ts +0 -37
- package/src/__tests__/link-buttons.test.ts +0 -112
- package/src/__tests__/lobu.test.ts +0 -32
- package/src/__tests__/mcp-config-service.test.ts +0 -347
- package/src/__tests__/mcp-proxy.test.ts +0 -694
- package/src/__tests__/message-handler-bridge.test.ts +0 -17
- package/src/__tests__/model-selection.test.ts +0 -172
- package/src/__tests__/oauth-templates.test.ts +0 -39
- package/src/__tests__/platform-adapter-slack-send.test.ts +0 -114
- package/src/__tests__/platform-helpers-model-resolution.test.ts +0 -253
- package/src/__tests__/provider-inheritance.test.ts +0 -212
- package/src/__tests__/routes/cli-auth.test.ts +0 -337
- package/src/__tests__/routes/interactions.test.ts +0 -121
- package/src/__tests__/secret-proxy.test.ts +0 -85
- package/src/__tests__/session-manager.test.ts +0 -572
- package/src/__tests__/setup.ts +0 -133
- package/src/__tests__/skill-and-mcp-registry.test.ts +0 -203
- package/src/__tests__/slack-routes.test.ts +0 -161
- package/src/__tests__/system-config-resolver.test.ts +0 -75
- package/src/__tests__/system-message-limiter.test.ts +0 -89
- package/src/__tests__/system-skills-service.test.ts +0 -362
- package/src/__tests__/transcription-service.test.ts +0 -222
- package/src/__tests__/utils/rate-limiter.test.ts +0 -102
- package/src/__tests__/worker-connection-manager.test.ts +0 -497
- package/src/__tests__/worker-job-router.test.ts +0 -722
- package/src/api/index.ts +0 -1
- package/src/api/platform.ts +0 -292
- package/src/api/response-renderer.ts +0 -157
- package/src/auth/agent-metadata-store.ts +0 -168
- package/src/auth/api-auth-middleware.ts +0 -69
- package/src/auth/api-key-provider-module.ts +0 -213
- package/src/auth/base-provider-module.ts +0 -201
- package/src/auth/bedrock/provider-module.ts +0 -110
- package/src/auth/chatgpt/chatgpt-oauth-module.ts +0 -185
- package/src/auth/chatgpt/device-code-client.ts +0 -218
- package/src/auth/chatgpt/index.ts +0 -1
- package/src/auth/claude/oauth-module.ts +0 -280
- package/src/auth/cli/token-service.ts +0 -249
- package/src/auth/external/client.ts +0 -560
- package/src/auth/external/device-code-client.ts +0 -235
- package/src/auth/mcp/config-service.ts +0 -420
- package/src/auth/mcp/proxy.ts +0 -1086
- package/src/auth/mcp/string-substitution.ts +0 -17
- package/src/auth/mcp/tool-cache.ts +0 -90
- package/src/auth/oauth/base-client.ts +0 -267
- package/src/auth/oauth/client.ts +0 -153
- package/src/auth/oauth/credentials.ts +0 -7
- package/src/auth/oauth/providers.ts +0 -69
- package/src/auth/oauth/state-store.ts +0 -150
- package/src/auth/oauth-templates.ts +0 -179
- package/src/auth/provider-catalog.ts +0 -220
- package/src/auth/provider-model-options.ts +0 -41
- package/src/auth/settings/agent-settings-store.ts +0 -565
- package/src/auth/settings/auth-profiles-manager.ts +0 -216
- package/src/auth/settings/index.ts +0 -12
- package/src/auth/settings/model-preference-store.ts +0 -52
- package/src/auth/settings/model-selection.ts +0 -135
- package/src/auth/settings/resolved-settings-view.ts +0 -298
- package/src/auth/settings/template-utils.ts +0 -44
- package/src/auth/settings/token-service.ts +0 -88
- package/src/auth/system-env-store.ts +0 -98
- package/src/auth/user-agents-store.ts +0 -68
- package/src/channels/binding-service.ts +0 -214
- package/src/channels/index.ts +0 -4
- package/src/cli/gateway.ts +0 -1312
- package/src/cli/index.ts +0 -74
- package/src/commands/built-in-commands.ts +0 -80
- package/src/commands/command-dispatcher.ts +0 -94
- package/src/commands/command-reply-adapters.ts +0 -27
- package/src/config/file-loader.ts +0 -618
- package/src/config/index.ts +0 -588
- package/src/config/network-allowlist.ts +0 -71
- package/src/connections/chat-instance-manager.ts +0 -1284
- package/src/connections/chat-response-bridge.ts +0 -618
- package/src/connections/index.ts +0 -7
- package/src/connections/interaction-bridge.ts +0 -831
- package/src/connections/message-handler-bridge.ts +0 -440
- package/src/connections/platform-auth-methods.ts +0 -15
- package/src/connections/types.ts +0 -84
- package/src/gateway/connection-manager.ts +0 -291
- package/src/gateway/index.ts +0 -698
- package/src/gateway/job-router.ts +0 -201
- package/src/gateway-main.ts +0 -200
- package/src/index.ts +0 -41
- package/src/infrastructure/queue/index.ts +0 -12
- package/src/infrastructure/queue/queue-producer.ts +0 -148
- package/src/infrastructure/queue/redis-queue.ts +0 -361
- package/src/infrastructure/queue/types.ts +0 -133
- package/src/infrastructure/redis/system-message-limiter.ts +0 -94
- package/src/interactions/config-request-store.ts +0 -198
- package/src/interactions.ts +0 -363
- package/src/lobu.ts +0 -311
- package/src/metrics/prometheus.ts +0 -159
- package/src/modules/module-system.ts +0 -179
- package/src/orchestration/base-deployment-manager.ts +0 -900
- package/src/orchestration/deployment-utils.ts +0 -98
- package/src/orchestration/impl/docker-deployment.ts +0 -620
- package/src/orchestration/impl/embedded-deployment.ts +0 -268
- package/src/orchestration/impl/index.ts +0 -8
- package/src/orchestration/impl/k8s/deployment.ts +0 -1061
- package/src/orchestration/impl/k8s/helpers.ts +0 -610
- package/src/orchestration/impl/k8s/index.ts +0 -1
- package/src/orchestration/index.ts +0 -333
- package/src/orchestration/message-consumer.ts +0 -584
- package/src/orchestration/scheduled-wakeup.ts +0 -704
- package/src/permissions/approval-policy.ts +0 -36
- package/src/permissions/grant-store.ts +0 -219
- package/src/platform/file-handler.ts +0 -66
- package/src/platform/link-buttons.ts +0 -57
- package/src/platform/renderer-utils.ts +0 -44
- package/src/platform/response-renderer.ts +0 -84
- package/src/platform/unified-thread-consumer.ts +0 -194
- package/src/platform.ts +0 -318
- package/src/proxy/http-proxy.ts +0 -752
- package/src/proxy/proxy-manager.ts +0 -81
- package/src/proxy/secret-proxy.ts +0 -402
- package/src/proxy/token-refresh-job.ts +0 -143
- package/src/routes/internal/audio.ts +0 -141
- package/src/routes/internal/device-auth.ts +0 -652
- package/src/routes/internal/files.ts +0 -226
- package/src/routes/internal/history.ts +0 -69
- package/src/routes/internal/images.ts +0 -127
- package/src/routes/internal/interactions.ts +0 -84
- package/src/routes/internal/middleware.ts +0 -23
- package/src/routes/internal/schedule.ts +0 -226
- package/src/routes/internal/types.ts +0 -22
- package/src/routes/openapi-auto.ts +0 -239
- package/src/routes/public/agent-access.ts +0 -23
- package/src/routes/public/agent-config.ts +0 -675
- package/src/routes/public/agent-history.ts +0 -422
- package/src/routes/public/agent-schedules.ts +0 -296
- package/src/routes/public/agent.ts +0 -1086
- package/src/routes/public/agents.ts +0 -373
- package/src/routes/public/channels.ts +0 -191
- package/src/routes/public/cli-auth.ts +0 -896
- package/src/routes/public/connections.ts +0 -574
- package/src/routes/public/landing.ts +0 -16
- package/src/routes/public/oauth.ts +0 -147
- package/src/routes/public/settings-auth.ts +0 -104
- package/src/routes/public/slack.ts +0 -173
- package/src/routes/shared/agent-ownership.ts +0 -101
- package/src/routes/shared/token-verifier.ts +0 -34
- package/src/services/bedrock-model-catalog.ts +0 -217
- package/src/services/bedrock-openai-service.ts +0 -658
- package/src/services/core-services.ts +0 -1072
- package/src/services/image-generation-service.ts +0 -257
- package/src/services/instruction-service.ts +0 -318
- package/src/services/mcp-registry.ts +0 -94
- package/src/services/platform-helpers.ts +0 -287
- package/src/services/session-manager.ts +0 -262
- package/src/services/settings-resolver.ts +0 -74
- package/src/services/system-config-resolver.ts +0 -89
- package/src/services/system-skills-service.ts +0 -229
- package/src/services/transcription-service.ts +0 -684
- package/src/session.ts +0 -110
- package/src/spaces/index.ts +0 -1
- package/src/spaces/space-resolver.ts +0 -17
- package/src/stores/in-memory-agent-store.ts +0 -403
- package/src/stores/redis-agent-store.ts +0 -279
- package/src/utils/public-url.ts +0 -44
- package/src/utils/rate-limiter.ts +0 -94
- package/tsconfig.json +0 -33
- package/tsconfig.tsbuildinfo +0 -1
|
@@ -1,684 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Multi-Provider Audio Service
|
|
3
|
-
*
|
|
4
|
-
* Supports speech-to-text and text-to-speech via auth profiles (installed providers):
|
|
5
|
-
* - OpenAI (chatgpt auth profile) - Whisper for STT, TTS API for speech
|
|
6
|
-
* - Google Gemini (gemini auth profile) - Audio input/output
|
|
7
|
-
* - ElevenLabs (elevenlabs auth profile) - STT and high-quality TTS
|
|
8
|
-
*
|
|
9
|
-
* STT selection: built-ins (chatgpt/openai, gemini, elevenlabs) plus optional
|
|
10
|
-
* config-driven STT providers declared in system-skills provider config.
|
|
11
|
-
* TTS selection stays built-in only (openai → gemini → elevenlabs).
|
|
12
|
-
*/
|
|
13
|
-
|
|
14
|
-
import type { ProviderConfigEntry } from "@lobu/core";
|
|
15
|
-
import { createLogger } from "@lobu/core";
|
|
16
|
-
import type { AuthProfilesManager } from "../auth/settings/auth-profiles-manager";
|
|
17
|
-
|
|
18
|
-
const logger = createLogger("transcription-service");
|
|
19
|
-
|
|
20
|
-
export type TranscriptionProvider = "openai" | "gemini" | "elevenlabs";
|
|
21
|
-
|
|
22
|
-
interface TranscriptionConfig {
|
|
23
|
-
profileProviderId: string;
|
|
24
|
-
displayName: string;
|
|
25
|
-
provider: TranscriptionProvider;
|
|
26
|
-
apiKey: string;
|
|
27
|
-
openaiCompat?: {
|
|
28
|
-
endpointUrl: string;
|
|
29
|
-
model: string;
|
|
30
|
-
};
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
export interface TranscriptionSuccess {
|
|
34
|
-
text: string;
|
|
35
|
-
provider: TranscriptionProvider;
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
export interface TranscriptionError {
|
|
39
|
-
error: string;
|
|
40
|
-
availableProviders: TranscriptionProvider[];
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
export type TranscriptionResult = TranscriptionSuccess | TranscriptionError;
|
|
44
|
-
|
|
45
|
-
export interface SynthesisSuccess {
|
|
46
|
-
audioBuffer: Buffer;
|
|
47
|
-
mimeType: string;
|
|
48
|
-
provider: TranscriptionProvider;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
export interface SynthesisError {
|
|
52
|
-
error: string;
|
|
53
|
-
availableProviders: TranscriptionProvider[];
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
export type SynthesisResult = SynthesisSuccess | SynthesisError;
|
|
57
|
-
|
|
58
|
-
// Voice options for TTS
|
|
59
|
-
export interface VoiceOptions {
|
|
60
|
-
voice?: string; // Provider-specific voice ID
|
|
61
|
-
speed?: number; // Speech speed (0.5-2.0, default 1.0)
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
// Auth profile providerId → TTS provider mapping (single source of truth)
|
|
65
|
-
const TTS_CAPABLE_PROVIDERS: {
|
|
66
|
-
profileProviderId: string;
|
|
67
|
-
ttsProvider: TranscriptionProvider;
|
|
68
|
-
displayName: string;
|
|
69
|
-
}[] = [
|
|
70
|
-
{
|
|
71
|
-
profileProviderId: "chatgpt",
|
|
72
|
-
ttsProvider: "openai",
|
|
73
|
-
displayName: "OpenAI",
|
|
74
|
-
},
|
|
75
|
-
{
|
|
76
|
-
profileProviderId: "gemini",
|
|
77
|
-
ttsProvider: "gemini",
|
|
78
|
-
displayName: "Google Gemini",
|
|
79
|
-
},
|
|
80
|
-
{
|
|
81
|
-
profileProviderId: "elevenlabs",
|
|
82
|
-
ttsProvider: "elevenlabs",
|
|
83
|
-
displayName: "ElevenLabs",
|
|
84
|
-
},
|
|
85
|
-
];
|
|
86
|
-
|
|
87
|
-
function displayName(provider: TranscriptionProvider): string {
|
|
88
|
-
return (
|
|
89
|
-
TTS_CAPABLE_PROVIDERS.find((p) => p.ttsProvider === provider)
|
|
90
|
-
?.displayName ?? provider
|
|
91
|
-
);
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
export class TranscriptionService {
|
|
95
|
-
private providerConfigSource?:
|
|
96
|
-
| (() => Promise<Record<string, ProviderConfigEntry>>)
|
|
97
|
-
| undefined;
|
|
98
|
-
|
|
99
|
-
constructor(
|
|
100
|
-
private readonly authProfilesManager: AuthProfilesManager,
|
|
101
|
-
providerConfigSource?: () => Promise<Record<string, ProviderConfigEntry>>
|
|
102
|
-
) {
|
|
103
|
-
this.providerConfigSource = providerConfigSource;
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
setProviderConfigSource(
|
|
107
|
-
source: () => Promise<Record<string, ProviderConfigEntry>>
|
|
108
|
-
): void {
|
|
109
|
-
this.providerConfigSource = source;
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
/**
|
|
113
|
-
* Transcribe audio buffer to text
|
|
114
|
-
*/
|
|
115
|
-
async transcribe(
|
|
116
|
-
audioBuffer: Buffer,
|
|
117
|
-
agentId: string,
|
|
118
|
-
mimeType = "audio/ogg"
|
|
119
|
-
): Promise<TranscriptionResult> {
|
|
120
|
-
const configs = await this.getTranscriptionConfigs(agentId);
|
|
121
|
-
|
|
122
|
-
if (configs.length === 0) {
|
|
123
|
-
return this.noProviderError(
|
|
124
|
-
"No transcription provider configured",
|
|
125
|
-
agentId
|
|
126
|
-
);
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
const attemptErrors: string[] = [];
|
|
130
|
-
for (const config of configs) {
|
|
131
|
-
logger.info("Transcribing audio", {
|
|
132
|
-
agentId,
|
|
133
|
-
provider: config.provider,
|
|
134
|
-
profileProviderId: config.profileProviderId,
|
|
135
|
-
bufferSize: audioBuffer.length,
|
|
136
|
-
mimeType,
|
|
137
|
-
});
|
|
138
|
-
|
|
139
|
-
try {
|
|
140
|
-
const text = await this.transcribeWithProvider(
|
|
141
|
-
audioBuffer,
|
|
142
|
-
config,
|
|
143
|
-
mimeType
|
|
144
|
-
);
|
|
145
|
-
logger.info("Transcription successful", {
|
|
146
|
-
agentId,
|
|
147
|
-
provider: config.provider,
|
|
148
|
-
profileProviderId: config.profileProviderId,
|
|
149
|
-
textLength: text.length,
|
|
150
|
-
});
|
|
151
|
-
return { text, provider: config.provider };
|
|
152
|
-
} catch (error) {
|
|
153
|
-
const errorMessage =
|
|
154
|
-
error instanceof Error ? error.message : String(error);
|
|
155
|
-
logger.error("Transcription failed", {
|
|
156
|
-
agentId,
|
|
157
|
-
provider: config.provider,
|
|
158
|
-
profileProviderId: config.profileProviderId,
|
|
159
|
-
error: errorMessage,
|
|
160
|
-
});
|
|
161
|
-
attemptErrors.push(`${config.displayName}: ${errorMessage}`);
|
|
162
|
-
}
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
return {
|
|
166
|
-
error: `Transcription failed with all configured providers: ${attemptErrors.join(" | ")}`,
|
|
167
|
-
availableProviders: [...new Set(configs.map((c) => c.provider))],
|
|
168
|
-
};
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
/**
|
|
172
|
-
* Get transcription config for an agent by checking installed auth profiles.
|
|
173
|
-
* First TTS-capable provider with a valid profile wins (openai → gemini → elevenlabs).
|
|
174
|
-
*/
|
|
175
|
-
async getConfig(agentId: string): Promise<TranscriptionConfig | null> {
|
|
176
|
-
const configs = await this.getTranscriptionConfigs(agentId);
|
|
177
|
-
return configs[0] ?? null;
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
private async getSynthesisConfigs(
|
|
181
|
-
agentId: string
|
|
182
|
-
): Promise<TranscriptionConfig[]> {
|
|
183
|
-
const configs: TranscriptionConfig[] = [];
|
|
184
|
-
for (const { profileProviderId, ttsProvider } of TTS_CAPABLE_PROVIDERS) {
|
|
185
|
-
const profile = await this.authProfilesManager.getBestProfile(
|
|
186
|
-
agentId,
|
|
187
|
-
profileProviderId
|
|
188
|
-
);
|
|
189
|
-
if (profile) {
|
|
190
|
-
configs.push({
|
|
191
|
-
profileProviderId,
|
|
192
|
-
displayName: displayName(ttsProvider),
|
|
193
|
-
provider: ttsProvider,
|
|
194
|
-
apiKey: profile.credential,
|
|
195
|
-
});
|
|
196
|
-
}
|
|
197
|
-
}
|
|
198
|
-
return configs;
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
private async getTranscriptionConfigs(
|
|
202
|
-
agentId: string
|
|
203
|
-
): Promise<TranscriptionConfig[]> {
|
|
204
|
-
const configs = await this.getSynthesisConfigs(agentId);
|
|
205
|
-
const providerIds = new Set(configs.map((c) => c.profileProviderId));
|
|
206
|
-
const configDriven = await this.getConfigDrivenSttCandidates();
|
|
207
|
-
|
|
208
|
-
for (const candidate of configDriven) {
|
|
209
|
-
if (providerIds.has(candidate.profileProviderId)) continue;
|
|
210
|
-
|
|
211
|
-
const profile = await this.authProfilesManager.getBestProfile(
|
|
212
|
-
agentId,
|
|
213
|
-
candidate.profileProviderId
|
|
214
|
-
);
|
|
215
|
-
if (!profile) continue;
|
|
216
|
-
|
|
217
|
-
configs.push({
|
|
218
|
-
profileProviderId: candidate.profileProviderId,
|
|
219
|
-
displayName: candidate.displayName,
|
|
220
|
-
provider: candidate.provider,
|
|
221
|
-
apiKey: profile.credential,
|
|
222
|
-
openaiCompat: candidate.openaiCompat,
|
|
223
|
-
});
|
|
224
|
-
providerIds.add(candidate.profileProviderId);
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
return configs;
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
private async getConfigDrivenSttCandidates(): Promise<
|
|
231
|
-
Array<Omit<TranscriptionConfig, "apiKey">>
|
|
232
|
-
> {
|
|
233
|
-
if (!this.providerConfigSource) return [];
|
|
234
|
-
|
|
235
|
-
let providerConfigs: Record<string, ProviderConfigEntry>;
|
|
236
|
-
try {
|
|
237
|
-
providerConfigs = await this.providerConfigSource();
|
|
238
|
-
} catch (error) {
|
|
239
|
-
logger.warn("Failed to load provider configs for STT", {
|
|
240
|
-
error: error instanceof Error ? error.message : String(error),
|
|
241
|
-
});
|
|
242
|
-
return [];
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
const candidates: Array<Omit<TranscriptionConfig, "apiKey">> = [];
|
|
246
|
-
for (const [providerId, entry] of Object.entries(providerConfigs)) {
|
|
247
|
-
const stt = entry.stt;
|
|
248
|
-
const compat = stt?.sdkCompat || entry.sdkCompat;
|
|
249
|
-
const sttEnabled = stt ? stt.enabled !== false : compat === "openai";
|
|
250
|
-
if (!sttEnabled) continue;
|
|
251
|
-
|
|
252
|
-
if (compat !== "openai") {
|
|
253
|
-
logger.warn("Unsupported config-driven STT compatibility", {
|
|
254
|
-
providerId,
|
|
255
|
-
compat,
|
|
256
|
-
});
|
|
257
|
-
continue;
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
const endpoint = this.resolveEndpointUrl(
|
|
261
|
-
stt?.transcriptionPath,
|
|
262
|
-
stt?.baseUrl || entry.upstreamBaseUrl
|
|
263
|
-
);
|
|
264
|
-
if (!endpoint) {
|
|
265
|
-
logger.warn("Invalid STT endpoint configuration", {
|
|
266
|
-
providerId,
|
|
267
|
-
transcriptionPath: stt?.transcriptionPath,
|
|
268
|
-
baseUrl: stt?.baseUrl || entry.upstreamBaseUrl,
|
|
269
|
-
});
|
|
270
|
-
continue;
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
candidates.push({
|
|
274
|
-
profileProviderId: providerId,
|
|
275
|
-
displayName: entry.displayName || providerId,
|
|
276
|
-
provider: "openai",
|
|
277
|
-
openaiCompat: {
|
|
278
|
-
endpointUrl: endpoint,
|
|
279
|
-
model: stt?.model?.trim() || "whisper-1",
|
|
280
|
-
},
|
|
281
|
-
});
|
|
282
|
-
}
|
|
283
|
-
return candidates;
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
/**
|
|
287
|
-
* Get provider info for documentation/help messages
|
|
288
|
-
*/
|
|
289
|
-
getProviderInfo(): Array<{ provider: TranscriptionProvider; name: string }> {
|
|
290
|
-
return TTS_CAPABLE_PROVIDERS.map(({ ttsProvider, displayName }) => ({
|
|
291
|
-
provider: ttsProvider,
|
|
292
|
-
name: displayName,
|
|
293
|
-
}));
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
// ==========================================================================
|
|
297
|
-
// Text-to-Speech (Synthesis)
|
|
298
|
-
// ==========================================================================
|
|
299
|
-
|
|
300
|
-
/**
|
|
301
|
-
* Synthesize text to audio
|
|
302
|
-
*/
|
|
303
|
-
async synthesize(
|
|
304
|
-
text: string,
|
|
305
|
-
agentId: string,
|
|
306
|
-
options: VoiceOptions = {}
|
|
307
|
-
): Promise<SynthesisResult> {
|
|
308
|
-
const config = await this.getConfig(agentId);
|
|
309
|
-
|
|
310
|
-
if (!config) {
|
|
311
|
-
return this.noProviderError("No audio provider configured", agentId);
|
|
312
|
-
}
|
|
313
|
-
|
|
314
|
-
logger.info("Synthesizing audio", {
|
|
315
|
-
agentId,
|
|
316
|
-
provider: config.provider,
|
|
317
|
-
textLength: text.length,
|
|
318
|
-
voice: options.voice,
|
|
319
|
-
});
|
|
320
|
-
|
|
321
|
-
try {
|
|
322
|
-
const result = await this.synthesizeWithProvider(text, config, options);
|
|
323
|
-
logger.info("Synthesis successful", {
|
|
324
|
-
agentId,
|
|
325
|
-
provider: config.provider,
|
|
326
|
-
audioSize: result.audioBuffer.length,
|
|
327
|
-
});
|
|
328
|
-
return { ...result, provider: config.provider };
|
|
329
|
-
} catch (error) {
|
|
330
|
-
const errorMessage =
|
|
331
|
-
error instanceof Error ? error.message : String(error);
|
|
332
|
-
logger.error("Synthesis failed", {
|
|
333
|
-
agentId,
|
|
334
|
-
provider: config.provider,
|
|
335
|
-
error: errorMessage,
|
|
336
|
-
});
|
|
337
|
-
return {
|
|
338
|
-
error: `Synthesis failed with ${displayName(config.provider)}: ${errorMessage}`,
|
|
339
|
-
availableProviders: [],
|
|
340
|
-
};
|
|
341
|
-
}
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
private noProviderError(message: string, agentId: string) {
|
|
345
|
-
const availableProviders = TTS_CAPABLE_PROVIDERS.map((p) => p.ttsProvider);
|
|
346
|
-
logger.info(message, { agentId, availableProviders });
|
|
347
|
-
return { error: message, availableProviders };
|
|
348
|
-
}
|
|
349
|
-
|
|
350
|
-
// ==========================================================================
|
|
351
|
-
// Provider-specific implementations - Transcription (STT)
|
|
352
|
-
// ==========================================================================
|
|
353
|
-
|
|
354
|
-
private async transcribeWithProvider(
|
|
355
|
-
buffer: Buffer,
|
|
356
|
-
config: TranscriptionConfig,
|
|
357
|
-
mimeType: string
|
|
358
|
-
): Promise<string> {
|
|
359
|
-
switch (config.provider) {
|
|
360
|
-
case "openai":
|
|
361
|
-
return this.transcribeWithOpenAI(
|
|
362
|
-
buffer,
|
|
363
|
-
config.apiKey,
|
|
364
|
-
mimeType,
|
|
365
|
-
config.openaiCompat
|
|
366
|
-
);
|
|
367
|
-
case "gemini":
|
|
368
|
-
return this.transcribeWithGemini(buffer, config.apiKey, mimeType);
|
|
369
|
-
case "elevenlabs":
|
|
370
|
-
return this.transcribeWithElevenLabs(buffer, config.apiKey, mimeType);
|
|
371
|
-
default:
|
|
372
|
-
throw new Error(`Unknown provider: ${config.provider}`);
|
|
373
|
-
}
|
|
374
|
-
}
|
|
375
|
-
|
|
376
|
-
private async transcribeWithOpenAI(
|
|
377
|
-
buffer: Buffer,
|
|
378
|
-
apiKey: string,
|
|
379
|
-
mimeType: string,
|
|
380
|
-
options?: { endpointUrl: string; model: string }
|
|
381
|
-
): Promise<string> {
|
|
382
|
-
const formData = new FormData();
|
|
383
|
-
const ext = this.getExtensionFromMime(mimeType);
|
|
384
|
-
formData.append(
|
|
385
|
-
"file",
|
|
386
|
-
new Blob([buffer], { type: mimeType }),
|
|
387
|
-
`audio.${ext}`
|
|
388
|
-
);
|
|
389
|
-
formData.append("model", options?.model || "whisper-1");
|
|
390
|
-
|
|
391
|
-
const resp = await fetch(
|
|
392
|
-
options?.endpointUrl || "https://api.openai.com/v1/audio/transcriptions",
|
|
393
|
-
{
|
|
394
|
-
method: "POST",
|
|
395
|
-
headers: { Authorization: `Bearer ${apiKey}` },
|
|
396
|
-
body: formData,
|
|
397
|
-
}
|
|
398
|
-
);
|
|
399
|
-
|
|
400
|
-
if (!resp.ok) {
|
|
401
|
-
const error = await resp.text();
|
|
402
|
-
throw new Error(`OpenAI API error: ${resp.status} - ${error}`);
|
|
403
|
-
}
|
|
404
|
-
|
|
405
|
-
const data = (await resp.json()) as { text: string };
|
|
406
|
-
return data.text;
|
|
407
|
-
}
|
|
408
|
-
|
|
409
|
-
private async transcribeWithGemini(
|
|
410
|
-
buffer: Buffer,
|
|
411
|
-
apiKey: string,
|
|
412
|
-
mimeType: string
|
|
413
|
-
): Promise<string> {
|
|
414
|
-
// Gemini uses inline audio data with base64 encoding
|
|
415
|
-
const resp = await fetch(
|
|
416
|
-
`https://generativelanguage.googleapis.com/v1/models/gemini-1.5-flash:generateContent?key=${apiKey}`,
|
|
417
|
-
{
|
|
418
|
-
method: "POST",
|
|
419
|
-
headers: { "Content-Type": "application/json" },
|
|
420
|
-
body: JSON.stringify({
|
|
421
|
-
contents: [
|
|
422
|
-
{
|
|
423
|
-
parts: [
|
|
424
|
-
{
|
|
425
|
-
text: "Transcribe this audio exactly as spoken. Return only the transcription text, nothing else:",
|
|
426
|
-
},
|
|
427
|
-
{
|
|
428
|
-
inline_data: {
|
|
429
|
-
mime_type: mimeType,
|
|
430
|
-
data: buffer.toString("base64"),
|
|
431
|
-
},
|
|
432
|
-
},
|
|
433
|
-
],
|
|
434
|
-
},
|
|
435
|
-
],
|
|
436
|
-
}),
|
|
437
|
-
}
|
|
438
|
-
);
|
|
439
|
-
|
|
440
|
-
if (!resp.ok) {
|
|
441
|
-
const error = await resp.text();
|
|
442
|
-
throw new Error(`Gemini API error: ${resp.status} - ${error}`);
|
|
443
|
-
}
|
|
444
|
-
|
|
445
|
-
const data = (await resp.json()) as {
|
|
446
|
-
candidates: Array<{
|
|
447
|
-
content: { parts: Array<{ text: string }> };
|
|
448
|
-
}>;
|
|
449
|
-
};
|
|
450
|
-
return data.candidates[0]?.content?.parts[0]?.text || "";
|
|
451
|
-
}
|
|
452
|
-
|
|
453
|
-
private async transcribeWithElevenLabs(
|
|
454
|
-
buffer: Buffer,
|
|
455
|
-
apiKey: string,
|
|
456
|
-
mimeType: string
|
|
457
|
-
): Promise<string> {
|
|
458
|
-
// ElevenLabs speech-to-text API
|
|
459
|
-
const formData = new FormData();
|
|
460
|
-
const ext = this.getExtensionFromMime(mimeType);
|
|
461
|
-
formData.append(
|
|
462
|
-
"audio",
|
|
463
|
-
new Blob([buffer], { type: mimeType }),
|
|
464
|
-
`audio.${ext}`
|
|
465
|
-
);
|
|
466
|
-
|
|
467
|
-
const resp = await fetch("https://api.elevenlabs.io/v1/speech-to-text", {
|
|
468
|
-
method: "POST",
|
|
469
|
-
headers: { "xi-api-key": apiKey },
|
|
470
|
-
body: formData,
|
|
471
|
-
});
|
|
472
|
-
|
|
473
|
-
if (!resp.ok) {
|
|
474
|
-
const error = await resp.text();
|
|
475
|
-
throw new Error(`ElevenLabs API error: ${resp.status} - ${error}`);
|
|
476
|
-
}
|
|
477
|
-
|
|
478
|
-
const data = (await resp.json()) as { text: string };
|
|
479
|
-
return data.text;
|
|
480
|
-
}
|
|
481
|
-
|
|
482
|
-
// ==========================================================================
|
|
483
|
-
// Provider-specific implementations - Synthesis (TTS)
|
|
484
|
-
// ==========================================================================
|
|
485
|
-
|
|
486
|
-
private async synthesizeWithProvider(
|
|
487
|
-
text: string,
|
|
488
|
-
config: TranscriptionConfig,
|
|
489
|
-
options: VoiceOptions
|
|
490
|
-
): Promise<{ audioBuffer: Buffer; mimeType: string }> {
|
|
491
|
-
switch (config.provider) {
|
|
492
|
-
case "openai":
|
|
493
|
-
return this.synthesizeWithOpenAI(text, config.apiKey, options);
|
|
494
|
-
case "gemini":
|
|
495
|
-
return this.synthesizeWithGemini(text, config.apiKey);
|
|
496
|
-
case "elevenlabs":
|
|
497
|
-
return this.synthesizeWithElevenLabs(text, config.apiKey, options);
|
|
498
|
-
default:
|
|
499
|
-
throw new Error(`Unknown provider: ${config.provider}`);
|
|
500
|
-
}
|
|
501
|
-
}
|
|
502
|
-
|
|
503
|
-
private async synthesizeWithOpenAI(
|
|
504
|
-
text: string,
|
|
505
|
-
apiKey: string,
|
|
506
|
-
options: VoiceOptions
|
|
507
|
-
): Promise<{ audioBuffer: Buffer; mimeType: string }> {
|
|
508
|
-
// OpenAI TTS API
|
|
509
|
-
// Voices: alloy, echo, fable, onyx, nova, shimmer
|
|
510
|
-
const voice = options.voice || "alloy";
|
|
511
|
-
const speed = options.speed || 1.0;
|
|
512
|
-
|
|
513
|
-
const resp = await fetch("https://api.openai.com/v1/audio/speech", {
|
|
514
|
-
method: "POST",
|
|
515
|
-
headers: {
|
|
516
|
-
Authorization: `Bearer ${apiKey}`,
|
|
517
|
-
"Content-Type": "application/json",
|
|
518
|
-
},
|
|
519
|
-
body: JSON.stringify({
|
|
520
|
-
model: "tts-1",
|
|
521
|
-
input: text,
|
|
522
|
-
voice,
|
|
523
|
-
speed,
|
|
524
|
-
response_format: "opus", // Good for WhatsApp
|
|
525
|
-
}),
|
|
526
|
-
});
|
|
527
|
-
|
|
528
|
-
if (!resp.ok) {
|
|
529
|
-
const error = await resp.text();
|
|
530
|
-
throw new Error(`OpenAI TTS API error: ${resp.status} - ${error}`);
|
|
531
|
-
}
|
|
532
|
-
|
|
533
|
-
const arrayBuffer = await resp.arrayBuffer();
|
|
534
|
-
return {
|
|
535
|
-
audioBuffer: Buffer.from(arrayBuffer),
|
|
536
|
-
mimeType: "audio/opus",
|
|
537
|
-
};
|
|
538
|
-
}
|
|
539
|
-
|
|
540
|
-
private async synthesizeWithGemini(
|
|
541
|
-
text: string,
|
|
542
|
-
apiKey: string
|
|
543
|
-
): Promise<{ audioBuffer: Buffer; mimeType: string }> {
|
|
544
|
-
const resp = await fetch(
|
|
545
|
-
`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?key=${apiKey}`,
|
|
546
|
-
{
|
|
547
|
-
method: "POST",
|
|
548
|
-
headers: { "Content-Type": "application/json" },
|
|
549
|
-
body: JSON.stringify({
|
|
550
|
-
contents: [
|
|
551
|
-
{
|
|
552
|
-
parts: [{ text: `Please speak this text aloud: "${text}"` }],
|
|
553
|
-
},
|
|
554
|
-
],
|
|
555
|
-
generationConfig: {
|
|
556
|
-
responseModalities: ["AUDIO"],
|
|
557
|
-
speechConfig: {
|
|
558
|
-
voiceConfig: {
|
|
559
|
-
prebuiltVoiceConfig: {
|
|
560
|
-
voiceName: "Aoede", // Default Gemini voice
|
|
561
|
-
},
|
|
562
|
-
},
|
|
563
|
-
},
|
|
564
|
-
},
|
|
565
|
-
}),
|
|
566
|
-
}
|
|
567
|
-
);
|
|
568
|
-
|
|
569
|
-
if (!resp.ok) {
|
|
570
|
-
const error = await resp.text();
|
|
571
|
-
throw new Error(`Gemini TTS API error: ${resp.status} - ${error}`);
|
|
572
|
-
}
|
|
573
|
-
|
|
574
|
-
const data = (await resp.json()) as {
|
|
575
|
-
candidates: Array<{
|
|
576
|
-
content: {
|
|
577
|
-
parts: Array<{
|
|
578
|
-
inlineData?: { mimeType: string; data: string };
|
|
579
|
-
}>;
|
|
580
|
-
};
|
|
581
|
-
}>;
|
|
582
|
-
};
|
|
583
|
-
|
|
584
|
-
const audioPart = data.candidates[0]?.content?.parts?.find((p) =>
|
|
585
|
-
p.inlineData?.mimeType?.startsWith("audio/")
|
|
586
|
-
);
|
|
587
|
-
|
|
588
|
-
if (!audioPart?.inlineData) {
|
|
589
|
-
throw new Error("Gemini did not return audio data");
|
|
590
|
-
}
|
|
591
|
-
|
|
592
|
-
return {
|
|
593
|
-
audioBuffer: Buffer.from(audioPart.inlineData.data, "base64"),
|
|
594
|
-
mimeType: audioPart.inlineData.mimeType,
|
|
595
|
-
};
|
|
596
|
-
}
|
|
597
|
-
|
|
598
|
-
private async synthesizeWithElevenLabs(
|
|
599
|
-
text: string,
|
|
600
|
-
apiKey: string,
|
|
601
|
-
options: VoiceOptions
|
|
602
|
-
): Promise<{ audioBuffer: Buffer; mimeType: string }> {
|
|
603
|
-
// ElevenLabs TTS API
|
|
604
|
-
// Default voice: Rachel (21m00Tcm4TlvDq8ikWAM)
|
|
605
|
-
const voiceId = options.voice || "21m00Tcm4TlvDq8ikWAM";
|
|
606
|
-
|
|
607
|
-
const resp = await fetch(
|
|
608
|
-
`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`,
|
|
609
|
-
{
|
|
610
|
-
method: "POST",
|
|
611
|
-
headers: {
|
|
612
|
-
"xi-api-key": apiKey,
|
|
613
|
-
"Content-Type": "application/json",
|
|
614
|
-
Accept: "audio/mpeg",
|
|
615
|
-
},
|
|
616
|
-
body: JSON.stringify({
|
|
617
|
-
text,
|
|
618
|
-
model_id: "eleven_monolingual_v1",
|
|
619
|
-
voice_settings: {
|
|
620
|
-
stability: 0.5,
|
|
621
|
-
similarity_boost: 0.5,
|
|
622
|
-
},
|
|
623
|
-
}),
|
|
624
|
-
}
|
|
625
|
-
);
|
|
626
|
-
|
|
627
|
-
if (!resp.ok) {
|
|
628
|
-
const error = await resp.text();
|
|
629
|
-
throw new Error(`ElevenLabs TTS API error: ${resp.status} - ${error}`);
|
|
630
|
-
}
|
|
631
|
-
|
|
632
|
-
const arrayBuffer = await resp.arrayBuffer();
|
|
633
|
-
return {
|
|
634
|
-
audioBuffer: Buffer.from(arrayBuffer),
|
|
635
|
-
mimeType: "audio/mpeg",
|
|
636
|
-
};
|
|
637
|
-
}
|
|
638
|
-
|
|
639
|
-
// ==========================================================================
|
|
640
|
-
// Utility methods
|
|
641
|
-
// ==========================================================================
|
|
642
|
-
|
|
643
|
-
private getExtensionFromMime(mimeType: string): string {
|
|
644
|
-
const mimeToExt: Record<string, string> = {
|
|
645
|
-
"audio/ogg": "ogg",
|
|
646
|
-
"audio/opus": "opus",
|
|
647
|
-
"audio/mpeg": "mp3",
|
|
648
|
-
"audio/mp3": "mp3",
|
|
649
|
-
"audio/wav": "wav",
|
|
650
|
-
"audio/webm": "webm",
|
|
651
|
-
"audio/m4a": "m4a",
|
|
652
|
-
"audio/mp4": "m4a",
|
|
653
|
-
};
|
|
654
|
-
return mimeToExt[mimeType] || "ogg";
|
|
655
|
-
}
|
|
656
|
-
|
|
657
|
-
private resolveEndpointUrl(
|
|
658
|
-
transcriptionPath: string | undefined,
|
|
659
|
-
baseUrl: string | undefined
|
|
660
|
-
): string | null {
|
|
661
|
-
const path = (
|
|
662
|
-
transcriptionPath || this.getDefaultOpenAiTranscriptionPath(baseUrl)
|
|
663
|
-
).trim();
|
|
664
|
-
if (/^https?:\/\//i.test(path)) {
|
|
665
|
-
return path;
|
|
666
|
-
}
|
|
667
|
-
|
|
668
|
-
const base = (baseUrl || "").trim();
|
|
669
|
-
if (!base) return null;
|
|
670
|
-
|
|
671
|
-
const normalizedPath = path.startsWith("/") ? path : `/${path}`;
|
|
672
|
-
return `${base.replace(/\/+$/, "")}${normalizedPath}`;
|
|
673
|
-
}
|
|
674
|
-
|
|
675
|
-
private getDefaultOpenAiTranscriptionPath(
|
|
676
|
-
baseUrl: string | undefined
|
|
677
|
-
): string {
|
|
678
|
-
const trimmedBase = (baseUrl || "").trim().replace(/\/+$/, "");
|
|
679
|
-
if (trimmedBase.endsWith("/v1")) {
|
|
680
|
-
return "/audio/transcriptions";
|
|
681
|
-
}
|
|
682
|
-
return "/v1/audio/transcriptions";
|
|
683
|
-
}
|
|
684
|
-
}
|