@johpaz/hive-sdk 0.0.12 → 0.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/CODEOWNERS +9 -0
- package/.github/workflows/publish.yml +89 -0
- package/.github/workflows/version-bump.yml +102 -0
- package/CHANGELOG.md +38 -0
- package/README.md +158 -0
- package/bun.lock +543 -0
- package/bunfig.toml +7 -0
- package/docs/API-AGENTS.md +316 -0
- package/docs/API-CONTEXT-COMPILER.md +252 -0
- package/docs/API-DAG-SCHEDULER.md +273 -0
- package/docs/API-TOOLS-SKILLS-CHANNELS.md +293 -0
- package/docs/API-WORKERS-EVENTS.md +152 -0
- package/docs/INDEX.md +141 -0
- package/docs/README.md +68 -0
- package/package.json +54 -105
- package/packages/cli/package.json +17 -0
- package/packages/cli/src/commands/init.ts +56 -0
- package/packages/cli/src/commands/run.ts +45 -0
- package/packages/cli/src/commands/test.ts +42 -0
- package/packages/cli/src/commands/trace.ts +55 -0
- package/packages/cli/src/index.ts +43 -0
- package/packages/core/package.json +58 -0
- package/packages/core/src/ace/Curator.ts +158 -0
- package/packages/core/src/ace/Reflector.ts +200 -0
- package/packages/core/src/ace/Tracer.ts +100 -0
- package/packages/core/src/ace/index.ts +4 -0
- package/packages/core/src/agent/AgentRunner.ts +699 -0
- package/packages/core/src/agent/Compaction.ts +221 -0
- package/packages/core/src/agent/ContextCompiler.ts +567 -0
- package/packages/core/src/agent/ContextGuard.ts +91 -0
- package/packages/core/src/agent/ConversationStore.ts +244 -0
- package/packages/core/src/agent/Hooks.ts +166 -0
- package/packages/core/src/agent/NativeTools.ts +31 -0
- package/packages/core/src/agent/PromptBuilder.ts +169 -0
- package/packages/core/src/agent/Service.ts +267 -0
- package/packages/core/src/agent/StuckLoop.ts +133 -0
- package/packages/core/src/agent/index.ts +12 -0
- package/packages/core/src/agent/providers/LLMClient.ts +149 -0
- package/packages/core/src/agent/providers/anthropic.ts +212 -0
- package/packages/core/src/agent/providers/gemini.ts +215 -0
- package/packages/core/src/agent/providers/index.ts +199 -0
- package/packages/core/src/agent/providers/interface.ts +195 -0
- package/packages/core/src/agent/providers/ollama.ts +175 -0
- package/packages/core/src/agent/providers/openai-compat.ts +231 -0
- package/packages/core/src/agent/providers.ts +1 -0
- package/packages/core/src/agent/selectors/PlaybookSelector.ts +147 -0
- package/packages/core/src/agent/selectors/SkillSelector.ts +478 -0
- package/packages/core/src/agent/selectors/ToolSelector.ts +577 -0
- package/packages/core/src/agent/selectors/index.ts +6 -0
- package/packages/core/src/api/createAgent.test.ts +48 -0
- package/packages/core/src/api/createAgent.ts +122 -0
- package/packages/core/src/api/index.ts +2 -0
- package/packages/core/src/canvas/CanvasManager.ts +390 -0
- package/packages/core/src/canvas/a2ui-tools.ts +255 -0
- package/packages/core/src/canvas/canvas-tools.ts +448 -0
- package/packages/core/src/canvas/emitter.ts +149 -0
- package/packages/core/src/canvas/index.ts +6 -0
- package/packages/core/src/config/index.ts +2 -0
- package/packages/core/src/config/loader.ts +554 -0
- package/packages/core/src/ethics/EthicsGuard.test.ts +54 -0
- package/packages/core/src/ethics/EthicsGuard.ts +66 -0
- package/packages/core/src/ethics/index.ts +2 -0
- package/packages/core/src/gateway/channel-notify.test.ts +14 -0
- package/packages/core/src/gateway/channel-notify.ts +12 -0
- package/packages/core/src/gateway/index.ts +1 -0
- package/packages/core/src/index.ts +37 -0
- package/packages/core/src/mcp/MCPClient.ts +439 -0
- package/packages/core/src/mcp/MCPToolAdapter.ts +176 -0
- package/packages/core/src/mcp/config.ts +13 -0
- package/packages/core/src/mcp/hot-reload.ts +147 -0
- package/packages/core/src/mcp/index.ts +11 -0
- package/packages/core/src/mcp/logger.ts +42 -0
- package/packages/core/src/mcp/singleton.ts +21 -0
- package/packages/core/src/mcp/transports/index.ts +67 -0
- package/packages/core/src/mcp/transports/sse.ts +241 -0
- package/packages/core/src/mcp/transports/websocket.ts +159 -0
- package/packages/core/src/memory/Scratchpad.test.ts +47 -0
- package/packages/core/src/memory/Scratchpad.ts +37 -0
- package/packages/core/src/memory/Storage.ts +6 -0
- package/packages/core/src/memory/index.ts +2 -0
- package/packages/core/src/multimodal/VisionService.ts +293 -0
- package/packages/core/src/multimodal/index.ts +2 -0
- package/packages/core/src/multimodal/types.ts +28 -0
- package/packages/core/src/security/Pairing.ts +250 -0
- package/packages/core/src/security/RateLimit.ts +270 -0
- package/packages/core/src/security/index.ts +4 -0
- package/packages/core/src/skills/SkillLoader.ts +388 -0
- package/packages/core/src/skills/bundled-data.generated.ts +3332 -0
- package/packages/core/src/skills/defineSkill.ts +18 -0
- package/packages/core/src/skills/index.ts +4 -0
- package/packages/core/src/state/index.ts +2 -0
- package/packages/core/src/state/store.ts +312 -0
- package/packages/core/src/storage/SQLiteStorage.ts +407 -0
- package/packages/core/src/storage/crypto.ts +101 -0
- package/packages/core/src/storage/index.ts +10 -0
- package/packages/core/src/storage/onboarding.ts +1603 -0
- package/packages/core/src/storage/schema.ts +689 -0
- package/packages/core/src/storage/seed.ts +740 -0
- package/packages/core/src/storage/usage.ts +374 -0
- package/packages/core/src/swarm/AgentBus.ts +460 -0
- package/packages/core/src/swarm/AgentExecutor.ts +53 -0
- package/packages/core/src/swarm/Coordinator.ts +251 -0
- package/packages/core/src/swarm/EventBridge.ts +122 -0
- package/packages/core/src/swarm/EventBus.ts +169 -0
- package/packages/core/src/swarm/TaskGraph.ts +192 -0
- package/packages/core/src/swarm/TaskNode.ts +97 -0
- package/packages/core/src/swarm/TaskResult.ts +22 -0
- package/packages/core/src/swarm/WorkerPool.ts +236 -0
- package/packages/core/src/swarm/errors.ts +37 -0
- package/packages/core/src/swarm/index.ts +30 -0
- package/packages/core/src/swarm/presets/HiveLearnPreset.ts +99 -0
- package/packages/core/src/swarm/presets/ResearchPreset.ts +97 -0
- package/packages/core/src/swarm/presets/index.ts +4 -0
- package/packages/core/src/swarm/strategies/ParallelStrategy.ts +21 -0
- package/packages/core/src/swarm/strategies/PriorityStrategy.ts +46 -0
- package/packages/core/src/swarm/strategies/index.ts +3 -0
- package/packages/core/src/swarm/types.ts +164 -0
- package/packages/core/src/tools/ToolExecutor.ts +58 -0
- package/packages/core/src/tools/ToolRegistry.test.ts +98 -0
- package/packages/core/src/tools/ToolRegistry.ts +61 -0
- package/packages/core/src/tools/agents/get-available-models.ts +118 -0
- package/packages/core/src/tools/agents/index.ts +715 -0
- package/packages/core/src/tools/bridge-events.ts +26 -0
- package/packages/core/src/tools/canvas/index.ts +375 -0
- package/packages/core/src/tools/cli/index.ts +142 -0
- package/packages/core/src/tools/codebridge/index.ts +342 -0
- package/packages/core/src/tools/core/index.ts +476 -0
- package/packages/core/src/tools/cron/index.ts +626 -0
- package/packages/core/src/tools/filesystem/fs-delete.ts +78 -0
- package/packages/core/src/tools/filesystem/fs-edit.ts +106 -0
- package/packages/core/src/tools/filesystem/fs-exists.ts +63 -0
- package/packages/core/src/tools/filesystem/fs-glob.ts +108 -0
- package/packages/core/src/tools/filesystem/fs-list.ts +129 -0
- package/packages/core/src/tools/filesystem/fs-read.ts +72 -0
- package/packages/core/src/tools/filesystem/fs-write.ts +67 -0
- package/packages/core/src/tools/filesystem/index.ts +34 -0
- package/packages/core/src/tools/filesystem/workspace-guard.ts +62 -0
- package/packages/core/src/tools/index.ts +231 -0
- package/packages/core/src/tools/meeting/index.ts +363 -0
- package/packages/core/src/tools/office/index.ts +47 -0
- package/packages/core/src/tools/office/office-escribir-docx.ts +192 -0
- package/packages/core/src/tools/office/office-escribir-pdf.ts +172 -0
- package/packages/core/src/tools/office/office-escribir-pptx.ts +174 -0
- package/packages/core/src/tools/office/office-escribir-xlsx.ts +116 -0
- package/packages/core/src/tools/office/office-leer-docx.ts +93 -0
- package/packages/core/src/tools/office/office-leer-pdf.ts +114 -0
- package/packages/core/src/tools/office/office-leer-pptx.ts +136 -0
- package/packages/core/src/tools/office/office-leer-xlsx.ts +124 -0
- package/packages/core/src/tools/projects/index.ts +37 -0
- package/packages/core/src/tools/projects/project-create.ts +94 -0
- package/packages/core/src/tools/projects/project-done.ts +66 -0
- package/packages/core/src/tools/projects/project-fail.ts +66 -0
- package/packages/core/src/tools/projects/project-list.ts +96 -0
- package/packages/core/src/tools/projects/project-update.ts +72 -0
- package/packages/core/src/tools/projects/task-create.ts +68 -0
- package/packages/core/src/tools/projects/task-evaluate.ts +93 -0
- package/packages/core/src/tools/projects/task-update.ts +93 -0
- package/packages/core/src/tools/types.ts +39 -0
- package/packages/core/src/tools/voice/index.ts +104 -0
- package/packages/core/src/tools/web/browser-click.ts +78 -0
- package/packages/core/src/tools/web/browser-extract.ts +139 -0
- package/packages/core/src/tools/web/browser-navigate.ts +106 -0
- package/packages/core/src/tools/web/browser-screenshot.ts +87 -0
- package/packages/core/src/tools/web/browser-script.ts +88 -0
- package/packages/core/src/tools/web/browser-service.ts +554 -0
- package/packages/core/src/tools/web/browser-type.ts +101 -0
- package/packages/core/src/tools/web/browser-wait.ts +136 -0
- package/packages/core/src/tools/web/index.ts +41 -0
- package/packages/core/src/tools/web/web-fetch.ts +78 -0
- package/packages/core/src/tools/web/web-search.ts +123 -0
- package/packages/core/src/utils/benchmark.ts +80 -0
- package/packages/core/src/utils/crypto.ts +73 -0
- package/packages/core/src/utils/date.ts +42 -0
- package/packages/core/src/utils/index.ts +10 -0
- package/packages/core/src/utils/logger.ts +389 -0
- package/packages/core/src/utils/retry.ts +70 -0
- package/packages/core/src/utils/toon.ts +253 -0
- package/packages/core/src/voice/index.ts +656 -0
- package/test/setup-db.ts +216 -0
- package/tsconfig.json +39 -0
- package/src/agents.ts +0 -1
- package/src/canvas.ts +0 -1
- package/src/channels.ts +0 -1
- package/src/config.ts +0 -1
- package/src/events.ts +0 -1
- package/src/gateway.ts +0 -1
- package/src/index.ts +0 -304
- package/src/mcp.ts +0 -1
- package/src/multimodal.ts +0 -1
- package/src/scheduler.ts +0 -1
- package/src/security.ts +0 -1
- package/src/skills.ts +0 -1
- package/src/state.ts +0 -1
- package/src/storage.ts +0 -1
- package/src/tools.ts +0 -1
- package/src/tts.ts +0 -1
- package/src/types.ts +0 -82
- package/src/utils.ts +0 -1
- package/src/voice.ts +0 -1
|
@@ -0,0 +1,656 @@
|
|
|
1
|
+
import { getDb } from "../storage/SQLiteStorage.ts";
|
|
2
|
+
import { decryptApiKey } from "../storage/crypto.ts";
|
|
3
|
+
import { logger } from "../utils/logger.ts";
|
|
4
|
+
|
|
5
|
+
export interface VoiceConfig {
|
|
6
|
+
voiceEnabled: boolean;
|
|
7
|
+
ttsEnabled: boolean;
|
|
8
|
+
sttProvider: string | null;
|
|
9
|
+
ttsProvider: string | null;
|
|
10
|
+
ttsVoiceId: string | null;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface AudioInput {
|
|
14
|
+
type: "buffer" | "url" | "base64";
|
|
15
|
+
data: Buffer | string;
|
|
16
|
+
mimeType?: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface AudioOutput {
|
|
20
|
+
type: "buffer" | "base64";
|
|
21
|
+
data: Buffer | string;
|
|
22
|
+
mimeType: string;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const log = logger.child("voice");
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Limpia texto para síntesis de voz (TTS)
|
|
29
|
+
* Elimina formato Markdown, emojis y otros elementos que no se pronuncian bien
|
|
30
|
+
*/
|
|
31
|
+
export function cleanTextForTTS(text: string): string {
|
|
32
|
+
if (!text) return "";
|
|
33
|
+
|
|
34
|
+
return text
|
|
35
|
+
// Eliminar código en bloque (``` ... ```)
|
|
36
|
+
.replace(/```[\s\S]*?```/g, " ")
|
|
37
|
+
// Eliminar código inline (`texto`)
|
|
38
|
+
.replace(/`([^`]+)`/g, "$1")
|
|
39
|
+
// Eliminar enlaces [texto](url) → texto
|
|
40
|
+
.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1")
|
|
41
|
+
// Eliminar imágenes  → alt
|
|
42
|
+
.replace(/!\[([^\]]*)\]\([^)]+\)/g, "$1")
|
|
43
|
+
// Eliminar negritas **texto** → texto
|
|
44
|
+
.replace(/\*\*([^*]+)\*\*/g, "$1")
|
|
45
|
+
// Eliminar cursivas *texto* o _texto_ → texto
|
|
46
|
+
.replace(/\*([^*]+)\*/g, "$1")
|
|
47
|
+
.replace(/_([^_]+)_/g, "$1")
|
|
48
|
+
// Eliminar tachado ~~texto~~ → texto
|
|
49
|
+
.replace(/~~([^~]+)~~/g, "$1")
|
|
50
|
+
// Eliminar negritas/cursivas combinadas ***texto*** → texto
|
|
51
|
+
.replace(/\*\*\*([^*]+)\*\*\*/g, "$1")
|
|
52
|
+
// Eliminar encabezados # texto → texto
|
|
53
|
+
.replace(/^#+\s+/gm, "")
|
|
54
|
+
// Eliminar listas con guión - texto → texto
|
|
55
|
+
.replace(/^[\-\*]\s+/gm, "")
|
|
56
|
+
// Eliminar listas numeradas 1. texto → texto
|
|
57
|
+
.replace(/^\d+\.\s+/gm, "")
|
|
58
|
+
// Eliminar citas > texto → texto
|
|
59
|
+
.replace(/^>\s+/gm, "")
|
|
60
|
+
// Eliminar emojis (rangos Unicode de emojis)
|
|
61
|
+
.replace(/[\p{Emoji}]/gu, "")
|
|
62
|
+
// Eliminar caracteres de control Unicode
|
|
63
|
+
.replace(/[\u200B-\u200D\uFEFF]/g, "")
|
|
64
|
+
// Eliminar espacios múltiples
|
|
65
|
+
.replace(/\s+/g, " ")
|
|
66
|
+
// Trim final
|
|
67
|
+
.trim();
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
class VoiceService {
|
|
71
|
+
private static instance: VoiceService;
|
|
72
|
+
|
|
73
|
+
private constructor() {}
|
|
74
|
+
|
|
75
|
+
static getInstance(): VoiceService {
|
|
76
|
+
if (!VoiceService.instance) {
|
|
77
|
+
VoiceService.instance = new VoiceService();
|
|
78
|
+
}
|
|
79
|
+
return VoiceService.instance;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
getChannelVoiceConfig(channelId: string): VoiceConfig {
|
|
83
|
+
const db = getDb();
|
|
84
|
+
const result = db.query(`
|
|
85
|
+
SELECT voice_enabled, tts_enabled, stt_provider, tts_provider, tts_voice_id
|
|
86
|
+
FROM channels WHERE id = ?
|
|
87
|
+
`).get(channelId) as {
|
|
88
|
+
voice_enabled: number;
|
|
89
|
+
tts_enabled: number;
|
|
90
|
+
stt_provider: string | null;
|
|
91
|
+
tts_provider: string | null;
|
|
92
|
+
tts_voice_id: string | null;
|
|
93
|
+
} | undefined;
|
|
94
|
+
|
|
95
|
+
if (!result) {
|
|
96
|
+
return {
|
|
97
|
+
voiceEnabled: false,
|
|
98
|
+
ttsEnabled: false,
|
|
99
|
+
sttProvider: null,
|
|
100
|
+
ttsProvider: null,
|
|
101
|
+
ttsVoiceId: null,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return {
|
|
106
|
+
voiceEnabled: result.voice_enabled === 1,
|
|
107
|
+
ttsEnabled: result.tts_enabled === 1,
|
|
108
|
+
sttProvider: result.stt_provider,
|
|
109
|
+
ttsProvider: result.tts_provider,
|
|
110
|
+
ttsVoiceId: result.tts_voice_id,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
async transcribe(audio: AudioInput, modelId: string): Promise<string> {
|
|
115
|
+
const isGroq = modelId.startsWith("whisper");
|
|
116
|
+
const isOpenAi = modelId === "whisper-1";
|
|
117
|
+
|
|
118
|
+
if (isGroq) {
|
|
119
|
+
return this.transcribeWithGroq(audio, modelId);
|
|
120
|
+
} else if (isOpenAi) {
|
|
121
|
+
return this.transcribeWithOpenAIWhisper(audio);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
log.warn(`Unknown STT provider ${modelId}, defaulting to Groq Whisper`);
|
|
125
|
+
return this.transcribeWithGroq(audio, "whisper-large-v3-turbo");
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
private async getProviderApiKey(providerId: string): Promise<string | null> {
|
|
129
|
+
const db = getDb();
|
|
130
|
+
const provider = db.query(`
|
|
131
|
+
SELECT api_key_encrypted, api_key_iv FROM providers WHERE id = ?
|
|
132
|
+
`).get(providerId) as { api_key_encrypted: string; api_key_iv: string } | undefined;
|
|
133
|
+
|
|
134
|
+
if (!provider?.api_key_encrypted) {
|
|
135
|
+
return null;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
try {
|
|
139
|
+
return await decryptApiKey(provider.api_key_encrypted, provider.api_key_iv);
|
|
140
|
+
} catch (error) {
|
|
141
|
+
log.error(`Failed to decrypt API key for provider ${providerId}: ${(error as Error).message}`);
|
|
142
|
+
return null;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
private async transcribeWithGroq(audio: AudioInput, modelId: string): Promise<string> {
|
|
147
|
+
const key = await this.getProviderApiKey("groq") || process.env.GROQ_API_KEY;
|
|
148
|
+
if (!key) {
|
|
149
|
+
throw new Error("GROQ_API_KEY not configured. Configúrala en Proveedores o en las variables de entorno.");
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
let audioData: ArrayBuffer | Uint8Array;
|
|
153
|
+
|
|
154
|
+
if (audio.type === "buffer") {
|
|
155
|
+
audioData = new Uint8Array((audio.data as Buffer));
|
|
156
|
+
} else if (audio.type === "base64") {
|
|
157
|
+
const buf = Buffer.from(audio.data as string, "base64");
|
|
158
|
+
audioData = new Uint8Array(buf);
|
|
159
|
+
} else if (audio.type === "url") {
|
|
160
|
+
const response = await fetch(audio.data as string);
|
|
161
|
+
const ab = await response.arrayBuffer();
|
|
162
|
+
audioData = new Uint8Array(ab);
|
|
163
|
+
} else {
|
|
164
|
+
throw new Error("Invalid audio input type");
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
const mime = audio.mimeType || "audio/ogg";
|
|
168
|
+
const ext = mime.includes("webm") ? "webm"
|
|
169
|
+
: mime.includes("mp4") || mime.includes("m4a") ? "m4a"
|
|
170
|
+
: mime.includes("mp3") || mime.includes("mpeg") ? "mp3"
|
|
171
|
+
: mime.includes("wav") ? "wav"
|
|
172
|
+
: mime.includes("flac") ? "flac"
|
|
173
|
+
: "ogg";
|
|
174
|
+
const blob = new Blob([audioData as BlobPart], { type: mime });
|
|
175
|
+
const formData = new FormData();
|
|
176
|
+
formData.append("file", blob, `audio.${ext}`);
|
|
177
|
+
formData.append("model", modelId);
|
|
178
|
+
formData.append("response_format", "json");
|
|
179
|
+
formData.append("language", "es");
|
|
180
|
+
|
|
181
|
+
const result = await fetch("https://api.groq.com/openai/v1/audio/transcriptions", {
|
|
182
|
+
method: "POST",
|
|
183
|
+
headers: {
|
|
184
|
+
"Authorization": `Bearer ${key}`,
|
|
185
|
+
},
|
|
186
|
+
body: formData,
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
if (!result.ok) {
|
|
190
|
+
const error = await result.text();
|
|
191
|
+
throw new Error(`Groq Whisper transcription failed: ${error}`);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
const data = await result.json() as { text: string };
|
|
195
|
+
return data.text;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
private async transcribeWithOpenAIWhisper(audio: AudioInput): Promise<string> {
|
|
199
|
+
const key = await this.getProviderApiKey("openai") || process.env.OPENAI_API_KEY;
|
|
200
|
+
if (!key) {
|
|
201
|
+
throw new Error("OPENAI_API_KEY not configured. Configúrala en Proveedores o en las variables de entorno.");
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
let audioData: ArrayBuffer | Uint8Array;
|
|
205
|
+
|
|
206
|
+
if (audio.type === "buffer") {
|
|
207
|
+
audioData = new Uint8Array(audio.data as Buffer);
|
|
208
|
+
} else if (audio.type === "base64") {
|
|
209
|
+
const buf = Buffer.from(audio.data as string, "base64");
|
|
210
|
+
audioData = new Uint8Array(buf);
|
|
211
|
+
} else if (audio.type === "url") {
|
|
212
|
+
const response = await fetch(audio.data as string);
|
|
213
|
+
const ab = await response.arrayBuffer();
|
|
214
|
+
audioData = new Uint8Array(ab);
|
|
215
|
+
} else {
|
|
216
|
+
throw new Error("Invalid audio input type");
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const blob = new Blob([audioData as BlobPart], { type: audio.mimeType || "audio/webm" });
|
|
220
|
+
const formData = new FormData();
|
|
221
|
+
formData.append("file", blob, "audio.webm");
|
|
222
|
+
|
|
223
|
+
formData.append("model", "whisper-1");
|
|
224
|
+
formData.append("response_format", "json");
|
|
225
|
+
formData.append("language", "es");
|
|
226
|
+
|
|
227
|
+
const result = await fetch("https://api.openai.com/v1/audio/transcriptions", {
|
|
228
|
+
method: "POST",
|
|
229
|
+
headers: {
|
|
230
|
+
"Authorization": `Bearer ${key}`,
|
|
231
|
+
},
|
|
232
|
+
body: formData,
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
if (!result.ok) {
|
|
236
|
+
const error = await result.text();
|
|
237
|
+
throw new Error(`OpenAI Whisper transcription failed: ${error}`);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
const data = await result.json() as { text: string };
|
|
241
|
+
return data.text;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
async speak(text: string, modelId: string, voiceId?: string): Promise<AudioOutput> {
|
|
245
|
+
const isElevenLabs = modelId.startsWith("eleven");
|
|
246
|
+
const isOpenAI = modelId.startsWith("tts-") || modelId.startsWith("gpt-");
|
|
247
|
+
const isGemini = modelId.startsWith("gemini");
|
|
248
|
+
const isQwen = modelId.startsWith("qwen");
|
|
249
|
+
const isPiper = modelId === "piper" || modelId === "piper-local";
|
|
250
|
+
|
|
251
|
+
if (isPiper) {
|
|
252
|
+
return this.speakWithPiper(text, voiceId);
|
|
253
|
+
} else if (isElevenLabs) {
|
|
254
|
+
return this.speakWithElevenLabs(text, modelId, voiceId);
|
|
255
|
+
} else if (isOpenAI) {
|
|
256
|
+
return this.speakWithOpenAI(text, modelId, voiceId);
|
|
257
|
+
} else if (isGemini) {
|
|
258
|
+
return this.speakWithGemini(text, modelId, voiceId);
|
|
259
|
+
} else if (isQwen) {
|
|
260
|
+
return this.speakWithQwen(text, modelId, voiceId);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
log.warn(`Unknown TTS provider ${modelId}, defaulting to ElevenLabs Flash`);
|
|
264
|
+
return this.speakWithElevenLabs(text, "eleven_flash_v2_5", voiceId);
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
private async speakWithPiper(text: string, voiceId?: string): Promise<AudioOutput> {
|
|
268
|
+
const cleanText = cleanTextForTTS(text);
|
|
269
|
+
const port = Number(process.env.TTS_PORT ?? 5500);
|
|
270
|
+
const res = await fetch(`http://localhost:${port}/tts`, {
|
|
271
|
+
method: "POST",
|
|
272
|
+
headers: { "Content-Type": "application/json" },
|
|
273
|
+
body: JSON.stringify({ text: cleanText, voice: voiceId }),
|
|
274
|
+
signal: AbortSignal.timeout(15_000),
|
|
275
|
+
});
|
|
276
|
+
if (!res.ok) {
|
|
277
|
+
throw new Error(`Piper TTS error ${res.status}. ¿Está el servidor TTS corriendo? (Ajustes → Voz)`);
|
|
278
|
+
}
|
|
279
|
+
const wav = await res.arrayBuffer();
|
|
280
|
+
return {
|
|
281
|
+
type: "buffer",
|
|
282
|
+
data: Buffer.from(wav),
|
|
283
|
+
mimeType: "audio/wav",
|
|
284
|
+
};
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
private async speakWithElevenLabs(text: string, modelId: string, voiceId?: string): Promise<AudioOutput> {
|
|
288
|
+
const apiKey = await this.getProviderApiKey("elevenlabs");
|
|
289
|
+
const key = apiKey || process.env.ELEVENLABS_API_KEY;
|
|
290
|
+
|
|
291
|
+
if (!key) {
|
|
292
|
+
throw new Error("ELEVENLABS_API_KEY not configured");
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
const voice = voiceId || "21m00Tcm4TlvDq8ikWAM";
|
|
296
|
+
|
|
297
|
+
const response = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${voice}`, {
|
|
298
|
+
method: "POST",
|
|
299
|
+
headers: {
|
|
300
|
+
"Content-Type": "application/json",
|
|
301
|
+
"xi-api-key": key,
|
|
302
|
+
},
|
|
303
|
+
body: JSON.stringify({
|
|
304
|
+
text,
|
|
305
|
+
model_id: modelId,
|
|
306
|
+
voice_settings: {
|
|
307
|
+
stability: 0.5,
|
|
308
|
+
similarity_boost: 0.75,
|
|
309
|
+
},
|
|
310
|
+
}),
|
|
311
|
+
});
|
|
312
|
+
|
|
313
|
+
if (!response.ok) {
|
|
314
|
+
const error = await response.text();
|
|
315
|
+
throw new Error(`ElevenLabs TTS failed: ${error}`);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
const buffer = await response.arrayBuffer();
|
|
319
|
+
return {
|
|
320
|
+
type: "buffer",
|
|
321
|
+
data: Buffer.from(buffer),
|
|
322
|
+
mimeType: "audio/mpeg",
|
|
323
|
+
};
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
private async speakWithOpenAI(text: string, modelId: string = "gpt-4o-mini-tts", voiceId?: string): Promise<AudioOutput> {
|
|
327
|
+
const apiKey = await this.getProviderApiKey("openai-tts");
|
|
328
|
+
const key = apiKey || process.env.OPENAI_API_KEY;
|
|
329
|
+
|
|
330
|
+
if (!key) {
|
|
331
|
+
throw new Error("OPENAI_API_KEY not configured");
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
const voice = voiceId || "alloy";
|
|
335
|
+
|
|
336
|
+
const response = await fetch("https://api.openai.com/v1/audio/speech", {
|
|
337
|
+
method: "POST",
|
|
338
|
+
headers: {
|
|
339
|
+
"Content-Type": "application/json",
|
|
340
|
+
"Authorization": `Bearer ${key}`,
|
|
341
|
+
},
|
|
342
|
+
body: JSON.stringify({
|
|
343
|
+
model: modelId,
|
|
344
|
+
voice,
|
|
345
|
+
input: text,
|
|
346
|
+
response_format: "mp3",
|
|
347
|
+
}),
|
|
348
|
+
});
|
|
349
|
+
|
|
350
|
+
if (!response.ok) {
|
|
351
|
+
const error = await response.text();
|
|
352
|
+
throw new Error(`OpenAI TTS failed: ${error}`);
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
const buffer = await response.arrayBuffer();
|
|
356
|
+
return {
|
|
357
|
+
type: "buffer",
|
|
358
|
+
data: Buffer.from(buffer),
|
|
359
|
+
mimeType: "audio/mpeg",
|
|
360
|
+
};
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
private async speakWithGemini(text: string, modelId: string, voiceId?: string): Promise<AudioOutput> {
|
|
364
|
+
const key = process.env.GEMINI_API_KEY;
|
|
365
|
+
|
|
366
|
+
if (!key) {
|
|
367
|
+
throw new Error("GEMINI_API_KEY not configured");
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
const voiceName = voiceId || "Aoede";
|
|
371
|
+
|
|
372
|
+
const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${modelId}:generateContent?key=${key}`, {
|
|
373
|
+
method: "POST",
|
|
374
|
+
headers: {
|
|
375
|
+
"Content-Type": "application/json",
|
|
376
|
+
},
|
|
377
|
+
body: JSON.stringify({
|
|
378
|
+
contents: [{
|
|
379
|
+
parts: [{
|
|
380
|
+
text: `Genera audio de este texto: ${text}`,
|
|
381
|
+
}]
|
|
382
|
+
}],
|
|
383
|
+
generationConfig: {
|
|
384
|
+
responseModalities: ["AUDIO"],
|
|
385
|
+
speechConfig: {
|
|
386
|
+
languageCode: "es-ES",
|
|
387
|
+
voiceConfig: {
|
|
388
|
+
prebuiltVoiceConfig: {
|
|
389
|
+
voiceName,
|
|
390
|
+
},
|
|
391
|
+
},
|
|
392
|
+
},
|
|
393
|
+
},
|
|
394
|
+
}),
|
|
395
|
+
});
|
|
396
|
+
|
|
397
|
+
if (!response.ok) {
|
|
398
|
+
const error = await response.text();
|
|
399
|
+
throw new Error(`Gemini TTS failed: ${error}`);
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
const data = await response.json() as { candidates?: Array<{ content?: { parts?: Array<{ inlineData?: { data: string } }> } }> };
|
|
403
|
+
const audioData = data.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
|
|
404
|
+
|
|
405
|
+
if (!audioData) {
|
|
406
|
+
throw new Error("No audio returned from Gemini");
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
const buffer = Buffer.from(audioData, "base64");
|
|
410
|
+
return {
|
|
411
|
+
type: "buffer",
|
|
412
|
+
data: buffer,
|
|
413
|
+
mimeType: "audio/mpeg",
|
|
414
|
+
};
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
private async speakWithQwen(text: string, modelId: string, voiceId?: string): Promise<AudioOutput> {
|
|
418
|
+
const key = process.env.DASHSCOPE_API_KEY;
|
|
419
|
+
|
|
420
|
+
if (!key) {
|
|
421
|
+
throw new Error("DASHSCOPE_API_KEY not configured");
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
const voice = voiceId || "ruoxi";
|
|
425
|
+
|
|
426
|
+
const response = await fetch("https://dashscope.aliyuncs.com/api/v1/services/audio/t2a/generation", {
|
|
427
|
+
method: "POST",
|
|
428
|
+
headers: {
|
|
429
|
+
"Content-Type": "application/json",
|
|
430
|
+
"Authorization": `Bearer ${key}`,
|
|
431
|
+
},
|
|
432
|
+
body: JSON.stringify({
|
|
433
|
+
model: modelId,
|
|
434
|
+
input: {
|
|
435
|
+
text,
|
|
436
|
+
},
|
|
437
|
+
parameters: {
|
|
438
|
+
voice,
|
|
439
|
+
format: "mp3",
|
|
440
|
+
},
|
|
441
|
+
}),
|
|
442
|
+
});
|
|
443
|
+
|
|
444
|
+
if (!response.ok) {
|
|
445
|
+
const error = await response.text();
|
|
446
|
+
throw new Error(`Qwen TTS failed: ${error}`);
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
const data = await response.json() as { output?: { audio?: string } };
|
|
450
|
+
const audioData = data.output?.audio;
|
|
451
|
+
|
|
452
|
+
if (!audioData) {
|
|
453
|
+
throw new Error("No audio returned from Qwen");
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
const buffer = Buffer.from(audioData, "base64");
|
|
457
|
+
return {
|
|
458
|
+
type: "buffer",
|
|
459
|
+
data: buffer,
|
|
460
|
+
mimeType: "audio/mpeg",
|
|
461
|
+
};
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
getConfiguredVoiceProviders(): { groq: boolean; elevenlabs: boolean; openai: boolean; gemini: boolean; qwen: boolean } {
|
|
465
|
+
const db = getDb();
|
|
466
|
+
const hasDbKey = (providerId: string): boolean => {
|
|
467
|
+
const row = db.query(
|
|
468
|
+
`SELECT api_key_encrypted FROM providers WHERE id = ? AND api_key_encrypted IS NOT NULL AND api_key_encrypted != ''`
|
|
469
|
+
).get(providerId) as { api_key_encrypted: string } | undefined;
|
|
470
|
+
return !!row;
|
|
471
|
+
};
|
|
472
|
+
|
|
473
|
+
return {
|
|
474
|
+
groq: hasDbKey("groq") || !!(process.env.GROQ_API_KEY),
|
|
475
|
+
elevenlabs: hasDbKey("elevenlabs") || !!(process.env.ELEVENLABS_API_KEY),
|
|
476
|
+
openai: hasDbKey("openai") || !!(process.env.OPENAI_API_KEY),
|
|
477
|
+
gemini: hasDbKey("gemini") || !!(process.env.GEMINI_API_KEY),
|
|
478
|
+
qwen: hasDbKey("qwen") || !!(process.env.DASHSCOPE_API_KEY),
|
|
479
|
+
};
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
getOpenAIVoices(): Array<{ id: string; name: string }> {
|
|
483
|
+
return [
|
|
484
|
+
{ id: "alloy", name: "Alloy" },
|
|
485
|
+
{ id: "echo", name: "Echo" },
|
|
486
|
+
{ id: "fable", name: "Fable" },
|
|
487
|
+
{ id: "onyx", name: "Onyx" },
|
|
488
|
+
{ id: "nova", name: "Nova" },
|
|
489
|
+
{ id: "shimmer", name: "Shimmer" },
|
|
490
|
+
{ id: "ash", name: "Ash" },
|
|
491
|
+
{ id: "ballad", name: "Ballad" },
|
|
492
|
+
{ id: "coral", name: "Coral" },
|
|
493
|
+
{ id: "sage", name: "Sage" },
|
|
494
|
+
{ id: "verse", name: "Verse" },
|
|
495
|
+
];
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
getGeminiVoices(): Array<{ id: string; name: string }> {
|
|
499
|
+
return [
|
|
500
|
+
{ id: "Puck", name: "Puck" },
|
|
501
|
+
{ id: "Charon", name: "Charon" },
|
|
502
|
+
{ id: "Kore", name: "Kore" },
|
|
503
|
+
{ id: "Fenrir", name: "Fenrir" },
|
|
504
|
+
{ id: "Aoede", name: "Aoede" },
|
|
505
|
+
{ id: "Orbit", name: "Orbit" },
|
|
506
|
+
{ id: "Zephyr", name: "Zephyr" },
|
|
507
|
+
{ id: "Autonoe", name: "Autonoe" },
|
|
508
|
+
{ id: "Enceladus", name: "Enceladus" },
|
|
509
|
+
{ id: "Iapetus", name: "Iapetus" },
|
|
510
|
+
{ id: "Umbriel", name: "Umbriel" },
|
|
511
|
+
{ id: "Algieba", name: "Algieba" },
|
|
512
|
+
{ id: "Despina", name: "Despina" },
|
|
513
|
+
{ id: "Erinome", name: "Erinome" },
|
|
514
|
+
{ id: "Laomedeia", name: "Laomedeia" },
|
|
515
|
+
{ id: "Achernar", name: "Achernar" },
|
|
516
|
+
{ id: "Rasalgethi", name: "Rasalgethi" },
|
|
517
|
+
{ id: "Schedar", name: "Schedar" },
|
|
518
|
+
{ id: "Sulafat", name: "Sulafat" },
|
|
519
|
+
{ id: "Vindemiatrix", name: "Vindemiatrix" },
|
|
520
|
+
{ id: "Zubenelgenubi", name: "Zubenelgenubi" },
|
|
521
|
+
{ id: "Pulcherrima", name: "Pulcherrima" },
|
|
522
|
+
{ id: "Achird", name: "Achird" },
|
|
523
|
+
{ id: "Zubeneschamali", name: "Zubeneschamali" },
|
|
524
|
+
{ id: "Sadachbia", name: "Sadachbia" },
|
|
525
|
+
{ id: "Sadaltager", name: "Sadaltager" },
|
|
526
|
+
{ id: "Sheratan", name: "Sheratan" },
|
|
527
|
+
];
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
getQwenVoices(): Array<{ id: string; name: string }> {
|
|
531
|
+
return [
|
|
532
|
+
{ id: "ruoxi", name: "Ruoxi (F, Chinese)" },
|
|
533
|
+
{ id: "longhua", name: "Longhua (M, Chinese)" },
|
|
534
|
+
{ id: "lingli", name: "Lingli (F, Chinese)" },
|
|
535
|
+
{ id: "zhiyan", name: "Zhiyan (F, Chinese)" },
|
|
536
|
+
{ id: "aicheng", name: "Aicheng (F, Chinese)" },
|
|
537
|
+
{ id: "aida", name: "Aida (F, Chinese)" },
|
|
538
|
+
{ id: "yucheng", name: "Yucheng (M, Chinese)" },
|
|
539
|
+
{ id: "yijia", name: "Yijia (F, Chinese)" },
|
|
540
|
+
{ id: "yinan", name: "Yinan (M, Chinese)" },
|
|
541
|
+
{ id: "sijia", name: "Sijia (F, Chinese)" },
|
|
542
|
+
{ id: "sicheng", name: "Sicheng (M, Chinese)" },
|
|
543
|
+
{ id: "siqi", name: "Siqi (F, Chinese)" },
|
|
544
|
+
{ id: "aixia", name: "Aixia (F, Chinese)" },
|
|
545
|
+
];
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
async getElevenLabsVoices(): Promise<Array<{ id: string; name: string; category: string }>> {
|
|
549
|
+
const apiKey = await this.getProviderApiKey("elevenlabs");
|
|
550
|
+
const key = apiKey || process.env.ELEVENLABS_API_KEY;
|
|
551
|
+
|
|
552
|
+
if (!key) {
|
|
553
|
+
throw new Error("ELEVENLABS_API_KEY not configured");
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
const response = await fetch("https://api.elevenlabs.io/v1/voices", {
|
|
557
|
+
headers: {
|
|
558
|
+
"xi-api-key": key,
|
|
559
|
+
},
|
|
560
|
+
});
|
|
561
|
+
|
|
562
|
+
if (!response.ok) {
|
|
563
|
+
const error = await response.text();
|
|
564
|
+
throw new Error(`Failed to fetch ElevenLabs voices: ${error}`);
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
const data = await response.json() as { voices: Array<{ voice_id: string; name: string; category: string }> };
|
|
568
|
+
return data.voices.map(v => ({
|
|
569
|
+
id: v.voice_id,
|
|
570
|
+
name: v.name,
|
|
571
|
+
category: v.category,
|
|
572
|
+
}));
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
normalizeAudioFromChannel(channelType: string, audioData: unknown): AudioInput {
|
|
576
|
+
switch (channelType) {
|
|
577
|
+
case "telegram":
|
|
578
|
+
return this.normalizeTelegramAudio(audioData);
|
|
579
|
+
case "discord":
|
|
580
|
+
return this.normalizeDiscordAudio(audioData);
|
|
581
|
+
case "whatsapp":
|
|
582
|
+
return this.normalizeWhatsAppAudio(audioData);
|
|
583
|
+
case "slack":
|
|
584
|
+
return this.normalizeSlackAudio(audioData);
|
|
585
|
+
case "webchat":
|
|
586
|
+
return this.normalizeWebChatAudio(audioData);
|
|
587
|
+
default:
|
|
588
|
+
throw new Error(`Unknown channel type: ${channelType}`);
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
private normalizeTelegramAudio(audioData: unknown): AudioInput {
|
|
593
|
+
const data = audioData as { fileId?: string; buffer?: Buffer; url?: string };
|
|
594
|
+
|
|
595
|
+
if (data.buffer) {
|
|
596
|
+
return { type: "buffer", data: data.buffer, mimeType: "audio/ogg" };
|
|
597
|
+
}
|
|
598
|
+
if (data.url) {
|
|
599
|
+
return { type: "url", data: data.url, mimeType: "audio/ogg" };
|
|
600
|
+
}
|
|
601
|
+
throw new Error("Telegram audio missing buffer or URL");
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
private normalizeDiscordAudio(audioData: unknown): AudioInput {
|
|
605
|
+
const data = audioData as { buffer?: Buffer; url?: string; mimeType?: string };
|
|
606
|
+
|
|
607
|
+
if (data.buffer) {
|
|
608
|
+
return { type: "buffer", data: data.buffer, mimeType: data.mimeType || "audio/webm" };
|
|
609
|
+
}
|
|
610
|
+
if (data.url) {
|
|
611
|
+
return { type: "url", data: data.url, mimeType: data.mimeType || "audio/webm" };
|
|
612
|
+
}
|
|
613
|
+
throw new Error("Discord audio missing buffer or URL");
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
private normalizeWhatsAppAudio(audioData: unknown): AudioInput {
|
|
617
|
+
const data = audioData as { buffer?: Buffer; url?: string; base64?: string };
|
|
618
|
+
|
|
619
|
+
if (data.buffer) {
|
|
620
|
+
return { type: "buffer", data: data.buffer, mimeType: "audio/ogg" };
|
|
621
|
+
}
|
|
622
|
+
if (data.base64) {
|
|
623
|
+
return { type: "base64", data: data.base64, mimeType: "audio/ogg" };
|
|
624
|
+
}
|
|
625
|
+
if (data.url) {
|
|
626
|
+
return { type: "url", data: data.url, mimeType: "audio/ogg" };
|
|
627
|
+
}
|
|
628
|
+
throw new Error("WhatsApp audio: buffer not available — download may have failed");
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
private normalizeSlackAudio(audioData: unknown): AudioInput {
|
|
632
|
+
const data = audioData as { buffer?: Buffer; url?: string; mimeType?: string };
|
|
633
|
+
|
|
634
|
+
if (data.buffer) {
|
|
635
|
+
return { type: "buffer", data: data.buffer, mimeType: data.mimeType || "audio/webm" };
|
|
636
|
+
}
|
|
637
|
+
if (data.url) {
|
|
638
|
+
return { type: "url", data: data.url, mimeType: data.mimeType || "audio/webm" };
|
|
639
|
+
}
|
|
640
|
+
throw new Error("Slack audio missing buffer or URL");
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
private normalizeWebChatAudio(audioData: unknown): AudioInput {
|
|
644
|
+
const data = audioData as { base64?: string; buffer?: Buffer };
|
|
645
|
+
|
|
646
|
+
if (data.base64) {
|
|
647
|
+
return { type: "base64", data: data.base64, mimeType: "audio/webm" };
|
|
648
|
+
}
|
|
649
|
+
if (data.buffer) {
|
|
650
|
+
return { type: "buffer", data: data.buffer, mimeType: "audio/webm" };
|
|
651
|
+
}
|
|
652
|
+
throw new Error("WebChat audio missing base64 or buffer");
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
export const voiceService = VoiceService.getInstance();
|