@johpaz/hive-sdk 0.0.12 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. package/.github/CODEOWNERS +9 -0
  2. package/.github/workflows/publish.yml +89 -0
  3. package/.github/workflows/version-bump.yml +102 -0
  4. package/CHANGELOG.md +38 -0
  5. package/README.md +158 -0
  6. package/bun.lock +543 -0
  7. package/bunfig.toml +7 -0
  8. package/docs/API-AGENTS.md +316 -0
  9. package/docs/API-CONTEXT-COMPILER.md +252 -0
  10. package/docs/API-DAG-SCHEDULER.md +273 -0
  11. package/docs/API-TOOLS-SKILLS-CHANNELS.md +293 -0
  12. package/docs/API-WORKERS-EVENTS.md +152 -0
  13. package/docs/INDEX.md +141 -0
  14. package/docs/README.md +68 -0
  15. package/package.json +54 -105
  16. package/packages/cli/package.json +17 -0
  17. package/packages/cli/src/commands/init.ts +56 -0
  18. package/packages/cli/src/commands/run.ts +45 -0
  19. package/packages/cli/src/commands/test.ts +42 -0
  20. package/packages/cli/src/commands/trace.ts +55 -0
  21. package/packages/cli/src/index.ts +43 -0
  22. package/packages/core/package.json +58 -0
  23. package/packages/core/src/ace/Curator.ts +158 -0
  24. package/packages/core/src/ace/Reflector.ts +200 -0
  25. package/packages/core/src/ace/Tracer.ts +100 -0
  26. package/packages/core/src/ace/index.ts +4 -0
  27. package/packages/core/src/agent/AgentRunner.ts +699 -0
  28. package/packages/core/src/agent/Compaction.ts +221 -0
  29. package/packages/core/src/agent/ContextCompiler.ts +567 -0
  30. package/packages/core/src/agent/ContextGuard.ts +91 -0
  31. package/packages/core/src/agent/ConversationStore.ts +244 -0
  32. package/packages/core/src/agent/Hooks.ts +166 -0
  33. package/packages/core/src/agent/NativeTools.ts +31 -0
  34. package/packages/core/src/agent/PromptBuilder.ts +169 -0
  35. package/packages/core/src/agent/Service.ts +267 -0
  36. package/packages/core/src/agent/StuckLoop.ts +133 -0
  37. package/packages/core/src/agent/index.ts +12 -0
  38. package/packages/core/src/agent/providers/LLMClient.ts +149 -0
  39. package/packages/core/src/agent/providers/anthropic.ts +212 -0
  40. package/packages/core/src/agent/providers/gemini.ts +215 -0
  41. package/packages/core/src/agent/providers/index.ts +199 -0
  42. package/packages/core/src/agent/providers/interface.ts +195 -0
  43. package/packages/core/src/agent/providers/ollama.ts +175 -0
  44. package/packages/core/src/agent/providers/openai-compat.ts +231 -0
  45. package/packages/core/src/agent/providers.ts +1 -0
  46. package/packages/core/src/agent/selectors/PlaybookSelector.ts +147 -0
  47. package/packages/core/src/agent/selectors/SkillSelector.ts +478 -0
  48. package/packages/core/src/agent/selectors/ToolSelector.ts +577 -0
  49. package/packages/core/src/agent/selectors/index.ts +6 -0
  50. package/packages/core/src/api/createAgent.test.ts +48 -0
  51. package/packages/core/src/api/createAgent.ts +122 -0
  52. package/packages/core/src/api/index.ts +2 -0
  53. package/packages/core/src/canvas/CanvasManager.ts +390 -0
  54. package/packages/core/src/canvas/a2ui-tools.ts +255 -0
  55. package/packages/core/src/canvas/canvas-tools.ts +448 -0
  56. package/packages/core/src/canvas/emitter.ts +149 -0
  57. package/packages/core/src/canvas/index.ts +6 -0
  58. package/packages/core/src/config/index.ts +2 -0
  59. package/packages/core/src/config/loader.ts +554 -0
  60. package/packages/core/src/ethics/EthicsGuard.test.ts +54 -0
  61. package/packages/core/src/ethics/EthicsGuard.ts +66 -0
  62. package/packages/core/src/ethics/index.ts +2 -0
  63. package/packages/core/src/gateway/channel-notify.test.ts +14 -0
  64. package/packages/core/src/gateway/channel-notify.ts +12 -0
  65. package/packages/core/src/gateway/index.ts +1 -0
  66. package/packages/core/src/index.ts +37 -0
  67. package/packages/core/src/mcp/MCPClient.ts +439 -0
  68. package/packages/core/src/mcp/MCPToolAdapter.ts +176 -0
  69. package/packages/core/src/mcp/config.ts +13 -0
  70. package/packages/core/src/mcp/hot-reload.ts +147 -0
  71. package/packages/core/src/mcp/index.ts +11 -0
  72. package/packages/core/src/mcp/logger.ts +42 -0
  73. package/packages/core/src/mcp/singleton.ts +21 -0
  74. package/packages/core/src/mcp/transports/index.ts +67 -0
  75. package/packages/core/src/mcp/transports/sse.ts +241 -0
  76. package/packages/core/src/mcp/transports/websocket.ts +159 -0
  77. package/packages/core/src/memory/Scratchpad.test.ts +47 -0
  78. package/packages/core/src/memory/Scratchpad.ts +37 -0
  79. package/packages/core/src/memory/Storage.ts +6 -0
  80. package/packages/core/src/memory/index.ts +2 -0
  81. package/packages/core/src/multimodal/VisionService.ts +293 -0
  82. package/packages/core/src/multimodal/index.ts +2 -0
  83. package/packages/core/src/multimodal/types.ts +28 -0
  84. package/packages/core/src/security/Pairing.ts +250 -0
  85. package/packages/core/src/security/RateLimit.ts +270 -0
  86. package/packages/core/src/security/index.ts +4 -0
  87. package/packages/core/src/skills/SkillLoader.ts +388 -0
  88. package/packages/core/src/skills/bundled-data.generated.ts +3332 -0
  89. package/packages/core/src/skills/defineSkill.ts +18 -0
  90. package/packages/core/src/skills/index.ts +4 -0
  91. package/packages/core/src/state/index.ts +2 -0
  92. package/packages/core/src/state/store.ts +312 -0
  93. package/packages/core/src/storage/SQLiteStorage.ts +407 -0
  94. package/packages/core/src/storage/crypto.ts +101 -0
  95. package/packages/core/src/storage/index.ts +10 -0
  96. package/packages/core/src/storage/onboarding.ts +1603 -0
  97. package/packages/core/src/storage/schema.ts +689 -0
  98. package/packages/core/src/storage/seed.ts +740 -0
  99. package/packages/core/src/storage/usage.ts +374 -0
  100. package/packages/core/src/swarm/AgentBus.ts +460 -0
  101. package/packages/core/src/swarm/AgentExecutor.ts +53 -0
  102. package/packages/core/src/swarm/Coordinator.ts +251 -0
  103. package/packages/core/src/swarm/EventBridge.ts +122 -0
  104. package/packages/core/src/swarm/EventBus.ts +169 -0
  105. package/packages/core/src/swarm/TaskGraph.ts +192 -0
  106. package/packages/core/src/swarm/TaskNode.ts +97 -0
  107. package/packages/core/src/swarm/TaskResult.ts +22 -0
  108. package/packages/core/src/swarm/WorkerPool.ts +236 -0
  109. package/packages/core/src/swarm/errors.ts +37 -0
  110. package/packages/core/src/swarm/index.ts +30 -0
  111. package/packages/core/src/swarm/presets/HiveLearnPreset.ts +99 -0
  112. package/packages/core/src/swarm/presets/ResearchPreset.ts +97 -0
  113. package/packages/core/src/swarm/presets/index.ts +4 -0
  114. package/packages/core/src/swarm/strategies/ParallelStrategy.ts +21 -0
  115. package/packages/core/src/swarm/strategies/PriorityStrategy.ts +46 -0
  116. package/packages/core/src/swarm/strategies/index.ts +3 -0
  117. package/packages/core/src/swarm/types.ts +164 -0
  118. package/packages/core/src/tools/ToolExecutor.ts +58 -0
  119. package/packages/core/src/tools/ToolRegistry.test.ts +98 -0
  120. package/packages/core/src/tools/ToolRegistry.ts +61 -0
  121. package/packages/core/src/tools/agents/get-available-models.ts +118 -0
  122. package/packages/core/src/tools/agents/index.ts +715 -0
  123. package/packages/core/src/tools/bridge-events.ts +26 -0
  124. package/packages/core/src/tools/canvas/index.ts +375 -0
  125. package/packages/core/src/tools/cli/index.ts +142 -0
  126. package/packages/core/src/tools/codebridge/index.ts +342 -0
  127. package/packages/core/src/tools/core/index.ts +476 -0
  128. package/packages/core/src/tools/cron/index.ts +626 -0
  129. package/packages/core/src/tools/filesystem/fs-delete.ts +78 -0
  130. package/packages/core/src/tools/filesystem/fs-edit.ts +106 -0
  131. package/packages/core/src/tools/filesystem/fs-exists.ts +63 -0
  132. package/packages/core/src/tools/filesystem/fs-glob.ts +108 -0
  133. package/packages/core/src/tools/filesystem/fs-list.ts +129 -0
  134. package/packages/core/src/tools/filesystem/fs-read.ts +72 -0
  135. package/packages/core/src/tools/filesystem/fs-write.ts +67 -0
  136. package/packages/core/src/tools/filesystem/index.ts +34 -0
  137. package/packages/core/src/tools/filesystem/workspace-guard.ts +62 -0
  138. package/packages/core/src/tools/index.ts +231 -0
  139. package/packages/core/src/tools/meeting/index.ts +363 -0
  140. package/packages/core/src/tools/office/index.ts +47 -0
  141. package/packages/core/src/tools/office/office-escribir-docx.ts +192 -0
  142. package/packages/core/src/tools/office/office-escribir-pdf.ts +172 -0
  143. package/packages/core/src/tools/office/office-escribir-pptx.ts +174 -0
  144. package/packages/core/src/tools/office/office-escribir-xlsx.ts +116 -0
  145. package/packages/core/src/tools/office/office-leer-docx.ts +93 -0
  146. package/packages/core/src/tools/office/office-leer-pdf.ts +114 -0
  147. package/packages/core/src/tools/office/office-leer-pptx.ts +136 -0
  148. package/packages/core/src/tools/office/office-leer-xlsx.ts +124 -0
  149. package/packages/core/src/tools/projects/index.ts +37 -0
  150. package/packages/core/src/tools/projects/project-create.ts +94 -0
  151. package/packages/core/src/tools/projects/project-done.ts +66 -0
  152. package/packages/core/src/tools/projects/project-fail.ts +66 -0
  153. package/packages/core/src/tools/projects/project-list.ts +96 -0
  154. package/packages/core/src/tools/projects/project-update.ts +72 -0
  155. package/packages/core/src/tools/projects/task-create.ts +68 -0
  156. package/packages/core/src/tools/projects/task-evaluate.ts +93 -0
  157. package/packages/core/src/tools/projects/task-update.ts +93 -0
  158. package/packages/core/src/tools/types.ts +39 -0
  159. package/packages/core/src/tools/voice/index.ts +104 -0
  160. package/packages/core/src/tools/web/browser-click.ts +78 -0
  161. package/packages/core/src/tools/web/browser-extract.ts +139 -0
  162. package/packages/core/src/tools/web/browser-navigate.ts +106 -0
  163. package/packages/core/src/tools/web/browser-screenshot.ts +87 -0
  164. package/packages/core/src/tools/web/browser-script.ts +88 -0
  165. package/packages/core/src/tools/web/browser-service.ts +554 -0
  166. package/packages/core/src/tools/web/browser-type.ts +101 -0
  167. package/packages/core/src/tools/web/browser-wait.ts +136 -0
  168. package/packages/core/src/tools/web/index.ts +41 -0
  169. package/packages/core/src/tools/web/web-fetch.ts +78 -0
  170. package/packages/core/src/tools/web/web-search.ts +123 -0
  171. package/packages/core/src/utils/benchmark.ts +80 -0
  172. package/packages/core/src/utils/crypto.ts +73 -0
  173. package/packages/core/src/utils/date.ts +42 -0
  174. package/packages/core/src/utils/index.ts +10 -0
  175. package/packages/core/src/utils/logger.ts +389 -0
  176. package/packages/core/src/utils/retry.ts +70 -0
  177. package/packages/core/src/utils/toon.ts +253 -0
  178. package/packages/core/src/voice/index.ts +656 -0
  179. package/test/setup-db.ts +216 -0
  180. package/tsconfig.json +39 -0
  181. package/src/agents.ts +0 -1
  182. package/src/canvas.ts +0 -1
  183. package/src/channels.ts +0 -1
  184. package/src/config.ts +0 -1
  185. package/src/events.ts +0 -1
  186. package/src/gateway.ts +0 -1
  187. package/src/index.ts +0 -304
  188. package/src/mcp.ts +0 -1
  189. package/src/multimodal.ts +0 -1
  190. package/src/scheduler.ts +0 -1
  191. package/src/security.ts +0 -1
  192. package/src/skills.ts +0 -1
  193. package/src/state.ts +0 -1
  194. package/src/storage.ts +0 -1
  195. package/src/tools.ts +0 -1
  196. package/src/tts.ts +0 -1
  197. package/src/types.ts +0 -82
  198. package/src/utils.ts +0 -1
  199. package/src/voice.ts +0 -1
@@ -0,0 +1,656 @@
1
+ import { getDb } from "../storage/SQLiteStorage.ts";
2
+ import { decryptApiKey } from "../storage/crypto.ts";
3
+ import { logger } from "../utils/logger.ts";
4
+
5
+ export interface VoiceConfig {
6
+ voiceEnabled: boolean;
7
+ ttsEnabled: boolean;
8
+ sttProvider: string | null;
9
+ ttsProvider: string | null;
10
+ ttsVoiceId: string | null;
11
+ }
12
+
13
+ export interface AudioInput {
14
+ type: "buffer" | "url" | "base64";
15
+ data: Buffer | string;
16
+ mimeType?: string;
17
+ }
18
+
19
+ export interface AudioOutput {
20
+ type: "buffer" | "base64";
21
+ data: Buffer | string;
22
+ mimeType: string;
23
+ }
24
+
25
+ const log = logger.child("voice");
26
+
27
+ /**
28
+ * Limpia texto para síntesis de voz (TTS)
29
+ * Elimina formato Markdown, emojis y otros elementos que no se pronuncian bien
30
+ */
31
+ export function cleanTextForTTS(text: string): string {
32
+ if (!text) return "";
33
+
34
+ return text
35
+ // Eliminar código en bloque (``` ... ```)
36
+ .replace(/```[\s\S]*?```/g, " ")
37
+ // Eliminar código inline (`texto`)
38
+ .replace(/`([^`]+)`/g, "$1")
39
+ // Eliminar enlaces [texto](url) → texto
40
+ .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1")
41
+ // Eliminar imágenes ![alt](url) → alt
42
+ .replace(/!\[([^\]]*)\]\([^)]+\)/g, "$1")
43
+ // Eliminar negritas **texto** → texto
44
+ .replace(/\*\*([^*]+)\*\*/g, "$1")
45
+ // Eliminar cursivas *texto* o _texto_ → texto
46
+ .replace(/\*([^*]+)\*/g, "$1")
47
+ .replace(/_([^_]+)_/g, "$1")
48
+ // Eliminar tachado ~~texto~~ → texto
49
+ .replace(/~~([^~]+)~~/g, "$1")
50
+ // Eliminar negritas/cursivas combinadas ***texto*** → texto
51
+ .replace(/\*\*\*([^*]+)\*\*\*/g, "$1")
52
+ // Eliminar encabezados # texto → texto
53
+ .replace(/^#+\s+/gm, "")
54
+ // Eliminar listas con guión - texto → texto
55
+ .replace(/^[\-\*]\s+/gm, "")
56
+ // Eliminar listas numeradas 1. texto → texto
57
+ .replace(/^\d+\.\s+/gm, "")
58
+ // Eliminar citas > texto → texto
59
+ .replace(/^>\s+/gm, "")
60
+ // Eliminar emojis (rangos Unicode de emojis)
61
+ .replace(/[\p{Emoji}]/gu, "")
62
+ // Eliminar caracteres de control Unicode
63
+ .replace(/[\u200B-\u200D\uFEFF]/g, "")
64
+ // Eliminar espacios múltiples
65
+ .replace(/\s+/g, " ")
66
+ // Trim final
67
+ .trim();
68
+ }
69
+
70
+ class VoiceService {
71
+ private static instance: VoiceService;
72
+
73
+ private constructor() {}
74
+
75
+ static getInstance(): VoiceService {
76
+ if (!VoiceService.instance) {
77
+ VoiceService.instance = new VoiceService();
78
+ }
79
+ return VoiceService.instance;
80
+ }
81
+
82
+ getChannelVoiceConfig(channelId: string): VoiceConfig {
83
+ const db = getDb();
84
+ const result = db.query(`
85
+ SELECT voice_enabled, tts_enabled, stt_provider, tts_provider, tts_voice_id
86
+ FROM channels WHERE id = ?
87
+ `).get(channelId) as {
88
+ voice_enabled: number;
89
+ tts_enabled: number;
90
+ stt_provider: string | null;
91
+ tts_provider: string | null;
92
+ tts_voice_id: string | null;
93
+ } | undefined;
94
+
95
+ if (!result) {
96
+ return {
97
+ voiceEnabled: false,
98
+ ttsEnabled: false,
99
+ sttProvider: null,
100
+ ttsProvider: null,
101
+ ttsVoiceId: null,
102
+ };
103
+ }
104
+
105
+ return {
106
+ voiceEnabled: result.voice_enabled === 1,
107
+ ttsEnabled: result.tts_enabled === 1,
108
+ sttProvider: result.stt_provider,
109
+ ttsProvider: result.tts_provider,
110
+ ttsVoiceId: result.tts_voice_id,
111
+ };
112
+ }
113
+
114
+ async transcribe(audio: AudioInput, modelId: string): Promise<string> {
115
+ const isGroq = modelId.startsWith("whisper");
116
+ const isOpenAi = modelId === "whisper-1";
117
+
118
+ if (isGroq) {
119
+ return this.transcribeWithGroq(audio, modelId);
120
+ } else if (isOpenAi) {
121
+ return this.transcribeWithOpenAIWhisper(audio);
122
+ }
123
+
124
+ log.warn(`Unknown STT provider ${modelId}, defaulting to Groq Whisper`);
125
+ return this.transcribeWithGroq(audio, "whisper-large-v3-turbo");
126
+ }
127
+
128
+ private async getProviderApiKey(providerId: string): Promise<string | null> {
129
+ const db = getDb();
130
+ const provider = db.query(`
131
+ SELECT api_key_encrypted, api_key_iv FROM providers WHERE id = ?
132
+ `).get(providerId) as { api_key_encrypted: string; api_key_iv: string } | undefined;
133
+
134
+ if (!provider?.api_key_encrypted) {
135
+ return null;
136
+ }
137
+
138
+ try {
139
+ return await decryptApiKey(provider.api_key_encrypted, provider.api_key_iv);
140
+ } catch (error) {
141
+ log.error(`Failed to decrypt API key for provider ${providerId}: ${(error as Error).message}`);
142
+ return null;
143
+ }
144
+ }
145
+
146
+ private async transcribeWithGroq(audio: AudioInput, modelId: string): Promise<string> {
147
+ const key = await this.getProviderApiKey("groq") || process.env.GROQ_API_KEY;
148
+ if (!key) {
149
+ throw new Error("GROQ_API_KEY not configured. Configúrala en Proveedores o en las variables de entorno.");
150
+ }
151
+
152
+ let audioData: ArrayBuffer | Uint8Array;
153
+
154
+ if (audio.type === "buffer") {
155
+ audioData = new Uint8Array((audio.data as Buffer));
156
+ } else if (audio.type === "base64") {
157
+ const buf = Buffer.from(audio.data as string, "base64");
158
+ audioData = new Uint8Array(buf);
159
+ } else if (audio.type === "url") {
160
+ const response = await fetch(audio.data as string);
161
+ const ab = await response.arrayBuffer();
162
+ audioData = new Uint8Array(ab);
163
+ } else {
164
+ throw new Error("Invalid audio input type");
165
+ }
166
+
167
+ const mime = audio.mimeType || "audio/ogg";
168
+ const ext = mime.includes("webm") ? "webm"
169
+ : mime.includes("mp4") || mime.includes("m4a") ? "m4a"
170
+ : mime.includes("mp3") || mime.includes("mpeg") ? "mp3"
171
+ : mime.includes("wav") ? "wav"
172
+ : mime.includes("flac") ? "flac"
173
+ : "ogg";
174
+ const blob = new Blob([audioData as BlobPart], { type: mime });
175
+ const formData = new FormData();
176
+ formData.append("file", blob, `audio.${ext}`);
177
+ formData.append("model", modelId);
178
+ formData.append("response_format", "json");
179
+ formData.append("language", "es");
180
+
181
+ const result = await fetch("https://api.groq.com/openai/v1/audio/transcriptions", {
182
+ method: "POST",
183
+ headers: {
184
+ "Authorization": `Bearer ${key}`,
185
+ },
186
+ body: formData,
187
+ });
188
+
189
+ if (!result.ok) {
190
+ const error = await result.text();
191
+ throw new Error(`Groq Whisper transcription failed: ${error}`);
192
+ }
193
+
194
+ const data = await result.json() as { text: string };
195
+ return data.text;
196
+ }
197
+
198
+ private async transcribeWithOpenAIWhisper(audio: AudioInput): Promise<string> {
199
+ const key = await this.getProviderApiKey("openai") || process.env.OPENAI_API_KEY;
200
+ if (!key) {
201
+ throw new Error("OPENAI_API_KEY not configured. Configúrala en Proveedores o en las variables de entorno.");
202
+ }
203
+
204
+ let audioData: ArrayBuffer | Uint8Array;
205
+
206
+ if (audio.type === "buffer") {
207
+ audioData = new Uint8Array(audio.data as Buffer);
208
+ } else if (audio.type === "base64") {
209
+ const buf = Buffer.from(audio.data as string, "base64");
210
+ audioData = new Uint8Array(buf);
211
+ } else if (audio.type === "url") {
212
+ const response = await fetch(audio.data as string);
213
+ const ab = await response.arrayBuffer();
214
+ audioData = new Uint8Array(ab);
215
+ } else {
216
+ throw new Error("Invalid audio input type");
217
+ }
218
+
219
+ const blob = new Blob([audioData as BlobPart], { type: audio.mimeType || "audio/webm" });
220
+ const formData = new FormData();
221
+ formData.append("file", blob, "audio.webm");
222
+
223
+ formData.append("model", "whisper-1");
224
+ formData.append("response_format", "json");
225
+ formData.append("language", "es");
226
+
227
+ const result = await fetch("https://api.openai.com/v1/audio/transcriptions", {
228
+ method: "POST",
229
+ headers: {
230
+ "Authorization": `Bearer ${key}`,
231
+ },
232
+ body: formData,
233
+ });
234
+
235
+ if (!result.ok) {
236
+ const error = await result.text();
237
+ throw new Error(`OpenAI Whisper transcription failed: ${error}`);
238
+ }
239
+
240
+ const data = await result.json() as { text: string };
241
+ return data.text;
242
+ }
243
+
244
+ async speak(text: string, modelId: string, voiceId?: string): Promise<AudioOutput> {
245
+ const isElevenLabs = modelId.startsWith("eleven");
246
+ const isOpenAI = modelId.startsWith("tts-") || modelId.startsWith("gpt-");
247
+ const isGemini = modelId.startsWith("gemini");
248
+ const isQwen = modelId.startsWith("qwen");
249
+ const isPiper = modelId === "piper" || modelId === "piper-local";
250
+
251
+ if (isPiper) {
252
+ return this.speakWithPiper(text, voiceId);
253
+ } else if (isElevenLabs) {
254
+ return this.speakWithElevenLabs(text, modelId, voiceId);
255
+ } else if (isOpenAI) {
256
+ return this.speakWithOpenAI(text, modelId, voiceId);
257
+ } else if (isGemini) {
258
+ return this.speakWithGemini(text, modelId, voiceId);
259
+ } else if (isQwen) {
260
+ return this.speakWithQwen(text, modelId, voiceId);
261
+ }
262
+
263
+ log.warn(`Unknown TTS provider ${modelId}, defaulting to ElevenLabs Flash`);
264
+ return this.speakWithElevenLabs(text, "eleven_flash_v2_5", voiceId);
265
+ }
266
+
267
+ private async speakWithPiper(text: string, voiceId?: string): Promise<AudioOutput> {
268
+ const cleanText = cleanTextForTTS(text);
269
+ const port = Number(process.env.TTS_PORT ?? 5500);
270
+ const res = await fetch(`http://localhost:${port}/tts`, {
271
+ method: "POST",
272
+ headers: { "Content-Type": "application/json" },
273
+ body: JSON.stringify({ text: cleanText, voice: voiceId }),
274
+ signal: AbortSignal.timeout(15_000),
275
+ });
276
+ if (!res.ok) {
277
+ throw new Error(`Piper TTS error ${res.status}. ¿Está el servidor TTS corriendo? (Ajustes → Voz)`);
278
+ }
279
+ const wav = await res.arrayBuffer();
280
+ return {
281
+ type: "buffer",
282
+ data: Buffer.from(wav),
283
+ mimeType: "audio/wav",
284
+ };
285
+ }
286
+
287
+ private async speakWithElevenLabs(text: string, modelId: string, voiceId?: string): Promise<AudioOutput> {
288
+ const apiKey = await this.getProviderApiKey("elevenlabs");
289
+ const key = apiKey || process.env.ELEVENLABS_API_KEY;
290
+
291
+ if (!key) {
292
+ throw new Error("ELEVENLABS_API_KEY not configured");
293
+ }
294
+
295
+ const voice = voiceId || "21m00Tcm4TlvDq8ikWAM";
296
+
297
+ const response = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${voice}`, {
298
+ method: "POST",
299
+ headers: {
300
+ "Content-Type": "application/json",
301
+ "xi-api-key": key,
302
+ },
303
+ body: JSON.stringify({
304
+ text,
305
+ model_id: modelId,
306
+ voice_settings: {
307
+ stability: 0.5,
308
+ similarity_boost: 0.75,
309
+ },
310
+ }),
311
+ });
312
+
313
+ if (!response.ok) {
314
+ const error = await response.text();
315
+ throw new Error(`ElevenLabs TTS failed: ${error}`);
316
+ }
317
+
318
+ const buffer = await response.arrayBuffer();
319
+ return {
320
+ type: "buffer",
321
+ data: Buffer.from(buffer),
322
+ mimeType: "audio/mpeg",
323
+ };
324
+ }
325
+
326
+ private async speakWithOpenAI(text: string, modelId: string = "gpt-4o-mini-tts", voiceId?: string): Promise<AudioOutput> {
327
+ const apiKey = await this.getProviderApiKey("openai-tts");
328
+ const key = apiKey || process.env.OPENAI_API_KEY;
329
+
330
+ if (!key) {
331
+ throw new Error("OPENAI_API_KEY not configured");
332
+ }
333
+
334
+ const voice = voiceId || "alloy";
335
+
336
+ const response = await fetch("https://api.openai.com/v1/audio/speech", {
337
+ method: "POST",
338
+ headers: {
339
+ "Content-Type": "application/json",
340
+ "Authorization": `Bearer ${key}`,
341
+ },
342
+ body: JSON.stringify({
343
+ model: modelId,
344
+ voice,
345
+ input: text,
346
+ response_format: "mp3",
347
+ }),
348
+ });
349
+
350
+ if (!response.ok) {
351
+ const error = await response.text();
352
+ throw new Error(`OpenAI TTS failed: ${error}`);
353
+ }
354
+
355
+ const buffer = await response.arrayBuffer();
356
+ return {
357
+ type: "buffer",
358
+ data: Buffer.from(buffer),
359
+ mimeType: "audio/mpeg",
360
+ };
361
+ }
362
+
363
+ private async speakWithGemini(text: string, modelId: string, voiceId?: string): Promise<AudioOutput> {
364
+ const key = process.env.GEMINI_API_KEY;
365
+
366
+ if (!key) {
367
+ throw new Error("GEMINI_API_KEY not configured");
368
+ }
369
+
370
+ const voiceName = voiceId || "Aoede";
371
+
372
+ const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${modelId}:generateContent?key=${key}`, {
373
+ method: "POST",
374
+ headers: {
375
+ "Content-Type": "application/json",
376
+ },
377
+ body: JSON.stringify({
378
+ contents: [{
379
+ parts: [{
380
+ text: `Genera audio de este texto: ${text}`,
381
+ }]
382
+ }],
383
+ generationConfig: {
384
+ responseModalities: ["AUDIO"],
385
+ speechConfig: {
386
+ languageCode: "es-ES",
387
+ voiceConfig: {
388
+ prebuiltVoiceConfig: {
389
+ voiceName,
390
+ },
391
+ },
392
+ },
393
+ },
394
+ }),
395
+ });
396
+
397
+ if (!response.ok) {
398
+ const error = await response.text();
399
+ throw new Error(`Gemini TTS failed: ${error}`);
400
+ }
401
+
402
+ const data = await response.json() as { candidates?: Array<{ content?: { parts?: Array<{ inlineData?: { data: string } }> } }> };
403
+ const audioData = data.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
404
+
405
+ if (!audioData) {
406
+ throw new Error("No audio returned from Gemini");
407
+ }
408
+
409
+ const buffer = Buffer.from(audioData, "base64");
410
+ return {
411
+ type: "buffer",
412
+ data: buffer,
413
+ mimeType: "audio/mpeg",
414
+ };
415
+ }
416
+
417
+ private async speakWithQwen(text: string, modelId: string, voiceId?: string): Promise<AudioOutput> {
418
+ const key = process.env.DASHSCOPE_API_KEY;
419
+
420
+ if (!key) {
421
+ throw new Error("DASHSCOPE_API_KEY not configured");
422
+ }
423
+
424
+ const voice = voiceId || "ruoxi";
425
+
426
+ const response = await fetch("https://dashscope.aliyuncs.com/api/v1/services/audio/t2a/generation", {
427
+ method: "POST",
428
+ headers: {
429
+ "Content-Type": "application/json",
430
+ "Authorization": `Bearer ${key}`,
431
+ },
432
+ body: JSON.stringify({
433
+ model: modelId,
434
+ input: {
435
+ text,
436
+ },
437
+ parameters: {
438
+ voice,
439
+ format: "mp3",
440
+ },
441
+ }),
442
+ });
443
+
444
+ if (!response.ok) {
445
+ const error = await response.text();
446
+ throw new Error(`Qwen TTS failed: ${error}`);
447
+ }
448
+
449
+ const data = await response.json() as { output?: { audio?: string } };
450
+ const audioData = data.output?.audio;
451
+
452
+ if (!audioData) {
453
+ throw new Error("No audio returned from Qwen");
454
+ }
455
+
456
+ const buffer = Buffer.from(audioData, "base64");
457
+ return {
458
+ type: "buffer",
459
+ data: buffer,
460
+ mimeType: "audio/mpeg",
461
+ };
462
+ }
463
+
464
+ getConfiguredVoiceProviders(): { groq: boolean; elevenlabs: boolean; openai: boolean; gemini: boolean; qwen: boolean } {
465
+ const db = getDb();
466
+ const hasDbKey = (providerId: string): boolean => {
467
+ const row = db.query(
468
+ `SELECT api_key_encrypted FROM providers WHERE id = ? AND api_key_encrypted IS NOT NULL AND api_key_encrypted != ''`
469
+ ).get(providerId) as { api_key_encrypted: string } | undefined;
470
+ return !!row;
471
+ };
472
+
473
+ return {
474
+ groq: hasDbKey("groq") || !!(process.env.GROQ_API_KEY),
475
+ elevenlabs: hasDbKey("elevenlabs") || !!(process.env.ELEVENLABS_API_KEY),
476
+ openai: hasDbKey("openai") || !!(process.env.OPENAI_API_KEY),
477
+ gemini: hasDbKey("gemini") || !!(process.env.GEMINI_API_KEY),
478
+ qwen: hasDbKey("qwen") || !!(process.env.DASHSCOPE_API_KEY),
479
+ };
480
+ }
481
+
482
+ getOpenAIVoices(): Array<{ id: string; name: string }> {
483
+ return [
484
+ { id: "alloy", name: "Alloy" },
485
+ { id: "echo", name: "Echo" },
486
+ { id: "fable", name: "Fable" },
487
+ { id: "onyx", name: "Onyx" },
488
+ { id: "nova", name: "Nova" },
489
+ { id: "shimmer", name: "Shimmer" },
490
+ { id: "ash", name: "Ash" },
491
+ { id: "ballad", name: "Ballad" },
492
+ { id: "coral", name: "Coral" },
493
+ { id: "sage", name: "Sage" },
494
+ { id: "verse", name: "Verse" },
495
+ ];
496
+ }
497
+
498
+ getGeminiVoices(): Array<{ id: string; name: string }> {
499
+ return [
500
+ { id: "Puck", name: "Puck" },
501
+ { id: "Charon", name: "Charon" },
502
+ { id: "Kore", name: "Kore" },
503
+ { id: "Fenrir", name: "Fenrir" },
504
+ { id: "Aoede", name: "Aoede" },
505
+ { id: "Orbit", name: "Orbit" },
506
+ { id: "Zephyr", name: "Zephyr" },
507
+ { id: "Autonoe", name: "Autonoe" },
508
+ { id: "Enceladus", name: "Enceladus" },
509
+ { id: "Iapetus", name: "Iapetus" },
510
+ { id: "Umbriel", name: "Umbriel" },
511
+ { id: "Algieba", name: "Algieba" },
512
+ { id: "Despina", name: "Despina" },
513
+ { id: "Erinome", name: "Erinome" },
514
+ { id: "Laomedeia", name: "Laomedeia" },
515
+ { id: "Achernar", name: "Achernar" },
516
+ { id: "Rasalgethi", name: "Rasalgethi" },
517
+ { id: "Schedar", name: "Schedar" },
518
+ { id: "Sulafat", name: "Sulafat" },
519
+ { id: "Vindemiatrix", name: "Vindemiatrix" },
520
+ { id: "Zubenelgenubi", name: "Zubenelgenubi" },
521
+ { id: "Pulcherrima", name: "Pulcherrima" },
522
+ { id: "Achird", name: "Achird" },
523
+ { id: "Zubeneschamali", name: "Zubeneschamali" },
524
+ { id: "Sadachbia", name: "Sadachbia" },
525
+ { id: "Sadaltager", name: "Sadaltager" },
526
+ { id: "Sheratan", name: "Sheratan" },
527
+ ];
528
+ }
529
+
530
+ getQwenVoices(): Array<{ id: string; name: string }> {
531
+ return [
532
+ { id: "ruoxi", name: "Ruoxi (F, Chinese)" },
533
+ { id: "longhua", name: "Longhua (M, Chinese)" },
534
+ { id: "lingli", name: "Lingli (F, Chinese)" },
535
+ { id: "zhiyan", name: "Zhiyan (F, Chinese)" },
536
+ { id: "aicheng", name: "Aicheng (F, Chinese)" },
537
+ { id: "aida", name: "Aida (F, Chinese)" },
538
+ { id: "yucheng", name: "Yucheng (M, Chinese)" },
539
+ { id: "yijia", name: "Yijia (F, Chinese)" },
540
+ { id: "yinan", name: "Yinan (M, Chinese)" },
541
+ { id: "sijia", name: "Sijia (F, Chinese)" },
542
+ { id: "sicheng", name: "Sicheng (M, Chinese)" },
543
+ { id: "siqi", name: "Siqi (F, Chinese)" },
544
+ { id: "aixia", name: "Aixia (F, Chinese)" },
545
+ ];
546
+ }
547
+
548
+ async getElevenLabsVoices(): Promise<Array<{ id: string; name: string; category: string }>> {
549
+ const apiKey = await this.getProviderApiKey("elevenlabs");
550
+ const key = apiKey || process.env.ELEVENLABS_API_KEY;
551
+
552
+ if (!key) {
553
+ throw new Error("ELEVENLABS_API_KEY not configured");
554
+ }
555
+
556
+ const response = await fetch("https://api.elevenlabs.io/v1/voices", {
557
+ headers: {
558
+ "xi-api-key": key,
559
+ },
560
+ });
561
+
562
+ if (!response.ok) {
563
+ const error = await response.text();
564
+ throw new Error(`Failed to fetch ElevenLabs voices: ${error}`);
565
+ }
566
+
567
+ const data = await response.json() as { voices: Array<{ voice_id: string; name: string; category: string }> };
568
+ return data.voices.map(v => ({
569
+ id: v.voice_id,
570
+ name: v.name,
571
+ category: v.category,
572
+ }));
573
+ }
574
+
575
+ normalizeAudioFromChannel(channelType: string, audioData: unknown): AudioInput {
576
+ switch (channelType) {
577
+ case "telegram":
578
+ return this.normalizeTelegramAudio(audioData);
579
+ case "discord":
580
+ return this.normalizeDiscordAudio(audioData);
581
+ case "whatsapp":
582
+ return this.normalizeWhatsAppAudio(audioData);
583
+ case "slack":
584
+ return this.normalizeSlackAudio(audioData);
585
+ case "webchat":
586
+ return this.normalizeWebChatAudio(audioData);
587
+ default:
588
+ throw new Error(`Unknown channel type: ${channelType}`);
589
+ }
590
+ }
591
+
592
+ private normalizeTelegramAudio(audioData: unknown): AudioInput {
593
+ const data = audioData as { fileId?: string; buffer?: Buffer; url?: string };
594
+
595
+ if (data.buffer) {
596
+ return { type: "buffer", data: data.buffer, mimeType: "audio/ogg" };
597
+ }
598
+ if (data.url) {
599
+ return { type: "url", data: data.url, mimeType: "audio/ogg" };
600
+ }
601
+ throw new Error("Telegram audio missing buffer or URL");
602
+ }
603
+
604
+ private normalizeDiscordAudio(audioData: unknown): AudioInput {
605
+ const data = audioData as { buffer?: Buffer; url?: string; mimeType?: string };
606
+
607
+ if (data.buffer) {
608
+ return { type: "buffer", data: data.buffer, mimeType: data.mimeType || "audio/webm" };
609
+ }
610
+ if (data.url) {
611
+ return { type: "url", data: data.url, mimeType: data.mimeType || "audio/webm" };
612
+ }
613
+ throw new Error("Discord audio missing buffer or URL");
614
+ }
615
+
616
+ private normalizeWhatsAppAudio(audioData: unknown): AudioInput {
617
+ const data = audioData as { buffer?: Buffer; url?: string; base64?: string };
618
+
619
+ if (data.buffer) {
620
+ return { type: "buffer", data: data.buffer, mimeType: "audio/ogg" };
621
+ }
622
+ if (data.base64) {
623
+ return { type: "base64", data: data.base64, mimeType: "audio/ogg" };
624
+ }
625
+ if (data.url) {
626
+ return { type: "url", data: data.url, mimeType: "audio/ogg" };
627
+ }
628
+ throw new Error("WhatsApp audio: buffer not available — download may have failed");
629
+ }
630
+
631
+ private normalizeSlackAudio(audioData: unknown): AudioInput {
632
+ const data = audioData as { buffer?: Buffer; url?: string; mimeType?: string };
633
+
634
+ if (data.buffer) {
635
+ return { type: "buffer", data: data.buffer, mimeType: data.mimeType || "audio/webm" };
636
+ }
637
+ if (data.url) {
638
+ return { type: "url", data: data.url, mimeType: data.mimeType || "audio/webm" };
639
+ }
640
+ throw new Error("Slack audio missing buffer or URL");
641
+ }
642
+
643
+ private normalizeWebChatAudio(audioData: unknown): AudioInput {
644
+ const data = audioData as { base64?: string; buffer?: Buffer };
645
+
646
+ if (data.base64) {
647
+ return { type: "base64", data: data.base64, mimeType: "audio/webm" };
648
+ }
649
+ if (data.buffer) {
650
+ return { type: "buffer", data: data.buffer, mimeType: "audio/webm" };
651
+ }
652
+ throw new Error("WebChat audio missing base64 or buffer");
653
+ }
654
+ }
655
+
656
+ export const voiceService = VoiceService.getInstance();