aiden-runtime 3.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/LICENSE +661 -0
  2. package/README.md +465 -0
  3. package/config/devos.config.json +186 -0
  4. package/config/hardware.json +9 -0
  5. package/config/model-selection.json +7 -0
  6. package/config/setup-complete.json +20 -0
  7. package/dist/api/routes/computerUse.js +112 -0
  8. package/dist/api/server.js +6870 -0
  9. package/dist/bin/npx-init.js +71 -0
  10. package/dist/coordination/commandGate.js +115 -0
  11. package/dist/coordination/livePulse.js +127 -0
  12. package/dist/core/agentLoop.js +2718 -0
  13. package/dist/core/agentShield.js +231 -0
  14. package/dist/core/aidenIdentity.js +215 -0
  15. package/dist/core/aidenPersonality.js +166 -0
  16. package/dist/core/aidenSdk.js +374 -0
  17. package/dist/core/asyncTasks.js +82 -0
  18. package/dist/core/auditTrail.js +61 -0
  19. package/dist/core/auxiliaryClient.js +114 -0
  20. package/dist/core/bgLLM.js +108 -0
  21. package/dist/core/bm25.js +68 -0
  22. package/dist/core/callbackSystem.js +64 -0
  23. package/dist/core/channels/adapter.js +6 -0
  24. package/dist/core/channels/discord.js +173 -0
  25. package/dist/core/channels/email.js +253 -0
  26. package/dist/core/channels/imessage.js +164 -0
  27. package/dist/core/channels/manager.js +96 -0
  28. package/dist/core/channels/signal.js +140 -0
  29. package/dist/core/channels/slack.js +139 -0
  30. package/dist/core/channels/twilio.js +144 -0
  31. package/dist/core/channels/webhook.js +186 -0
  32. package/dist/core/channels/whatsapp.js +185 -0
  33. package/dist/core/clarifyBus.js +75 -0
  34. package/dist/core/codeInterpreter.js +82 -0
  35. package/dist/core/computerControl.js +439 -0
  36. package/dist/core/conversationMemory.js +334 -0
  37. package/dist/core/costTracker.js +221 -0
  38. package/dist/core/cronManager.js +217 -0
  39. package/dist/core/deepKB.js +77 -0
  40. package/dist/core/doctor.js +279 -0
  41. package/dist/core/dreamEngine.js +334 -0
  42. package/dist/core/entityGraph.js +169 -0
  43. package/dist/core/eventBus.js +16 -0
  44. package/dist/core/evolutionAnalyzer.js +153 -0
  45. package/dist/core/executionLoop.js +309 -0
  46. package/dist/core/executor.js +224 -0
  47. package/dist/core/failureAnalyzer.js +166 -0
  48. package/dist/core/fastPathExpansion.js +82 -0
  49. package/dist/core/faultEngine.js +106 -0
  50. package/dist/core/featureGates.js +70 -0
  51. package/dist/core/fileIngestion.js +113 -0
  52. package/dist/core/gateway.js +97 -0
  53. package/dist/core/goalTracker.js +75 -0
  54. package/dist/core/growthEngine.js +168 -0
  55. package/dist/core/hardwareDetector.js +98 -0
  56. package/dist/core/hooks.js +45 -0
  57. package/dist/core/httpKeepalive.js +46 -0
  58. package/dist/core/hybridSearch.js +101 -0
  59. package/dist/core/importers.js +164 -0
  60. package/dist/core/instinctSystem.js +223 -0
  61. package/dist/core/knowledgeBase.js +351 -0
  62. package/dist/core/learningMemory.js +121 -0
  63. package/dist/core/lessonsBrowser.js +125 -0
  64. package/dist/core/licenseManager.js +399 -0
  65. package/dist/core/logBuffer.js +85 -0
  66. package/dist/core/machineId.js +87 -0
  67. package/dist/core/mcpClient.js +442 -0
  68. package/dist/core/memoryDistiller.js +165 -0
  69. package/dist/core/memoryExtractor.js +212 -0
  70. package/dist/core/memoryIds.js +213 -0
  71. package/dist/core/memoryPreamble.js +113 -0
  72. package/dist/core/memoryQuery.js +136 -0
  73. package/dist/core/memoryRecall.js +140 -0
  74. package/dist/core/memoryStrategy.js +201 -0
  75. package/dist/core/messageValidator.js +85 -0
  76. package/dist/core/modelDiscovery.js +108 -0
  77. package/dist/core/modelRouter.js +118 -0
  78. package/dist/core/morningBriefing.js +203 -0
  79. package/dist/core/multiGoalValidator.js +51 -0
  80. package/dist/core/parallelExecutor.js +43 -0
  81. package/dist/core/passiveSkillObserver.js +204 -0
  82. package/dist/core/paths.js +57 -0
  83. package/dist/core/patternDetector.js +83 -0
  84. package/dist/core/planResponseRepair.js +64 -0
  85. package/dist/core/planTool.js +111 -0
  86. package/dist/core/playwrightBridge.js +356 -0
  87. package/dist/core/pluginSystem.js +121 -0
  88. package/dist/core/privateMode.js +85 -0
  89. package/dist/core/reactLoop.js +156 -0
  90. package/dist/core/recipeEngine.js +166 -0
  91. package/dist/core/responseCache.js +128 -0
  92. package/dist/core/runSandbox.js +132 -0
  93. package/dist/core/sandboxRunner.js +200 -0
  94. package/dist/core/scheduler.js +543 -0
  95. package/dist/core/secretScanner.js +49 -0
  96. package/dist/core/semanticMemory.js +223 -0
  97. package/dist/core/sessionMemory.js +259 -0
  98. package/dist/core/sessionRouter.js +91 -0
  99. package/dist/core/sessionSearch.js +163 -0
  100. package/dist/core/setupWizard.js +225 -0
  101. package/dist/core/skillImporter.js +303 -0
  102. package/dist/core/skillLibrary.js +144 -0
  103. package/dist/core/skillLoader.js +471 -0
  104. package/dist/core/skillTeacher.js +352 -0
  105. package/dist/core/skillValidator.js +210 -0
  106. package/dist/core/skillWriter.js +384 -0
  107. package/dist/core/slashAsTool.js +226 -0
  108. package/dist/core/spawnManager.js +197 -0
  109. package/dist/core/statusVerbs.js +43 -0
  110. package/dist/core/swarmManager.js +109 -0
  111. package/dist/core/taskQueue.js +119 -0
  112. package/dist/core/taskRecovery.js +128 -0
  113. package/dist/core/taskState.js +168 -0
  114. package/dist/core/telegramBot.js +152 -0
  115. package/dist/core/todoManager.js +70 -0
  116. package/dist/core/toolNameRepair.js +71 -0
  117. package/dist/core/toolRegistry.js +2730 -0
  118. package/dist/core/tools/calendarTool.js +98 -0
  119. package/dist/core/tools/companyFilingsTool.js +98 -0
  120. package/dist/core/tools/gmailTool.js +87 -0
  121. package/dist/core/tools/marketDataTool.js +135 -0
  122. package/dist/core/tools/socialResearchTool.js +121 -0
  123. package/dist/core/truthCheck.js +57 -0
  124. package/dist/core/updateChecker.js +74 -0
  125. package/dist/core/userCognitionProfile.js +238 -0
  126. package/dist/core/userProfile.js +341 -0
  127. package/dist/core/version.js +5 -0
  128. package/dist/core/visionAnalyze.js +161 -0
  129. package/dist/core/voice/audio.js +187 -0
  130. package/dist/core/voice/stt.js +226 -0
  131. package/dist/core/voice/tts.js +310 -0
  132. package/dist/core/voiceInput.js +118 -0
  133. package/dist/core/voiceOutput.js +130 -0
  134. package/dist/core/webSearch.js +326 -0
  135. package/dist/core/workflowTracker.js +72 -0
  136. package/dist/core/workspaceMemory.js +54 -0
  137. package/dist/core/youtubeTranscript.js +224 -0
  138. package/dist/integrations/computerUse/apiRegistry.js +113 -0
  139. package/dist/integrations/computerUse/screenAgent.js +203 -0
  140. package/dist/integrations/computerUse/visionLoop.js +296 -0
  141. package/dist/memory/memoryLayers.js +143 -0
  142. package/dist/providers/boa.js +93 -0
  143. package/dist/providers/cerebras.js +70 -0
  144. package/dist/providers/custom.js +89 -0
  145. package/dist/providers/gemini.js +82 -0
  146. package/dist/providers/groq.js +92 -0
  147. package/dist/providers/index.js +149 -0
  148. package/dist/providers/nvidia.js +70 -0
  149. package/dist/providers/ollama.js +99 -0
  150. package/dist/providers/openrouter.js +74 -0
  151. package/dist/providers/router.js +497 -0
  152. package/dist/providers/types.js +6 -0
  153. package/dist/security/browserVault.js +129 -0
  154. package/dist/security/dataGuard.js +89 -0
  155. package/dist/tools/eonetTool.js +72 -0
  156. package/dist/types/computerUse.js +2 -0
  157. package/dist/types/executor.js +2 -0
  158. package/dist-bundle/cli.js +357859 -0
  159. package/package.json +256 -0
@@ -0,0 +1,310 @@
1
+ "use strict";
2
+ // ============================================================
3
+ // DevOS — Autonomous AI Execution System
4
+ // Copyright (c) 2026 Shiva Deore. All rights reserved.
5
+ // ============================================================
6
+ var __importDefault = (this && this.__importDefault) || function (mod) {
7
+ return (mod && mod.__esModule) ? mod : { "default": mod };
8
+ };
9
+ Object.defineProperty(exports, "__esModule", { value: true });
10
+ exports.cleanForTTS = cleanForTTS;
11
+ exports.synthesize = synthesize;
12
+ exports.getTtsProviders = getTtsProviders;
13
+ // core/voice/tts.ts — Text-to-Speech with four-provider fallback chain.
14
+ //
15
+ // Priority order (auto-selected at runtime):
16
+ // 0. VoxCPM2 (USE_VOXCPM=1, Python subprocess, Apache-2.0) — voice clone/design
17
+ // 1. Edge TTS (edge-tts Python pkg, free) — best quality, offline after install
18
+ // 2. ElevenLabs (ELEVENLABS_API_KEY) — premium voices, REST API
19
+ // 3. Windows SAPI (System.Speech assembly) — always available on Windows
20
+ //
21
+ // VoxCPM2 is opt-in via USE_VOXCPM=1 env var and is always tried first when enabled.
22
+ // Never throws — returns TtsResult; callers check result.error.
23
+ const fs_1 = __importDefault(require("fs"));
24
+ const path_1 = __importDefault(require("path"));
25
+ const child_process_1 = require("child_process");
26
+ const util_1 = require("util");
27
+ const axios_1 = __importDefault(require("axios"));
28
+ const execAsync = (0, util_1.promisify)(child_process_1.exec);
29
+ // ── Constants ─────────────────────────────────────────────────────────────────
30
+ const DEFAULT_VOICE = 'en-US-AriaNeural';
31
+ const WORKSPACE = path_1.default.join(process.cwd(), 'workspace');
32
+ const ELEVENLABS_API_URL = 'https://api.elevenlabs.io/v1/text-to-speech';
33
+ const VOXCPM_RUNNER_PATH = path_1.default.join(__dirname, 'voxcpm_runner.py');
34
+ const VOXCPM_TIMEOUT_MS = 120000;
35
+ function ensureWorkspace() {
36
+ if (!fs_1.default.existsSync(WORKSPACE))
37
+ fs_1.default.mkdirSync(WORKSPACE, { recursive: true });
38
+ }
39
+ // ── Text cleaner ──────────────────────────────────────────────────────────────
40
+ function cleanForTTS(text) {
41
+ return text
42
+ .replace(/```[\s\S]*?```/g, 'code block.')
43
+ .replace(/`[^`]+`/g, '')
44
+ .replace(/\*\*([^*]+)\*\*/g, '$1')
45
+ .replace(/\*([^*]+)\*/g, '$1')
46
+ .replace(/#{1,6}\s+/g, '')
47
+ .replace(/\[([^\]]+)\]\([^\)]+\)/g, '$1')
48
+ .replace(/\n+/g, '. ')
49
+ .replace(/[❌✅⚡🔧📋🔍🎤🔊]/g, '')
50
+ .replace(/\s+/g, ' ')
51
+ .trim()
52
+ .slice(0, 500);
53
+ }
54
+ // ── Provider 0 — VoxCPM2 (opt-in via USE_VOXCPM=1) ───────────────────────────
55
+ async function synthesizeVoxCPM(text, opts) {
56
+ const t0 = Date.now();
57
+ const timeout = opts.timeoutMs ?? VOXCPM_TIMEOUT_MS;
58
+ ensureWorkspace();
59
+ const outputPath = path_1.default.join(WORKSPACE, `tts_voxcpm_${Date.now()}.wav`);
60
+ const outputFwd = outputPath.replace(/\\/g, '/');
61
+ // Detect mode: clone / design / standard
62
+ let mode = 'tts';
63
+ let voiceDescription;
64
+ let speechText = text;
65
+ if (opts.referenceAudioPath) {
66
+ mode = 'clone';
67
+ }
68
+ else if (opts.voiceDesignPrompt) {
69
+ mode = 'design';
70
+ voiceDescription = opts.voiceDesignPrompt;
71
+ }
72
+ else if (text.startsWith('design:')) {
73
+ // Inline design prefix: "design:<description>\n<text-to-speak>"
74
+ const newline = text.indexOf('\n');
75
+ if (newline !== -1) {
76
+ mode = 'design';
77
+ voiceDescription = text.slice('design:'.length, newline).trim();
78
+ speechText = text.slice(newline + 1).trim();
79
+ }
80
+ }
81
+ const payload = {
82
+ text: speechText,
83
+ output_path: outputFwd,
84
+ mode,
85
+ reference_audio: opts.referenceAudioPath?.replace(/\\/g, '/') ?? null,
86
+ voice_description: voiceDescription ?? null,
87
+ language: 'en',
88
+ };
89
+ return new Promise((resolve) => {
90
+ const child = (0, child_process_1.spawn)('python', [VOXCPM_RUNNER_PATH], { stdio: ['pipe', 'pipe', 'pipe'] });
91
+ let stdout = '';
92
+ let stderr = '';
93
+ child.stdout.on('data', (d) => { stdout += d.toString(); });
94
+ child.stderr.on('data', (d) => { stderr += d.toString(); });
95
+ const timer = setTimeout(() => {
96
+ child.kill();
97
+ resolve({ provider: 'voxcpm', durationMs: Date.now() - t0, error: `VoxCPM timed out after ${timeout}ms` });
98
+ }, timeout);
99
+ child.stdin.write(JSON.stringify(payload));
100
+ child.stdin.end();
101
+ child.on('close', () => {
102
+ clearTimeout(timer);
103
+ try {
104
+ const result = JSON.parse(stdout.trim());
105
+ if (!result.ok) {
106
+ // Surface well-known errors for upstream handling
107
+ resolve({ provider: 'voxcpm', durationMs: Date.now() - t0, error: result.error ?? 'VoxCPM failed' });
108
+ return;
109
+ }
110
+ // Play the generated WAV
111
+ const escaped = outputPath.replace(/\\/g, '\\\\');
112
+ (0, child_process_1.exec)(`powershell -Command "Add-Type -AssemblyName presentationCore; $mp = New-Object System.Windows.Media.MediaPlayer; $mp.Open([uri]'${escaped}'); $mp.Play(); Start-Sleep -Seconds 10; $mp.Stop(); $mp.Close()"`, { timeout: 15000 }, (err) => {
113
+ if (err)
114
+ (0, child_process_1.exec)(`powershell -Command "Start-Process '${escaped}'"`);
115
+ });
116
+ setTimeout(() => { try {
117
+ fs_1.default.unlinkSync(outputPath);
118
+ }
119
+ catch { /* ignore */ } }, 15000);
120
+ resolve({ provider: 'voxcpm', durationMs: Date.now() - t0 });
121
+ }
122
+ catch {
123
+ resolve({ provider: 'voxcpm', durationMs: Date.now() - t0, error: `VoxCPM invalid output: ${stdout} | stderr: ${stderr}` });
124
+ }
125
+ });
126
+ child.on('error', (err) => {
127
+ clearTimeout(timer);
128
+ resolve({ provider: 'voxcpm', durationMs: Date.now() - t0, error: `VoxCPM spawn error: ${err.message}` });
129
+ });
130
+ });
131
+ }
132
+ // ── Provider 1 — Edge TTS ─────────────────────────────────────────────────────
133
+ async function synthesizeEdge(text, opts) {
134
+ ensureWorkspace();
135
+ const t0 = Date.now();
136
+ const voice = opts.voice ?? DEFAULT_VOICE;
137
+ const audioPath = path_1.default.join(WORKSPACE, `tts_edge_${Date.now()}.mp3`);
138
+ const audioFwd = audioPath.replace(/\\/g, '/');
139
+ const escaped = text.replace(/"/g, '\\"').replace(/'/g, "\\'");
140
+ const timeout = opts.timeoutMs ?? 20000;
141
+ const script = `
142
+ import asyncio, sys
143
+ sys.stderr = open('nul', 'w')
144
+ import edge_tts
145
+ async def main():
146
+ communicate = edge_tts.Communicate("${escaped}", "${voice}")
147
+ await communicate.save("${audioFwd}")
148
+ asyncio.run(main())
149
+ `.trim();
150
+ const tmpPy = path_1.default.join(WORKSPACE, `tts_edge_gen_${Date.now()}.py`);
151
+ fs_1.default.writeFileSync(tmpPy, script);
152
+ try {
153
+ await execAsync(`python "${tmpPy}"`, { timeout });
154
+ if (!fs_1.default.existsSync(audioPath))
155
+ throw new Error('edge-tts produced no audio file');
156
+ // Play via Windows Media Player (fire-and-forget)
157
+ const escaped_path = audioPath.replace(/\\/g, '\\\\');
158
+ execAsync(`powershell -Command "Add-Type -AssemblyName presentationCore; $mp = New-Object System.Windows.Media.MediaPlayer; $mp.Open([uri]'${escaped_path}'); $mp.Play(); Start-Sleep -Seconds 8; $mp.Stop(); $mp.Close()"`, { timeout: 15000 }).catch(() => {
159
+ execAsync(`powershell -Command "Start-Process '${escaped_path}'"`)
160
+ .catch(() => { });
161
+ });
162
+ setTimeout(() => { try {
163
+ fs_1.default.unlinkSync(audioPath);
164
+ }
165
+ catch { /* ignore */ } }, 15000);
166
+ return { provider: 'edge', durationMs: Date.now() - t0 };
167
+ }
168
+ finally {
169
+ try {
170
+ fs_1.default.unlinkSync(tmpPy);
171
+ }
172
+ catch { /* ignore */ }
173
+ }
174
+ }
175
+ // ── Provider 2 — ElevenLabs ───────────────────────────────────────────────────
176
+ async function synthesizeElevenLabs(text, opts) {
177
+ const apiKey = process.env.ELEVENLABS_API_KEY;
178
+ if (!apiKey)
179
+ throw new Error('ELEVENLABS_API_KEY not set');
180
+ ensureWorkspace();
181
+ const t0 = Date.now();
182
+ const voiceId = opts.voice ?? 'EXAVITQu4vr4xnSDxMaL'; // Sarah (default public voice)
183
+ const timeout = opts.timeoutMs ?? 20000;
184
+ const res = await axios_1.default.post(`${ELEVENLABS_API_URL}/${voiceId}`, {
185
+ text,
186
+ model_id: 'eleven_monolingual_v1',
187
+ voice_settings: { stability: 0.5, similarity_boost: 0.75 },
188
+ }, {
189
+ headers: {
190
+ 'xi-api-key': apiKey,
191
+ 'Content-Type': 'application/json',
192
+ Accept: 'audio/mpeg',
193
+ },
194
+ responseType: 'arraybuffer',
195
+ timeout,
196
+ });
197
+ const audioPath = path_1.default.join(WORKSPACE, `tts_eleven_${Date.now()}.mp3`);
198
+ const escaped_path = audioPath.replace(/\\/g, '\\\\');
199
+ fs_1.default.writeFileSync(audioPath, Buffer.from(res.data));
200
+ execAsync(`powershell -Command "Add-Type -AssemblyName presentationCore; $mp = New-Object System.Windows.Media.MediaPlayer; $mp.Open([uri]'${escaped_path}'); $mp.Play(); Start-Sleep -Seconds 8; $mp.Stop(); $mp.Close()"`, { timeout: 15000 }).catch(() => {
201
+ execAsync(`powershell -Command "Start-Process '${escaped_path}'"`)
202
+ .catch(() => { });
203
+ });
204
+ setTimeout(() => { try {
205
+ fs_1.default.unlinkSync(audioPath);
206
+ }
207
+ catch { /* ignore */ } }, 15000);
208
+ return { provider: 'elevenlabs', durationMs: Date.now() - t0 };
209
+ }
210
+ // ── Provider 3 — Windows SAPI ─────────────────────────────────────────────────
211
+ async function synthesizeSAPI(text, opts) {
212
+ const t0 = Date.now();
213
+ const rate = Math.round(((opts.rate ?? 1.0) - 1.0) * 5); // map 0.5–2.0 → -3..5
214
+ const volume = opts.volume ?? 100;
215
+ const safe = text.replace(/'/g, "''").replace(/"/g, '');
216
+ const timeout = opts.timeoutMs ?? 30000;
217
+ await execAsync(`powershell -Command "Add-Type -AssemblyName System.Speech; $s = New-Object System.Speech.Synthesis.SpeechSynthesizer; $s.Rate = ${rate}; $s.Volume = ${volume}; $s.Speak('${safe}')"`, { timeout });
218
+ return { provider: 'sapi', durationMs: Date.now() - t0 };
219
+ }
220
+ // ── Main exported function ────────────────────────────────────────────────────
221
+ /**
222
+ * Synthesize text using the first available provider.
223
+ * Never throws — always returns a TtsResult; check result.error on failure.
224
+ */
225
+ async function synthesize(options) {
226
+ const t0 = Date.now();
227
+ const text = cleanForTTS(options.text);
228
+ if (!text)
229
+ return { provider: 'none', durationMs: 0 };
230
+ const errors = [];
231
+ // Explicit provider override
232
+ if (options.provider) {
233
+ try {
234
+ if (options.provider === 'voxcpm')
235
+ return await synthesizeVoxCPM(text, options);
236
+ if (options.provider === 'edge')
237
+ return await synthesizeEdge(text, options);
238
+ if (options.provider === 'elevenlabs')
239
+ return await synthesizeElevenLabs(text, options);
240
+ if (options.provider === 'sapi')
241
+ return await synthesizeSAPI(text, options);
242
+ }
243
+ catch (e) {
244
+ return { provider: options.provider, durationMs: Date.now() - t0, error: e.message };
245
+ }
246
+ }
247
+ // Provider 0 — VoxCPM2 (opt-in via USE_VOXCPM=1)
248
+ const voxCpmEnabled = process.env.USE_VOXCPM === '1';
249
+ if (voxCpmEnabled || options.referenceAudioPath || options.voiceDesignPrompt) {
250
+ const r = await synthesizeVoxCPM(text, options);
251
+ if (!r.error) {
252
+ console.log(`[TTS] VoxCPM2: ${r.durationMs}ms`);
253
+ return r;
254
+ }
255
+ const isNotInstalled = r.error?.includes('No module named voxcpm');
256
+ const isOOM = r.error?.includes('CUDA out of memory');
257
+ if (isOOM) {
258
+ console.warn(`[TTS] VoxCPM OOM — falling through to next provider`);
259
+ }
260
+ else if (isNotInstalled && !voxCpmEnabled) {
261
+ // clone/design requested but VoxCPM not installed — surface error immediately
262
+ return r;
263
+ }
264
+ errors.push(`voxcpm: ${r.error}`);
265
+ }
266
+ // Provider 1 — Edge TTS
267
+ try {
268
+ const r = await synthesizeEdge(text, options);
269
+ console.log(`[TTS] Edge TTS: ${r.durationMs}ms`);
270
+ return r;
271
+ }
272
+ catch (e) {
273
+ errors.push(`edge: ${e.message}`);
274
+ }
275
+ // Provider 2 — ElevenLabs
276
+ try {
277
+ const r = await synthesizeElevenLabs(text, options);
278
+ console.log(`[TTS] ElevenLabs: ${r.durationMs}ms`);
279
+ return r;
280
+ }
281
+ catch (e) {
282
+ errors.push(`elevenlabs: ${e.message}`);
283
+ }
284
+ // Provider 3 — Windows SAPI
285
+ try {
286
+ const r = await synthesizeSAPI(text, options);
287
+ console.log(`[TTS] SAPI: ${r.durationMs}ms`);
288
+ return r;
289
+ }
290
+ catch (e) {
291
+ errors.push(`sapi: ${e.message}`);
292
+ }
293
+ // All failed
294
+ const errorMsg = errors.join(' | ');
295
+ console.warn(`[TTS] All providers failed: ${errorMsg}`);
296
+ return { provider: 'none', durationMs: Date.now() - t0, error: errorMsg };
297
+ }
298
+ /** Returns which TTS providers are likely available (env / platform check). */
299
+ function getTtsProviders() {
300
+ return [
301
+ {
302
+ name: 'voxcpm',
303
+ available: process.env.USE_VOXCPM === '1',
304
+ note: process.env.USE_VOXCPM === '1' ? 'enabled (USE_VOXCPM=1)' : 'set USE_VOXCPM=1 to enable',
305
+ },
306
+ { name: 'edge', available: true }, // checked at runtime via Python import
307
+ { name: 'elevenlabs', available: !!process.env.ELEVENLABS_API_KEY },
308
+ { name: 'sapi', available: process.platform === 'win32' },
309
+ ];
310
+ }
@@ -0,0 +1,118 @@
1
+ "use strict";
2
+ // ============================================================
3
+ // DevOS — Autonomous AI Execution System
4
+ // Copyright (c) 2026 Shiva Deore. All rights reserved.
5
+ // ============================================================
6
+ var __importDefault = (this && this.__importDefault) || function (mod) {
7
+ return (mod && mod.__esModule) ? mod : { "default": mod };
8
+ };
9
+ Object.defineProperty(exports, "__esModule", { value: true });
10
+ exports.checkVoiceAvailable = checkVoiceAvailable;
11
+ exports.transcribeAudio = transcribeAudio;
12
+ exports.recordAudio = recordAudio;
13
+ // core/voiceInput.ts — Voice input via faster-whisper (Python)
14
+ //
15
+ // Prerequisites (user installs once):
16
+ // pip install faster-whisper
17
+ //
18
+ // Fallback: if faster-whisper is not available, returns error message
19
+ // so the UI can hide the voice button gracefully.
20
+ const child_process_1 = require("child_process");
21
+ const util_1 = require("util");
22
+ const fs_1 = __importDefault(require("fs"));
23
+ const path_1 = __importDefault(require("path"));
24
+ const execAsync = (0, util_1.promisify)(child_process_1.exec);
25
+ const WORKSPACE = path_1.default.join(process.cwd(), 'workspace');
26
+ function ensureWorkspace() {
27
+ if (!fs_1.default.existsSync(WORKSPACE))
28
+ fs_1.default.mkdirSync(WORKSPACE, { recursive: true });
29
+ }
30
+ // ── Availability check ────────────────────────────────────────
31
+ async function checkVoiceAvailable() {
32
+ try {
33
+ const { stdout } = await execAsync('python -c "import faster_whisper; print(\'ok\')"', { timeout: 5000 });
34
+ return stdout.trim() === 'ok';
35
+ }
36
+ catch {
37
+ return false;
38
+ }
39
+ }
40
+ // ── Transcribe audio file ─────────────────────────────────────
41
+ // Uses faster-whisper tiny model (CPU, int8) — fast enough for real-time
42
+ async function transcribeAudio(audioPath) {
43
+ ensureWorkspace();
44
+ const normalizedPath = audioPath.replace(/\\/g, '/');
45
+ const pythonScript = `
46
+ from faster_whisper import WhisperModel
47
+ model = WhisperModel("tiny", device="cpu", compute_type="int8")
48
+ segments, info = model.transcribe("${normalizedPath}", beam_size=5)
49
+ text = " ".join([segment.text for segment in segments])
50
+ print(text.strip())
51
+ `.trim();
52
+ const tmpScript = path_1.default.join(WORKSPACE, `whisper_${Date.now()}.py`);
53
+ fs_1.default.writeFileSync(tmpScript, pythonScript);
54
+ try {
55
+ const { stdout } = await execAsync(`python "${tmpScript}"`, { timeout: 30000 });
56
+ return stdout.trim();
57
+ }
58
+ catch (e) {
59
+ throw new Error(`Transcription failed: ${e.message}`);
60
+ }
61
+ finally {
62
+ try {
63
+ fs_1.default.unlinkSync(tmpScript);
64
+ }
65
+ catch { }
66
+ }
67
+ }
68
+ // ── Record audio from microphone ──────────────────────────────
69
+ // Uses Windows MCI (mciSendString) via P/Invoke — no third-party dep
70
+ async function recordAudio(durationMs = 5000) {
71
+ ensureWorkspace();
72
+ const outputPath = path_1.default.join(WORKSPACE, `recording_${Date.now()}.wav`);
73
+ const outputPathFwd = outputPath.replace(/\\/g, '\\\\');
74
+ const psScript = `
75
+ Add-Type -TypeDefinition @"
76
+ using System;
77
+ using System.Threading;
78
+ using System.Runtime.InteropServices;
79
+
80
+ public class AudioRecorder {
81
+ [DllImport("winmm.dll")]
82
+ private static extern int mciSendString(
83
+ string command,
84
+ System.Text.StringBuilder returnValue,
85
+ int returnLength,
86
+ IntPtr winHandle
87
+ );
88
+
89
+ public static void Record(string outputPath, int durationMs) {
90
+ mciSendString("open new Type waveaudio Alias recsound", null, 0, IntPtr.Zero);
91
+ mciSendString("set recsound channels 1 bitspersample 16 samplespersec 16000", null, 0, IntPtr.Zero);
92
+ mciSendString("record recsound", null, 0, IntPtr.Zero);
93
+ Thread.Sleep(durationMs);
94
+ mciSendString("stop recsound", null, 0, IntPtr.Zero);
95
+ mciSendString("save recsound " + outputPath, null, 0, IntPtr.Zero);
96
+ mciSendString("close recsound", null, 0, IntPtr.Zero);
97
+ }
98
+ }
99
+ "@
100
+ [AudioRecorder]::Record("${outputPathFwd}", ${durationMs})
101
+ Write-Output "${outputPath}"
102
+ `.trim();
103
+ const psFile = path_1.default.join(WORKSPACE, `record_${Date.now()}.ps1`);
104
+ fs_1.default.writeFileSync(psFile, psScript);
105
+ try {
106
+ await execAsync(`powershell.exe -ExecutionPolicy Bypass -File "${psFile}"`, { timeout: durationMs + 8000 });
107
+ return outputPath;
108
+ }
109
+ catch (e) {
110
+ throw new Error(`Recording failed: ${e.message}`);
111
+ }
112
+ finally {
113
+ try {
114
+ fs_1.default.unlinkSync(psFile);
115
+ }
116
+ catch { }
117
+ }
118
+ }
@@ -0,0 +1,130 @@
1
+ "use strict";
2
+ // ============================================================
3
+ // DevOS — Autonomous AI Execution System
4
+ // Copyright (c) 2026 Shiva Deore. All rights reserved.
5
+ // ============================================================
6
+ var __importDefault = (this && this.__importDefault) || function (mod) {
7
+ return (mod && mod.__esModule) ? mod : { "default": mod };
8
+ };
9
+ Object.defineProperty(exports, "__esModule", { value: true });
10
+ exports.checkTTSAvailable = checkTTSAvailable;
11
+ exports.speak = speak;
12
+ // core/voiceOutput.ts — Voice output via edge-tts (Python) with SAPI fallback
13
+ //
14
+ // Prerequisites for best quality (user installs once):
15
+ // pip install edge-tts
16
+ //
17
+ // Fallback: Windows SAPI (System.Speech) — always available on Windows,
18
+ // no install required but lower quality than edge-tts/Aria.
19
+ const child_process_1 = require("child_process");
20
+ const util_1 = require("util");
21
+ const fs_1 = __importDefault(require("fs"));
22
+ const path_1 = __importDefault(require("path"));
23
+ const execAsync = (0, util_1.promisify)(child_process_1.exec);
24
+ const WORKSPACE = path_1.default.join(process.cwd(), 'workspace');
25
+ const DEFAULT_VOICE = 'en-US-AriaNeural';
26
+ function ensureWorkspace() {
27
+ if (!fs_1.default.existsSync(WORKSPACE))
28
+ fs_1.default.mkdirSync(WORKSPACE, { recursive: true });
29
+ }
30
+ // ── Availability check ────────────────────────────────────────
31
+ async function checkTTSAvailable() {
32
+ // edge-tts Python check
33
+ try {
34
+ const { stdout } = await execAsync('python -c "import edge_tts; print(\'ok\')"', { timeout: 5000 });
35
+ if (stdout.trim() === 'ok')
36
+ return true;
37
+ }
38
+ catch { }
39
+ // Windows SAPI fallback check — always true on Windows
40
+ try {
41
+ await execAsync('powershell -Command "Add-Type -AssemblyName System.Speech; Write-Output ok"', { timeout: 3000 });
42
+ return true;
43
+ }
44
+ catch { }
45
+ return false;
46
+ }
47
+ // ── Clean text for TTS ────────────────────────────────────────
48
+ // Strips markdown, code blocks, symbols — leaves clean spoken text
49
+ function cleanForTTS(text) {
50
+ return text
51
+ .replace(/```[\s\S]*?```/g, 'code block.')
52
+ .replace(/`[^`]+`/g, '')
53
+ .replace(/\*\*([^*]+)\*\*/g, '$1')
54
+ .replace(/\*([^*]+)\*/g, '$1')
55
+ .replace(/#{1,6}\s+/g, '')
56
+ .replace(/\[([^\]]+)\]\([^\)]+\)/g, '$1') // links → text
57
+ .replace(/\n+/g, '. ')
58
+ .replace(/[❌✅⚡🔧📋🔍🎤🔊]/g, '') // strip emojis
59
+ .replace(/\s+/g, ' ')
60
+ .trim()
61
+ .slice(0, 500); // max 500 chars for TTS
62
+ }
63
+ // ── Speak via edge-tts (Python) ───────────────────────────────
64
+ async function speakEdgeTTS(text, voice) {
65
+ ensureWorkspace();
66
+ const audioPath = path_1.default.join(WORKSPACE, `tts_${Date.now()}.mp3`);
67
+ const audioFwd = audioPath.replace(/\\/g, '/');
68
+ const escapedText = text.replace(/"/g, '\\"').replace(/'/g, "\\'");
69
+ const scriptContent = `
70
+ import asyncio, sys
71
+ sys.stderr = open('nul', 'w')
72
+ import edge_tts
73
+ async def main():
74
+ communicate = edge_tts.Communicate("${escapedText}", "${voice}")
75
+ await communicate.save("${audioFwd}")
76
+ asyncio.run(main())
77
+ `.trim();
78
+ const tmpScript = path_1.default.join(WORKSPACE, `tts_gen_${Date.now()}.py`);
79
+ fs_1.default.writeFileSync(tmpScript, scriptContent);
80
+ try {
81
+ await execAsync(`python "${tmpScript}"`, { timeout: 15000 });
82
+ if (!fs_1.default.existsSync(audioPath))
83
+ return false;
84
+ // Play the MP3 via Windows Media Player (async — don't block the response)
85
+ execAsync(`powershell -Command "Add-Type -AssemblyName presentationCore; $mp = New-Object System.Windows.Media.MediaPlayer; $mp.Open([uri]'${audioPath.replace(/\\/g, '\\\\')}'); $mp.Play(); Start-Sleep -Seconds 6; $mp.Stop(); $mp.Close()"`, { timeout: 15000 }).catch(() => {
86
+ // Fallback: just Start-Process (opens system default player)
87
+ execAsync(`powershell -Command "Start-Process '${audioPath.replace(/\\/g, '\\\\')}'"`).catch(() => { });
88
+ });
89
+ // Clean up after 12s
90
+ setTimeout(() => { try {
91
+ fs_1.default.unlinkSync(audioPath);
92
+ }
93
+ catch { } }, 12000);
94
+ return true;
95
+ }
96
+ catch {
97
+ return false;
98
+ }
99
+ finally {
100
+ try {
101
+ fs_1.default.unlinkSync(tmpScript);
102
+ }
103
+ catch { }
104
+ }
105
+ }
106
+ // ── Speak via Windows SAPI (always available) ─────────────────
107
+ async function speakSAPI(text) {
108
+ // Escape single quotes for PowerShell string embedding
109
+ const safe = text.replace(/'/g, "''").replace(/"/g, '');
110
+ await execAsync(`powershell -Command "Add-Type -AssemblyName System.Speech; $s = New-Object System.Speech.Synthesis.SpeechSynthesizer; $s.Rate = 1; $s.Volume = 100; $s.Speak('${safe}')"`, { timeout: 30000 });
111
+ }
112
+ // ── Main exported function ────────────────────────────────────
113
+ async function speak(text, voice = DEFAULT_VOICE) {
114
+ if (!text?.trim())
115
+ return;
116
+ const clean = cleanForTTS(text);
117
+ if (!clean)
118
+ return;
119
+ // Try edge-tts first — best quality
120
+ const edgeOk = await speakEdgeTTS(clean, voice);
121
+ if (edgeOk)
122
+ return;
123
+ // Fallback to Windows SAPI
124
+ try {
125
+ await speakSAPI(clean);
126
+ }
127
+ catch (e) {
128
+ console.error('[TTS] Both methods failed:', e.message);
129
+ }
130
+ }