aiden-runtime 3.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/LICENSE +661 -0
  2. package/README.md +465 -0
  3. package/config/devos.config.json +186 -0
  4. package/config/hardware.json +9 -0
  5. package/config/model-selection.json +7 -0
  6. package/config/setup-complete.json +20 -0
  7. package/dist/api/routes/computerUse.js +112 -0
  8. package/dist/api/server.js +6870 -0
  9. package/dist/bin/npx-init.js +71 -0
  10. package/dist/coordination/commandGate.js +115 -0
  11. package/dist/coordination/livePulse.js +127 -0
  12. package/dist/core/agentLoop.js +2718 -0
  13. package/dist/core/agentShield.js +231 -0
  14. package/dist/core/aidenIdentity.js +215 -0
  15. package/dist/core/aidenPersonality.js +166 -0
  16. package/dist/core/aidenSdk.js +374 -0
  17. package/dist/core/asyncTasks.js +82 -0
  18. package/dist/core/auditTrail.js +61 -0
  19. package/dist/core/auxiliaryClient.js +114 -0
  20. package/dist/core/bgLLM.js +108 -0
  21. package/dist/core/bm25.js +68 -0
  22. package/dist/core/callbackSystem.js +64 -0
  23. package/dist/core/channels/adapter.js +6 -0
  24. package/dist/core/channels/discord.js +173 -0
  25. package/dist/core/channels/email.js +253 -0
  26. package/dist/core/channels/imessage.js +164 -0
  27. package/dist/core/channels/manager.js +96 -0
  28. package/dist/core/channels/signal.js +140 -0
  29. package/dist/core/channels/slack.js +139 -0
  30. package/dist/core/channels/twilio.js +144 -0
  31. package/dist/core/channels/webhook.js +186 -0
  32. package/dist/core/channels/whatsapp.js +185 -0
  33. package/dist/core/clarifyBus.js +75 -0
  34. package/dist/core/codeInterpreter.js +82 -0
  35. package/dist/core/computerControl.js +439 -0
  36. package/dist/core/conversationMemory.js +334 -0
  37. package/dist/core/costTracker.js +221 -0
  38. package/dist/core/cronManager.js +217 -0
  39. package/dist/core/deepKB.js +77 -0
  40. package/dist/core/doctor.js +279 -0
  41. package/dist/core/dreamEngine.js +334 -0
  42. package/dist/core/entityGraph.js +169 -0
  43. package/dist/core/eventBus.js +16 -0
  44. package/dist/core/evolutionAnalyzer.js +153 -0
  45. package/dist/core/executionLoop.js +309 -0
  46. package/dist/core/executor.js +224 -0
  47. package/dist/core/failureAnalyzer.js +166 -0
  48. package/dist/core/fastPathExpansion.js +82 -0
  49. package/dist/core/faultEngine.js +106 -0
  50. package/dist/core/featureGates.js +70 -0
  51. package/dist/core/fileIngestion.js +113 -0
  52. package/dist/core/gateway.js +97 -0
  53. package/dist/core/goalTracker.js +75 -0
  54. package/dist/core/growthEngine.js +168 -0
  55. package/dist/core/hardwareDetector.js +98 -0
  56. package/dist/core/hooks.js +45 -0
  57. package/dist/core/httpKeepalive.js +46 -0
  58. package/dist/core/hybridSearch.js +101 -0
  59. package/dist/core/importers.js +164 -0
  60. package/dist/core/instinctSystem.js +223 -0
  61. package/dist/core/knowledgeBase.js +351 -0
  62. package/dist/core/learningMemory.js +121 -0
  63. package/dist/core/lessonsBrowser.js +125 -0
  64. package/dist/core/licenseManager.js +399 -0
  65. package/dist/core/logBuffer.js +85 -0
  66. package/dist/core/machineId.js +87 -0
  67. package/dist/core/mcpClient.js +442 -0
  68. package/dist/core/memoryDistiller.js +165 -0
  69. package/dist/core/memoryExtractor.js +212 -0
  70. package/dist/core/memoryIds.js +213 -0
  71. package/dist/core/memoryPreamble.js +113 -0
  72. package/dist/core/memoryQuery.js +136 -0
  73. package/dist/core/memoryRecall.js +140 -0
  74. package/dist/core/memoryStrategy.js +201 -0
  75. package/dist/core/messageValidator.js +85 -0
  76. package/dist/core/modelDiscovery.js +108 -0
  77. package/dist/core/modelRouter.js +118 -0
  78. package/dist/core/morningBriefing.js +203 -0
  79. package/dist/core/multiGoalValidator.js +51 -0
  80. package/dist/core/parallelExecutor.js +43 -0
  81. package/dist/core/passiveSkillObserver.js +204 -0
  82. package/dist/core/paths.js +57 -0
  83. package/dist/core/patternDetector.js +83 -0
  84. package/dist/core/planResponseRepair.js +64 -0
  85. package/dist/core/planTool.js +111 -0
  86. package/dist/core/playwrightBridge.js +356 -0
  87. package/dist/core/pluginSystem.js +121 -0
  88. package/dist/core/privateMode.js +85 -0
  89. package/dist/core/reactLoop.js +156 -0
  90. package/dist/core/recipeEngine.js +166 -0
  91. package/dist/core/responseCache.js +128 -0
  92. package/dist/core/runSandbox.js +132 -0
  93. package/dist/core/sandboxRunner.js +200 -0
  94. package/dist/core/scheduler.js +543 -0
  95. package/dist/core/secretScanner.js +49 -0
  96. package/dist/core/semanticMemory.js +223 -0
  97. package/dist/core/sessionMemory.js +259 -0
  98. package/dist/core/sessionRouter.js +91 -0
  99. package/dist/core/sessionSearch.js +163 -0
  100. package/dist/core/setupWizard.js +225 -0
  101. package/dist/core/skillImporter.js +303 -0
  102. package/dist/core/skillLibrary.js +144 -0
  103. package/dist/core/skillLoader.js +471 -0
  104. package/dist/core/skillTeacher.js +352 -0
  105. package/dist/core/skillValidator.js +210 -0
  106. package/dist/core/skillWriter.js +384 -0
  107. package/dist/core/slashAsTool.js +226 -0
  108. package/dist/core/spawnManager.js +197 -0
  109. package/dist/core/statusVerbs.js +43 -0
  110. package/dist/core/swarmManager.js +109 -0
  111. package/dist/core/taskQueue.js +119 -0
  112. package/dist/core/taskRecovery.js +128 -0
  113. package/dist/core/taskState.js +168 -0
  114. package/dist/core/telegramBot.js +152 -0
  115. package/dist/core/todoManager.js +70 -0
  116. package/dist/core/toolNameRepair.js +71 -0
  117. package/dist/core/toolRegistry.js +2730 -0
  118. package/dist/core/tools/calendarTool.js +98 -0
  119. package/dist/core/tools/companyFilingsTool.js +98 -0
  120. package/dist/core/tools/gmailTool.js +87 -0
  121. package/dist/core/tools/marketDataTool.js +135 -0
  122. package/dist/core/tools/socialResearchTool.js +121 -0
  123. package/dist/core/truthCheck.js +57 -0
  124. package/dist/core/updateChecker.js +74 -0
  125. package/dist/core/userCognitionProfile.js +238 -0
  126. package/dist/core/userProfile.js +341 -0
  127. package/dist/core/version.js +5 -0
  128. package/dist/core/visionAnalyze.js +161 -0
  129. package/dist/core/voice/audio.js +187 -0
  130. package/dist/core/voice/stt.js +226 -0
  131. package/dist/core/voice/tts.js +310 -0
  132. package/dist/core/voiceInput.js +118 -0
  133. package/dist/core/voiceOutput.js +130 -0
  134. package/dist/core/webSearch.js +326 -0
  135. package/dist/core/workflowTracker.js +72 -0
  136. package/dist/core/workspaceMemory.js +54 -0
  137. package/dist/core/youtubeTranscript.js +224 -0
  138. package/dist/integrations/computerUse/apiRegistry.js +113 -0
  139. package/dist/integrations/computerUse/screenAgent.js +203 -0
  140. package/dist/integrations/computerUse/visionLoop.js +296 -0
  141. package/dist/memory/memoryLayers.js +143 -0
  142. package/dist/providers/boa.js +93 -0
  143. package/dist/providers/cerebras.js +70 -0
  144. package/dist/providers/custom.js +89 -0
  145. package/dist/providers/gemini.js +82 -0
  146. package/dist/providers/groq.js +92 -0
  147. package/dist/providers/index.js +149 -0
  148. package/dist/providers/nvidia.js +70 -0
  149. package/dist/providers/ollama.js +99 -0
  150. package/dist/providers/openrouter.js +74 -0
  151. package/dist/providers/router.js +497 -0
  152. package/dist/providers/types.js +6 -0
  153. package/dist/security/browserVault.js +129 -0
  154. package/dist/security/dataGuard.js +89 -0
  155. package/dist/tools/eonetTool.js +72 -0
  156. package/dist/types/computerUse.js +2 -0
  157. package/dist/types/executor.js +2 -0
  158. package/dist-bundle/cli.js +357859 -0
  159. package/package.json +256 -0
@@ -0,0 +1,161 @@
1
+ "use strict";
2
+ // ============================================================
3
+ // DevOS — Autonomous AI Execution System
4
+ // Copyright (c) 2026 Shiva Deore. All rights reserved.
5
+ // ============================================================
6
+ //
7
+ // core/visionAnalyze.ts — Image analysis via vision-capable providers.
8
+ //
9
+ // Provider chain (first available wins):
10
+ // 1. Anthropic claude-3-5-sonnet (ANTHROPIC_API_KEY)
11
+ // 2. OpenAI gpt-4o (OPENAI_API_KEY)
12
+ // 3. Ollama llava (local, no key needed)
13
+ //
14
+ // Accepts local file paths (→ base64) or HTTP/HTTPS URLs.
15
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
16
+ if (k2 === undefined) k2 = k;
17
+ var desc = Object.getOwnPropertyDescriptor(m, k);
18
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
19
+ desc = { enumerable: true, get: function() { return m[k]; } };
20
+ }
21
+ Object.defineProperty(o, k2, desc);
22
+ }) : (function(o, m, k, k2) {
23
+ if (k2 === undefined) k2 = k;
24
+ o[k2] = m[k];
25
+ }));
26
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
27
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
28
+ }) : function(o, v) {
29
+ o["default"] = v;
30
+ });
31
+ var __importStar = (this && this.__importStar) || (function () {
32
+ var ownKeys = function(o) {
33
+ ownKeys = Object.getOwnPropertyNames || function (o) {
34
+ var ar = [];
35
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
36
+ return ar;
37
+ };
38
+ return ownKeys(o);
39
+ };
40
+ return function (mod) {
41
+ if (mod && mod.__esModule) return mod;
42
+ var result = {};
43
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
44
+ __setModuleDefault(result, mod);
45
+ return result;
46
+ };
47
+ })();
48
+ var __importDefault = (this && this.__importDefault) || function (mod) {
49
+ return (mod && mod.__esModule) ? mod : { "default": mod };
50
+ };
51
+ Object.defineProperty(exports, "__esModule", { value: true });
52
+ exports.analyzeImage = analyzeImage;
53
+ const fs = __importStar(require("fs"));
54
+ const path = __importStar(require("path"));
55
+ const axios_1 = __importDefault(require("axios"));
56
+ // ── Media type resolver ───────────────────────────────────────────────────────
57
+ function extToMediaType(ext) {
58
+ const map = {
59
+ jpg: 'image/jpeg', jpeg: 'image/jpeg', png: 'image/png',
60
+ gif: 'image/gif', webp: 'image/webp', bmp: 'image/bmp',
61
+ };
62
+ return map[ext.toLowerCase().replace(/^\./, '')] ?? 'image/jpeg';
63
+ }
64
+ // ── Core function ─────────────────────────────────────────────────────────────
65
+ /**
66
+ * Analyze an image using the first available vision-capable provider.
67
+ *
68
+ * @param imageSource File path (absolute or relative) or HTTP(S) URL.
69
+ * @param prompt Instruction prompt (default: describe the image).
70
+ * @returns VisionResult with description, provider, model, timing.
71
+ */
72
+ async function analyzeImage(imageSource, prompt = 'Describe this image in detail.') {
73
+ const start = Date.now();
74
+ // Resolve image data
75
+ const isUrl = imageSource.startsWith('http://') || imageSource.startsWith('https://');
76
+ let base64Data = '';
77
+ let mediaType = 'image/jpeg';
78
+ if (!isUrl) {
79
+ const absPath = path.isAbsolute(imageSource)
80
+ ? imageSource
81
+ : path.resolve(process.cwd(), imageSource);
82
+ const buf = fs.readFileSync(absPath);
83
+ base64Data = buf.toString('base64');
84
+ mediaType = extToMediaType(path.extname(absPath));
85
+ }
86
+ // ── Provider 1: Anthropic ─────────────────────────────────────────────────
87
+ const anthropicKey = process.env.ANTHROPIC_API_KEY;
88
+ if (anthropicKey) {
89
+ try {
90
+ const imageBlock = isUrl
91
+ ? { type: 'image', source: { type: 'url', url: imageSource } }
92
+ : { type: 'image', source: { type: 'base64', media_type: mediaType, data: base64Data } };
93
+ const res = await axios_1.default.post('https://api.anthropic.com/v1/messages', {
94
+ model: 'claude-3-5-sonnet-20241022',
95
+ max_tokens: 1024,
96
+ messages: [{ role: 'user', content: [imageBlock, { type: 'text', text: prompt }] }],
97
+ }, {
98
+ headers: {
99
+ 'x-api-key': anthropicKey,
100
+ 'anthropic-version': '2023-06-01',
101
+ 'content-type': 'application/json',
102
+ },
103
+ timeout: 30000,
104
+ });
105
+ const description = (res.data?.content?.[0]?.text ?? '').trim();
106
+ if (description) {
107
+ return { description, provider: 'anthropic', modelUsed: 'claude-3-5-sonnet-20241022', durationMs: Date.now() - start };
108
+ }
109
+ }
110
+ catch { /* fall through */ }
111
+ }
112
+ // ── Provider 2: OpenAI ────────────────────────────────────────────────────
113
+ const openaiKey = process.env.OPENAI_API_KEY;
114
+ if (openaiKey) {
115
+ try {
116
+ const imageUrl = isUrl
117
+ ? imageSource
118
+ : `data:${mediaType};base64,${base64Data}`;
119
+ const res = await axios_1.default.post('https://api.openai.com/v1/chat/completions', {
120
+ model: 'gpt-4o',
121
+ max_tokens: 1024,
122
+ messages: [{
123
+ role: 'user',
124
+ content: [
125
+ { type: 'image_url', image_url: { url: imageUrl } },
126
+ { type: 'text', text: prompt },
127
+ ],
128
+ }],
129
+ }, {
130
+ headers: { Authorization: `Bearer ${openaiKey}`, 'content-type': 'application/json' },
131
+ timeout: 30000,
132
+ });
133
+ const description = (res.data?.choices?.[0]?.message?.content ?? '').trim();
134
+ if (description) {
135
+ return { description, provider: 'openai', modelUsed: 'gpt-4o', durationMs: Date.now() - start };
136
+ }
137
+ }
138
+ catch { /* fall through */ }
139
+ }
140
+ // ── Provider 3: Ollama llava ──────────────────────────────────────────────
141
+ const ollamaBase = (process.env.OLLAMA_BASE_URL ?? 'http://127.0.0.1:11434').replace(/\/$/, '');
142
+ // For URLs we need to download first so Ollama can receive base64
143
+ let ollamaBase64 = base64Data;
144
+ if (isUrl) {
145
+ try {
146
+ const imgRes = await axios_1.default.get(imageSource, { responseType: 'arraybuffer', timeout: 15000 });
147
+ ollamaBase64 = Buffer.from(imgRes.data).toString('base64');
148
+ }
149
+ catch (e) {
150
+ throw new Error(`vision_analyze: all providers failed (could not download URL for Ollama). ${e.message}`);
151
+ }
152
+ }
153
+ try {
154
+ const res = await axios_1.default.post(`${ollamaBase}/api/generate`, { model: 'llava', prompt, images: [ollamaBase64], stream: false }, { timeout: 60000 });
155
+ const description = (res.data?.response ?? '').trim();
156
+ return { description, provider: 'ollama', modelUsed: 'llava', durationMs: Date.now() - start };
157
+ }
158
+ catch (e) {
159
+ throw new Error(`vision_analyze: all providers exhausted. ${e.message}`);
160
+ }
161
+ }
@@ -0,0 +1,187 @@
1
+ "use strict";
2
+ // ============================================================
3
+ // DevOS — Autonomous AI Execution System
4
+ // Copyright (c) 2026 Shiva Deore. All rights reserved.
5
+ // ============================================================
6
+ var __importDefault = (this && this.__importDefault) || function (mod) {
7
+ return (mod && mod.__esModule) ? mod : { "default": mod };
8
+ };
9
+ Object.defineProperty(exports, "__esModule", { value: true });
10
+ exports.recordAudio = recordAudio;
11
+ exports.playAudio = playAudio;
12
+ exports.checkAudioAvailable = checkAudioAvailable;
13
+ // core/voice/audio.ts — Platform audio I/O: recording + playback.
14
+ //
15
+ // Recording: Windows MCI (P/Invoke via PowerShell, no third-party dep)
16
+ // Playback: Windows Media Player (presentationCore) → Start-Process fallback
17
+ //
18
+ // Cross-platform note: recording falls back to arecord/sox on Linux/macOS.
19
+ // Playback falls back to afplay (macOS) / paplay (Linux).
20
+ const fs_1 = __importDefault(require("fs"));
21
+ const path_1 = __importDefault(require("path"));
22
+ const child_process_1 = require("child_process");
23
+ const util_1 = require("util");
24
+ const execAsync = (0, util_1.promisify)(child_process_1.exec);
25
+ const WORKSPACE = path_1.default.join(process.cwd(), 'workspace');
26
+ function ensureWorkspace() {
27
+ if (!fs_1.default.existsSync(WORKSPACE))
28
+ fs_1.default.mkdirSync(WORKSPACE, { recursive: true });
29
+ }
30
+ // ── Record audio from microphone ──────────────────────────────────────────────
31
+ /**
32
+ * Record audio from the default microphone.
33
+ *
34
+ * @param durationSeconds Recording length in seconds (default 5).
35
+ * @param outputPath Where to save the .wav file. Defaults to a temp file in workspace/.
36
+ * @returns Resolved path to the recorded file.
37
+ */
38
+ async function recordAudio(durationSeconds = 5, outputPath) {
39
+ ensureWorkspace();
40
+ const outPath = outputPath ?? path_1.default.join(WORKSPACE, `recording_${Date.now()}.wav`);
41
+ const durationMs = Math.round(durationSeconds * 1000);
42
+ if (process.platform === 'win32') {
43
+ return _recordWindows(outPath, durationMs);
44
+ }
45
+ else {
46
+ return _recordUnix(outPath, durationMs);
47
+ }
48
+ }
49
+ async function _recordWindows(outputPath, durationMs) {
50
+ const escapedPath = outputPath.replace(/\\/g, '\\\\');
51
+ const psScript = `
52
+ Add-Type -TypeDefinition @"
53
+ using System;
54
+ using System.Threading;
55
+ using System.Runtime.InteropServices;
56
+
57
+ public class AudioRecorder {
58
+ [DllImport("winmm.dll")]
59
+ private static extern int mciSendString(
60
+ string command,
61
+ System.Text.StringBuilder returnValue,
62
+ int returnLength,
63
+ IntPtr winHandle
64
+ );
65
+
66
+ public static void Record(string outputPath, int durationMs) {
67
+ mciSendString("open new Type waveaudio Alias recsound", null, 0, IntPtr.Zero);
68
+ mciSendString("set recsound channels 1 bitspersample 16 samplespersec 16000", null, 0, IntPtr.Zero);
69
+ mciSendString("record recsound", null, 0, IntPtr.Zero);
70
+ Thread.Sleep(durationMs);
71
+ mciSendString("stop recsound", null, 0, IntPtr.Zero);
72
+ mciSendString("save recsound " + outputPath, null, 0, IntPtr.Zero);
73
+ mciSendString("close recsound", null, 0, IntPtr.Zero);
74
+ }
75
+ }
76
+ "@
77
+ [AudioRecorder]::Record("${escapedPath}", ${durationMs})
78
+ Write-Output "${outputPath}"
79
+ `.trim();
80
+ const psFile = path_1.default.join(WORKSPACE, `record_${Date.now()}.ps1`);
81
+ fs_1.default.writeFileSync(psFile, psScript);
82
+ try {
83
+ await execAsync(`powershell.exe -ExecutionPolicy Bypass -File "${psFile}"`, { timeout: durationMs + 8000 });
84
+ return outputPath;
85
+ }
86
+ catch (e) {
87
+ throw new Error(`[Audio] Recording failed: ${e.message}`);
88
+ }
89
+ finally {
90
+ try {
91
+ fs_1.default.unlinkSync(psFile);
92
+ }
93
+ catch { /* ignore */ }
94
+ }
95
+ }
96
+ async function _recordUnix(outputPath, durationMs) {
97
+ const seconds = Math.ceil(durationMs / 1000);
98
+ // Try sox first, then arecord
99
+ try {
100
+ await execAsync(`sox -d -t wav "${outputPath}" trim 0 ${seconds}`, { timeout: durationMs + 5000 });
101
+ }
102
+ catch {
103
+ await execAsync(`arecord -d ${seconds} -f S16_LE -r 16000 -c 1 "${outputPath}"`, { timeout: durationMs + 5000 });
104
+ }
105
+ return outputPath;
106
+ }
107
+ // ── Play audio ────────────────────────────────────────────────────────────────
108
+ /**
109
+ * Play an audio file (wav / mp3 / ogg).
110
+ * Non-blocking on Windows (fires MediaPlayer async); blocking on Unix.
111
+ *
112
+ * @param audioSource Path to audio file, or raw audio Buffer.
113
+ */
114
+ async function playAudio(audioSource) {
115
+ ensureWorkspace();
116
+ let filePath;
117
+ let isTmp = false;
118
+ if (Buffer.isBuffer(audioSource)) {
119
+ filePath = path_1.default.join(WORKSPACE, `playback_${Date.now()}.wav`);
120
+ fs_1.default.writeFileSync(filePath, audioSource);
121
+ isTmp = true;
122
+ }
123
+ else {
124
+ filePath = audioSource;
125
+ }
126
+ if (!fs_1.default.existsSync(filePath)) {
127
+ throw new Error(`[Audio] File not found: ${filePath}`);
128
+ }
129
+ try {
130
+ if (process.platform === 'win32') {
131
+ await _playWindows(filePath);
132
+ }
133
+ else {
134
+ await _playUnix(filePath);
135
+ }
136
+ }
137
+ finally {
138
+ if (isTmp) {
139
+ setTimeout(() => { try {
140
+ fs_1.default.unlinkSync(filePath);
141
+ }
142
+ catch { /* ignore */ } }, 10000);
143
+ }
144
+ }
145
+ }
146
+ async function _playWindows(filePath) {
147
+ const escaped = filePath.replace(/\\/g, '\\\\');
148
+ await execAsync(`powershell -Command "Add-Type -AssemblyName presentationCore; $mp = New-Object System.Windows.Media.MediaPlayer; $mp.Open([uri]'${escaped}'); $mp.Play(); Start-Sleep -Seconds 10; $mp.Stop(); $mp.Close()"`, { timeout: 30000 }).catch(async () => {
149
+ // Fallback: system default media player
150
+ await execAsync(`powershell -Command "Start-Process '${escaped}'"`, { timeout: 5000 })
151
+ .catch(() => { });
152
+ });
153
+ }
154
+ async function _playUnix(filePath) {
155
+ if (process.platform === 'darwin') {
156
+ await execAsync(`afplay "${filePath}"`, { timeout: 30000 });
157
+ }
158
+ else {
159
+ try {
160
+ await execAsync(`paplay "${filePath}"`, { timeout: 30000 });
161
+ }
162
+ catch {
163
+ await execAsync(`aplay "${filePath}"`, { timeout: 30000 });
164
+ }
165
+ }
166
+ }
167
+ // ── Availability check ────────────────────────────────────────────────────────
168
+ /** Returns true if audio recording is likely possible on this system. */
169
+ async function checkAudioAvailable() {
170
+ if (process.platform === 'win32') {
171
+ try {
172
+ await execAsync('powershell -Command "Add-Type -AssemblyName System.Speech; Write-Output ok"', { timeout: 3000 });
173
+ return true;
174
+ }
175
+ catch {
176
+ return false;
177
+ }
178
+ }
179
+ // Unix: check for arecord or sox
180
+ try {
181
+ await execAsync('which arecord || which sox', { timeout: 2000 });
182
+ return true;
183
+ }
184
+ catch {
185
+ return false;
186
+ }
187
+ }
@@ -0,0 +1,226 @@
1
+ "use strict";
2
+ // ============================================================
3
+ // DevOS — Autonomous AI Execution System
4
+ // Copyright (c) 2026 Shiva Deore. All rights reserved.
5
+ // ============================================================
6
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
7
+ if (k2 === undefined) k2 = k;
8
+ var desc = Object.getOwnPropertyDescriptor(m, k);
9
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
10
+ desc = { enumerable: true, get: function() { return m[k]; } };
11
+ }
12
+ Object.defineProperty(o, k2, desc);
13
+ }) : (function(o, m, k, k2) {
14
+ if (k2 === undefined) k2 = k;
15
+ o[k2] = m[k];
16
+ }));
17
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
18
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
19
+ }) : function(o, v) {
20
+ o["default"] = v;
21
+ });
22
+ var __importStar = (this && this.__importStar) || (function () {
23
+ var ownKeys = function(o) {
24
+ ownKeys = Object.getOwnPropertyNames || function (o) {
25
+ var ar = [];
26
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
27
+ return ar;
28
+ };
29
+ return ownKeys(o);
30
+ };
31
+ return function (mod) {
32
+ if (mod && mod.__esModule) return mod;
33
+ var result = {};
34
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
35
+ __setModuleDefault(result, mod);
36
+ return result;
37
+ };
38
+ })();
39
+ var __importDefault = (this && this.__importDefault) || function (mod) {
40
+ return (mod && mod.__esModule) ? mod : { "default": mod };
41
+ };
42
+ Object.defineProperty(exports, "__esModule", { value: true });
43
+ exports.transcribe = transcribe;
44
+ exports.getSttProviders = getSttProviders;
45
+ // core/voice/stt.ts — Speech-to-Text with three-provider fallback chain.
46
+ //
47
+ // Priority order (auto-selected at runtime):
48
+ // 1. Groq Whisper API (GROQ_API_KEY) — fastest, cloud
49
+ // 2. OpenAI Whisper API (OPENAI_API_KEY) — reliable, cloud
50
+ // 3. Local Whisper.cpp (WHISPER_MODEL_PATH) — offline, no API key
51
+ //
52
+ // If all providers fail: returns { text: '', provider: 'none', error }
53
+ // — never throws; callers check result.text.
54
+ const fs_1 = __importDefault(require("fs"));
55
+ const path_1 = __importDefault(require("path"));
56
+ const child_process_1 = require("child_process");
57
+ const util_1 = require("util");
58
+ const axios_1 = __importDefault(require("axios"));
59
+ const execAsync = (0, util_1.promisify)(child_process_1.exec);
60
+ // ── Internal helpers ──────────────────────────────────────────────────────────
61
+ const WORKSPACE = path_1.default.join(process.cwd(), 'workspace');
62
+ function ensureWorkspace() {
63
+ if (!fs_1.default.existsSync(WORKSPACE))
64
+ fs_1.default.mkdirSync(WORKSPACE, { recursive: true });
65
+ }
66
+ /** Resolves the audio file path, writing buffer to a temp file if needed. */
67
+ function resolveAudioPath(opts) {
68
+ if (opts.audioFilePath)
69
+ return opts.audioFilePath;
70
+ if (opts.audioBuffer) {
71
+ ensureWorkspace();
72
+ const tmp = path_1.default.join(WORKSPACE, `stt_input_${Date.now()}.wav`);
73
+ fs_1.default.writeFileSync(tmp, opts.audioBuffer);
74
+ return tmp;
75
+ }
76
+ throw new Error('SttOptions: provide audioFilePath or audioBuffer');
77
+ }
78
+ // ── Provider 1 — Groq Whisper ─────────────────────────────────────────────────
79
+ async function transcribeGroq(audioPath, opts) {
80
+ const apiKey = process.env.GROQ_API_KEY;
81
+ if (!apiKey)
82
+ throw new Error('GROQ_API_KEY not set');
83
+ const timeout = opts.timeoutMs ?? 30000;
84
+ const t0 = Date.now();
85
+ const FormData = (await Promise.resolve().then(() => __importStar(require('form-data')))).default;
86
+ const form = new FormData();
87
+ form.append('file', fs_1.default.createReadStream(audioPath), path_1.default.basename(audioPath));
88
+ form.append('model', 'whisper-large-v3');
89
+ if (opts.language)
90
+ form.append('language', opts.language);
91
+ form.append('response_format', 'json');
92
+ const res = await axios_1.default.post('https://api.groq.com/openai/v1/audio/transcriptions', form, {
93
+ headers: { ...form.getHeaders(), Authorization: `Bearer ${apiKey}` },
94
+ timeout,
95
+ });
96
+ return {
97
+ text: (res.data.text ?? '').trim(),
98
+ provider: 'groq',
99
+ durationMs: Date.now() - t0,
100
+ };
101
+ }
102
+ // ── Provider 2 — OpenAI Whisper ───────────────────────────────────────────────
103
+ async function transcribeOpenAI(audioPath, opts) {
104
+ const apiKey = process.env.OPENAI_API_KEY;
105
+ if (!apiKey)
106
+ throw new Error('OPENAI_API_KEY not set');
107
+ const timeout = opts.timeoutMs ?? 30000;
108
+ const t0 = Date.now();
109
+ const FormData = (await Promise.resolve().then(() => __importStar(require('form-data')))).default;
110
+ const form = new FormData();
111
+ form.append('file', fs_1.default.createReadStream(audioPath), path_1.default.basename(audioPath));
112
+ form.append('model', 'whisper-1');
113
+ if (opts.language)
114
+ form.append('language', opts.language);
115
+ form.append('response_format', 'json');
116
+ const res = await axios_1.default.post('https://api.openai.com/v1/audio/transcriptions', form, {
117
+ headers: { ...form.getHeaders(), Authorization: `Bearer ${apiKey}` },
118
+ timeout,
119
+ });
120
+ return {
121
+ text: (res.data.text ?? '').trim(),
122
+ provider: 'openai',
123
+ durationMs: Date.now() - t0,
124
+ };
125
+ }
126
+ // ── Provider 3 — Local Whisper.cpp ────────────────────────────────────────────
127
+ async function transcribeLocal(audioPath, opts) {
128
+ const modelPath = process.env.WHISPER_MODEL_PATH;
129
+ const t0 = Date.now();
130
+ const timeout = opts.timeoutMs ?? 60000;
131
+ // whisper-cli binary: try PATH first, then common install locations
132
+ const binaryName = process.platform === 'win32' ? 'whisper-cli.exe' : 'whisper-cli';
133
+ const binaryCandidates = [
134
+ binaryName,
135
+ path_1.default.join(process.cwd(), 'bin', binaryName),
136
+ path_1.default.join(process.cwd(), binaryName),
137
+ ];
138
+ let binary = binaryName;
139
+ for (const candidate of binaryCandidates) {
140
+ try {
141
+ await execAsync(`"${candidate}" --version`, { timeout: 3000 });
142
+ binary = candidate;
143
+ break;
144
+ }
145
+ catch { /* try next */ }
146
+ }
147
+ const modelArg = modelPath ? `-m "${modelPath}"` : '';
148
+ const langArg = opts.language ? `-l ${opts.language}` : '';
149
+ const cmd = `"${binary}" ${modelArg} ${langArg} -f "${audioPath}" --output-txt`.trim();
150
+ await execAsync(cmd, { timeout });
151
+ // whisper-cli writes <audioPath>.txt
152
+ const txtPath = audioPath + '.txt';
153
+ if (!fs_1.default.existsSync(txtPath))
154
+ throw new Error('whisper-cli produced no output file');
155
+ const text = fs_1.default.readFileSync(txtPath, 'utf-8').trim();
156
+ try {
157
+ fs_1.default.unlinkSync(txtPath);
158
+ }
159
+ catch { /* ignore */ }
160
+ return { text, provider: 'local', durationMs: Date.now() - t0 };
161
+ }
162
+ // ── Main exported function ────────────────────────────────────────────────────
163
+ /**
164
+ * Transcribe audio using the first available provider.
165
+ * Never throws — always returns an SttResult; check result.error on failure.
166
+ */
167
+ async function transcribe(options) {
168
+ const t0 = Date.now();
169
+ let tmpFile = '';
170
+ const errors = [];
171
+ try {
172
+ const audioPath = resolveAudioPath(options);
173
+ if (!options.audioFilePath && options.audioBuffer)
174
+ tmpFile = audioPath;
175
+ // Provider 1 — Groq
176
+ try {
177
+ const r = await transcribeGroq(audioPath, options);
178
+ console.log(`[STT] Groq Whisper: "${r.text.slice(0, 60)}" (${r.durationMs}ms)`);
179
+ return r;
180
+ }
181
+ catch (e) {
182
+ errors.push(`groq: ${e.message}`);
183
+ }
184
+ // Provider 2 — OpenAI
185
+ try {
186
+ const r = await transcribeOpenAI(audioPath, options);
187
+ console.log(`[STT] OpenAI Whisper: "${r.text.slice(0, 60)}" (${r.durationMs}ms)`);
188
+ return r;
189
+ }
190
+ catch (e) {
191
+ errors.push(`openai: ${e.message}`);
192
+ }
193
+ // Provider 3 — Local Whisper.cpp
194
+ try {
195
+ const r = await transcribeLocal(audioPath, options);
196
+ console.log(`[STT] Local Whisper.cpp: "${r.text.slice(0, 60)}" (${r.durationMs}ms)`);
197
+ return r;
198
+ }
199
+ catch (e) {
200
+ errors.push(`local: ${e.message}`);
201
+ }
202
+ // All failed
203
+ const errorMsg = errors.join(' | ');
204
+ console.warn(`[STT] All providers failed: ${errorMsg}`);
205
+ return { text: '', provider: 'none', durationMs: Date.now() - t0, error: errorMsg };
206
+ }
207
+ catch (outer) {
208
+ return { text: '', provider: 'none', durationMs: Date.now() - t0, error: outer.message };
209
+ }
210
+ finally {
211
+ if (tmpFile) {
212
+ try {
213
+ fs_1.default.unlinkSync(tmpFile);
214
+ }
215
+ catch { /* ignore */ }
216
+ }
217
+ }
218
+ }
219
+ /** Returns which STT providers are likely available (env-key check only). */
220
+ function getSttProviders() {
221
+ return [
222
+ { name: 'groq', available: !!process.env.GROQ_API_KEY },
223
+ { name: 'openai', available: !!process.env.OPENAI_API_KEY },
224
+ { name: 'local', available: !!process.env.WHISPER_MODEL_PATH },
225
+ ];
226
+ }