aiden-runtime 3.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +661 -0
- package/README.md +465 -0
- package/config/devos.config.json +186 -0
- package/config/hardware.json +9 -0
- package/config/model-selection.json +7 -0
- package/config/setup-complete.json +20 -0
- package/dist/api/routes/computerUse.js +112 -0
- package/dist/api/server.js +6870 -0
- package/dist/bin/npx-init.js +71 -0
- package/dist/coordination/commandGate.js +115 -0
- package/dist/coordination/livePulse.js +127 -0
- package/dist/core/agentLoop.js +2718 -0
- package/dist/core/agentShield.js +231 -0
- package/dist/core/aidenIdentity.js +215 -0
- package/dist/core/aidenPersonality.js +166 -0
- package/dist/core/aidenSdk.js +374 -0
- package/dist/core/asyncTasks.js +82 -0
- package/dist/core/auditTrail.js +61 -0
- package/dist/core/auxiliaryClient.js +114 -0
- package/dist/core/bgLLM.js +108 -0
- package/dist/core/bm25.js +68 -0
- package/dist/core/callbackSystem.js +64 -0
- package/dist/core/channels/adapter.js +6 -0
- package/dist/core/channels/discord.js +173 -0
- package/dist/core/channels/email.js +253 -0
- package/dist/core/channels/imessage.js +164 -0
- package/dist/core/channels/manager.js +96 -0
- package/dist/core/channels/signal.js +140 -0
- package/dist/core/channels/slack.js +139 -0
- package/dist/core/channels/twilio.js +144 -0
- package/dist/core/channels/webhook.js +186 -0
- package/dist/core/channels/whatsapp.js +185 -0
- package/dist/core/clarifyBus.js +75 -0
- package/dist/core/codeInterpreter.js +82 -0
- package/dist/core/computerControl.js +439 -0
- package/dist/core/conversationMemory.js +334 -0
- package/dist/core/costTracker.js +221 -0
- package/dist/core/cronManager.js +217 -0
- package/dist/core/deepKB.js +77 -0
- package/dist/core/doctor.js +279 -0
- package/dist/core/dreamEngine.js +334 -0
- package/dist/core/entityGraph.js +169 -0
- package/dist/core/eventBus.js +16 -0
- package/dist/core/evolutionAnalyzer.js +153 -0
- package/dist/core/executionLoop.js +309 -0
- package/dist/core/executor.js +224 -0
- package/dist/core/failureAnalyzer.js +166 -0
- package/dist/core/fastPathExpansion.js +82 -0
- package/dist/core/faultEngine.js +106 -0
- package/dist/core/featureGates.js +70 -0
- package/dist/core/fileIngestion.js +113 -0
- package/dist/core/gateway.js +97 -0
- package/dist/core/goalTracker.js +75 -0
- package/dist/core/growthEngine.js +168 -0
- package/dist/core/hardwareDetector.js +98 -0
- package/dist/core/hooks.js +45 -0
- package/dist/core/httpKeepalive.js +46 -0
- package/dist/core/hybridSearch.js +101 -0
- package/dist/core/importers.js +164 -0
- package/dist/core/instinctSystem.js +223 -0
- package/dist/core/knowledgeBase.js +351 -0
- package/dist/core/learningMemory.js +121 -0
- package/dist/core/lessonsBrowser.js +125 -0
- package/dist/core/licenseManager.js +399 -0
- package/dist/core/logBuffer.js +85 -0
- package/dist/core/machineId.js +87 -0
- package/dist/core/mcpClient.js +442 -0
- package/dist/core/memoryDistiller.js +165 -0
- package/dist/core/memoryExtractor.js +212 -0
- package/dist/core/memoryIds.js +213 -0
- package/dist/core/memoryPreamble.js +113 -0
- package/dist/core/memoryQuery.js +136 -0
- package/dist/core/memoryRecall.js +140 -0
- package/dist/core/memoryStrategy.js +201 -0
- package/dist/core/messageValidator.js +85 -0
- package/dist/core/modelDiscovery.js +108 -0
- package/dist/core/modelRouter.js +118 -0
- package/dist/core/morningBriefing.js +203 -0
- package/dist/core/multiGoalValidator.js +51 -0
- package/dist/core/parallelExecutor.js +43 -0
- package/dist/core/passiveSkillObserver.js +204 -0
- package/dist/core/paths.js +57 -0
- package/dist/core/patternDetector.js +83 -0
- package/dist/core/planResponseRepair.js +64 -0
- package/dist/core/planTool.js +111 -0
- package/dist/core/playwrightBridge.js +356 -0
- package/dist/core/pluginSystem.js +121 -0
- package/dist/core/privateMode.js +85 -0
- package/dist/core/reactLoop.js +156 -0
- package/dist/core/recipeEngine.js +166 -0
- package/dist/core/responseCache.js +128 -0
- package/dist/core/runSandbox.js +132 -0
- package/dist/core/sandboxRunner.js +200 -0
- package/dist/core/scheduler.js +543 -0
- package/dist/core/secretScanner.js +49 -0
- package/dist/core/semanticMemory.js +223 -0
- package/dist/core/sessionMemory.js +259 -0
- package/dist/core/sessionRouter.js +91 -0
- package/dist/core/sessionSearch.js +163 -0
- package/dist/core/setupWizard.js +225 -0
- package/dist/core/skillImporter.js +303 -0
- package/dist/core/skillLibrary.js +144 -0
- package/dist/core/skillLoader.js +471 -0
- package/dist/core/skillTeacher.js +352 -0
- package/dist/core/skillValidator.js +210 -0
- package/dist/core/skillWriter.js +384 -0
- package/dist/core/slashAsTool.js +226 -0
- package/dist/core/spawnManager.js +197 -0
- package/dist/core/statusVerbs.js +43 -0
- package/dist/core/swarmManager.js +109 -0
- package/dist/core/taskQueue.js +119 -0
- package/dist/core/taskRecovery.js +128 -0
- package/dist/core/taskState.js +168 -0
- package/dist/core/telegramBot.js +152 -0
- package/dist/core/todoManager.js +70 -0
- package/dist/core/toolNameRepair.js +71 -0
- package/dist/core/toolRegistry.js +2730 -0
- package/dist/core/tools/calendarTool.js +98 -0
- package/dist/core/tools/companyFilingsTool.js +98 -0
- package/dist/core/tools/gmailTool.js +87 -0
- package/dist/core/tools/marketDataTool.js +135 -0
- package/dist/core/tools/socialResearchTool.js +121 -0
- package/dist/core/truthCheck.js +57 -0
- package/dist/core/updateChecker.js +74 -0
- package/dist/core/userCognitionProfile.js +238 -0
- package/dist/core/userProfile.js +341 -0
- package/dist/core/version.js +5 -0
- package/dist/core/visionAnalyze.js +161 -0
- package/dist/core/voice/audio.js +187 -0
- package/dist/core/voice/stt.js +226 -0
- package/dist/core/voice/tts.js +310 -0
- package/dist/core/voiceInput.js +118 -0
- package/dist/core/voiceOutput.js +130 -0
- package/dist/core/webSearch.js +326 -0
- package/dist/core/workflowTracker.js +72 -0
- package/dist/core/workspaceMemory.js +54 -0
- package/dist/core/youtubeTranscript.js +224 -0
- package/dist/integrations/computerUse/apiRegistry.js +113 -0
- package/dist/integrations/computerUse/screenAgent.js +203 -0
- package/dist/integrations/computerUse/visionLoop.js +296 -0
- package/dist/memory/memoryLayers.js +143 -0
- package/dist/providers/boa.js +93 -0
- package/dist/providers/cerebras.js +70 -0
- package/dist/providers/custom.js +89 -0
- package/dist/providers/gemini.js +82 -0
- package/dist/providers/groq.js +92 -0
- package/dist/providers/index.js +149 -0
- package/dist/providers/nvidia.js +70 -0
- package/dist/providers/ollama.js +99 -0
- package/dist/providers/openrouter.js +74 -0
- package/dist/providers/router.js +497 -0
- package/dist/providers/types.js +6 -0
- package/dist/security/browserVault.js +129 -0
- package/dist/security/dataGuard.js +89 -0
- package/dist/tools/eonetTool.js +72 -0
- package/dist/types/computerUse.js +2 -0
- package/dist/types/executor.js +2 -0
- package/dist-bundle/cli.js +357859 -0
- package/package.json +256 -0
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// ============================================================
|
|
3
|
+
// DevOS — Autonomous AI Execution System
|
|
4
|
+
// Copyright (c) 2026 Shiva Deore. All rights reserved.
|
|
5
|
+
// ============================================================
|
|
6
|
+
//
|
|
7
|
+
// core/visionAnalyze.ts — Image analysis via vision-capable providers.
|
|
8
|
+
//
|
|
9
|
+
// Provider chain (first available wins):
|
|
10
|
+
// 1. Anthropic claude-3-5-sonnet (ANTHROPIC_API_KEY)
|
|
11
|
+
// 2. OpenAI gpt-4o (OPENAI_API_KEY)
|
|
12
|
+
// 3. Ollama llava (local, no key needed)
|
|
13
|
+
//
|
|
14
|
+
// Accepts local file paths (→ base64) or HTTP/HTTPS URLs.
|
|
15
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
16
|
+
if (k2 === undefined) k2 = k;
|
|
17
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
18
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
19
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
20
|
+
}
|
|
21
|
+
Object.defineProperty(o, k2, desc);
|
|
22
|
+
}) : (function(o, m, k, k2) {
|
|
23
|
+
if (k2 === undefined) k2 = k;
|
|
24
|
+
o[k2] = m[k];
|
|
25
|
+
}));
|
|
26
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
27
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
28
|
+
}) : function(o, v) {
|
|
29
|
+
o["default"] = v;
|
|
30
|
+
});
|
|
31
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
32
|
+
var ownKeys = function(o) {
|
|
33
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
34
|
+
var ar = [];
|
|
35
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
36
|
+
return ar;
|
|
37
|
+
};
|
|
38
|
+
return ownKeys(o);
|
|
39
|
+
};
|
|
40
|
+
return function (mod) {
|
|
41
|
+
if (mod && mod.__esModule) return mod;
|
|
42
|
+
var result = {};
|
|
43
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
44
|
+
__setModuleDefault(result, mod);
|
|
45
|
+
return result;
|
|
46
|
+
};
|
|
47
|
+
})();
|
|
48
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
49
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
50
|
+
};
|
|
51
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
52
|
+
exports.analyzeImage = analyzeImage;
|
|
53
|
+
const fs = __importStar(require("fs"));
|
|
54
|
+
const path = __importStar(require("path"));
|
|
55
|
+
const axios_1 = __importDefault(require("axios"));
|
|
56
|
+
// ── Media type resolver ───────────────────────────────────────────────────────
|
|
57
|
+
function extToMediaType(ext) {
|
|
58
|
+
const map = {
|
|
59
|
+
jpg: 'image/jpeg', jpeg: 'image/jpeg', png: 'image/png',
|
|
60
|
+
gif: 'image/gif', webp: 'image/webp', bmp: 'image/bmp',
|
|
61
|
+
};
|
|
62
|
+
return map[ext.toLowerCase().replace(/^\./, '')] ?? 'image/jpeg';
|
|
63
|
+
}
|
|
64
|
+
// ── Core function ─────────────────────────────────────────────────────────────
|
|
65
|
+
/**
|
|
66
|
+
* Analyze an image using the first available vision-capable provider.
|
|
67
|
+
*
|
|
68
|
+
* @param imageSource File path (absolute or relative) or HTTP(S) URL.
|
|
69
|
+
* @param prompt Instruction prompt (default: describe the image).
|
|
70
|
+
* @returns VisionResult with description, provider, model, timing.
|
|
71
|
+
*/
|
|
72
|
+
async function analyzeImage(imageSource, prompt = 'Describe this image in detail.') {
|
|
73
|
+
const start = Date.now();
|
|
74
|
+
// Resolve image data
|
|
75
|
+
const isUrl = imageSource.startsWith('http://') || imageSource.startsWith('https://');
|
|
76
|
+
let base64Data = '';
|
|
77
|
+
let mediaType = 'image/jpeg';
|
|
78
|
+
if (!isUrl) {
|
|
79
|
+
const absPath = path.isAbsolute(imageSource)
|
|
80
|
+
? imageSource
|
|
81
|
+
: path.resolve(process.cwd(), imageSource);
|
|
82
|
+
const buf = fs.readFileSync(absPath);
|
|
83
|
+
base64Data = buf.toString('base64');
|
|
84
|
+
mediaType = extToMediaType(path.extname(absPath));
|
|
85
|
+
}
|
|
86
|
+
// ── Provider 1: Anthropic ─────────────────────────────────────────────────
|
|
87
|
+
const anthropicKey = process.env.ANTHROPIC_API_KEY;
|
|
88
|
+
if (anthropicKey) {
|
|
89
|
+
try {
|
|
90
|
+
const imageBlock = isUrl
|
|
91
|
+
? { type: 'image', source: { type: 'url', url: imageSource } }
|
|
92
|
+
: { type: 'image', source: { type: 'base64', media_type: mediaType, data: base64Data } };
|
|
93
|
+
const res = await axios_1.default.post('https://api.anthropic.com/v1/messages', {
|
|
94
|
+
model: 'claude-3-5-sonnet-20241022',
|
|
95
|
+
max_tokens: 1024,
|
|
96
|
+
messages: [{ role: 'user', content: [imageBlock, { type: 'text', text: prompt }] }],
|
|
97
|
+
}, {
|
|
98
|
+
headers: {
|
|
99
|
+
'x-api-key': anthropicKey,
|
|
100
|
+
'anthropic-version': '2023-06-01',
|
|
101
|
+
'content-type': 'application/json',
|
|
102
|
+
},
|
|
103
|
+
timeout: 30000,
|
|
104
|
+
});
|
|
105
|
+
const description = (res.data?.content?.[0]?.text ?? '').trim();
|
|
106
|
+
if (description) {
|
|
107
|
+
return { description, provider: 'anthropic', modelUsed: 'claude-3-5-sonnet-20241022', durationMs: Date.now() - start };
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
catch { /* fall through */ }
|
|
111
|
+
}
|
|
112
|
+
// ── Provider 2: OpenAI ────────────────────────────────────────────────────
|
|
113
|
+
const openaiKey = process.env.OPENAI_API_KEY;
|
|
114
|
+
if (openaiKey) {
|
|
115
|
+
try {
|
|
116
|
+
const imageUrl = isUrl
|
|
117
|
+
? imageSource
|
|
118
|
+
: `data:${mediaType};base64,${base64Data}`;
|
|
119
|
+
const res = await axios_1.default.post('https://api.openai.com/v1/chat/completions', {
|
|
120
|
+
model: 'gpt-4o',
|
|
121
|
+
max_tokens: 1024,
|
|
122
|
+
messages: [{
|
|
123
|
+
role: 'user',
|
|
124
|
+
content: [
|
|
125
|
+
{ type: 'image_url', image_url: { url: imageUrl } },
|
|
126
|
+
{ type: 'text', text: prompt },
|
|
127
|
+
],
|
|
128
|
+
}],
|
|
129
|
+
}, {
|
|
130
|
+
headers: { Authorization: `Bearer ${openaiKey}`, 'content-type': 'application/json' },
|
|
131
|
+
timeout: 30000,
|
|
132
|
+
});
|
|
133
|
+
const description = (res.data?.choices?.[0]?.message?.content ?? '').trim();
|
|
134
|
+
if (description) {
|
|
135
|
+
return { description, provider: 'openai', modelUsed: 'gpt-4o', durationMs: Date.now() - start };
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
catch { /* fall through */ }
|
|
139
|
+
}
|
|
140
|
+
// ── Provider 3: Ollama llava ──────────────────────────────────────────────
|
|
141
|
+
const ollamaBase = (process.env.OLLAMA_BASE_URL ?? 'http://127.0.0.1:11434').replace(/\/$/, '');
|
|
142
|
+
// For URLs we need to download first so Ollama can receive base64
|
|
143
|
+
let ollamaBase64 = base64Data;
|
|
144
|
+
if (isUrl) {
|
|
145
|
+
try {
|
|
146
|
+
const imgRes = await axios_1.default.get(imageSource, { responseType: 'arraybuffer', timeout: 15000 });
|
|
147
|
+
ollamaBase64 = Buffer.from(imgRes.data).toString('base64');
|
|
148
|
+
}
|
|
149
|
+
catch (e) {
|
|
150
|
+
throw new Error(`vision_analyze: all providers failed (could not download URL for Ollama). ${e.message}`);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
try {
|
|
154
|
+
const res = await axios_1.default.post(`${ollamaBase}/api/generate`, { model: 'llava', prompt, images: [ollamaBase64], stream: false }, { timeout: 60000 });
|
|
155
|
+
const description = (res.data?.response ?? '').trim();
|
|
156
|
+
return { description, provider: 'ollama', modelUsed: 'llava', durationMs: Date.now() - start };
|
|
157
|
+
}
|
|
158
|
+
catch (e) {
|
|
159
|
+
throw new Error(`vision_analyze: all providers exhausted. ${e.message}`);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// ============================================================
|
|
3
|
+
// DevOS — Autonomous AI Execution System
|
|
4
|
+
// Copyright (c) 2026 Shiva Deore. All rights reserved.
|
|
5
|
+
// ============================================================
|
|
6
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
7
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
8
|
+
};
|
|
9
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
10
|
+
exports.recordAudio = recordAudio;
|
|
11
|
+
exports.playAudio = playAudio;
|
|
12
|
+
exports.checkAudioAvailable = checkAudioAvailable;
|
|
13
|
+
// core/voice/audio.ts — Platform audio I/O: recording + playback.
|
|
14
|
+
//
|
|
15
|
+
// Recording: Windows MCI (P/Invoke via PowerShell, no third-party dep)
|
|
16
|
+
// Playback: Windows Media Player (presentationCore) → Start-Process fallback
|
|
17
|
+
//
|
|
18
|
+
// Cross-platform note: recording falls back to arecord/sox on Linux/macOS.
|
|
19
|
+
// Playback falls back to afplay (macOS) / paplay (Linux).
|
|
20
|
+
const fs_1 = __importDefault(require("fs"));
|
|
21
|
+
const path_1 = __importDefault(require("path"));
|
|
22
|
+
const child_process_1 = require("child_process");
|
|
23
|
+
const util_1 = require("util");
|
|
24
|
+
const execAsync = (0, util_1.promisify)(child_process_1.exec);
|
|
25
|
+
const WORKSPACE = path_1.default.join(process.cwd(), 'workspace');
|
|
26
|
+
function ensureWorkspace() {
|
|
27
|
+
if (!fs_1.default.existsSync(WORKSPACE))
|
|
28
|
+
fs_1.default.mkdirSync(WORKSPACE, { recursive: true });
|
|
29
|
+
}
|
|
30
|
+
// ── Record audio from microphone ──────────────────────────────────────────────
|
|
31
|
+
/**
|
|
32
|
+
* Record audio from the default microphone.
|
|
33
|
+
*
|
|
34
|
+
* @param durationSeconds Recording length in seconds (default 5).
|
|
35
|
+
* @param outputPath Where to save the .wav file. Defaults to a temp file in workspace/.
|
|
36
|
+
* @returns Resolved path to the recorded file.
|
|
37
|
+
*/
|
|
38
|
+
async function recordAudio(durationSeconds = 5, outputPath) {
|
|
39
|
+
ensureWorkspace();
|
|
40
|
+
const outPath = outputPath ?? path_1.default.join(WORKSPACE, `recording_${Date.now()}.wav`);
|
|
41
|
+
const durationMs = Math.round(durationSeconds * 1000);
|
|
42
|
+
if (process.platform === 'win32') {
|
|
43
|
+
return _recordWindows(outPath, durationMs);
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
return _recordUnix(outPath, durationMs);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
async function _recordWindows(outputPath, durationMs) {
|
|
50
|
+
const escapedPath = outputPath.replace(/\\/g, '\\\\');
|
|
51
|
+
const psScript = `
|
|
52
|
+
Add-Type -TypeDefinition @"
|
|
53
|
+
using System;
|
|
54
|
+
using System.Threading;
|
|
55
|
+
using System.Runtime.InteropServices;
|
|
56
|
+
|
|
57
|
+
public class AudioRecorder {
|
|
58
|
+
[DllImport("winmm.dll")]
|
|
59
|
+
private static extern int mciSendString(
|
|
60
|
+
string command,
|
|
61
|
+
System.Text.StringBuilder returnValue,
|
|
62
|
+
int returnLength,
|
|
63
|
+
IntPtr winHandle
|
|
64
|
+
);
|
|
65
|
+
|
|
66
|
+
public static void Record(string outputPath, int durationMs) {
|
|
67
|
+
mciSendString("open new Type waveaudio Alias recsound", null, 0, IntPtr.Zero);
|
|
68
|
+
mciSendString("set recsound channels 1 bitspersample 16 samplespersec 16000", null, 0, IntPtr.Zero);
|
|
69
|
+
mciSendString("record recsound", null, 0, IntPtr.Zero);
|
|
70
|
+
Thread.Sleep(durationMs);
|
|
71
|
+
mciSendString("stop recsound", null, 0, IntPtr.Zero);
|
|
72
|
+
mciSendString("save recsound " + outputPath, null, 0, IntPtr.Zero);
|
|
73
|
+
mciSendString("close recsound", null, 0, IntPtr.Zero);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
"@
|
|
77
|
+
[AudioRecorder]::Record("${escapedPath}", ${durationMs})
|
|
78
|
+
Write-Output "${outputPath}"
|
|
79
|
+
`.trim();
|
|
80
|
+
const psFile = path_1.default.join(WORKSPACE, `record_${Date.now()}.ps1`);
|
|
81
|
+
fs_1.default.writeFileSync(psFile, psScript);
|
|
82
|
+
try {
|
|
83
|
+
await execAsync(`powershell.exe -ExecutionPolicy Bypass -File "${psFile}"`, { timeout: durationMs + 8000 });
|
|
84
|
+
return outputPath;
|
|
85
|
+
}
|
|
86
|
+
catch (e) {
|
|
87
|
+
throw new Error(`[Audio] Recording failed: ${e.message}`);
|
|
88
|
+
}
|
|
89
|
+
finally {
|
|
90
|
+
try {
|
|
91
|
+
fs_1.default.unlinkSync(psFile);
|
|
92
|
+
}
|
|
93
|
+
catch { /* ignore */ }
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
async function _recordUnix(outputPath, durationMs) {
|
|
97
|
+
const seconds = Math.ceil(durationMs / 1000);
|
|
98
|
+
// Try sox first, then arecord
|
|
99
|
+
try {
|
|
100
|
+
await execAsync(`sox -d -t wav "${outputPath}" trim 0 ${seconds}`, { timeout: durationMs + 5000 });
|
|
101
|
+
}
|
|
102
|
+
catch {
|
|
103
|
+
await execAsync(`arecord -d ${seconds} -f S16_LE -r 16000 -c 1 "${outputPath}"`, { timeout: durationMs + 5000 });
|
|
104
|
+
}
|
|
105
|
+
return outputPath;
|
|
106
|
+
}
|
|
107
|
+
// ── Play audio ────────────────────────────────────────────────────────────────
|
|
108
|
+
/**
|
|
109
|
+
* Play an audio file (wav / mp3 / ogg).
|
|
110
|
+
* Non-blocking on Windows (fires MediaPlayer async); blocking on Unix.
|
|
111
|
+
*
|
|
112
|
+
* @param audioSource Path to audio file, or raw audio Buffer.
|
|
113
|
+
*/
|
|
114
|
+
async function playAudio(audioSource) {
|
|
115
|
+
ensureWorkspace();
|
|
116
|
+
let filePath;
|
|
117
|
+
let isTmp = false;
|
|
118
|
+
if (Buffer.isBuffer(audioSource)) {
|
|
119
|
+
filePath = path_1.default.join(WORKSPACE, `playback_${Date.now()}.wav`);
|
|
120
|
+
fs_1.default.writeFileSync(filePath, audioSource);
|
|
121
|
+
isTmp = true;
|
|
122
|
+
}
|
|
123
|
+
else {
|
|
124
|
+
filePath = audioSource;
|
|
125
|
+
}
|
|
126
|
+
if (!fs_1.default.existsSync(filePath)) {
|
|
127
|
+
throw new Error(`[Audio] File not found: ${filePath}`);
|
|
128
|
+
}
|
|
129
|
+
try {
|
|
130
|
+
if (process.platform === 'win32') {
|
|
131
|
+
await _playWindows(filePath);
|
|
132
|
+
}
|
|
133
|
+
else {
|
|
134
|
+
await _playUnix(filePath);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
finally {
|
|
138
|
+
if (isTmp) {
|
|
139
|
+
setTimeout(() => { try {
|
|
140
|
+
fs_1.default.unlinkSync(filePath);
|
|
141
|
+
}
|
|
142
|
+
catch { /* ignore */ } }, 10000);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
async function _playWindows(filePath) {
|
|
147
|
+
const escaped = filePath.replace(/\\/g, '\\\\');
|
|
148
|
+
await execAsync(`powershell -Command "Add-Type -AssemblyName presentationCore; $mp = New-Object System.Windows.Media.MediaPlayer; $mp.Open([uri]'${escaped}'); $mp.Play(); Start-Sleep -Seconds 10; $mp.Stop(); $mp.Close()"`, { timeout: 30000 }).catch(async () => {
|
|
149
|
+
// Fallback: system default media player
|
|
150
|
+
await execAsync(`powershell -Command "Start-Process '${escaped}'"`, { timeout: 5000 })
|
|
151
|
+
.catch(() => { });
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
async function _playUnix(filePath) {
|
|
155
|
+
if (process.platform === 'darwin') {
|
|
156
|
+
await execAsync(`afplay "${filePath}"`, { timeout: 30000 });
|
|
157
|
+
}
|
|
158
|
+
else {
|
|
159
|
+
try {
|
|
160
|
+
await execAsync(`paplay "${filePath}"`, { timeout: 30000 });
|
|
161
|
+
}
|
|
162
|
+
catch {
|
|
163
|
+
await execAsync(`aplay "${filePath}"`, { timeout: 30000 });
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
// ── Availability check ────────────────────────────────────────────────────────
|
|
168
|
+
/** Returns true if audio recording is likely possible on this system. */
|
|
169
|
+
async function checkAudioAvailable() {
|
|
170
|
+
if (process.platform === 'win32') {
|
|
171
|
+
try {
|
|
172
|
+
await execAsync('powershell -Command "Add-Type -AssemblyName System.Speech; Write-Output ok"', { timeout: 3000 });
|
|
173
|
+
return true;
|
|
174
|
+
}
|
|
175
|
+
catch {
|
|
176
|
+
return false;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
// Unix: check for arecord or sox
|
|
180
|
+
try {
|
|
181
|
+
await execAsync('which arecord || which sox', { timeout: 2000 });
|
|
182
|
+
return true;
|
|
183
|
+
}
|
|
184
|
+
catch {
|
|
185
|
+
return false;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// ============================================================
|
|
3
|
+
// DevOS — Autonomous AI Execution System
|
|
4
|
+
// Copyright (c) 2026 Shiva Deore. All rights reserved.
|
|
5
|
+
// ============================================================
|
|
6
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
7
|
+
if (k2 === undefined) k2 = k;
|
|
8
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
9
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
10
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
11
|
+
}
|
|
12
|
+
Object.defineProperty(o, k2, desc);
|
|
13
|
+
}) : (function(o, m, k, k2) {
|
|
14
|
+
if (k2 === undefined) k2 = k;
|
|
15
|
+
o[k2] = m[k];
|
|
16
|
+
}));
|
|
17
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
18
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
19
|
+
}) : function(o, v) {
|
|
20
|
+
o["default"] = v;
|
|
21
|
+
});
|
|
22
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
23
|
+
var ownKeys = function(o) {
|
|
24
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
25
|
+
var ar = [];
|
|
26
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
27
|
+
return ar;
|
|
28
|
+
};
|
|
29
|
+
return ownKeys(o);
|
|
30
|
+
};
|
|
31
|
+
return function (mod) {
|
|
32
|
+
if (mod && mod.__esModule) return mod;
|
|
33
|
+
var result = {};
|
|
34
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
35
|
+
__setModuleDefault(result, mod);
|
|
36
|
+
return result;
|
|
37
|
+
};
|
|
38
|
+
})();
|
|
39
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
40
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
41
|
+
};
|
|
42
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
43
|
+
exports.transcribe = transcribe;
|
|
44
|
+
exports.getSttProviders = getSttProviders;
|
|
45
|
+
// core/voice/stt.ts — Speech-to-Text with three-provider fallback chain.
|
|
46
|
+
//
|
|
47
|
+
// Priority order (auto-selected at runtime):
|
|
48
|
+
// 1. Groq Whisper API (GROQ_API_KEY) — fastest, cloud
|
|
49
|
+
// 2. OpenAI Whisper API (OPENAI_API_KEY) — reliable, cloud
|
|
50
|
+
// 3. Local Whisper.cpp (WHISPER_MODEL_PATH) — offline, no API key
|
|
51
|
+
//
|
|
52
|
+
// If all providers fail: returns { text: '', provider: 'none', error }
|
|
53
|
+
// — never throws; callers check result.text.
|
|
54
|
+
const fs_1 = __importDefault(require("fs"));
|
|
55
|
+
const path_1 = __importDefault(require("path"));
|
|
56
|
+
const child_process_1 = require("child_process");
|
|
57
|
+
const util_1 = require("util");
|
|
58
|
+
const axios_1 = __importDefault(require("axios"));
|
|
59
|
+
const execAsync = (0, util_1.promisify)(child_process_1.exec);
|
|
60
|
+
// ── Internal helpers ──────────────────────────────────────────────────────────
|
|
61
|
+
const WORKSPACE = path_1.default.join(process.cwd(), 'workspace');
|
|
62
|
+
function ensureWorkspace() {
|
|
63
|
+
if (!fs_1.default.existsSync(WORKSPACE))
|
|
64
|
+
fs_1.default.mkdirSync(WORKSPACE, { recursive: true });
|
|
65
|
+
}
|
|
66
|
+
/** Resolves the audio file path, writing buffer to a temp file if needed. */
|
|
67
|
+
function resolveAudioPath(opts) {
|
|
68
|
+
if (opts.audioFilePath)
|
|
69
|
+
return opts.audioFilePath;
|
|
70
|
+
if (opts.audioBuffer) {
|
|
71
|
+
ensureWorkspace();
|
|
72
|
+
const tmp = path_1.default.join(WORKSPACE, `stt_input_${Date.now()}.wav`);
|
|
73
|
+
fs_1.default.writeFileSync(tmp, opts.audioBuffer);
|
|
74
|
+
return tmp;
|
|
75
|
+
}
|
|
76
|
+
throw new Error('SttOptions: provide audioFilePath or audioBuffer');
|
|
77
|
+
}
|
|
78
|
+
// ── Provider 1 — Groq Whisper ─────────────────────────────────────────────────
|
|
79
|
+
async function transcribeGroq(audioPath, opts) {
|
|
80
|
+
const apiKey = process.env.GROQ_API_KEY;
|
|
81
|
+
if (!apiKey)
|
|
82
|
+
throw new Error('GROQ_API_KEY not set');
|
|
83
|
+
const timeout = opts.timeoutMs ?? 30000;
|
|
84
|
+
const t0 = Date.now();
|
|
85
|
+
const FormData = (await Promise.resolve().then(() => __importStar(require('form-data')))).default;
|
|
86
|
+
const form = new FormData();
|
|
87
|
+
form.append('file', fs_1.default.createReadStream(audioPath), path_1.default.basename(audioPath));
|
|
88
|
+
form.append('model', 'whisper-large-v3');
|
|
89
|
+
if (opts.language)
|
|
90
|
+
form.append('language', opts.language);
|
|
91
|
+
form.append('response_format', 'json');
|
|
92
|
+
const res = await axios_1.default.post('https://api.groq.com/openai/v1/audio/transcriptions', form, {
|
|
93
|
+
headers: { ...form.getHeaders(), Authorization: `Bearer ${apiKey}` },
|
|
94
|
+
timeout,
|
|
95
|
+
});
|
|
96
|
+
return {
|
|
97
|
+
text: (res.data.text ?? '').trim(),
|
|
98
|
+
provider: 'groq',
|
|
99
|
+
durationMs: Date.now() - t0,
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
// ── Provider 2 — OpenAI Whisper ───────────────────────────────────────────────
|
|
103
|
+
async function transcribeOpenAI(audioPath, opts) {
|
|
104
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
105
|
+
if (!apiKey)
|
|
106
|
+
throw new Error('OPENAI_API_KEY not set');
|
|
107
|
+
const timeout = opts.timeoutMs ?? 30000;
|
|
108
|
+
const t0 = Date.now();
|
|
109
|
+
const FormData = (await Promise.resolve().then(() => __importStar(require('form-data')))).default;
|
|
110
|
+
const form = new FormData();
|
|
111
|
+
form.append('file', fs_1.default.createReadStream(audioPath), path_1.default.basename(audioPath));
|
|
112
|
+
form.append('model', 'whisper-1');
|
|
113
|
+
if (opts.language)
|
|
114
|
+
form.append('language', opts.language);
|
|
115
|
+
form.append('response_format', 'json');
|
|
116
|
+
const res = await axios_1.default.post('https://api.openai.com/v1/audio/transcriptions', form, {
|
|
117
|
+
headers: { ...form.getHeaders(), Authorization: `Bearer ${apiKey}` },
|
|
118
|
+
timeout,
|
|
119
|
+
});
|
|
120
|
+
return {
|
|
121
|
+
text: (res.data.text ?? '').trim(),
|
|
122
|
+
provider: 'openai',
|
|
123
|
+
durationMs: Date.now() - t0,
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
// ── Provider 3 — Local Whisper.cpp ────────────────────────────────────────────
|
|
127
|
+
async function transcribeLocal(audioPath, opts) {
|
|
128
|
+
const modelPath = process.env.WHISPER_MODEL_PATH;
|
|
129
|
+
const t0 = Date.now();
|
|
130
|
+
const timeout = opts.timeoutMs ?? 60000;
|
|
131
|
+
// whisper-cli binary: try PATH first, then common install locations
|
|
132
|
+
const binaryName = process.platform === 'win32' ? 'whisper-cli.exe' : 'whisper-cli';
|
|
133
|
+
const binaryCandidates = [
|
|
134
|
+
binaryName,
|
|
135
|
+
path_1.default.join(process.cwd(), 'bin', binaryName),
|
|
136
|
+
path_1.default.join(process.cwd(), binaryName),
|
|
137
|
+
];
|
|
138
|
+
let binary = binaryName;
|
|
139
|
+
for (const candidate of binaryCandidates) {
|
|
140
|
+
try {
|
|
141
|
+
await execAsync(`"${candidate}" --version`, { timeout: 3000 });
|
|
142
|
+
binary = candidate;
|
|
143
|
+
break;
|
|
144
|
+
}
|
|
145
|
+
catch { /* try next */ }
|
|
146
|
+
}
|
|
147
|
+
const modelArg = modelPath ? `-m "${modelPath}"` : '';
|
|
148
|
+
const langArg = opts.language ? `-l ${opts.language}` : '';
|
|
149
|
+
const cmd = `"${binary}" ${modelArg} ${langArg} -f "${audioPath}" --output-txt`.trim();
|
|
150
|
+
await execAsync(cmd, { timeout });
|
|
151
|
+
// whisper-cli writes <audioPath>.txt
|
|
152
|
+
const txtPath = audioPath + '.txt';
|
|
153
|
+
if (!fs_1.default.existsSync(txtPath))
|
|
154
|
+
throw new Error('whisper-cli produced no output file');
|
|
155
|
+
const text = fs_1.default.readFileSync(txtPath, 'utf-8').trim();
|
|
156
|
+
try {
|
|
157
|
+
fs_1.default.unlinkSync(txtPath);
|
|
158
|
+
}
|
|
159
|
+
catch { /* ignore */ }
|
|
160
|
+
return { text, provider: 'local', durationMs: Date.now() - t0 };
|
|
161
|
+
}
|
|
162
|
+
// ── Main exported function ────────────────────────────────────────────────────
|
|
163
|
+
/**
|
|
164
|
+
* Transcribe audio using the first available provider.
|
|
165
|
+
* Never throws — always returns an SttResult; check result.error on failure.
|
|
166
|
+
*/
|
|
167
|
+
async function transcribe(options) {
|
|
168
|
+
const t0 = Date.now();
|
|
169
|
+
let tmpFile = '';
|
|
170
|
+
const errors = [];
|
|
171
|
+
try {
|
|
172
|
+
const audioPath = resolveAudioPath(options);
|
|
173
|
+
if (!options.audioFilePath && options.audioBuffer)
|
|
174
|
+
tmpFile = audioPath;
|
|
175
|
+
// Provider 1 — Groq
|
|
176
|
+
try {
|
|
177
|
+
const r = await transcribeGroq(audioPath, options);
|
|
178
|
+
console.log(`[STT] Groq Whisper: "${r.text.slice(0, 60)}" (${r.durationMs}ms)`);
|
|
179
|
+
return r;
|
|
180
|
+
}
|
|
181
|
+
catch (e) {
|
|
182
|
+
errors.push(`groq: ${e.message}`);
|
|
183
|
+
}
|
|
184
|
+
// Provider 2 — OpenAI
|
|
185
|
+
try {
|
|
186
|
+
const r = await transcribeOpenAI(audioPath, options);
|
|
187
|
+
console.log(`[STT] OpenAI Whisper: "${r.text.slice(0, 60)}" (${r.durationMs}ms)`);
|
|
188
|
+
return r;
|
|
189
|
+
}
|
|
190
|
+
catch (e) {
|
|
191
|
+
errors.push(`openai: ${e.message}`);
|
|
192
|
+
}
|
|
193
|
+
// Provider 3 — Local Whisper.cpp
|
|
194
|
+
try {
|
|
195
|
+
const r = await transcribeLocal(audioPath, options);
|
|
196
|
+
console.log(`[STT] Local Whisper.cpp: "${r.text.slice(0, 60)}" (${r.durationMs}ms)`);
|
|
197
|
+
return r;
|
|
198
|
+
}
|
|
199
|
+
catch (e) {
|
|
200
|
+
errors.push(`local: ${e.message}`);
|
|
201
|
+
}
|
|
202
|
+
// All failed
|
|
203
|
+
const errorMsg = errors.join(' | ');
|
|
204
|
+
console.warn(`[STT] All providers failed: ${errorMsg}`);
|
|
205
|
+
return { text: '', provider: 'none', durationMs: Date.now() - t0, error: errorMsg };
|
|
206
|
+
}
|
|
207
|
+
catch (outer) {
|
|
208
|
+
return { text: '', provider: 'none', durationMs: Date.now() - t0, error: outer.message };
|
|
209
|
+
}
|
|
210
|
+
finally {
|
|
211
|
+
if (tmpFile) {
|
|
212
|
+
try {
|
|
213
|
+
fs_1.default.unlinkSync(tmpFile);
|
|
214
|
+
}
|
|
215
|
+
catch { /* ignore */ }
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
/** Returns which STT providers are likely available (env-key check only). */
|
|
220
|
+
function getSttProviders() {
|
|
221
|
+
return [
|
|
222
|
+
{ name: 'groq', available: !!process.env.GROQ_API_KEY },
|
|
223
|
+
{ name: 'openai', available: !!process.env.OPENAI_API_KEY },
|
|
224
|
+
{ name: 'local', available: !!process.env.WHISPER_MODEL_PATH },
|
|
225
|
+
];
|
|
226
|
+
}
|