aiden-runtime 3.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +661 -0
- package/README.md +465 -0
- package/config/devos.config.json +186 -0
- package/config/hardware.json +9 -0
- package/config/model-selection.json +7 -0
- package/config/setup-complete.json +20 -0
- package/dist/api/routes/computerUse.js +112 -0
- package/dist/api/server.js +6870 -0
- package/dist/bin/npx-init.js +71 -0
- package/dist/coordination/commandGate.js +115 -0
- package/dist/coordination/livePulse.js +127 -0
- package/dist/core/agentLoop.js +2718 -0
- package/dist/core/agentShield.js +231 -0
- package/dist/core/aidenIdentity.js +215 -0
- package/dist/core/aidenPersonality.js +166 -0
- package/dist/core/aidenSdk.js +374 -0
- package/dist/core/asyncTasks.js +82 -0
- package/dist/core/auditTrail.js +61 -0
- package/dist/core/auxiliaryClient.js +114 -0
- package/dist/core/bgLLM.js +108 -0
- package/dist/core/bm25.js +68 -0
- package/dist/core/callbackSystem.js +64 -0
- package/dist/core/channels/adapter.js +6 -0
- package/dist/core/channels/discord.js +173 -0
- package/dist/core/channels/email.js +253 -0
- package/dist/core/channels/imessage.js +164 -0
- package/dist/core/channels/manager.js +96 -0
- package/dist/core/channels/signal.js +140 -0
- package/dist/core/channels/slack.js +139 -0
- package/dist/core/channels/twilio.js +144 -0
- package/dist/core/channels/webhook.js +186 -0
- package/dist/core/channels/whatsapp.js +185 -0
- package/dist/core/clarifyBus.js +75 -0
- package/dist/core/codeInterpreter.js +82 -0
- package/dist/core/computerControl.js +439 -0
- package/dist/core/conversationMemory.js +334 -0
- package/dist/core/costTracker.js +221 -0
- package/dist/core/cronManager.js +217 -0
- package/dist/core/deepKB.js +77 -0
- package/dist/core/doctor.js +279 -0
- package/dist/core/dreamEngine.js +334 -0
- package/dist/core/entityGraph.js +169 -0
- package/dist/core/eventBus.js +16 -0
- package/dist/core/evolutionAnalyzer.js +153 -0
- package/dist/core/executionLoop.js +309 -0
- package/dist/core/executor.js +224 -0
- package/dist/core/failureAnalyzer.js +166 -0
- package/dist/core/fastPathExpansion.js +82 -0
- package/dist/core/faultEngine.js +106 -0
- package/dist/core/featureGates.js +70 -0
- package/dist/core/fileIngestion.js +113 -0
- package/dist/core/gateway.js +97 -0
- package/dist/core/goalTracker.js +75 -0
- package/dist/core/growthEngine.js +168 -0
- package/dist/core/hardwareDetector.js +98 -0
- package/dist/core/hooks.js +45 -0
- package/dist/core/httpKeepalive.js +46 -0
- package/dist/core/hybridSearch.js +101 -0
- package/dist/core/importers.js +164 -0
- package/dist/core/instinctSystem.js +223 -0
- package/dist/core/knowledgeBase.js +351 -0
- package/dist/core/learningMemory.js +121 -0
- package/dist/core/lessonsBrowser.js +125 -0
- package/dist/core/licenseManager.js +399 -0
- package/dist/core/logBuffer.js +85 -0
- package/dist/core/machineId.js +87 -0
- package/dist/core/mcpClient.js +442 -0
- package/dist/core/memoryDistiller.js +165 -0
- package/dist/core/memoryExtractor.js +212 -0
- package/dist/core/memoryIds.js +213 -0
- package/dist/core/memoryPreamble.js +113 -0
- package/dist/core/memoryQuery.js +136 -0
- package/dist/core/memoryRecall.js +140 -0
- package/dist/core/memoryStrategy.js +201 -0
- package/dist/core/messageValidator.js +85 -0
- package/dist/core/modelDiscovery.js +108 -0
- package/dist/core/modelRouter.js +118 -0
- package/dist/core/morningBriefing.js +203 -0
- package/dist/core/multiGoalValidator.js +51 -0
- package/dist/core/parallelExecutor.js +43 -0
- package/dist/core/passiveSkillObserver.js +204 -0
- package/dist/core/paths.js +57 -0
- package/dist/core/patternDetector.js +83 -0
- package/dist/core/planResponseRepair.js +64 -0
- package/dist/core/planTool.js +111 -0
- package/dist/core/playwrightBridge.js +356 -0
- package/dist/core/pluginSystem.js +121 -0
- package/dist/core/privateMode.js +85 -0
- package/dist/core/reactLoop.js +156 -0
- package/dist/core/recipeEngine.js +166 -0
- package/dist/core/responseCache.js +128 -0
- package/dist/core/runSandbox.js +132 -0
- package/dist/core/sandboxRunner.js +200 -0
- package/dist/core/scheduler.js +543 -0
- package/dist/core/secretScanner.js +49 -0
- package/dist/core/semanticMemory.js +223 -0
- package/dist/core/sessionMemory.js +259 -0
- package/dist/core/sessionRouter.js +91 -0
- package/dist/core/sessionSearch.js +163 -0
- package/dist/core/setupWizard.js +225 -0
- package/dist/core/skillImporter.js +303 -0
- package/dist/core/skillLibrary.js +144 -0
- package/dist/core/skillLoader.js +471 -0
- package/dist/core/skillTeacher.js +352 -0
- package/dist/core/skillValidator.js +210 -0
- package/dist/core/skillWriter.js +384 -0
- package/dist/core/slashAsTool.js +226 -0
- package/dist/core/spawnManager.js +197 -0
- package/dist/core/statusVerbs.js +43 -0
- package/dist/core/swarmManager.js +109 -0
- package/dist/core/taskQueue.js +119 -0
- package/dist/core/taskRecovery.js +128 -0
- package/dist/core/taskState.js +168 -0
- package/dist/core/telegramBot.js +152 -0
- package/dist/core/todoManager.js +70 -0
- package/dist/core/toolNameRepair.js +71 -0
- package/dist/core/toolRegistry.js +2730 -0
- package/dist/core/tools/calendarTool.js +98 -0
- package/dist/core/tools/companyFilingsTool.js +98 -0
- package/dist/core/tools/gmailTool.js +87 -0
- package/dist/core/tools/marketDataTool.js +135 -0
- package/dist/core/tools/socialResearchTool.js +121 -0
- package/dist/core/truthCheck.js +57 -0
- package/dist/core/updateChecker.js +74 -0
- package/dist/core/userCognitionProfile.js +238 -0
- package/dist/core/userProfile.js +341 -0
- package/dist/core/version.js +5 -0
- package/dist/core/visionAnalyze.js +161 -0
- package/dist/core/voice/audio.js +187 -0
- package/dist/core/voice/stt.js +226 -0
- package/dist/core/voice/tts.js +310 -0
- package/dist/core/voiceInput.js +118 -0
- package/dist/core/voiceOutput.js +130 -0
- package/dist/core/webSearch.js +326 -0
- package/dist/core/workflowTracker.js +72 -0
- package/dist/core/workspaceMemory.js +54 -0
- package/dist/core/youtubeTranscript.js +224 -0
- package/dist/integrations/computerUse/apiRegistry.js +113 -0
- package/dist/integrations/computerUse/screenAgent.js +203 -0
- package/dist/integrations/computerUse/visionLoop.js +296 -0
- package/dist/memory/memoryLayers.js +143 -0
- package/dist/providers/boa.js +93 -0
- package/dist/providers/cerebras.js +70 -0
- package/dist/providers/custom.js +89 -0
- package/dist/providers/gemini.js +82 -0
- package/dist/providers/groq.js +92 -0
- package/dist/providers/index.js +149 -0
- package/dist/providers/nvidia.js +70 -0
- package/dist/providers/ollama.js +99 -0
- package/dist/providers/openrouter.js +74 -0
- package/dist/providers/router.js +497 -0
- package/dist/providers/types.js +6 -0
- package/dist/security/browserVault.js +129 -0
- package/dist/security/dataGuard.js +89 -0
- package/dist/tools/eonetTool.js +72 -0
- package/dist/types/computerUse.js +2 -0
- package/dist/types/executor.js +2 -0
- package/dist-bundle/cli.js +357859 -0
- package/package.json +256 -0
|
@@ -0,0 +1,439 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// ============================================================
|
|
3
|
+
// DevOS — Autonomous AI Execution System
|
|
4
|
+
// Copyright (c) 2026 Shiva Deore. All rights reserved.
|
|
5
|
+
// ============================================================
|
|
6
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
7
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
8
|
+
};
|
|
9
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
10
|
+
exports.moveMouse = moveMouse;
|
|
11
|
+
exports.clickMouse = clickMouse;
|
|
12
|
+
exports.typeText = typeText;
|
|
13
|
+
exports.pressKey = pressKey;
|
|
14
|
+
exports.takeScreenshot = takeScreenshot;
|
|
15
|
+
exports.readScreen = readScreen;
|
|
16
|
+
exports.openBrowser = openBrowser;
|
|
17
|
+
exports.focusWindow = focusWindow;
|
|
18
|
+
exports.getScreenSize = getScreenSize;
|
|
19
|
+
exports.visionLoop = visionLoop;
|
|
20
|
+
exports.executeWithVisionRetry = executeWithVisionRetry;
|
|
21
|
+
exports.executeWithFallback = executeWithFallback;
|
|
22
|
+
// core/computerControl.ts — Mouse, keyboard, screenshot, and
|
|
23
|
+
// vision-loop computer control.
|
|
24
|
+
// Pure PowerShell implementation — zero native dependencies.
|
|
25
|
+
// Works on any Windows machine without Visual Studio or nut-js.
|
|
26
|
+
const child_process_1 = require("child_process");
|
|
27
|
+
const util_1 = require("util");
|
|
28
|
+
const fs_1 = __importDefault(require("fs"));
|
|
29
|
+
const path_1 = __importDefault(require("path"));
|
|
30
|
+
const auditTrail_1 = require("./auditTrail");
|
|
31
|
+
const execAsync = (0, util_1.promisify)(child_process_1.exec);
|
|
32
|
+
const SCREENSHOTS_DIR = path_1.default.join(process.cwd(), 'workspace', 'screenshots');
|
|
33
|
+
try {
|
|
34
|
+
fs_1.default.mkdirSync(SCREENSHOTS_DIR, { recursive: true });
|
|
35
|
+
}
|
|
36
|
+
catch { }
|
|
37
|
+
// ── PowerShell helpers ────────────────────────────────────────
|
|
38
|
+
// Run a simple one-liner PowerShell command
|
|
39
|
+
async function ps(script) {
|
|
40
|
+
try {
|
|
41
|
+
const escaped = script.replace(/"/g, '\\"');
|
|
42
|
+
const { stdout } = await execAsync(`powershell -NoProfile -NonInteractive -Command "${escaped}"`, { timeout: 15000 });
|
|
43
|
+
return stdout.trim();
|
|
44
|
+
}
|
|
45
|
+
catch (e) {
|
|
46
|
+
return e.message || '';
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
// Write a .ps1 file and execute it — avoids all quoting issues for complex scripts
|
|
50
|
+
async function psFile(script) {
|
|
51
|
+
const tmpFile = path_1.default.join(process.cwd(), 'workspace', `ps_${Date.now()}.ps1`);
|
|
52
|
+
fs_1.default.writeFileSync(tmpFile, script, 'utf8');
|
|
53
|
+
try {
|
|
54
|
+
const { stdout } = await execAsync(`powershell -NoProfile -NonInteractive -ExecutionPolicy Bypass -File "${tmpFile}"`, { timeout: 15000 });
|
|
55
|
+
try {
|
|
56
|
+
fs_1.default.unlinkSync(tmpFile);
|
|
57
|
+
}
|
|
58
|
+
catch { }
|
|
59
|
+
return stdout.trim();
|
|
60
|
+
}
|
|
61
|
+
catch (e) {
|
|
62
|
+
try {
|
|
63
|
+
fs_1.default.unlinkSync(tmpFile);
|
|
64
|
+
}
|
|
65
|
+
catch { }
|
|
66
|
+
return e.message || '';
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
// ── MOUSE ──────────────────────────────────────────────────────
|
|
70
|
+
async function moveMouse(x, y) {
|
|
71
|
+
await psFile(`
|
|
72
|
+
Add-Type -AssemblyName System.Windows.Forms
|
|
73
|
+
[System.Windows.Forms.Cursor]::Position = New-Object System.Drawing.Point(${x}, ${y})
|
|
74
|
+
`);
|
|
75
|
+
return `Mouse moved to (${x}, ${y})`;
|
|
76
|
+
}
|
|
77
|
+
async function clickMouse(x, y, button = 'left', double = false) {
|
|
78
|
+
const clicks = double ? 2 : 1;
|
|
79
|
+
const isRight = button === 'right';
|
|
80
|
+
await psFile(`
|
|
81
|
+
Add-Type @"
|
|
82
|
+
using System;
|
|
83
|
+
using System.Runtime.InteropServices;
|
|
84
|
+
public class MouseClick {
|
|
85
|
+
[DllImport("user32.dll")] public static extern bool SetCursorPos(int x, int y);
|
|
86
|
+
[DllImport("user32.dll")] public static extern void mouse_event(int dwFlags, int dx, int dy, int cButtons, int dwExtraInfo);
|
|
87
|
+
public const int MOUSEEVENTF_LEFTDOWN = 0x02;
|
|
88
|
+
public const int MOUSEEVENTF_LEFTUP = 0x04;
|
|
89
|
+
public const int MOUSEEVENTF_RIGHTDOWN = 0x08;
|
|
90
|
+
public const int MOUSEEVENTF_RIGHTUP = 0x10;
|
|
91
|
+
}
|
|
92
|
+
"@
|
|
93
|
+
[MouseClick]::SetCursorPos(${x}, ${y})
|
|
94
|
+
Start-Sleep -Milliseconds 100
|
|
95
|
+
for ($i = 0; $i -lt ${clicks}; $i++) {
|
|
96
|
+
${isRight
|
|
97
|
+
? `[MouseClick]::mouse_event([MouseClick]::MOUSEEVENTF_RIGHTDOWN, 0, 0, 0, 0)
|
|
98
|
+
Start-Sleep -Milliseconds 50
|
|
99
|
+
[MouseClick]::mouse_event([MouseClick]::MOUSEEVENTF_RIGHTUP, 0, 0, 0, 0)`
|
|
100
|
+
: `[MouseClick]::mouse_event([MouseClick]::MOUSEEVENTF_LEFTDOWN, 0, 0, 0, 0)
|
|
101
|
+
Start-Sleep -Milliseconds 50
|
|
102
|
+
[MouseClick]::mouse_event([MouseClick]::MOUSEEVENTF_LEFTUP, 0, 0, 0, 0)`}
|
|
103
|
+
Start-Sleep -Milliseconds 80
|
|
104
|
+
}
|
|
105
|
+
`);
|
|
106
|
+
return `${double ? 'Double-clicked' : 'Clicked'} ${button} at (${x}, ${y})`;
|
|
107
|
+
}
|
|
108
|
+
// ── KEYBOARD ───────────────────────────────────────────────────
|
|
109
|
+
async function typeText(text) {
|
|
110
|
+
// WScript.Shell SendKeys — most reliable for text input
|
|
111
|
+
const safe = text.replace(/'/g, "''").replace(/[+^%~(){}]/g, '{$&}');
|
|
112
|
+
await psFile(`
|
|
113
|
+
$wsh = New-Object -ComObject WScript.Shell
|
|
114
|
+
Start-Sleep -Milliseconds 200
|
|
115
|
+
$wsh.SendKeys('${safe}')
|
|
116
|
+
`);
|
|
117
|
+
return `Typed: ${text.slice(0, 50)}${text.length > 50 ? '...' : ''}`;
|
|
118
|
+
}
|
|
119
|
+
async function pressKey(key) {
|
|
120
|
+
const keyMap = {
|
|
121
|
+
enter: '{ENTER}',
|
|
122
|
+
tab: '{TAB}',
|
|
123
|
+
escape: '{ESC}',
|
|
124
|
+
esc: '{ESC}',
|
|
125
|
+
backspace: '{BACKSPACE}',
|
|
126
|
+
delete: '{DELETE}',
|
|
127
|
+
up: '{UP}',
|
|
128
|
+
down: '{DOWN}',
|
|
129
|
+
left: '{LEFT}',
|
|
130
|
+
right: '{RIGHT}',
|
|
131
|
+
home: '{HOME}',
|
|
132
|
+
end: '{END}',
|
|
133
|
+
pageup: '{PGUP}',
|
|
134
|
+
pagedown: '{PGDN}',
|
|
135
|
+
f1: '{F1}', f2: '{F2}', f3: '{F3}', f4: '{F4}',
|
|
136
|
+
f5: '{F5}', f6: '{F6}', f7: '{F7}', f8: '{F8}',
|
|
137
|
+
f11: '{F11}', f12: '{F12}',
|
|
138
|
+
'ctrl+c': '^c',
|
|
139
|
+
'ctrl+v': '^v',
|
|
140
|
+
'ctrl+a': '^a',
|
|
141
|
+
'ctrl+z': '^z',
|
|
142
|
+
'ctrl+s': '^s',
|
|
143
|
+
'ctrl+t': '^t',
|
|
144
|
+
'ctrl+w': '^w',
|
|
145
|
+
'ctrl+l': '^l',
|
|
146
|
+
'ctrl+r': '^r',
|
|
147
|
+
'ctrl_c': '^c',
|
|
148
|
+
'ctrl_v': '^v',
|
|
149
|
+
'ctrl_a': '^a',
|
|
150
|
+
'ctrl_z': '^z',
|
|
151
|
+
'ctrl_l': '^l',
|
|
152
|
+
'alt+f4': '%{F4}',
|
|
153
|
+
win: '{LWIN}',
|
|
154
|
+
};
|
|
155
|
+
const mapped = keyMap[key.toLowerCase()] || `{${key.toUpperCase()}}`;
|
|
156
|
+
await psFile(`
|
|
157
|
+
$wsh = New-Object -ComObject WScript.Shell
|
|
158
|
+
Start-Sleep -Milliseconds 100
|
|
159
|
+
$wsh.SendKeys('${mapped}')
|
|
160
|
+
`);
|
|
161
|
+
return `Pressed: ${key}`;
|
|
162
|
+
}
|
|
163
|
+
// ── SCREENSHOT ─────────────────────────────────────────────────
|
|
164
|
+
async function takeScreenshot() {
|
|
165
|
+
const filename = `screenshot_${Date.now()}.png`;
|
|
166
|
+
const filepath = path_1.default.join(SCREENSHOTS_DIR, filename);
|
|
167
|
+
const escaped = filepath.replace(/\\/g, '\\\\');
|
|
168
|
+
await psFile(`
|
|
169
|
+
Add-Type -AssemblyName System.Windows.Forms
|
|
170
|
+
Add-Type -AssemblyName System.Drawing
|
|
171
|
+
$screen = [System.Windows.Forms.Screen]::PrimaryScreen.Bounds
|
|
172
|
+
$bitmap = New-Object System.Drawing.Bitmap($screen.Width, $screen.Height)
|
|
173
|
+
$graphics = [System.Drawing.Graphics]::FromImage($bitmap)
|
|
174
|
+
$graphics.CopyFromScreen($screen.Location, [System.Drawing.Point]::Empty, $screen.Size)
|
|
175
|
+
$bitmap.Save('${escaped}')
|
|
176
|
+
$graphics.Dispose()
|
|
177
|
+
$bitmap.Dispose()
|
|
178
|
+
`);
|
|
179
|
+
// Trim old screenshots — keep only last 10
|
|
180
|
+
try {
|
|
181
|
+
const files = fs_1.default.readdirSync(SCREENSHOTS_DIR)
|
|
182
|
+
.filter(f => f.endsWith('.png'))
|
|
183
|
+
.sort();
|
|
184
|
+
if (files.length > 10) {
|
|
185
|
+
files.slice(0, files.length - 10).forEach(f => {
|
|
186
|
+
try {
|
|
187
|
+
fs_1.default.unlinkSync(path_1.default.join(SCREENSHOTS_DIR, f));
|
|
188
|
+
}
|
|
189
|
+
catch { }
|
|
190
|
+
});
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
catch { }
|
|
194
|
+
if (fs_1.default.existsSync(filepath))
|
|
195
|
+
return filepath;
|
|
196
|
+
throw new Error('Screenshot failed — file not created');
|
|
197
|
+
}
|
|
198
|
+
async function readScreen() {
|
|
199
|
+
const filepath = await takeScreenshot();
|
|
200
|
+
return `Screenshot saved: ${filepath}`;
|
|
201
|
+
}
|
|
202
|
+
// ── BROWSER ────────────────────────────────────────────────────
|
|
203
|
+
async function openBrowser(url) {
|
|
204
|
+
const safeUrl = url.replace(/'/g, '%27');
|
|
205
|
+
await psFile(`Start-Process '${safeUrl}'`);
|
|
206
|
+
// Wait for browser to load
|
|
207
|
+
await new Promise(r => setTimeout(r, 3000));
|
|
208
|
+
return `Opened browser: ${url}`;
|
|
209
|
+
}
|
|
210
|
+
async function focusWindow(title) {
|
|
211
|
+
await psFile(`
|
|
212
|
+
Add-Type @"
|
|
213
|
+
using System;
|
|
214
|
+
using System.Runtime.InteropServices;
|
|
215
|
+
public class Win32 {
|
|
216
|
+
[DllImport("user32.dll")] public static extern bool SetForegroundWindow(IntPtr hWnd);
|
|
217
|
+
[DllImport("user32.dll")] public static extern IntPtr FindWindow(string lpClassName, string lpWindowName);
|
|
218
|
+
}
|
|
219
|
+
"@
|
|
220
|
+
$hwnd = [Win32]::FindWindow([NullString]::Value, "${title}")
|
|
221
|
+
if ($hwnd -ne [IntPtr]::Zero) { [Win32]::SetForegroundWindow($hwnd) }
|
|
222
|
+
`);
|
|
223
|
+
await new Promise(r => setTimeout(r, 500));
|
|
224
|
+
return `Focused window: ${title}`;
|
|
225
|
+
}
|
|
226
|
+
// ── SCREEN SIZE ────────────────────────────────────────────────
|
|
227
|
+
async function getScreenSize() {
|
|
228
|
+
const result = await psFile(`
|
|
229
|
+
Add-Type -AssemblyName System.Windows.Forms
|
|
230
|
+
$screen = [System.Windows.Forms.Screen]::PrimaryScreen.Bounds
|
|
231
|
+
Write-Output "$($screen.Width)x$($screen.Height)"
|
|
232
|
+
`);
|
|
233
|
+
const [w, h] = result.split('x').map(Number);
|
|
234
|
+
return { width: w || 1920, height: h || 1080 };
|
|
235
|
+
}
|
|
236
|
+
// ── VISION LOOP ────────────────────────────────────────────────
|
|
237
|
+
// Iterative see → decide → act loop driven by the active LLM.
|
|
238
|
+
async function visionLoop(goal, maxSteps = 10, callLLM) {
|
|
239
|
+
const results = [];
|
|
240
|
+
for (let step = 1; step <= maxSteps; step++) {
|
|
241
|
+
// Take screenshot
|
|
242
|
+
let screenshotPath;
|
|
243
|
+
try {
|
|
244
|
+
screenshotPath = await takeScreenshot();
|
|
245
|
+
}
|
|
246
|
+
catch (e) {
|
|
247
|
+
return `Vision loop failed at step ${step}: screenshot error — ${e.message}`;
|
|
248
|
+
}
|
|
249
|
+
const prompt = `
|
|
250
|
+
You are controlling a Windows computer to achieve this goal: "${goal}"
|
|
251
|
+
|
|
252
|
+
Steps completed so far:
|
|
253
|
+
${results.length > 0 ? results.map((r, i) => `${i + 1}. ${r}`).join('\n') : 'None yet'}
|
|
254
|
+
|
|
255
|
+
Screenshot taken at: ${screenshotPath}
|
|
256
|
+
|
|
257
|
+
Decide the next action. Respond with ONLY a JSON object:
|
|
258
|
+
{
|
|
259
|
+
"action": "click|type|key|scroll|done|failed",
|
|
260
|
+
"x": 500,
|
|
261
|
+
"y": 300,
|
|
262
|
+
"text": "text to type if action is type",
|
|
263
|
+
"key": "key name if action is key",
|
|
264
|
+
"reason": "why this action",
|
|
265
|
+
"confidence": 0.85,
|
|
266
|
+
"goal_complete": false
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
Rules:
|
|
270
|
+
- Use "done" when the goal is fully complete
|
|
271
|
+
- Use "failed" if you cannot proceed
|
|
272
|
+
- confidence below 0.5 → use "done" and explain in reason
|
|
273
|
+
- For browser: click address bar (ctrl+l) → type URL → press enter
|
|
274
|
+
`;
|
|
275
|
+
let actionJson;
|
|
276
|
+
try {
|
|
277
|
+
const response = await callLLM(prompt);
|
|
278
|
+
const jsonMatch = response.match(/\{[\s\S]*\}/);
|
|
279
|
+
if (!jsonMatch)
|
|
280
|
+
throw new Error('No JSON in response');
|
|
281
|
+
actionJson = JSON.parse(jsonMatch[0]);
|
|
282
|
+
}
|
|
283
|
+
catch {
|
|
284
|
+
results.push(`Step ${step}: LLM parse error — skipping`);
|
|
285
|
+
continue;
|
|
286
|
+
}
|
|
287
|
+
if (actionJson.goal_complete || actionJson.action === 'done') {
|
|
288
|
+
results.push(`Goal complete: ${actionJson.reason}`);
|
|
289
|
+
break;
|
|
290
|
+
}
|
|
291
|
+
if (actionJson.action === 'failed') {
|
|
292
|
+
return `Vision loop failed: ${actionJson.reason}`;
|
|
293
|
+
}
|
|
294
|
+
if ((actionJson.confidence ?? 1) < 0.5) {
|
|
295
|
+
return `Vision loop stopped: low confidence (${actionJson.confidence}) — ${actionJson.reason}`;
|
|
296
|
+
}
|
|
297
|
+
// Execute action
|
|
298
|
+
let result = '';
|
|
299
|
+
switch (actionJson.action) {
|
|
300
|
+
case 'click':
|
|
301
|
+
result = await clickMouse(actionJson.x ?? 0, actionJson.y ?? 0);
|
|
302
|
+
await new Promise(r => setTimeout(r, 800));
|
|
303
|
+
break;
|
|
304
|
+
case 'type':
|
|
305
|
+
result = await typeText(actionJson.text || '');
|
|
306
|
+
await new Promise(r => setTimeout(r, 500));
|
|
307
|
+
break;
|
|
308
|
+
case 'key':
|
|
309
|
+
result = await pressKey(actionJson.key || 'enter');
|
|
310
|
+
await new Promise(r => setTimeout(r, 500));
|
|
311
|
+
break;
|
|
312
|
+
case 'scroll':
|
|
313
|
+
await psFile(`
|
|
314
|
+
$wsh = New-Object -ComObject WScript.Shell
|
|
315
|
+
$wsh.SendKeys('{PGDN}')
|
|
316
|
+
`);
|
|
317
|
+
result = 'Scrolled down';
|
|
318
|
+
await new Promise(r => setTimeout(r, 500));
|
|
319
|
+
break;
|
|
320
|
+
default:
|
|
321
|
+
result = `Unknown action: ${actionJson.action}`;
|
|
322
|
+
}
|
|
323
|
+
results.push(`Step ${step}: ${actionJson.action} — ${result} (${actionJson.reason})`);
|
|
324
|
+
console.log(`[VisionLoop] Step ${step}: ${actionJson.action} — ${actionJson.reason}`);
|
|
325
|
+
}
|
|
326
|
+
return results.join('\n');
|
|
327
|
+
}
|
|
328
|
+
/**
|
|
329
|
+
* runVisionLoop — wraps the existing visionLoop with a confidence-scored result.
|
|
330
|
+
* Confidence is derived from whether the output indicates completion vs failure.
|
|
331
|
+
*/
|
|
332
|
+
async function runVisionLoop(task) {
|
|
333
|
+
try {
|
|
334
|
+
// Provide a no-op LLM stub — real use goes through the tool registry which
|
|
335
|
+
// injects the actual callLLM; this layer only needs to determine success/confidence.
|
|
336
|
+
const output = await visionLoop(task, 10, async (prompt) => {
|
|
337
|
+
// If visionLoop is called standalone here, we can't call the real LLM.
|
|
338
|
+
// Return a sentinel that triggers the "failed" path gracefully.
|
|
339
|
+
return JSON.stringify({ action: 'failed', reason: 'standalone_run_no_llm', confidence: 0 });
|
|
340
|
+
});
|
|
341
|
+
if (output.includes('Goal complete')) {
|
|
342
|
+
return { success: true, confidence: 1.0, output };
|
|
343
|
+
}
|
|
344
|
+
if (output.includes('low confidence')) {
|
|
345
|
+
const match = output.match(/confidence \(([0-9.]+)\)/);
|
|
346
|
+
const conf = match ? parseFloat(match[1]) : 0.3;
|
|
347
|
+
return { success: false, confidence: conf, output };
|
|
348
|
+
}
|
|
349
|
+
if (output.includes('failed')) {
|
|
350
|
+
return { success: false, confidence: 0.0, error: output };
|
|
351
|
+
}
|
|
352
|
+
// Partial output — screenshot was taken but completion unclear
|
|
353
|
+
return { success: false, confidence: 0.3, output };
|
|
354
|
+
}
|
|
355
|
+
catch (e) {
|
|
356
|
+
return { success: false, confidence: 0.0, error: e.message };
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
/**
|
|
360
|
+
* executeWithVisionRetry — retries vision loop up to maxAttempts times.
|
|
361
|
+
* Returns early if confidence > 0.7 on any attempt.
|
|
362
|
+
*/
|
|
363
|
+
async function executeWithVisionRetry(task, maxAttempts = 3) {
|
|
364
|
+
for (let i = 0; i < maxAttempts; i++) {
|
|
365
|
+
const result = await runVisionLoop(task);
|
|
366
|
+
if (result.success && result.confidence > 0.7)
|
|
367
|
+
return result;
|
|
368
|
+
await new Promise(r => setTimeout(r, 1000 * (i + 1)));
|
|
369
|
+
}
|
|
370
|
+
return { success: false, confidence: 0, error: 'vision_failed_after_retries' };
|
|
371
|
+
}
|
|
372
|
+
/**
|
|
373
|
+
* executePowerShell — thin wrapper to run a task as a PowerShell command (Tier 2).
|
|
374
|
+
*/
|
|
375
|
+
async function executePowerShell(task) {
|
|
376
|
+
try {
|
|
377
|
+
const output = await psFile(task);
|
|
378
|
+
const success = !output.toLowerCase().includes('error') && output.length > 0;
|
|
379
|
+
return { success, output };
|
|
380
|
+
}
|
|
381
|
+
catch (e) {
|
|
382
|
+
return { success: false, output: e.message };
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
/**
|
|
386
|
+
* executeWithFallback — 4-tier escalation ladder.
|
|
387
|
+
*
|
|
388
|
+
* Tier 2: PowerShell direct execution
|
|
389
|
+
* Tier 3: VisionLoop with retries
|
|
390
|
+
* Tier 4: Log escalation, return clear message for manual intervention
|
|
391
|
+
*
|
|
392
|
+
* (Tier 1 = direct API / native tool call; handled upstream by the tool registry)
|
|
393
|
+
*/
|
|
394
|
+
async function executeWithFallback(task) {
|
|
395
|
+
// Tier 2 — PowerShell
|
|
396
|
+
try {
|
|
397
|
+
const psResult = await executePowerShell(task);
|
|
398
|
+
if (psResult.success)
|
|
399
|
+
return { success: true, tier: 2, output: psResult.output };
|
|
400
|
+
}
|
|
401
|
+
catch { }
|
|
402
|
+
// Tier 3 — VisionLoop with retries
|
|
403
|
+
const visionResult = await executeWithVisionRetry(task);
|
|
404
|
+
if (visionResult.success)
|
|
405
|
+
return { success: true, tier: 3, output: visionResult.output };
|
|
406
|
+
// Tier 4 — Escalation: log and return a clear message
|
|
407
|
+
auditTrail_1.auditTrail.record({
|
|
408
|
+
action: 'system',
|
|
409
|
+
tool: 'computer_control',
|
|
410
|
+
input: task.slice(0, 200),
|
|
411
|
+
durationMs: 0,
|
|
412
|
+
success: false,
|
|
413
|
+
error: 'Escalated to Tier 4 — vision confidence too low',
|
|
414
|
+
});
|
|
415
|
+
return {
|
|
416
|
+
success: false,
|
|
417
|
+
tier: 4,
|
|
418
|
+
error: 'All automated tiers failed. This task requires Claude Computer Use or manual intervention.',
|
|
419
|
+
};
|
|
420
|
+
}
|
|
421
|
+
/*
|
|
422
|
+
MANUAL TEST — run in PowerShell from DevOS root:
|
|
423
|
+
node -e "const cc = require('./core/computerControl'); cc.takeScreenshot().then(p => console.log('Screenshot:', p)).catch(console.error);"
|
|
424
|
+
|
|
425
|
+
Expected: Screenshot saved to workspace/screenshots/screenshot_[timestamp].png
|
|
426
|
+
|
|
427
|
+
Full flow test — ask Aiden:
|
|
428
|
+
"open chrome, go to google.com, search for batman, tell me what you see"
|
|
429
|
+
|
|
430
|
+
Expected plan:
|
|
431
|
+
Step 1: open_browser({ url: 'https://www.google.com' })
|
|
432
|
+
Step 2: wait({ ms: 2000 })
|
|
433
|
+
Step 3: keyboard_press({ key: 'ctrl+l' })
|
|
434
|
+
Step 4: keyboard_type({ text: 'batman' })
|
|
435
|
+
Step 5: keyboard_press({ key: 'enter' })
|
|
436
|
+
Step 6: wait({ ms: 1500 })
|
|
437
|
+
Step 7: screenshot()
|
|
438
|
+
Step 8: screen_read() -- describe what's visible
|
|
439
|
+
*/
|