0agent 1.0.68 → 1.0.70
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/daemon.mjs +1120 -148
- package/package.json +1 -1
package/dist/daemon.mjs
CHANGED
|
@@ -1507,7 +1507,7 @@ var init_EdgeWeightUpdater = __esm({
|
|
|
1507
1507
|
this.weightLog.append(event);
|
|
1508
1508
|
}
|
|
1509
1509
|
sleep(ms) {
|
|
1510
|
-
return new Promise((
|
|
1510
|
+
return new Promise((resolve17) => setTimeout(resolve17, ms));
|
|
1511
1511
|
}
|
|
1512
1512
|
};
|
|
1513
1513
|
}
|
|
@@ -3007,11 +3007,976 @@ var init_MemoryCapability = __esm({
|
|
|
3007
3007
|
}
|
|
3008
3008
|
});
|
|
3009
3009
|
|
|
3010
|
-
// packages/daemon/src/capabilities/
|
|
3011
|
-
import { spawn as spawn3 } from "node:child_process";
|
|
3010
|
+
// packages/daemon/src/capabilities/GUICapability.ts
|
|
3011
|
+
import { spawn as spawn3, spawnSync as spawnSync4 } from "node:child_process";
|
|
3012
3012
|
import { writeFileSync as writeFileSync2, unlinkSync } from "node:fs";
|
|
3013
3013
|
import { resolve as resolve3 } from "node:path";
|
|
3014
|
-
import { tmpdir } from "node:os";
|
|
3014
|
+
import { tmpdir, platform as platform2 } from "node:os";
|
|
3015
|
+
var GUICapability;
|
|
3016
|
+
var init_GUICapability = __esm({
|
|
3017
|
+
"packages/daemon/src/capabilities/GUICapability.ts"() {
|
|
3018
|
+
"use strict";
|
|
3019
|
+
GUICapability = class {
|
|
3020
|
+
name = "gui_automation";
|
|
3021
|
+
description = "Automate desktop GUI \u2014 click, type, screenshot, hotkeys, find text on screen.";
|
|
3022
|
+
toolDefinition = {
|
|
3023
|
+
name: "gui_automation",
|
|
3024
|
+
description: "GUI automation + comprehensive browser control. BROWSER (no Screen Recording needed): click_text \u2014 click any element by its visible text; type_in \u2014 fill a form field by placeholder/label; get_elements \u2014 list all interactive elements on the page; read_element \u2014 read text of an element by CSS selector; get_media_state \u2014 check if video is playing/paused/current time; scroll_to \u2014 scroll page or scroll to specific element; exec_js \u2014 run arbitrary JavaScript in Chrome tab; browser_state \u2014 get current URL + title; cdp_screenshot \u2014 screenshot via CDP (needs --remote-debugging-port=9222) with OCR. NATIVE APPS: accessibility_click \u2014 click button in macOS app (WhatsApp, Finder) via Accessibility API. NAVIGATION: open_url \u2014 navigate Chrome tab, returns URL+title+video state. MOUSE/KEYBOARD: click, type, hotkey (use app param to target Chrome vs Terminal), screenshot (needs Screen Recording).",
|
|
3025
|
+
input_schema: {
|
|
3026
|
+
type: "object",
|
|
3027
|
+
properties: {
|
|
3028
|
+
action: {
|
|
3029
|
+
type: "string",
|
|
3030
|
+
description: 'Browser (no Screen Recording): "click_text"|"type_in"|"get_elements"|"read_element"|"get_media_state"|"scroll_to"|"exec_js"|"browser_state"|"cdp_screenshot" | Native apps (no Screen Recording): "app_type"|"accessibility_click" | Navigation: "open_url"|"open_app" | Mouse/KB (Screen Recording for screenshots): "screenshot"|"click"|"double_click"|"right_click"|"move"|"type"|"hotkey"|"scroll"|"drag"|"find_and_click"|"get_screen_size"|"get_cursor_pos"|"wait"'
|
|
3031
|
+
},
|
|
3032
|
+
js: { type: "string", description: `JavaScript to execute in Chrome tab (use with exec_js). Example: "document.querySelector('video').paused"` },
|
|
3033
|
+
selector: { type: "string", description: 'CSS selector for read_element, type_in, scroll_to (e.g. "input[type=search]", ".title", "video")' },
|
|
3034
|
+
x: { type: "number", description: "X coordinate (pixels from left)" },
|
|
3035
|
+
y: { type: "number", description: "Y coordinate (pixels from top)" },
|
|
3036
|
+
to_x: { type: "number", description: "End X for drag" },
|
|
3037
|
+
to_y: { type: "number", description: "End Y for drag" },
|
|
3038
|
+
text: { type: "string", description: "Text to type, or text to search for (find_and_click)" },
|
|
3039
|
+
keys: { type: "string", description: 'Hotkey combo e.g. "cmd+c", "ctrl+z", "alt+tab", "enter"' },
|
|
3040
|
+
direction: { type: "string", description: '"up" | "down" | "left" | "right" for scroll' },
|
|
3041
|
+
amount: { type: "number", description: "Scroll clicks (default 3)" },
|
|
3042
|
+
app: { type: "string", description: 'App name to open e.g. "Safari", "Terminal", "Chrome"' },
|
|
3043
|
+
url: { type: "string", description: 'URL to open e.g. "https://example.com" (use with open_url)' },
|
|
3044
|
+
seconds: { type: "number", description: "Seconds to wait (use with wait action, default 2)" },
|
|
3045
|
+
interval: { type: "number", description: "Seconds to wait between actions (default 0.05)" },
|
|
3046
|
+
duration: { type: "number", description: "Seconds for mouse movement animation (default 0.2)" }
|
|
3047
|
+
},
|
|
3048
|
+
required: ["action"]
|
|
3049
|
+
}
|
|
3050
|
+
};
|
|
3051
|
+
async execute(input, _cwd, signal) {
|
|
3052
|
+
const action = String(input.action ?? "").toLowerCase().trim();
|
|
3053
|
+
const start = Date.now();
|
|
3054
|
+
const script = this._buildScript(action, input);
|
|
3055
|
+
if (!script) {
|
|
3056
|
+
return { success: false, output: `Unknown GUI action: "${action}". Valid: screenshot, click, double_click, right_click, move, type, hotkey, scroll, drag, find_and_click, get_screen_size, get_cursor_pos, wait, open_url, open_app, exec_js, browser_state`, duration_ms: 0 };
|
|
3057
|
+
}
|
|
3058
|
+
if (signal?.aborted) {
|
|
3059
|
+
return { success: false, output: "Cancelled.", duration_ms: 0 };
|
|
3060
|
+
}
|
|
3061
|
+
const tmpFile = resolve3(tmpdir(), `0agent_gui_${Date.now()}.py`);
|
|
3062
|
+
writeFileSync2(tmpFile, script, "utf8");
|
|
3063
|
+
const runPy = (file) => new Promise((res) => {
|
|
3064
|
+
const proc = spawn3("python3", [file], { env: process.env });
|
|
3065
|
+
const out = [];
|
|
3066
|
+
const err = [];
|
|
3067
|
+
let settled = false;
|
|
3068
|
+
const finish = (code) => {
|
|
3069
|
+
if (settled) return;
|
|
3070
|
+
settled = true;
|
|
3071
|
+
signal?.removeEventListener("abort", onAbort);
|
|
3072
|
+
clearTimeout(timer);
|
|
3073
|
+
res({ stdout: out.join(""), stderr: err.join(""), code });
|
|
3074
|
+
};
|
|
3075
|
+
const onAbort = () => {
|
|
3076
|
+
try {
|
|
3077
|
+
proc.kill("SIGKILL");
|
|
3078
|
+
} catch {
|
|
3079
|
+
}
|
|
3080
|
+
finish(null);
|
|
3081
|
+
};
|
|
3082
|
+
signal?.addEventListener("abort", onAbort, { once: true });
|
|
3083
|
+
proc.stdout.on("data", (d) => out.push(d.toString()));
|
|
3084
|
+
proc.stderr.on("data", (d) => err.push(d.toString()));
|
|
3085
|
+
proc.on("exit", finish);
|
|
3086
|
+
proc.on("error", () => finish(-1));
|
|
3087
|
+
const timer = setTimeout(() => {
|
|
3088
|
+
try {
|
|
3089
|
+
proc.kill("SIGKILL");
|
|
3090
|
+
} catch {
|
|
3091
|
+
}
|
|
3092
|
+
finish(null);
|
|
3093
|
+
}, 3e4);
|
|
3094
|
+
});
|
|
3095
|
+
let result = await runPy(tmpFile);
|
|
3096
|
+
try {
|
|
3097
|
+
unlinkSync(tmpFile);
|
|
3098
|
+
} catch {
|
|
3099
|
+
}
|
|
3100
|
+
if (signal?.aborted) {
|
|
3101
|
+
return { success: false, output: "Cancelled.", duration_ms: Date.now() - start };
|
|
3102
|
+
}
|
|
3103
|
+
if (result.code !== 0 && result.code !== null) {
|
|
3104
|
+
const err = result.stderr.trim();
|
|
3105
|
+
if (err.includes("No module named") || err.includes("ModuleNotFoundError")) {
|
|
3106
|
+
const missing = err.includes("pyautogui") ? "pyautogui pillow pytesseract" : err.includes("PIL") ? "pillow" : err.includes("tesseract") ? "pytesseract" : "pyautogui pillow";
|
|
3107
|
+
const install = spawnSync4("pip3", ["install", ...missing.split(" "), "-q"], {
|
|
3108
|
+
timeout: 6e4,
|
|
3109
|
+
encoding: "utf8"
|
|
3110
|
+
});
|
|
3111
|
+
if (install.status !== 0) {
|
|
3112
|
+
return { success: false, output: `Auto-install failed: ${install.stderr?.slice(0, 200)}. Run: pip3 install ${missing}`, duration_ms: Date.now() - start };
|
|
3113
|
+
}
|
|
3114
|
+
writeFileSync2(tmpFile, script, "utf8");
|
|
3115
|
+
result = await runPy(tmpFile);
|
|
3116
|
+
try {
|
|
3117
|
+
unlinkSync(tmpFile);
|
|
3118
|
+
} catch {
|
|
3119
|
+
}
|
|
3120
|
+
if (signal?.aborted) return { success: false, output: "Cancelled.", duration_ms: Date.now() - start };
|
|
3121
|
+
if (result.code === 0) return { success: true, output: result.stdout.trim() || "Done", duration_ms: Date.now() - start };
|
|
3122
|
+
return { success: false, output: result.stderr.trim() || "Unknown error after install", duration_ms: Date.now() - start };
|
|
3123
|
+
}
|
|
3124
|
+
const isScreenRecordingDenied = err.includes("could not create image from display") || err.includes("screen capture failed") || err.includes("screencapture") || err.includes("CGDisplayStream") || err.includes("Operation not permitted") || err.includes("OSError") && err.includes("display") || result.stdout.includes("could not create image from display");
|
|
3125
|
+
if (isScreenRecordingDenied) {
|
|
3126
|
+
if (platform2() === "darwin") {
|
|
3127
|
+
spawnSync4("open", ["x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture"], { timeout: 3e3 });
|
|
3128
|
+
const fallbackScript = `
|
|
3129
|
+
import subprocess
|
|
3130
|
+
as_script = '''tell application "Google Chrome"
|
|
3131
|
+
tell front window
|
|
3132
|
+
tell active tab
|
|
3133
|
+
set tabURL to URL
|
|
3134
|
+
set tabTitle to title
|
|
3135
|
+
set videoSt to execute javascript "try{let v=document.querySelector('video');v?(v.paused?'PAUSED':'PLAYING:'+v.currentTime.toFixed(1)+'s'):'no-video'}catch(e){'?'}"
|
|
3136
|
+
return tabURL & "|||" & tabTitle & "|||" & videoSt
|
|
3137
|
+
end tell
|
|
3138
|
+
end tell
|
|
3139
|
+
end tell'''
|
|
3140
|
+
r = subprocess.run(['osascript', '-e', as_script], capture_output=True, text=True)
|
|
3141
|
+
out = r.stdout.strip()
|
|
3142
|
+
parts = out.split('|||') if '|||' in out else []
|
|
3143
|
+
if len(parts) >= 3:
|
|
3144
|
+
print(f"[No screenshot \u2014 Screen Recording permission needed]")
|
|
3145
|
+
print(f"Browser URL: {parts[0]}")
|
|
3146
|
+
print(f"Page title: {parts[1]}")
|
|
3147
|
+
print(f"Video state: {parts[2]}")
|
|
3148
|
+
print(f"To enable screenshots: System Settings \u2192 Privacy & Security \u2192 Screen Recording \u2192 enable Terminal")
|
|
3149
|
+
else:
|
|
3150
|
+
print("[No screenshot \u2014 Screen Recording permission needed]")
|
|
3151
|
+
print("To fix: System Settings \u2192 Privacy & Security \u2192 Screen Recording \u2192 enable Terminal (or iTerm2)")
|
|
3152
|
+
`;
|
|
3153
|
+
const fallbackTmp = resolve3(tmpdir(), `0agent_scfb_${Date.now()}.py`);
|
|
3154
|
+
writeFileSync2(fallbackTmp, fallbackScript, "utf8");
|
|
3155
|
+
const fbResult = await runPy(fallbackTmp);
|
|
3156
|
+
try {
|
|
3157
|
+
unlinkSync(fallbackTmp);
|
|
3158
|
+
} catch {
|
|
3159
|
+
}
|
|
3160
|
+
if (fbResult.code === 0 && fbResult.stdout.trim()) {
|
|
3161
|
+
return { success: false, output: fbResult.stdout.trim(), duration_ms: Date.now() - start };
|
|
3162
|
+
}
|
|
3163
|
+
}
|
|
3164
|
+
return {
|
|
3165
|
+
success: false,
|
|
3166
|
+
output: 'macOS Screen Recording permission required for screenshots.\nSystem Settings opened \u2192 Privacy & Security \u2192 Screen Recording \u2192 enable Terminal/iTerm2.\nFor browser content, use exec_js instead: {action:"exec_js",js:"document.title"} or {action:"browser_state"} \u2014 these work without Screen Recording.',
|
|
3167
|
+
duration_ms: Date.now() - start
|
|
3168
|
+
};
|
|
3169
|
+
}
|
|
3170
|
+
if (err.includes("accessibility") || err.includes("permission") || err.includes("AXIsProcessTrusted")) {
|
|
3171
|
+
if (platform2() === "darwin") {
|
|
3172
|
+
spawnSync4("open", ["x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility"], { timeout: 3e3 });
|
|
3173
|
+
}
|
|
3174
|
+
return {
|
|
3175
|
+
success: false,
|
|
3176
|
+
output: "macOS Accessibility permission required for GUI automation.\n\u2192 System Settings has been opened automatically.\n\u2192 Go to: Privacy & Security \u2192 Accessibility \u2192 enable Terminal (or iTerm2 / the app running 0agent)\n\u2192 Then re-run your task.",
|
|
3177
|
+
duration_ms: Date.now() - start
|
|
3178
|
+
};
|
|
3179
|
+
}
|
|
3180
|
+
return { success: false, output: `GUI error: ${err.slice(0, 300)}`, duration_ms: Date.now() - start };
|
|
3181
|
+
}
|
|
3182
|
+
return { success: true, output: result.stdout.trim() || "Done", duration_ms: Date.now() - start };
|
|
3183
|
+
}
|
|
3184
|
+
_buildScript(action, input) {
|
|
3185
|
+
const x = input.x != null ? Number(input.x) : null;
|
|
3186
|
+
const y = input.y != null ? Number(input.y) : null;
|
|
3187
|
+
const toX = input.to_x != null ? Number(input.to_x) : null;
|
|
3188
|
+
const toY = input.to_y != null ? Number(input.to_y) : null;
|
|
3189
|
+
const text = input.text != null ? String(input.text) : "";
|
|
3190
|
+
const keys = input.keys != null ? String(input.keys) : "";
|
|
3191
|
+
const dir = input.direction != null ? String(input.direction) : "down";
|
|
3192
|
+
const amount = input.amount != null ? Number(input.amount) : 3;
|
|
3193
|
+
const app = input.app != null ? String(input.app) : "";
|
|
3194
|
+
const url = input.url != null ? String(input.url) : "";
|
|
3195
|
+
const seconds = input.seconds != null ? Number(input.seconds) : 2;
|
|
3196
|
+
const interval = input.interval != null ? Number(input.interval) : 0.05;
|
|
3197
|
+
const duration = input.duration != null ? Number(input.duration) : 0.2;
|
|
3198
|
+
const header = `
|
|
3199
|
+
import pyautogui
|
|
3200
|
+
import time
|
|
3201
|
+
import sys
|
|
3202
|
+
pyautogui.FAILSAFE = False
|
|
3203
|
+
pyautogui.PAUSE = ${interval}
|
|
3204
|
+
`;
|
|
3205
|
+
switch (action) {
|
|
3206
|
+
case "get_screen_size":
|
|
3207
|
+
return header + `
|
|
3208
|
+
w, h = pyautogui.size()
|
|
3209
|
+
print(f"Screen size: {w} x {h}")
|
|
3210
|
+
`;
|
|
3211
|
+
case "get_cursor_pos":
|
|
3212
|
+
return header + `
|
|
3213
|
+
x, y = pyautogui.position()
|
|
3214
|
+
print(f"Cursor position: ({x}, {y})")
|
|
3215
|
+
`;
|
|
3216
|
+
case "wait":
|
|
3217
|
+
return header + `
|
|
3218
|
+
time.sleep(${seconds})
|
|
3219
|
+
print(f"Waited ${seconds}s")
|
|
3220
|
+
`;
|
|
3221
|
+
case "screenshot": {
|
|
3222
|
+
return header + `
|
|
3223
|
+
import os, tempfile
|
|
3224
|
+
from PIL import Image
|
|
3225
|
+
|
|
3226
|
+
# Take screenshot
|
|
3227
|
+
shot_path = os.path.join(tempfile.gettempdir(), "0agent_screen.png")
|
|
3228
|
+
img = pyautogui.screenshot(shot_path)
|
|
3229
|
+
|
|
3230
|
+
w, h = img.size
|
|
3231
|
+
print(f"Screen: {w}x{h}")
|
|
3232
|
+
|
|
3233
|
+
# Try OCR with pytesseract
|
|
3234
|
+
try:
|
|
3235
|
+
import pytesseract
|
|
3236
|
+
# Resize for faster OCR if screen is large
|
|
3237
|
+
scale = min(1.0, 1920 / w)
|
|
3238
|
+
small = img.resize((int(w * scale), int(h * scale)), Image.LANCZOS)
|
|
3239
|
+
text = pytesseract.image_to_string(small, config='--psm 11')
|
|
3240
|
+
lines = [l.strip() for l in text.splitlines() if l.strip()]
|
|
3241
|
+
print("\\nOn-screen text (OCR):")
|
|
3242
|
+
print("\\n".join(lines[:80]))
|
|
3243
|
+
|
|
3244
|
+
# Also get bounding boxes for clickable text
|
|
3245
|
+
data = pytesseract.image_to_data(small, output_type=pytesseract.Output.DICT)
|
|
3246
|
+
hits = []
|
|
3247
|
+
for i, word in enumerate(data['text']):
|
|
3248
|
+
if word.strip() and int(data['conf'][i]) > 50:
|
|
3249
|
+
bx = int(data['left'][i] / scale)
|
|
3250
|
+
by = int(data['top'][i] / scale)
|
|
3251
|
+
bw = int(data['width'][i] / scale)
|
|
3252
|
+
bh = int(data['height'][i] / scale)
|
|
3253
|
+
hits.append(f" '{word}' at ({bx + bw//2}, {by + bh//2})")
|
|
3254
|
+
if hits:
|
|
3255
|
+
print("\\nClickable words with center coordinates:")
|
|
3256
|
+
print("\\n".join(hits[:40]))
|
|
3257
|
+
except ImportError:
|
|
3258
|
+
print("(pytesseract not installed \u2014 install it for OCR: pip3 install pytesseract)")
|
|
3259
|
+
except Exception as e:
|
|
3260
|
+
print(f"OCR failed: {e}")
|
|
3261
|
+
finally:
|
|
3262
|
+
try:
|
|
3263
|
+
os.remove(shot_path)
|
|
3264
|
+
except Exception:
|
|
3265
|
+
pass
|
|
3266
|
+
`;
|
|
3267
|
+
}
|
|
3268
|
+
case "click":
|
|
3269
|
+
if (x == null || y == null) return null;
|
|
3270
|
+
return header + `
|
|
3271
|
+
pyautogui.click(${x}, ${y}, duration=${duration})
|
|
3272
|
+
print(f"Clicked at ({${x}}, {${y}})")
|
|
3273
|
+
`;
|
|
3274
|
+
case "double_click":
|
|
3275
|
+
if (x == null || y == null) return null;
|
|
3276
|
+
return header + `
|
|
3277
|
+
pyautogui.doubleClick(${x}, ${y}, duration=${duration})
|
|
3278
|
+
print(f"Double-clicked at ({${x}}, {${y}})")
|
|
3279
|
+
`;
|
|
3280
|
+
case "right_click":
|
|
3281
|
+
if (x == null || y == null) return null;
|
|
3282
|
+
return header + `
|
|
3283
|
+
pyautogui.rightClick(${x}, ${y}, duration=${duration})
|
|
3284
|
+
print(f"Right-clicked at ({${x}}, {${y}})")
|
|
3285
|
+
`;
|
|
3286
|
+
case "move":
|
|
3287
|
+
if (x == null || y == null) return null;
|
|
3288
|
+
return header + `
|
|
3289
|
+
pyautogui.moveTo(${x}, ${y}, duration=${duration})
|
|
3290
|
+
print(f"Moved to ({${x}}, {${y}})")
|
|
3291
|
+
`;
|
|
3292
|
+
case "type": {
|
|
3293
|
+
if (!text) return null;
|
|
3294
|
+
return header + `
|
|
3295
|
+
pyautogui.typewrite(${JSON.stringify(text)}, interval=${interval})
|
|
3296
|
+
print("Typed successfully")
|
|
3297
|
+
`;
|
|
3298
|
+
}
|
|
3299
|
+
case "hotkey": {
|
|
3300
|
+
if (!keys) return null;
|
|
3301
|
+
const targetApp = input.app ? String(input.app) : "";
|
|
3302
|
+
const parts = keys.toLowerCase().replace(/cmd|command|meta/g, "command").replace(/ctrl|control/g, "ctrl").replace(/opt|option/g, "option").split(/[+\-]/).map((k) => k.trim()).filter(Boolean);
|
|
3303
|
+
const pyKeys = JSON.stringify(parts);
|
|
3304
|
+
if (targetApp && platform2() === "darwin") {
|
|
3305
|
+
const safeApp = targetApp.replace(/'/g, "\\'");
|
|
3306
|
+
const asKey = parts[parts.length - 1] ?? "";
|
|
3307
|
+
const modifiers = parts.slice(0, -1).map((k) => {
|
|
3308
|
+
if (k === "command") return "command down";
|
|
3309
|
+
if (k === "ctrl") return "control down";
|
|
3310
|
+
if (k === "shift") return "shift down";
|
|
3311
|
+
if (k === "option") return "option down";
|
|
3312
|
+
return "";
|
|
3313
|
+
}).filter(Boolean).join(", ");
|
|
3314
|
+
const asModStr = modifiers ? ` using {${modifiers}}` : "";
|
|
3315
|
+
return header + `
|
|
3316
|
+
import subprocess, time
|
|
3317
|
+
# Focus target app first
|
|
3318
|
+
subprocess.run(['osascript', '-e', 'tell application "${safeApp}" to activate'], capture_output=True)
|
|
3319
|
+
time.sleep(0.3)
|
|
3320
|
+
# Send keystroke via AppleScript (reliable \u2014 goes to the focused app, not Terminal)
|
|
3321
|
+
as_script = '''tell application "System Events"
|
|
3322
|
+
keystroke "${asKey}"${asModStr}
|
|
3323
|
+
end tell'''
|
|
3324
|
+
r = subprocess.run(['osascript', '-e', as_script], capture_output=True, text=True)
|
|
3325
|
+
if r.returncode == 0:
|
|
3326
|
+
print(f"Sent ${parts.join("+")} to ${safeApp}")
|
|
3327
|
+
else:
|
|
3328
|
+
print(f"Keystroke error: {r.stderr.strip()[:200]}")
|
|
3329
|
+
`;
|
|
3330
|
+
}
|
|
3331
|
+
return header + `
|
|
3332
|
+
keys = ${pyKeys}
|
|
3333
|
+
pyautogui.hotkey(*keys)
|
|
3334
|
+
print(f"Pressed: {'+'.join(keys)}")
|
|
3335
|
+
`;
|
|
3336
|
+
}
|
|
3337
|
+
case "scroll": {
|
|
3338
|
+
const clicksVal = dir === "up" ? amount : dir === "down" ? -amount : 0;
|
|
3339
|
+
const hVal = dir === "left" ? -amount : dir === "right" ? amount : 0;
|
|
3340
|
+
const sx = x ?? "pyautogui.size()[0]//2";
|
|
3341
|
+
const sy = y ?? "pyautogui.size()[1]//2";
|
|
3342
|
+
return header + `
|
|
3343
|
+
${hVal !== 0 ? `pyautogui.hscroll(${hVal}, x=${sx}, y=${sy})` : `pyautogui.scroll(${clicksVal}, x=${sx}, y=${sy})`}
|
|
3344
|
+
print(f"Scrolled ${dir} by ${amount}")
|
|
3345
|
+
`;
|
|
3346
|
+
}
|
|
3347
|
+
case "drag":
|
|
3348
|
+
if (x == null || y == null || toX == null || toY == null) return null;
|
|
3349
|
+
return header + `
|
|
3350
|
+
pyautogui.moveTo(${x}, ${y}, duration=${duration})
|
|
3351
|
+
pyautogui.dragTo(${toX}, ${toY}, duration=${duration * 2}, button='left')
|
|
3352
|
+
print(f"Dragged from ({${x}},{${y}}) to ({${toX}},{${toY}})")
|
|
3353
|
+
`;
|
|
3354
|
+
case "find_and_click": {
|
|
3355
|
+
if (!text) return null;
|
|
3356
|
+
const safeText = text.replace(/'/g, "\\'");
|
|
3357
|
+
return header + `
|
|
3358
|
+
from PIL import Image
|
|
3359
|
+
import pytesseract, os, tempfile
|
|
3360
|
+
|
|
3361
|
+
shot_path = os.path.join(tempfile.gettempdir(), "0agent_screen.png")
|
|
3362
|
+
img = pyautogui.screenshot(shot_path)
|
|
3363
|
+
w, h = img.size
|
|
3364
|
+
|
|
3365
|
+
data = pytesseract.image_to_data(img, output_type=pytesseract.Output.DICT)
|
|
3366
|
+
target = '${safeText}'.lower()
|
|
3367
|
+
found = []
|
|
3368
|
+
for i, word in enumerate(data['text']):
|
|
3369
|
+
if target in word.lower() and int(data['conf'][i]) > 40:
|
|
3370
|
+
cx = data['left'][i] + data['width'][i] // 2
|
|
3371
|
+
cy = data['top'][i] + data['height'][i] // 2
|
|
3372
|
+
found.append((cx, cy, word))
|
|
3373
|
+
|
|
3374
|
+
if found:
|
|
3375
|
+
cx, cy, word = found[0]
|
|
3376
|
+
pyautogui.click(cx, cy, duration=${duration})
|
|
3377
|
+
print(f"Found '{word}' at ({cx},{cy}) \u2014 clicked")
|
|
3378
|
+
else:
|
|
3379
|
+
# Retry once after a brief wait (element may still be loading)
|
|
3380
|
+
time.sleep(1.5)
|
|
3381
|
+
img2 = pyautogui.screenshot()
|
|
3382
|
+
data2 = pytesseract.image_to_data(img2, output_type=pytesseract.Output.DICT)
|
|
3383
|
+
found2 = []
|
|
3384
|
+
for i, word in enumerate(data2['text']):
|
|
3385
|
+
if target in word.lower() and int(data2['conf'][i]) > 40:
|
|
3386
|
+
cx2 = data2['left'][i] + data2['width'][i] // 2
|
|
3387
|
+
cy2 = data2['top'][i] + data2['height'][i] // 2
|
|
3388
|
+
found2.append((cx2, cy2, word))
|
|
3389
|
+
if found2:
|
|
3390
|
+
cx2, cy2, word2 = found2[0]
|
|
3391
|
+
pyautogui.click(cx2, cy2, duration=${duration})
|
|
3392
|
+
print(f"Found '{word2}' at ({cx2},{cy2}) after retry \u2014 clicked")
|
|
3393
|
+
else:
|
|
3394
|
+
print(f"Text '${safeText}' not found on screen after retry. Take a screenshot to see what changed.")
|
|
3395
|
+
sys.exit(1)
|
|
3396
|
+
try:
|
|
3397
|
+
os.remove(shot_path)
|
|
3398
|
+
except Exception:
|
|
3399
|
+
pass
|
|
3400
|
+
`;
|
|
3401
|
+
}
|
|
3402
|
+
case "open_url": {
|
|
3403
|
+
if (!url) return null;
|
|
3404
|
+
let finalUrl = url;
|
|
3405
|
+
if (/youtube\.com\/watch/i.test(url) && !url.includes("autoplay")) {
|
|
3406
|
+
finalUrl = url + (url.includes("?") ? "&" : "?") + "autoplay=1";
|
|
3407
|
+
}
|
|
3408
|
+
const safeUrl = finalUrl.replace(/\\/g, "\\\\").replace(/'/g, "\\'");
|
|
3409
|
+
const isYouTubeVideo = /youtube\.com\/watch/i.test(finalUrl);
|
|
3410
|
+
const osName = platform2();
|
|
3411
|
+
if (osName === "darwin") {
|
|
3412
|
+
return header + `
|
|
3413
|
+
import subprocess
|
|
3414
|
+
import time
|
|
3415
|
+
|
|
3416
|
+
url = '${safeUrl}'
|
|
3417
|
+
is_youtube_video = ${isYouTubeVideo ? "True" : "False"}
|
|
3418
|
+
|
|
3419
|
+
# Check if Chrome is running
|
|
3420
|
+
chrome_running = subprocess.run(['pgrep', '-x', 'Google Chrome'], capture_output=True).returncode == 0
|
|
3421
|
+
firefox_running = subprocess.run(['pgrep', '-x', 'firefox'], capture_output=True).returncode == 0
|
|
3422
|
+
safari_running = subprocess.run(['pgrep', '-x', 'Safari'], capture_output=True).returncode == 0
|
|
3423
|
+
|
|
3424
|
+
import urllib.parse
|
|
3425
|
+
domain = urllib.parse.urlparse(url).netloc
|
|
3426
|
+
|
|
3427
|
+
if chrome_running:
|
|
3428
|
+
if is_youtube_video:
|
|
3429
|
+
# Navigate the CURRENT active tab directly to avoid domain-matching a wrong/stale tab
|
|
3430
|
+
nav_script = f"""tell application "Google Chrome"
|
|
3431
|
+
tell front window
|
|
3432
|
+
tell active tab
|
|
3433
|
+
set URL to "{url}"
|
|
3434
|
+
end tell
|
|
3435
|
+
end tell
|
|
3436
|
+
activate
|
|
3437
|
+
end tell"""
|
|
3438
|
+
subprocess.run(['osascript', '-e', nav_script], capture_output=True)
|
|
3439
|
+
time.sleep(3)
|
|
3440
|
+
# Unmute + play via JS (handles autoplay policy blocks)
|
|
3441
|
+
play_script = """tell application "Google Chrome"
|
|
3442
|
+
tell front window
|
|
3443
|
+
tell active tab
|
|
3444
|
+
execute javascript "try{let v=document.querySelector('video');if(v){v.muted=false;v.volume=1.0;v.play();}}catch(e){}"
|
|
3445
|
+
end tell
|
|
3446
|
+
end tell
|
|
3447
|
+
end tell"""
|
|
3448
|
+
subprocess.run(['osascript', '-e', play_script], capture_output=True)
|
|
3449
|
+
time.sleep(1)
|
|
3450
|
+
# Verify: get URL, title, video state \u2014 all via AppleScript, no Screen Recording needed
|
|
3451
|
+
verify_script = """tell application "Google Chrome"
|
|
3452
|
+
tell front window
|
|
3453
|
+
tell active tab
|
|
3454
|
+
set tabURL to URL
|
|
3455
|
+
set tabTitle to title
|
|
3456
|
+
set videoSt to execute javascript "try{let v=document.querySelector('video');v?(v.paused?'PAUSED':'PLAYING:'+v.currentTime.toFixed(1)+'s'):'no-video'}catch(e){'err'}"
|
|
3457
|
+
return tabURL & "|||" & tabTitle & "|||" & videoSt
|
|
3458
|
+
end tell
|
|
3459
|
+
end tell
|
|
3460
|
+
end tell"""
|
|
3461
|
+
vr = subprocess.run(['osascript', '-e', verify_script], capture_output=True, text=True)
|
|
3462
|
+
parts = vr.stdout.strip().split('|||')
|
|
3463
|
+
if len(parts) >= 3:
|
|
3464
|
+
print(f"URL: {parts[0]}")
|
|
3465
|
+
print(f"Title: {parts[1]}")
|
|
3466
|
+
st = parts[2].strip()
|
|
3467
|
+
if 'PLAYING' in st:
|
|
3468
|
+
print(f"Video: {st} \u2713")
|
|
3469
|
+
elif st == 'PAUSED':
|
|
3470
|
+
# Send play() one more time
|
|
3471
|
+
subprocess.run(['osascript', '-e', play_script], capture_output=True)
|
|
3472
|
+
time.sleep(0.5)
|
|
3473
|
+
print("Video: was PAUSED \u2014 sent play() again, should be playing now")
|
|
3474
|
+
else:
|
|
3475
|
+
print(f"Video state: {st} (page may still be loading)")
|
|
3476
|
+
else:
|
|
3477
|
+
print(f"Navigated to: {url}")
|
|
3478
|
+
print(f"(Verification unavailable: {vr.stdout.strip() or vr.stderr.strip()[:100]})")
|
|
3479
|
+
else:
|
|
3480
|
+
# Non-video: switch to existing same-domain tab or open new tab
|
|
3481
|
+
check_script = f"""tell application "Google Chrome"
|
|
3482
|
+
set foundTab to false
|
|
3483
|
+
repeat with w in every window
|
|
3484
|
+
set tabIdx to 1
|
|
3485
|
+
repeat with t in every tab of w
|
|
3486
|
+
if URL of t contains "{domain}" then
|
|
3487
|
+
set active tab index of w to tabIdx
|
|
3488
|
+
set index of w to 1
|
|
3489
|
+
set foundTab to true
|
|
3490
|
+
exit repeat
|
|
3491
|
+
end if
|
|
3492
|
+
set tabIdx to tabIdx + 1
|
|
3493
|
+
end repeat
|
|
3494
|
+
if foundTab then exit repeat
|
|
3495
|
+
end repeat
|
|
3496
|
+
if foundTab then
|
|
3497
|
+
activate
|
|
3498
|
+
return "switched"
|
|
3499
|
+
else
|
|
3500
|
+
tell front window to make new tab with properties {{URL:"{url}"}}
|
|
3501
|
+
activate
|
|
3502
|
+
return "new-tab"
|
|
3503
|
+
end if
|
|
3504
|
+
end tell"""
|
|
3505
|
+
r = subprocess.run(['osascript', '-e', check_script], capture_output=True, text=True)
|
|
3506
|
+
switched = r.stdout.strip() == "switched"
|
|
3507
|
+
# Verify actual URL and title loaded (catches wrong-domain tab issues)
|
|
3508
|
+
state_script = """tell application "Google Chrome"
|
|
3509
|
+
tell front window
|
|
3510
|
+
tell active tab
|
|
3511
|
+
return URL & "|||" & title
|
|
3512
|
+
end tell
|
|
3513
|
+
end tell
|
|
3514
|
+
end tell"""
|
|
3515
|
+
sr = subprocess.run(['osascript', '-e', state_script], capture_output=True, text=True)
|
|
3516
|
+
sp = sr.stdout.strip().split('|||')
|
|
3517
|
+
if len(sp) >= 2:
|
|
3518
|
+
print(f"{'Switched to' if switched else 'Opened'}: {sp[0]}")
|
|
3519
|
+
print(f"Title: {sp[1]}")
|
|
3520
|
+
else:
|
|
3521
|
+
print(f"{'Switched to existing' if switched else 'Opened new'} Chrome tab: {url}")
|
|
3522
|
+
elif firefox_running:
|
|
3523
|
+
script = f'tell application "Firefox" to open location "{url}"'
|
|
3524
|
+
subprocess.run(['osascript', '-e', script])
|
|
3525
|
+
subprocess.run(['osascript', '-e', 'tell application "Firefox" to activate'])
|
|
3526
|
+
print(f"Navigated Firefox to: {url}")
|
|
3527
|
+
elif safari_running:
|
|
3528
|
+
script = f'tell application "Safari" to open location "{url}"'
|
|
3529
|
+
subprocess.run(['osascript', '-e', script])
|
|
3530
|
+
subprocess.run(['osascript', '-e', 'tell application "Safari" to activate'])
|
|
3531
|
+
print(f"Navigated Safari to: {url}")
|
|
3532
|
+
else:
|
|
3533
|
+
# No browser open \u2014 launch default browser with the URL
|
|
3534
|
+
subprocess.run(['open', url])
|
|
3535
|
+
print(f"Launched browser with: {url}")
|
|
3536
|
+
time.sleep(1.0)
|
|
3537
|
+
`;
|
|
3538
|
+
}
|
|
3539
|
+
return header + `
|
|
3540
|
+
import subprocess
|
|
3541
|
+
|
|
3542
|
+
url = '${safeUrl}'
|
|
3543
|
+
|
|
3544
|
+
# Try to reuse existing browser via wmctrl/xdotool, fall back to xdg-open
|
|
3545
|
+
chrome_pid = subprocess.run(['pgrep', '-x', 'chrome'], capture_output=True)
|
|
3546
|
+
firefox_pid = subprocess.run(['pgrep', '-x', 'firefox'], capture_output=True)
|
|
3547
|
+
|
|
3548
|
+
if chrome_pid.returncode == 0:
|
|
3549
|
+
subprocess.Popen(['google-chrome', '--new-tab', url])
|
|
3550
|
+
print(f"Opened in Chrome tab: {url}")
|
|
3551
|
+
elif firefox_pid.returncode == 0:
|
|
3552
|
+
subprocess.Popen(['firefox', '--new-tab', url])
|
|
3553
|
+
print(f"Opened in Firefox tab: {url}")
|
|
3554
|
+
else:
|
|
3555
|
+
subprocess.Popen(['xdg-open', url])
|
|
3556
|
+
print(f"Opened with default browser: {url}")
|
|
3557
|
+
time.sleep(1.0)
|
|
3558
|
+
`;
|
|
3559
|
+
}
|
|
3560
|
+
case "open_app": {
|
|
3561
|
+
if (!app) return null;
|
|
3562
|
+
const safeApp = app.replace(/'/g, "\\'");
|
|
3563
|
+
const os = platform2();
|
|
3564
|
+
if (os === "darwin") {
|
|
3565
|
+
return header + `
|
|
3566
|
+
import subprocess
|
|
3567
|
+
result = subprocess.run(['open', '-a', '${safeApp}'], capture_output=True, text=True)
|
|
3568
|
+
if result.returncode == 0:
|
|
3569
|
+
print(f"Opened: ${safeApp}")
|
|
3570
|
+
time.sleep(1.5) # wait for app to launch
|
|
3571
|
+
else:
|
|
3572
|
+
# Try spotlight
|
|
3573
|
+
pyautogui.hotkey('command', 'space')
|
|
3574
|
+
time.sleep(0.5)
|
|
3575
|
+
pyautogui.write('${safeApp}', interval=0.05)
|
|
3576
|
+
time.sleep(0.5)
|
|
3577
|
+
pyautogui.press('enter')
|
|
3578
|
+
print(f"Opened via Spotlight: ${safeApp}")
|
|
3579
|
+
time.sleep(1.5)
|
|
3580
|
+
`;
|
|
3581
|
+
}
|
|
3582
|
+
return header + `
|
|
3583
|
+
import subprocess
|
|
3584
|
+
subprocess.Popen(['${safeApp}'])
|
|
3585
|
+
print(f"Launched: ${safeApp}")
|
|
3586
|
+
time.sleep(1.5)
|
|
3587
|
+
`;
|
|
3588
|
+
}
|
|
3589
|
+
// ── New high-level browser actions — no Screen Recording needed ───────────
|
|
3590
|
+
case "app_type": {
|
|
3591
|
+
const appName = String(input.app ?? "").trim();
|
|
3592
|
+
const typeText = String(input.text ?? text ?? "").trim();
|
|
3593
|
+
if (!appName || !typeText) return null;
|
|
3594
|
+
const osName = platform2();
|
|
3595
|
+
if (osName !== "darwin") return header + `print("app_type requires macOS")`;
|
|
3596
|
+
const safeApp = appName.replace(/'/g, "\\'");
|
|
3597
|
+
const textJson = JSON.stringify(typeText);
|
|
3598
|
+
return header + `
|
|
3599
|
+
import subprocess, time, json
|
|
3600
|
+
|
|
3601
|
+
text_to_type = json.loads(${textJson})
|
|
3602
|
+
|
|
3603
|
+
# Step 1: copy to clipboard (handles unicode, special chars, long text)
|
|
3604
|
+
cp = subprocess.run(['pbcopy'], input=text_to_type.encode('utf-8'), capture_output=True)
|
|
3605
|
+
if cp.returncode != 0:
|
|
3606
|
+
print(f"Clipboard copy failed: {cp.stderr.decode()[:100]}")
|
|
3607
|
+
sys.exit(1)
|
|
3608
|
+
|
|
3609
|
+
# Step 2: bring app to front
|
|
3610
|
+
subprocess.run(['osascript', '-e', 'tell application "${safeApp}" to activate'], capture_output=True)
|
|
3611
|
+
time.sleep(0.4)
|
|
3612
|
+
|
|
3613
|
+
# Step 3: paste via AppleScript System Events (targets the specific process, not OS focus)
|
|
3614
|
+
paste_script = """tell application "System Events"
|
|
3615
|
+
tell process "${safeApp}"
|
|
3616
|
+
keystroke "v" using command down
|
|
3617
|
+
end tell
|
|
3618
|
+
end tell"""
|
|
3619
|
+
r = subprocess.run(['osascript', '-e', paste_script], capture_output=True, text=True)
|
|
3620
|
+
if r.returncode == 0:
|
|
3621
|
+
print(f"Typed in ${safeApp}: {text_to_type[:60]}")
|
|
3622
|
+
else:
|
|
3623
|
+
# Accessibility permission might be needed
|
|
3624
|
+
err = r.stderr.strip()
|
|
3625
|
+
if 'not allowed' in err.lower() or 'accessibility' in err.lower():
|
|
3626
|
+
subprocess.run(['open', 'x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility'], capture_output=True)
|
|
3627
|
+
print(f"Accessibility permission needed for ${safeApp}. System Settings opened \u2014 Privacy & Security \u2192 Accessibility \u2192 enable Terminal.")
|
|
3628
|
+
else:
|
|
3629
|
+
print(f"app_type error: {err[:200]}")
|
|
3630
|
+
`;
|
|
3631
|
+
}
|
|
3632
|
+
case "click_text": {
|
|
3633
|
+
if (!text) return null;
|
|
3634
|
+
if (platform2() !== "darwin") return header + `print("click_text requires macOS + Chrome")`;
|
|
3635
|
+
return this._chromeJs(JSON.stringify(text), `
|
|
3636
|
+
(function(t) {
|
|
3637
|
+
t = t.toLowerCase().trim();
|
|
3638
|
+
// Pass 1: interactive elements (buttons, links, roles)
|
|
3639
|
+
var candidates = Array.from(document.querySelectorAll(
|
|
3640
|
+
'button,a,[role="button"],[role="link"],[role="menuitem"],[role="option"],[role="tab"],[tabindex="0"],label'
|
|
3641
|
+
));
|
|
3642
|
+
var match = candidates.find(el => {
|
|
3643
|
+
var txt = (el.textContent || el.getAttribute('aria-label') || el.getAttribute('title') || '').trim().toLowerCase();
|
|
3644
|
+
return txt === t || txt.startsWith(t) || (t.length > 3 && txt.includes(t));
|
|
3645
|
+
});
|
|
3646
|
+
// Pass 2: any visible leaf element with matching text
|
|
3647
|
+
if (!match) {
|
|
3648
|
+
match = Array.from(document.querySelectorAll('*')).find(el => {
|
|
3649
|
+
if (!el.offsetParent && el !== document.body) return false;
|
|
3650
|
+
if (el.children.length > 0) return false;
|
|
3651
|
+
var txt = (el.textContent || '').trim().toLowerCase();
|
|
3652
|
+
return txt === t || (t.length > 4 && txt.includes(t) && txt.length < t.length * 3);
|
|
3653
|
+
});
|
|
3654
|
+
}
|
|
3655
|
+
if (!match) return 'NOT_FOUND: ' + t;
|
|
3656
|
+
match.scrollIntoView({behavior:'instant', block:'center'});
|
|
3657
|
+
match.focus();
|
|
3658
|
+
['mousedown','mouseup','click'].forEach(e =>
|
|
3659
|
+
match.dispatchEvent(new MouseEvent(e, {bubbles:true, cancelable:true}))
|
|
3660
|
+
);
|
|
3661
|
+
return 'CLICKED: ' + (match.textContent || match.getAttribute('aria-label') || match.tagName).trim().slice(0,80);
|
|
3662
|
+
})(JSARG)
|
|
3663
|
+
`);
|
|
3664
|
+
}
|
|
3665
|
+
case "type_in": {
|
|
3666
|
+
if (!text) return null;
|
|
3667
|
+
if (platform2() !== "darwin") return header + `print("type_in requires macOS + Chrome")`;
|
|
3668
|
+
const query = String(input.selector ?? input.query ?? "").trim() || "active";
|
|
3669
|
+
const args = JSON.stringify([query, text]);
|
|
3670
|
+
return this._chromeJs(args, `
|
|
3671
|
+
(function(query, value) {
|
|
3672
|
+
var el = query === 'active' ? document.activeElement :
|
|
3673
|
+
document.querySelector('input[placeholder*="'+query+'" i]') ||
|
|
3674
|
+
document.querySelector('input[aria-label*="'+query+'" i]') ||
|
|
3675
|
+
document.querySelector('textarea[placeholder*="'+query+'" i]') ||
|
|
3676
|
+
document.querySelector('[role="textbox"][aria-label*="'+query+'" i]') ||
|
|
3677
|
+
document.querySelector('[contenteditable="true"]') ||
|
|
3678
|
+
document.querySelector('input[type="text"],input[type="search"],input:not([type])');
|
|
3679
|
+
if (!el) return 'NOT_FOUND: ' + query;
|
|
3680
|
+
el.focus();
|
|
3681
|
+
if (el.getAttribute('contenteditable') !== null) {
|
|
3682
|
+
el.textContent = '';
|
|
3683
|
+
document.execCommand('insertText', false, value);
|
|
3684
|
+
} else {
|
|
3685
|
+
var proto = el instanceof HTMLTextAreaElement ? HTMLTextAreaElement.prototype : HTMLInputElement.prototype;
|
|
3686
|
+
Object.getOwnPropertyDescriptor(proto, 'value').set.call(el, value);
|
|
3687
|
+
['input','change'].forEach(t => el.dispatchEvent(new Event(t, {bubbles:true})));
|
|
3688
|
+
}
|
|
3689
|
+
return 'TYPED "'+value.slice(0,40)+'" in '+(el.placeholder||el.getAttribute('aria-label')||el.tagName);
|
|
3690
|
+
})(JSARG[0], JSARG[1])
|
|
3691
|
+
`);
|
|
3692
|
+
}
|
|
3693
|
+
case "read_element": {
|
|
3694
|
+
if (platform2() !== "darwin") return header + `print("read_element requires macOS + Chrome")`;
|
|
3695
|
+
const sel = String(input.selector ?? "").trim();
|
|
3696
|
+
return this._chromeJs(JSON.stringify(sel || "body"), `
|
|
3697
|
+
(function(sel) {
|
|
3698
|
+
var el = sel ? document.querySelector(sel) : document.body;
|
|
3699
|
+
if (!el) return 'NOT_FOUND: ' + sel;
|
|
3700
|
+
return (el.textContent || el.innerText || el.value || '').trim().replace(/\\s+/g,' ').slice(0, 800);
|
|
3701
|
+
})(JSARG)
|
|
3702
|
+
`);
|
|
3703
|
+
}
|
|
3704
|
+
case "get_elements": {
|
|
3705
|
+
if (platform2() !== "darwin") return header + `print("get_elements requires macOS + Chrome")`;
|
|
3706
|
+
return this._chromeJs(JSON.stringify(""), `
|
|
3707
|
+
(function() {
|
|
3708
|
+
var seen = new Set(), els = [];
|
|
3709
|
+
document.querySelectorAll('button,a,input,select,textarea,[role="button"],[role="link"],[role="tab"],[role="option"],h1,h2,h3,video,audio').forEach(function(el,i) {
|
|
3710
|
+
if (i > 80 || !el.offsetParent) return;
|
|
3711
|
+
var label = (el.textContent || el.getAttribute('aria-label') || el.placeholder || el.getAttribute('title') || el.value || '').trim().slice(0,80);
|
|
3712
|
+
if (!label || seen.has(label)) return;
|
|
3713
|
+
seen.add(label);
|
|
3714
|
+
els.push(el.tagName.toLowerCase()+': '+label);
|
|
3715
|
+
});
|
|
3716
|
+
return els.length ? els.join('\\n') : 'No interactive elements found';
|
|
3717
|
+
})()
|
|
3718
|
+
`);
|
|
3719
|
+
}
|
|
3720
|
+
case "get_media_state": {
|
|
3721
|
+
if (platform2() !== "darwin") return header + `print("get_media_state requires macOS + Chrome")`;
|
|
3722
|
+
return this._chromeJs(JSON.stringify(""), `
|
|
3723
|
+
(function() {
|
|
3724
|
+
var v = document.querySelector('video,audio');
|
|
3725
|
+
if (!v) return 'No media on this page';
|
|
3726
|
+
return JSON.stringify({
|
|
3727
|
+
state: v.paused ? 'PAUSED' : 'PLAYING',
|
|
3728
|
+
time: v.currentTime.toFixed(1)+'s',
|
|
3729
|
+
duration: isFinite(v.duration) ? v.duration.toFixed(1)+'s' : 'live/unknown',
|
|
3730
|
+
muted: v.muted,
|
|
3731
|
+
volume: Math.round(v.volume*100)+'%',
|
|
3732
|
+
title: document.title.slice(0,80)
|
|
3733
|
+
});
|
|
3734
|
+
})()
|
|
3735
|
+
`);
|
|
3736
|
+
}
|
|
3737
|
+
case "scroll_to": {
|
|
3738
|
+
if (platform2() !== "darwin") return header + `print("scroll_to requires macOS + Chrome")`;
|
|
3739
|
+
const sel = String(input.selector ?? "").trim();
|
|
3740
|
+
const scrollDir = String(input.direction ?? "down").toLowerCase();
|
|
3741
|
+
const scrollAmt = input.amount != null ? Number(input.amount) : 400;
|
|
3742
|
+
if (sel) {
|
|
3743
|
+
return this._chromeJs(JSON.stringify(sel), `
|
|
3744
|
+
(function(s){var el=document.querySelector(s);if(!el)return 'NOT_FOUND: '+s;el.scrollIntoView({behavior:'instant',block:'center'});return 'Scrolled to: '+s;})(JSARG)
|
|
3745
|
+
`);
|
|
3746
|
+
}
|
|
3747
|
+
const scrollY = scrollDir === "up" ? -scrollAmt : scrollDir === "down" ? scrollAmt : 0;
|
|
3748
|
+
const scrollX = scrollDir === "left" ? -scrollAmt : scrollDir === "right" ? scrollAmt : 0;
|
|
3749
|
+
return this._chromeJs(JSON.stringify([scrollX, scrollY]), `
|
|
3750
|
+
(function(xy){window.scrollBy(xy[0],xy[1]);return 'Scrolled';} )(JSARG)
|
|
3751
|
+
`);
|
|
3752
|
+
}
|
|
3753
|
+
case "accessibility_click": {
|
|
3754
|
+
const appName = String(input.app ?? "").trim();
|
|
3755
|
+
const elemLabel = String(input.element ?? text ?? "").trim();
|
|
3756
|
+
if (!appName || !elemLabel) return null;
|
|
3757
|
+
const osName = platform2();
|
|
3758
|
+
if (osName !== "darwin") return header + `print("accessibility_click is macOS only")`;
|
|
3759
|
+
const safeApp = appName.replace(/'/g, "\\'");
|
|
3760
|
+
const safeElem = elemLabel.replace(/'/g, "\\'");
|
|
3761
|
+
return header + `
|
|
3762
|
+
import subprocess, time
|
|
3763
|
+
|
|
3764
|
+
# Bring app to foreground
|
|
3765
|
+
subprocess.run(['osascript', '-e', 'tell application "${safeApp}" to activate'], capture_output=True)
|
|
3766
|
+
time.sleep(0.5)
|
|
3767
|
+
|
|
3768
|
+
# Try clicking by name, then by description, then by value
|
|
3769
|
+
attempts = [
|
|
3770
|
+
f'''tell application "System Events" to tell process "${safeApp}" to click (first UI element of front window whose name contains "${safeElem}")''',
|
|
3771
|
+
f'''tell application "System Events" to tell process "${safeApp}" to click (first button whose description contains "${safeElem}")''',
|
|
3772
|
+
f'''tell application "System Events" to tell process "${safeApp}" to click (first UI element whose value contains "${safeElem}")''',
|
|
3773
|
+
]
|
|
3774
|
+
|
|
3775
|
+
success = False
|
|
3776
|
+
for script in attempts:
|
|
3777
|
+
r = subprocess.run(['osascript', '-e', script], capture_output=True, text=True)
|
|
3778
|
+
if r.returncode == 0:
|
|
3779
|
+
print(f"Clicked '{${JSON.stringify(elemLabel)}}' in ${safeApp}")
|
|
3780
|
+
success = True
|
|
3781
|
+
break
|
|
3782
|
+
|
|
3783
|
+
if not success:
|
|
3784
|
+
# Last resort: try clicking the front window element matching description
|
|
3785
|
+
list_script = f"""tell application "System Events"
|
|
3786
|
+
tell process "${safeApp}"
|
|
3787
|
+
return name of every UI element of front window
|
|
3788
|
+
end tell
|
|
3789
|
+
end tell"""
|
|
3790
|
+
lr = subprocess.run(['osascript', '-e', list_script], capture_output=True, text=True)
|
|
3791
|
+
print(f"Could not find element '${safeElem}' in ${safeApp}")
|
|
3792
|
+
print(f"Available elements: {lr.stdout.strip()[:300] or 'could not list'}")
|
|
3793
|
+
`;
|
|
3794
|
+
}
|
|
3795
|
+
case "cdp_screenshot": {
|
|
3796
|
+
if (platform2() !== "darwin") return header + `print("cdp_screenshot is macOS only for now")`;
|
|
3797
|
+
return header + `
|
|
3798
|
+
import urllib.request, json, base64, os, tempfile, subprocess
|
|
3799
|
+
|
|
3800
|
+
def get_browser_state():
|
|
3801
|
+
simple_scr = """tell application "Google Chrome"
|
|
3802
|
+
tell front window
|
|
3803
|
+
tell active tab
|
|
3804
|
+
return URL & "|||" & title
|
|
3805
|
+
end tell
|
|
3806
|
+
end tell
|
|
3807
|
+
end tell"""
|
|
3808
|
+
r = subprocess.run(['osascript', '-e', simple_scr], capture_output=True, text=True)
|
|
3809
|
+
parts = r.stdout.strip().split('|||')
|
|
3810
|
+
if len(parts) >= 2:
|
|
3811
|
+
print(f"[No CDP screenshot] Tab: {parts[1]}")
|
|
3812
|
+
print(f"URL: {parts[0]}")
|
|
3813
|
+
else:
|
|
3814
|
+
print("[No CDP screenshot \u2014 Chrome not running or no active tab]")
|
|
3815
|
+
print("To enable screenshots without Screen Recording, start Chrome with:")
|
|
3816
|
+
print(" /Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome --remote-debugging-port=9222")
|
|
3817
|
+
|
|
3818
|
+
try:
|
|
3819
|
+
tabs_raw = urllib.request.urlopen('http://localhost:9222/json', timeout=2).read()
|
|
3820
|
+
tabs = json.loads(tabs_raw)
|
|
3821
|
+
if not tabs:
|
|
3822
|
+
raise Exception("No tabs available")
|
|
3823
|
+
ws_url = tabs[0].get('webSocketDebuggerUrl', '')
|
|
3824
|
+
if not ws_url:
|
|
3825
|
+
raise Exception("No WebSocket URL")
|
|
3826
|
+
|
|
3827
|
+
# Auto-install websockets if needed
|
|
3828
|
+
try:
|
|
3829
|
+
import websockets
|
|
3830
|
+
except ImportError:
|
|
3831
|
+
subprocess.run(['pip3', 'install', 'websockets', '-q'], capture_output=True, timeout=60)
|
|
3832
|
+
import websockets
|
|
3833
|
+
|
|
3834
|
+
import asyncio
|
|
3835
|
+
|
|
3836
|
+
async def capture():
|
|
3837
|
+
async with websockets.connect(ws_url) as ws:
|
|
3838
|
+
await ws.send(json.dumps({'id':1,'method':'Page.captureScreenshot','params':{'format':'jpeg','quality':75}}))
|
|
3839
|
+
resp = json.loads(await ws.recv())
|
|
3840
|
+
return resp.get('result', {}).get('data')
|
|
3841
|
+
|
|
3842
|
+
img_b64 = asyncio.run(capture())
|
|
3843
|
+
if not img_b64:
|
|
3844
|
+
raise Exception("No screenshot data returned")
|
|
3845
|
+
|
|
3846
|
+
out_path = os.path.join(tempfile.gettempdir(), '0agent_cdp_shot.jpg')
|
|
3847
|
+
with open(out_path, 'wb') as f:
|
|
3848
|
+
f.write(base64.b64decode(img_b64))
|
|
3849
|
+
|
|
3850
|
+
print(f"Screenshot: {out_path}")
|
|
3851
|
+
print(f"Tab: {tabs[0].get('title','?')} \u2014 {tabs[0].get('url','?')[:80]}")
|
|
3852
|
+
|
|
3853
|
+
try:
|
|
3854
|
+
import pytesseract
|
|
3855
|
+
from PIL import Image
|
|
3856
|
+
img = Image.open(out_path)
|
|
3857
|
+
text = pytesseract.image_to_string(img, config='--psm 11')
|
|
3858
|
+
lines = [l.strip() for l in text.splitlines() if l.strip()]
|
|
3859
|
+
if lines:
|
|
3860
|
+
print("On-screen text (OCR):\\n" + "\\n".join(lines[:50]))
|
|
3861
|
+
except Exception:
|
|
3862
|
+
print("(OCR not available \u2014 install pytesseract for text extraction)")
|
|
3863
|
+
|
|
3864
|
+
except Exception as e:
|
|
3865
|
+
get_browser_state()
|
|
3866
|
+
`;
|
|
3867
|
+
}
|
|
3868
|
+
case "exec_js": {
|
|
3869
|
+
const js = String(input.js ?? "").trim();
|
|
3870
|
+
if (!js) return null;
|
|
3871
|
+
const osName = platform2();
|
|
3872
|
+
if (osName !== "darwin") {
|
|
3873
|
+
return header + `print("exec_js requires macOS + Google Chrome")`;
|
|
3874
|
+
}
|
|
3875
|
+
const jsJson = JSON.stringify(js);
|
|
3876
|
+
return header + `
|
|
3877
|
+
import subprocess, json, os, tempfile
|
|
3878
|
+
|
|
3879
|
+
js = json.loads(${jsJson})
|
|
3880
|
+
tmpjs = os.path.join(tempfile.gettempdir(), f"0agent_execjs_{os.getpid()}.js")
|
|
3881
|
+
with open(tmpjs, 'w') as f:
|
|
3882
|
+
f.write(js)
|
|
3883
|
+
|
|
3884
|
+
as_script = f'''tell application "Google Chrome"
|
|
3885
|
+
tell front window
|
|
3886
|
+
tell active tab
|
|
3887
|
+
set jsCode to do shell script "cat '{tmpjs}'"
|
|
3888
|
+
return execute javascript jsCode
|
|
3889
|
+
end tell
|
|
3890
|
+
end tell
|
|
3891
|
+
end tell'''
|
|
3892
|
+
|
|
3893
|
+
r = subprocess.run(['osascript', '-e', as_script], capture_output=True, text=True)
|
|
3894
|
+
try: os.remove(tmpjs)
|
|
3895
|
+
except: pass
|
|
3896
|
+
|
|
3897
|
+
if r.returncode == 0:
|
|
3898
|
+
print(r.stdout.strip() if r.stdout.strip() else "(no return value)")
|
|
3899
|
+
else:
|
|
3900
|
+
print(f"JS error: {r.stderr.strip()[:300]}")
|
|
3901
|
+
`;
|
|
3902
|
+
}
|
|
3903
|
+
case "browser_state": {
|
|
3904
|
+
const osName = platform2();
|
|
3905
|
+
if (osName !== "darwin") {
|
|
3906
|
+
return header + `print("browser_state requires macOS + Google Chrome")`;
|
|
3907
|
+
}
|
|
3908
|
+
return header + `
|
|
3909
|
+
import subprocess
|
|
3910
|
+
|
|
3911
|
+
as_script = '''tell application "Google Chrome"
|
|
3912
|
+
tell front window
|
|
3913
|
+
tell active tab
|
|
3914
|
+
return URL & "|||" & title
|
|
3915
|
+
end tell
|
|
3916
|
+
end tell
|
|
3917
|
+
end tell'''
|
|
3918
|
+
|
|
3919
|
+
r = subprocess.run(['osascript', '-e', as_script], capture_output=True, text=True)
|
|
3920
|
+
out = r.stdout.strip()
|
|
3921
|
+
if '|||' in out:
|
|
3922
|
+
parts = out.split('|||', 1)
|
|
3923
|
+
print(f"URL: {parts[0]}")
|
|
3924
|
+
print(f"Title: {parts[1]}")
|
|
3925
|
+
else:
|
|
3926
|
+
print(out or r.stderr.strip() or "Chrome not running or no active tab")
|
|
3927
|
+
`;
|
|
3928
|
+
}
|
|
3929
|
+
default:
|
|
3930
|
+
return null;
|
|
3931
|
+
}
|
|
3932
|
+
}
|
|
3933
|
+
/**
|
|
3934
|
+
* Generate a Python script that runs JS in the current Chrome tab via AppleScript.
|
|
3935
|
+
* jsArgJson is passed as variable JSARG inside the JS template.
|
|
3936
|
+
* No Screen Recording needed — uses Chrome's built-in execute javascript.
|
|
3937
|
+
*/
|
|
3938
|
+
_chromeJs(jsArgJson, jsTemplate) {
|
|
3939
|
+
const finalJs = `var JSARG = ${jsArgJson};
|
|
3940
|
+
${jsTemplate.trim()}`;
|
|
3941
|
+
const jsJson = JSON.stringify(finalJs);
|
|
3942
|
+
return `
|
|
3943
|
+
import subprocess, json, os, tempfile
|
|
3944
|
+
|
|
3945
|
+
js = json.loads(${jsJson})
|
|
3946
|
+
tmpjs = os.path.join(tempfile.gettempdir(), f"0agent_cjs_{os.getpid()}.js")
|
|
3947
|
+
with open(tmpjs, 'w') as f:
|
|
3948
|
+
f.write(js)
|
|
3949
|
+
as_script = f"""tell application "Google Chrome"
|
|
3950
|
+
tell front window
|
|
3951
|
+
tell active tab
|
|
3952
|
+
set jsCode to do shell script "cat '{tmpjs}'"
|
|
3953
|
+
return execute javascript jsCode
|
|
3954
|
+
end tell
|
|
3955
|
+
end tell
|
|
3956
|
+
end tell"""
|
|
3957
|
+
r = subprocess.run(['osascript', '-e', as_script], capture_output=True, text=True)
|
|
3958
|
+
try: os.remove(tmpjs)
|
|
3959
|
+
except: pass
|
|
3960
|
+
result = r.stdout.strip()
|
|
3961
|
+
if r.returncode != 0:
|
|
3962
|
+
print(f"JS error: {r.stderr.strip()[:300]}")
|
|
3963
|
+
elif result.startswith('NOT_FOUND:'):
|
|
3964
|
+
print(f"Not found: {result[10:]} \u2014 call get_elements to see available elements")
|
|
3965
|
+
elif result.startswith('CLICKED:') or result.startswith('TYPED'):
|
|
3966
|
+
print(f"OK {result}")
|
|
3967
|
+
else:
|
|
3968
|
+
print(result if result else "(no return value)")
|
|
3969
|
+
`;
|
|
3970
|
+
}
|
|
3971
|
+
};
|
|
3972
|
+
}
|
|
3973
|
+
});
|
|
3974
|
+
|
|
3975
|
+
// packages/daemon/src/capabilities/OpenInterpreterCapability.ts
|
|
3976
|
+
import { spawn as spawn4 } from "node:child_process";
|
|
3977
|
+
import { writeFileSync as writeFileSync3, unlinkSync as unlinkSync2 } from "node:fs";
|
|
3978
|
+
import { resolve as resolve4 } from "node:path";
|
|
3979
|
+
import { tmpdir as tmpdir2 } from "node:os";
|
|
3015
3980
|
var OI_SCRIPT, OpenInterpreterCapability;
|
|
3016
3981
|
var init_OpenInterpreterCapability = __esm({
|
|
3017
3982
|
"packages/daemon/src/capabilities/OpenInterpreterCapability.ts"() {
|
|
@@ -3104,11 +4069,11 @@ print(output if output else "Task completed successfully")
|
|
|
3104
4069
|
const fullTask = context ? `Context: ${context}
|
|
3105
4070
|
|
|
3106
4071
|
Task: ${task}` : task;
|
|
3107
|
-
const tmpFile =
|
|
3108
|
-
|
|
4072
|
+
const tmpFile = resolve4(tmpdir2(), `0agent_oi_${Date.now()}.py`);
|
|
4073
|
+
writeFileSync3(tmpFile, OI_SCRIPT, "utf8");
|
|
3109
4074
|
let result = await this._runScript(tmpFile, fullTask, signal);
|
|
3110
4075
|
try {
|
|
3111
|
-
|
|
4076
|
+
unlinkSync2(tmpFile);
|
|
3112
4077
|
} catch {
|
|
3113
4078
|
}
|
|
3114
4079
|
if (signal?.aborted) {
|
|
@@ -3124,10 +4089,10 @@ Run manually: pip3 install open-interpreter`,
|
|
|
3124
4089
|
duration_ms: Date.now() - start
|
|
3125
4090
|
};
|
|
3126
4091
|
}
|
|
3127
|
-
|
|
4092
|
+
writeFileSync3(tmpFile, OI_SCRIPT, "utf8");
|
|
3128
4093
|
result = await this._runScript(tmpFile, fullTask, signal);
|
|
3129
4094
|
try {
|
|
3130
|
-
|
|
4095
|
+
unlinkSync2(tmpFile);
|
|
3131
4096
|
} catch {
|
|
3132
4097
|
}
|
|
3133
4098
|
if (signal?.aborted) {
|
|
@@ -3147,8 +4112,8 @@ Run manually: pip3 install open-interpreter`,
|
|
|
3147
4112
|
}
|
|
3148
4113
|
/** Async pip install — never blocks the event loop (unlike spawnSync). */
|
|
3149
4114
|
_pipInstall(pkg, signal) {
|
|
3150
|
-
return new Promise((
|
|
3151
|
-
const proc =
|
|
4115
|
+
return new Promise((resolve17) => {
|
|
4116
|
+
const proc = spawn4("pip3", ["install", pkg, "-q"], {
|
|
3152
4117
|
env: process.env,
|
|
3153
4118
|
stdio: "ignore"
|
|
3154
4119
|
});
|
|
@@ -3158,7 +4123,7 @@ Run manually: pip3 install open-interpreter`,
|
|
|
3158
4123
|
settled = true;
|
|
3159
4124
|
signal?.removeEventListener("abort", onAbort);
|
|
3160
4125
|
clearTimeout(timer);
|
|
3161
|
-
|
|
4126
|
+
resolve17(ok);
|
|
3162
4127
|
};
|
|
3163
4128
|
const onAbort = () => {
|
|
3164
4129
|
try {
|
|
@@ -3180,8 +4145,8 @@ Run manually: pip3 install open-interpreter`,
|
|
|
3180
4145
|
});
|
|
3181
4146
|
}
|
|
3182
4147
|
_runScript(scriptPath, stdinData, signal) {
|
|
3183
|
-
return new Promise((
|
|
3184
|
-
const proc =
|
|
4148
|
+
return new Promise((resolve17) => {
|
|
4149
|
+
const proc = spawn4("python3", [scriptPath], {
|
|
3185
4150
|
env: process.env,
|
|
3186
4151
|
stdio: ["pipe", "pipe", "pipe"]
|
|
3187
4152
|
});
|
|
@@ -3193,7 +4158,7 @@ Run manually: pip3 install open-interpreter`,
|
|
|
3193
4158
|
settled = true;
|
|
3194
4159
|
signal?.removeEventListener("abort", onAbort);
|
|
3195
4160
|
clearTimeout(timer);
|
|
3196
|
-
|
|
4161
|
+
resolve17({ stdout: out.join(""), stderr: err.join(""), code });
|
|
3197
4162
|
};
|
|
3198
4163
|
const onAbort = () => {
|
|
3199
4164
|
try {
|
|
@@ -3307,6 +4272,7 @@ var init_CapabilityRegistry = __esm({
|
|
|
3307
4272
|
init_ShellCapability();
|
|
3308
4273
|
init_FileCapability();
|
|
3309
4274
|
init_MemoryCapability();
|
|
4275
|
+
init_GUICapability();
|
|
3310
4276
|
init_OpenInterpreterCapability();
|
|
3311
4277
|
CapabilityRegistry = class {
|
|
3312
4278
|
capabilities = /* @__PURE__ */ new Map();
|
|
@@ -3335,6 +4301,7 @@ var init_CapabilityRegistry = __esm({
|
|
|
3335
4301
|
this.register(new ScraperCapability());
|
|
3336
4302
|
this.register(new ShellCapability());
|
|
3337
4303
|
this.register(new FileCapability());
|
|
4304
|
+
this.register(new GUICapability());
|
|
3338
4305
|
this.register(new OpenInterpreterCapability());
|
|
3339
4306
|
if (graph) {
|
|
3340
4307
|
this.register(new MemoryCapability(graph, onMemoryWrite));
|
|
@@ -3414,9 +4381,9 @@ var init_capabilities = __esm({
|
|
|
3414
4381
|
});
|
|
3415
4382
|
|
|
3416
4383
|
// packages/daemon/src/AgentExecutor.ts
|
|
3417
|
-
import { spawn as
|
|
3418
|
-
import { writeFileSync as
|
|
3419
|
-
import { resolve as
|
|
4384
|
+
import { spawn as spawn5 } from "node:child_process";
|
|
4385
|
+
import { writeFileSync as writeFileSync4, readFileSync as readFileSync3, readdirSync as readdirSync2, mkdirSync as mkdirSync2, existsSync as existsSync3 } from "node:fs";
|
|
4386
|
+
import { resolve as resolve5, dirname as dirname2, relative } from "node:path";
|
|
3420
4387
|
import { homedir as homedir2 } from "node:os";
|
|
3421
4388
|
var SELF_MOD_PATTERN, AgentExecutor;
|
|
3422
4389
|
var init_AgentExecutor = __esm({
|
|
@@ -3611,9 +4578,9 @@ var init_AgentExecutor = __esm({
|
|
|
3611
4578
|
}
|
|
3612
4579
|
}
|
|
3613
4580
|
shellExec(command, timeoutMs) {
|
|
3614
|
-
return new Promise((
|
|
4581
|
+
return new Promise((resolve17) => {
|
|
3615
4582
|
const chunks = [];
|
|
3616
|
-
const proc =
|
|
4583
|
+
const proc = spawn5("bash", ["-c", command], {
|
|
3617
4584
|
cwd: this.cwd,
|
|
3618
4585
|
env: { ...process.env, TERM: "dumb" },
|
|
3619
4586
|
timeout: timeoutMs
|
|
@@ -3622,10 +4589,10 @@ var init_AgentExecutor = __esm({
|
|
|
3622
4589
|
proc.stderr.on("data", (d) => chunks.push(d.toString()));
|
|
3623
4590
|
proc.on("close", (code) => {
|
|
3624
4591
|
const output = chunks.join("").trim();
|
|
3625
|
-
|
|
4592
|
+
resolve17(output || (code === 0 ? "(command completed, no output)" : `exit code ${code}`));
|
|
3626
4593
|
});
|
|
3627
4594
|
proc.on("error", (err) => {
|
|
3628
|
-
|
|
4595
|
+
resolve17(`Error: ${err.message}`);
|
|
3629
4596
|
});
|
|
3630
4597
|
});
|
|
3631
4598
|
}
|
|
@@ -3633,7 +4600,7 @@ var init_AgentExecutor = __esm({
|
|
|
3633
4600
|
const safe = this.safePath(filePath);
|
|
3634
4601
|
if (!safe) return "Error: path outside working directory";
|
|
3635
4602
|
mkdirSync2(dirname2(safe), { recursive: true });
|
|
3636
|
-
|
|
4603
|
+
writeFileSync4(safe, content, "utf8");
|
|
3637
4604
|
const rel = relative(this.cwd, safe);
|
|
3638
4605
|
return `Written: ${rel} (${content.length} bytes)`;
|
|
3639
4606
|
}
|
|
@@ -3742,7 +4709,7 @@ content = element.text if element else page.get_all_text()` : `content = page.ge
|
|
|
3742
4709
|
}
|
|
3743
4710
|
// ─── Helpers ───────────────────────────────────────────────────────────────
|
|
3744
4711
|
safePath(p) {
|
|
3745
|
-
const resolved =
|
|
4712
|
+
const resolved = resolve5(this.cwd, p);
|
|
3746
4713
|
return resolved.startsWith(this.cwd) ? resolved : null;
|
|
3747
4714
|
}
|
|
3748
4715
|
buildSystemPrompt(extra, task) {
|
|
@@ -3783,20 +4750,25 @@ content = element.text if element else page.get_all_text()` : `content = page.ge
|
|
|
3783
4750
|
if (hasGUI) {
|
|
3784
4751
|
lines.push(
|
|
3785
4752
|
``,
|
|
3786
|
-
`GUI
|
|
3787
|
-
`
|
|
3788
|
-
`
|
|
3789
|
-
`
|
|
3790
|
-
`
|
|
3791
|
-
`
|
|
3792
|
-
`
|
|
3793
|
-
`
|
|
3794
|
-
`
|
|
3795
|
-
`
|
|
3796
|
-
`
|
|
3797
|
-
`
|
|
3798
|
-
`
|
|
3799
|
-
`
|
|
4753
|
+
`Browser/GUI actions \u2014 ALL work without Screen Recording permission:`,
|
|
4754
|
+
`BROWSER (Chrome): click_text {text} | type_in {selector,text} | get_elements | read_element {selector}`,
|
|
4755
|
+
` get_media_state | scroll_to {selector|direction} | exec_js {js} | browser_state | cdp_screenshot`,
|
|
4756
|
+
`NATIVE APPS (no Screen Recording \u2014 use these for WhatsApp, iMessage, Finder):`,
|
|
4757
|
+
` app_type {app:"WhatsApp", text:"hi"} \u2014 types via clipboard paste \u2192 cmd+v into the app.`,
|
|
4758
|
+
` Uses macOS clipboard so unicode/emoji/special chars all work. Target app gets the text`,
|
|
4759
|
+
` regardless of OS keyboard focus. ALWAYS use this for native app text input.`,
|
|
4760
|
+
` accessibility_click {app:"WhatsApp", element:"Send"} \u2014 click button via Accessibility API.`,
|
|
4761
|
+
` hotkey {keys:"cmd+f", app:"WhatsApp"} \u2014 send hotkey to specific app (not Terminal).`,
|
|
4762
|
+
`WHATSAPP WORKFLOW (use this exact sequence):`,
|
|
4763
|
+
` 1. open_app {app:"WhatsApp"}`,
|
|
4764
|
+
` 2. hotkey {keys:"cmd+f", app:"WhatsApp"} \u2014 open search`,
|
|
4765
|
+
` 3. app_type {app:"WhatsApp", text:"ContactName"} \u2014 search for contact`,
|
|
4766
|
+
` 4. hotkey {keys:"enter", app:"WhatsApp"} \u2014 open the conversation`,
|
|
4767
|
+
` 5. app_type {app:"WhatsApp", text:"your message"} \u2014 type message`,
|
|
4768
|
+
` 6. hotkey {keys:"enter", app:"WhatsApp"} \u2014 send`,
|
|
4769
|
+
` 7. accessibility_click {app:"WhatsApp", element:"Send"} \u2014 if enter doesn't send`,
|
|
4770
|
+
`NEVER use bare 'type' action for native apps \u2014 it goes to Terminal not the app.`,
|
|
4771
|
+
`ALWAYS verify: browser_state after web nav, get_media_state after play/pause, read_element for page content.`
|
|
3800
4772
|
);
|
|
3801
4773
|
}
|
|
3802
4774
|
if (isSelfMod && this.agentRoot) {
|
|
@@ -3809,10 +4781,10 @@ content = element.text if element else page.get_all_text()` : `content = page.ge
|
|
|
3809
4781
|
);
|
|
3810
4782
|
}
|
|
3811
4783
|
const agentsFiles = [
|
|
3812
|
-
|
|
3813
|
-
|
|
3814
|
-
|
|
3815
|
-
|
|
4784
|
+
resolve5(this.cwd, "AGENTS.md"),
|
|
4785
|
+
resolve5(this.cwd, ".0agent", "AGENTS.md"),
|
|
4786
|
+
resolve5(this.cwd, "CLAUDE.md"),
|
|
4787
|
+
resolve5(homedir2(), ".0agent", "AGENTS.md")
|
|
3816
4788
|
];
|
|
3817
4789
|
for (const f of agentsFiles) {
|
|
3818
4790
|
try {
|
|
@@ -3923,7 +4895,7 @@ content = element.text if element else page.get_all_text()` : `content = page.ge
|
|
|
3923
4895
|
|
|
3924
4896
|
// packages/daemon/src/ExecutionVerifier.ts
|
|
3925
4897
|
import { existsSync as existsSync5 } from "node:fs";
|
|
3926
|
-
import { resolve as
|
|
4898
|
+
import { resolve as resolve6 } from "node:path";
|
|
3927
4899
|
var ExecutionVerifier;
|
|
3928
4900
|
var init_ExecutionVerifier = __esm({
|
|
3929
4901
|
"packages/daemon/src/ExecutionVerifier.ts"() {
|
|
@@ -3960,7 +4932,7 @@ var init_ExecutionVerifier = __esm({
|
|
|
3960
4932
|
};
|
|
3961
4933
|
}
|
|
3962
4934
|
if (files.length > 0) {
|
|
3963
|
-
const lastFile =
|
|
4935
|
+
const lastFile = resolve6(this.cwd, files[files.length - 1]);
|
|
3964
4936
|
const exists = existsSync5(lastFile);
|
|
3965
4937
|
return {
|
|
3966
4938
|
success: exists,
|
|
@@ -4000,10 +4972,10 @@ var init_ExecutionVerifier = __esm({
|
|
|
4000
4972
|
});
|
|
4001
4973
|
|
|
4002
4974
|
// packages/daemon/src/RuntimeSelfHeal.ts
|
|
4003
|
-
import { readFileSync as readFileSync5, writeFileSync as
|
|
4004
|
-
import { resolve as
|
|
4975
|
+
import { readFileSync as readFileSync5, writeFileSync as writeFileSync5, existsSync as existsSync6 } from "node:fs";
|
|
4976
|
+
import { resolve as resolve7, dirname as dirname3 } from "node:path";
|
|
4005
4977
|
import { fileURLToPath } from "node:url";
|
|
4006
|
-
import { execSync as execSync4, spawn as
|
|
4978
|
+
import { execSync as execSync4, spawn as spawn6 } from "node:child_process";
|
|
4007
4979
|
function isRuntimeBug(error) {
|
|
4008
4980
|
if (TASK_FAILURE_PATTERNS.some((p) => p.test(error))) return false;
|
|
4009
4981
|
return RUNTIME_BUG_PATTERNS.some((p) => p.test(error));
|
|
@@ -4073,8 +5045,8 @@ var init_RuntimeSelfHeal = __esm({
|
|
|
4073
5045
|
this.llm = llm;
|
|
4074
5046
|
this.eventBus = eventBus;
|
|
4075
5047
|
let dir = dirname3(fileURLToPath(import.meta.url));
|
|
4076
|
-
while (dir !== "/" && !existsSync6(
|
|
4077
|
-
dir =
|
|
5048
|
+
while (dir !== "/" && !existsSync6(resolve7(dir, "package.json"))) {
|
|
5049
|
+
dir = resolve7(dir, "..");
|
|
4078
5050
|
}
|
|
4079
5051
|
this.projectRoot = dir;
|
|
4080
5052
|
}
|
|
@@ -4120,7 +5092,7 @@ var init_RuntimeSelfHeal = __esm({
|
|
|
4120
5092
|
try {
|
|
4121
5093
|
const original = readFileSync5(tsPath, "utf8");
|
|
4122
5094
|
const backup = tsPath + ".bak";
|
|
4123
|
-
|
|
5095
|
+
writeFileSync5(backup, original, "utf8");
|
|
4124
5096
|
if (!original.includes(proposal.original_code.trim())) {
|
|
4125
5097
|
return {
|
|
4126
5098
|
applied: false,
|
|
@@ -4129,8 +5101,8 @@ var init_RuntimeSelfHeal = __esm({
|
|
|
4129
5101
|
};
|
|
4130
5102
|
}
|
|
4131
5103
|
const patched = original.replace(proposal.original_code, proposal.proposed_code);
|
|
4132
|
-
|
|
4133
|
-
const bundleScript =
|
|
5104
|
+
writeFileSync5(tsPath, patched, "utf8");
|
|
5105
|
+
const bundleScript = resolve7(this.projectRoot, "scripts", "bundle.mjs");
|
|
4134
5106
|
if (existsSync6(bundleScript)) {
|
|
4135
5107
|
try {
|
|
4136
5108
|
execSync4(`node "${bundleScript}"`, {
|
|
@@ -4139,7 +5111,7 @@ var init_RuntimeSelfHeal = __esm({
|
|
|
4139
5111
|
stdio: "ignore"
|
|
4140
5112
|
});
|
|
4141
5113
|
} catch {
|
|
4142
|
-
|
|
5114
|
+
writeFileSync5(tsPath, original, "utf8");
|
|
4143
5115
|
return {
|
|
4144
5116
|
applied: false,
|
|
4145
5117
|
restarted: false,
|
|
@@ -4164,11 +5136,11 @@ var init_RuntimeSelfHeal = __esm({
|
|
|
4164
5136
|
// ─── Private helpers ───────────────────────────────────────────────────────
|
|
4165
5137
|
findSourceFile(location) {
|
|
4166
5138
|
const candidates = [
|
|
4167
|
-
|
|
5139
|
+
resolve7(this.projectRoot, location.relPath),
|
|
4168
5140
|
// If relPath starts with dist/, look in src/
|
|
4169
|
-
|
|
4170
|
-
|
|
4171
|
-
|
|
5141
|
+
resolve7(this.projectRoot, location.relPath.replace(/^dist\//, "src/").replace(/\.js$/, ".ts")),
|
|
5142
|
+
resolve7(this.projectRoot, "packages", "daemon", "src", location.relPath.replace(/.*src\//, "")),
|
|
5143
|
+
resolve7(this.projectRoot, "packages", "core", "src", location.relPath.replace(/.*src\//, ""))
|
|
4172
5144
|
];
|
|
4173
5145
|
for (const p of candidates) {
|
|
4174
5146
|
if (existsSync6(p)) return p;
|
|
@@ -4236,9 +5208,9 @@ Rules:
|
|
|
4236
5208
|
}
|
|
4237
5209
|
}
|
|
4238
5210
|
restartDaemon() {
|
|
4239
|
-
const bundlePath =
|
|
5211
|
+
const bundlePath = resolve7(this.projectRoot, "dist", "daemon.mjs");
|
|
4240
5212
|
if (existsSync6(bundlePath)) {
|
|
4241
|
-
const child =
|
|
5213
|
+
const child = spawn6(process.execPath, [bundlePath], {
|
|
4242
5214
|
detached: true,
|
|
4243
5215
|
stdio: "ignore",
|
|
4244
5216
|
env: process.env
|
|
@@ -4343,7 +5315,7 @@ __export(ProactiveSurface_exports, {
|
|
|
4343
5315
|
});
|
|
4344
5316
|
import { execSync as execSync7 } from "node:child_process";
|
|
4345
5317
|
import { existsSync as existsSync16, readFileSync as readFileSync14, statSync, readdirSync as readdirSync5 } from "node:fs";
|
|
4346
|
-
import { resolve as
|
|
5318
|
+
import { resolve as resolve14, join as join6 } from "node:path";
|
|
4347
5319
|
function readdirSafe(dir) {
|
|
4348
5320
|
try {
|
|
4349
5321
|
return readdirSync5(dir);
|
|
@@ -4392,7 +5364,7 @@ var init_ProactiveSurface = __esm({
|
|
|
4392
5364
|
return [...this.insights];
|
|
4393
5365
|
}
|
|
4394
5366
|
async poll() {
|
|
4395
|
-
if (!existsSync16(
|
|
5367
|
+
if (!existsSync16(resolve14(this.cwd, ".git"))) return;
|
|
4396
5368
|
const newInsights = [];
|
|
4397
5369
|
const gitInsight = this.checkGitActivity();
|
|
4398
5370
|
if (gitInsight) newInsights.push(gitInsight);
|
|
@@ -4497,8 +5469,8 @@ var init_ProactiveSurface = __esm({
|
|
|
4497
5469
|
|
|
4498
5470
|
// packages/daemon/src/ZeroAgentDaemon.ts
|
|
4499
5471
|
init_src();
|
|
4500
|
-
import { writeFileSync as
|
|
4501
|
-
import { resolve as
|
|
5472
|
+
import { writeFileSync as writeFileSync12, unlinkSync as unlinkSync4, existsSync as existsSync17, mkdirSync as mkdirSync9, readFileSync as readFileSync15 } from "node:fs";
|
|
5473
|
+
import { resolve as resolve15 } from "node:path";
|
|
4502
5474
|
import { homedir as homedir9 } from "node:os";
|
|
4503
5475
|
|
|
4504
5476
|
// packages/daemon/src/config/DaemonConfig.ts
|
|
@@ -5000,19 +5972,19 @@ var ProjectScanner = class {
|
|
|
5000
5972
|
async getRunningPorts() {
|
|
5001
5973
|
const open = [];
|
|
5002
5974
|
await Promise.all(PORTS_TO_CHECK.map(
|
|
5003
|
-
(port) => new Promise((
|
|
5975
|
+
(port) => new Promise((resolve17) => {
|
|
5004
5976
|
const s = createServer();
|
|
5005
5977
|
s.listen(port, "127.0.0.1", () => {
|
|
5006
5978
|
s.close();
|
|
5007
|
-
|
|
5979
|
+
resolve17();
|
|
5008
5980
|
});
|
|
5009
5981
|
s.on("error", () => {
|
|
5010
5982
|
open.push(port);
|
|
5011
|
-
|
|
5983
|
+
resolve17();
|
|
5012
5984
|
});
|
|
5013
5985
|
setTimeout(() => {
|
|
5014
5986
|
s.close();
|
|
5015
|
-
|
|
5987
|
+
resolve17();
|
|
5016
5988
|
}, 200);
|
|
5017
5989
|
})
|
|
5018
5990
|
));
|
|
@@ -5089,7 +6061,7 @@ var ConversationStore = class {
|
|
|
5089
6061
|
|
|
5090
6062
|
// packages/daemon/src/SessionManager.ts
|
|
5091
6063
|
import { readFileSync as readFileSync6, existsSync as existsSync7 } from "node:fs";
|
|
5092
|
-
import { resolve as
|
|
6064
|
+
import { resolve as resolve8 } from "node:path";
|
|
5093
6065
|
import { homedir as homedir3 } from "node:os";
|
|
5094
6066
|
import YAML2 from "yaml";
|
|
5095
6067
|
var SessionManager = class {
|
|
@@ -5466,7 +6438,7 @@ Current task:`;
|
|
|
5466
6438
|
model: agentResult.model
|
|
5467
6439
|
});
|
|
5468
6440
|
} else {
|
|
5469
|
-
const cfgPath =
|
|
6441
|
+
const cfgPath = resolve8(homedir3(), ".0agent", "config.yaml");
|
|
5470
6442
|
const output = `No LLM API key found. Add one to ${cfgPath} or run: 0agent init`;
|
|
5471
6443
|
this.addStep(sessionId, "\u26A0 No LLM API key configured \u2014 run: 0agent init");
|
|
5472
6444
|
this.completeSession(sessionId, { output });
|
|
@@ -5509,7 +6481,7 @@ Current task:`;
|
|
|
5509
6481
|
*/
|
|
5510
6482
|
getFreshLLM() {
|
|
5511
6483
|
try {
|
|
5512
|
-
const configPath =
|
|
6484
|
+
const configPath = resolve8(homedir3(), ".0agent", "config.yaml");
|
|
5513
6485
|
if (!existsSync7(configPath)) return this.llm;
|
|
5514
6486
|
const raw = readFileSync6(configPath, "utf8");
|
|
5515
6487
|
const cfg = YAML2.parse(raw);
|
|
@@ -5537,7 +6509,7 @@ Current task:`;
|
|
|
5537
6509
|
if (!this.graph) return;
|
|
5538
6510
|
let extractLLM;
|
|
5539
6511
|
try {
|
|
5540
|
-
const cfgPath =
|
|
6512
|
+
const cfgPath = resolve8(homedir3(), ".0agent", "config.yaml");
|
|
5541
6513
|
if (existsSync7(cfgPath)) {
|
|
5542
6514
|
const raw = readFileSync6(cfgPath, "utf8");
|
|
5543
6515
|
const cfg = YAML2.parse(raw);
|
|
@@ -5902,7 +6874,7 @@ var BackgroundWorkers = class {
|
|
|
5902
6874
|
};
|
|
5903
6875
|
|
|
5904
6876
|
// packages/daemon/src/SkillRegistry.ts
|
|
5905
|
-
import { readFileSync as readFileSync7, readdirSync as readdirSync3, existsSync as existsSync8, writeFileSync as
|
|
6877
|
+
import { readFileSync as readFileSync7, readdirSync as readdirSync3, existsSync as existsSync8, writeFileSync as writeFileSync6, unlinkSync as unlinkSync3, mkdirSync as mkdirSync3 } from "node:fs";
|
|
5906
6878
|
import { join as join2 } from "node:path";
|
|
5907
6879
|
import { homedir as homedir4 } from "node:os";
|
|
5908
6880
|
import YAML3 from "yaml";
|
|
@@ -5966,7 +6938,7 @@ var SkillRegistry = class {
|
|
|
5966
6938
|
}
|
|
5967
6939
|
mkdirSync3(this.customDir, { recursive: true });
|
|
5968
6940
|
const filePath = join2(this.customDir, `${name}.yaml`);
|
|
5969
|
-
|
|
6941
|
+
writeFileSync6(filePath, yamlContent, "utf8");
|
|
5970
6942
|
const skill = YAML3.parse(yamlContent);
|
|
5971
6943
|
this.skills.set(name, skill);
|
|
5972
6944
|
return skill;
|
|
@@ -5980,7 +6952,7 @@ var SkillRegistry = class {
|
|
|
5980
6952
|
}
|
|
5981
6953
|
const filePath = join2(this.customDir, `${name}.yaml`);
|
|
5982
6954
|
if (existsSync8(filePath)) {
|
|
5983
|
-
|
|
6955
|
+
unlinkSync3(filePath);
|
|
5984
6956
|
}
|
|
5985
6957
|
this.skills.delete(name);
|
|
5986
6958
|
}
|
|
@@ -5993,7 +6965,7 @@ var SkillRegistry = class {
|
|
|
5993
6965
|
import { Hono as Hono14 } from "hono";
|
|
5994
6966
|
import { serve } from "@hono/node-server";
|
|
5995
6967
|
import { readFileSync as readFileSync9 } from "node:fs";
|
|
5996
|
-
import { resolve as
|
|
6968
|
+
import { resolve as resolve10, dirname as dirname4 } from "node:path";
|
|
5997
6969
|
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
5998
6970
|
|
|
5999
6971
|
// packages/daemon/src/routes/health.ts
|
|
@@ -6286,7 +7258,7 @@ function memoryRoutes(deps) {
|
|
|
6286
7258
|
init_LLMExecutor();
|
|
6287
7259
|
import { Hono as Hono10 } from "hono";
|
|
6288
7260
|
import { readFileSync as readFileSync8, existsSync as existsSync9 } from "node:fs";
|
|
6289
|
-
import { resolve as
|
|
7261
|
+
import { resolve as resolve9 } from "node:path";
|
|
6290
7262
|
import { homedir as homedir5 } from "node:os";
|
|
6291
7263
|
import YAML4 from "yaml";
|
|
6292
7264
|
function llmRoutes() {
|
|
@@ -6294,7 +7266,7 @@ function llmRoutes() {
|
|
|
6294
7266
|
app.post("/ping", async (c) => {
|
|
6295
7267
|
const start = Date.now();
|
|
6296
7268
|
try {
|
|
6297
|
-
const configPath =
|
|
7269
|
+
const configPath = resolve9(homedir5(), ".0agent", "config.yaml");
|
|
6298
7270
|
if (!existsSync9(configPath)) {
|
|
6299
7271
|
return c.json({ ok: false, error: "Config not found. Run: 0agent init" });
|
|
6300
7272
|
}
|
|
@@ -6813,11 +7785,11 @@ function runtimeRoutes(deps) {
|
|
|
6813
7785
|
// packages/daemon/src/HTTPServer.ts
|
|
6814
7786
|
function findGraphHtml() {
|
|
6815
7787
|
const candidates = [
|
|
6816
|
-
|
|
7788
|
+
resolve10(dirname4(fileURLToPath2(import.meta.url)), "graph.html"),
|
|
6817
7789
|
// dev (src/)
|
|
6818
|
-
|
|
7790
|
+
resolve10(dirname4(fileURLToPath2(import.meta.url)), "..", "graph.html"),
|
|
6819
7791
|
// bundled (dist/../)
|
|
6820
|
-
|
|
7792
|
+
resolve10(dirname4(fileURLToPath2(import.meta.url)), "..", "dist", "graph.html")
|
|
6821
7793
|
];
|
|
6822
7794
|
for (const p of candidates) {
|
|
6823
7795
|
try {
|
|
@@ -6867,7 +7839,7 @@ var HTTPServer = class {
|
|
|
6867
7839
|
this.app.get("/graph", serveGraph);
|
|
6868
7840
|
}
|
|
6869
7841
|
start() {
|
|
6870
|
-
return new Promise((
|
|
7842
|
+
return new Promise((resolve17) => {
|
|
6871
7843
|
this.server = serve(
|
|
6872
7844
|
{
|
|
6873
7845
|
fetch: this.app.fetch,
|
|
@@ -6875,20 +7847,20 @@ var HTTPServer = class {
|
|
|
6875
7847
|
hostname: this.deps.host
|
|
6876
7848
|
},
|
|
6877
7849
|
() => {
|
|
6878
|
-
|
|
7850
|
+
resolve17();
|
|
6879
7851
|
}
|
|
6880
7852
|
);
|
|
6881
7853
|
});
|
|
6882
7854
|
}
|
|
6883
7855
|
stop() {
|
|
6884
|
-
return new Promise((
|
|
7856
|
+
return new Promise((resolve17, reject) => {
|
|
6885
7857
|
if (!this.server) {
|
|
6886
|
-
|
|
7858
|
+
resolve17();
|
|
6887
7859
|
return;
|
|
6888
7860
|
}
|
|
6889
7861
|
this.server.close((err) => {
|
|
6890
7862
|
if (err) reject(err);
|
|
6891
|
-
else
|
|
7863
|
+
else resolve17();
|
|
6892
7864
|
});
|
|
6893
7865
|
});
|
|
6894
7866
|
}
|
|
@@ -6902,11 +7874,11 @@ init_LLMExecutor();
|
|
|
6902
7874
|
|
|
6903
7875
|
// packages/daemon/src/IdentityManager.ts
|
|
6904
7876
|
init_src();
|
|
6905
|
-
import { readFileSync as readFileSync10, writeFileSync as
|
|
6906
|
-
import { resolve as
|
|
7877
|
+
import { readFileSync as readFileSync10, writeFileSync as writeFileSync7, existsSync as existsSync10, mkdirSync as mkdirSync4 } from "node:fs";
|
|
7878
|
+
import { resolve as resolve11, dirname as dirname5 } from "node:path";
|
|
6907
7879
|
import { homedir as homedir6, hostname } from "node:os";
|
|
6908
7880
|
import YAML5 from "yaml";
|
|
6909
|
-
var IDENTITY_PATH =
|
|
7881
|
+
var IDENTITY_PATH = resolve11(homedir6(), ".0agent", "identity.yaml");
|
|
6910
7882
|
var DEFAULT_IDENTITY = {
|
|
6911
7883
|
name: "User",
|
|
6912
7884
|
device_id: `unknown-device`,
|
|
@@ -6978,16 +7950,16 @@ var IdentityManager = class {
|
|
|
6978
7950
|
if (!existsSync10(dir)) {
|
|
6979
7951
|
mkdirSync4(dir, { recursive: true });
|
|
6980
7952
|
}
|
|
6981
|
-
|
|
7953
|
+
writeFileSync7(IDENTITY_PATH, YAML5.stringify(this.identity), "utf8");
|
|
6982
7954
|
}
|
|
6983
7955
|
};
|
|
6984
7956
|
|
|
6985
7957
|
// packages/daemon/src/TeamManager.ts
|
|
6986
|
-
import { readFileSync as readFileSync11, writeFileSync as
|
|
6987
|
-
import { resolve as
|
|
7958
|
+
import { readFileSync as readFileSync11, writeFileSync as writeFileSync8, existsSync as existsSync11, mkdirSync as mkdirSync5 } from "node:fs";
|
|
7959
|
+
import { resolve as resolve12 } from "node:path";
|
|
6988
7960
|
import { homedir as homedir7 } from "node:os";
|
|
6989
7961
|
import YAML6 from "yaml";
|
|
6990
|
-
var TEAMS_PATH =
|
|
7962
|
+
var TEAMS_PATH = resolve12(homedir7(), ".0agent", "teams.yaml");
|
|
6991
7963
|
var TeamManager = class {
|
|
6992
7964
|
config;
|
|
6993
7965
|
constructor() {
|
|
@@ -7047,8 +8019,8 @@ var TeamManager = class {
|
|
|
7047
8019
|
}
|
|
7048
8020
|
}
|
|
7049
8021
|
save() {
|
|
7050
|
-
mkdirSync5(
|
|
7051
|
-
|
|
8022
|
+
mkdirSync5(resolve12(homedir7(), ".0agent"), { recursive: true });
|
|
8023
|
+
writeFileSync8(TEAMS_PATH, YAML6.stringify(this.config), "utf8");
|
|
7052
8024
|
}
|
|
7053
8025
|
};
|
|
7054
8026
|
|
|
@@ -7131,8 +8103,8 @@ var TeamSync = class {
|
|
|
7131
8103
|
};
|
|
7132
8104
|
|
|
7133
8105
|
// packages/daemon/src/GitHubMemorySync.ts
|
|
7134
|
-
import { readFileSync as readFileSync12, writeFileSync as
|
|
7135
|
-
import { resolve as
|
|
8106
|
+
import { readFileSync as readFileSync12, writeFileSync as writeFileSync9, existsSync as existsSync12, readdirSync as readdirSync4 } from "node:fs";
|
|
8107
|
+
import { resolve as resolve13 } from "node:path";
|
|
7136
8108
|
import { homedir as homedir8 } from "node:os";
|
|
7137
8109
|
var GITHUB_API = "https://api.github.com";
|
|
7138
8110
|
async function ghFetch(path, token, opts) {
|
|
@@ -7252,10 +8224,10 @@ var GitHubMemorySync = class {
|
|
|
7252
8224
|
)
|
|
7253
8225
|
);
|
|
7254
8226
|
}
|
|
7255
|
-
const customSkillsDir =
|
|
8227
|
+
const customSkillsDir = resolve13(homedir8(), ".0agent", "skills", "custom");
|
|
7256
8228
|
if (existsSync12(customSkillsDir)) {
|
|
7257
8229
|
for (const file of readdirSync4(customSkillsDir).filter((f) => f.endsWith(".yaml"))) {
|
|
7258
|
-
const content = readFileSync12(
|
|
8230
|
+
const content = readFileSync12(resolve13(customSkillsDir, file), "utf8");
|
|
7259
8231
|
pushes.push(putFile(token, owner, repo, `skills/custom/${file}`, content, commitMsg));
|
|
7260
8232
|
}
|
|
7261
8233
|
}
|
|
@@ -7441,7 +8413,7 @@ var GitHubMemorySync = class {
|
|
|
7441
8413
|
}
|
|
7442
8414
|
async pullCustomSkills() {
|
|
7443
8415
|
const { token, owner, repo } = this.config;
|
|
7444
|
-
const dir =
|
|
8416
|
+
const dir = resolve13(homedir8(), ".0agent", "skills", "custom");
|
|
7445
8417
|
try {
|
|
7446
8418
|
const res = await ghFetch(`/repos/${owner}/${repo}/contents/skills/custom`, token);
|
|
7447
8419
|
if (!res.ok) return;
|
|
@@ -7451,7 +8423,7 @@ var GitHubMemorySync = class {
|
|
|
7451
8423
|
if (content) {
|
|
7452
8424
|
const { mkdirSync: mkdirSync10 } = await import("node:fs");
|
|
7453
8425
|
mkdirSync10(dir, { recursive: true });
|
|
7454
|
-
|
|
8426
|
+
writeFileSync9(resolve13(dir, file.name), content, "utf8");
|
|
7455
8427
|
}
|
|
7456
8428
|
}
|
|
7457
8429
|
} catch {
|
|
@@ -7528,7 +8500,7 @@ git checkout <commit> graph/ # restore graph files
|
|
|
7528
8500
|
};
|
|
7529
8501
|
|
|
7530
8502
|
// packages/daemon/src/CodespaceManager.ts
|
|
7531
|
-
import { execSync as execSync5, spawn as
|
|
8503
|
+
import { execSync as execSync5, spawn as spawn7 } from "node:child_process";
|
|
7532
8504
|
var BROWSER_PORT_REMOTE = 3e3;
|
|
7533
8505
|
var BROWSER_PORT_LOCAL = 3001;
|
|
7534
8506
|
var DISPLAY_NAME = "0agent-browser";
|
|
@@ -7623,7 +8595,7 @@ var CodespaceManager = class {
|
|
|
7623
8595
|
async openTunnel(name) {
|
|
7624
8596
|
this.closeTunnel();
|
|
7625
8597
|
console.log(`[Codespace] Opening tunnel port ${BROWSER_PORT_REMOTE} \u2192 localhost:${BROWSER_PORT_LOCAL}...`);
|
|
7626
|
-
this.forwardProcess =
|
|
8598
|
+
this.forwardProcess = spawn7(
|
|
7627
8599
|
"gh",
|
|
7628
8600
|
["codespace", "ports", "forward", `${BROWSER_PORT_REMOTE}:${BROWSER_PORT_LOCAL}`, "--codespace", name],
|
|
7629
8601
|
{ stdio: ["ignore", "ignore", "ignore"] }
|
|
@@ -8073,7 +9045,7 @@ var SurfaceRouter = class {
|
|
|
8073
9045
|
|
|
8074
9046
|
// packages/daemon/src/surfaces/TelegramAdapter.ts
|
|
8075
9047
|
import { existsSync as existsSync13, mkdirSync as mkdirSync6 } from "node:fs";
|
|
8076
|
-
import { tmpdir as
|
|
9048
|
+
import { tmpdir as tmpdir3 } from "node:os";
|
|
8077
9049
|
import { join as join3 } from "node:path";
|
|
8078
9050
|
var TelegramAdapter = class {
|
|
8079
9051
|
constructor(config) {
|
|
@@ -8278,15 +9250,15 @@ Sessions: ${h.active_sessions} active`
|
|
|
8278
9250
|
try {
|
|
8279
9251
|
const fileUrl = await this._getFileUrl(fileId);
|
|
8280
9252
|
if (!fileUrl) return null;
|
|
8281
|
-
const tmpDir = join3(
|
|
9253
|
+
const tmpDir = join3(tmpdir3(), "0agent-voice");
|
|
8282
9254
|
if (!existsSync13(tmpDir)) mkdirSync6(tmpDir, { recursive: true });
|
|
8283
9255
|
const tmpPath = join3(tmpDir, `${fileId}.ogg`);
|
|
8284
9256
|
const wavPath = join3(tmpDir, `${fileId}.wav`);
|
|
8285
9257
|
const res = await fetch(fileUrl);
|
|
8286
9258
|
if (!res.ok) return null;
|
|
8287
9259
|
const buf = await res.arrayBuffer();
|
|
8288
|
-
const { writeFileSync:
|
|
8289
|
-
|
|
9260
|
+
const { writeFileSync: writeFileSync13 } = await import("node:fs");
|
|
9261
|
+
writeFileSync13(tmpPath, Buffer.from(buf));
|
|
8290
9262
|
const { execSync: execSync8 } = await import("node:child_process");
|
|
8291
9263
|
try {
|
|
8292
9264
|
execSync8(`ffmpeg -y -i "${tmpPath}" -ar 16000 -ac 1 "${wavPath}" 2>/dev/null`, { timeout: 3e4 });
|
|
@@ -8742,9 +9714,9 @@ var WhatsAppAdapter = class {
|
|
|
8742
9714
|
import * as readline from "node:readline";
|
|
8743
9715
|
|
|
8744
9716
|
// packages/daemon/src/surfaces/WhisperSTT.ts
|
|
8745
|
-
import { execSync as execSync6, spawnSync as
|
|
9717
|
+
import { execSync as execSync6, spawnSync as spawnSync5 } from "node:child_process";
|
|
8746
9718
|
import { existsSync as existsSync14, mkdirSync as mkdirSync7, readFileSync as readFileSync13 } from "node:fs";
|
|
8747
|
-
import { tmpdir as
|
|
9719
|
+
import { tmpdir as tmpdir4 } from "node:os";
|
|
8748
9720
|
import { join as join4, basename } from "node:path";
|
|
8749
9721
|
var WhisperSTT = class _WhisperSTT {
|
|
8750
9722
|
model;
|
|
@@ -8765,7 +9737,7 @@ var WhisperSTT = class _WhisperSTT {
|
|
|
8765
9737
|
console.warn(`[WhisperSTT] Audio file not found: ${audioPath}`);
|
|
8766
9738
|
return null;
|
|
8767
9739
|
}
|
|
8768
|
-
const outDir = join4(
|
|
9740
|
+
const outDir = join4(tmpdir4(), "0agent-whisper");
|
|
8769
9741
|
if (!existsSync14(outDir)) mkdirSync7(outDir, { recursive: true });
|
|
8770
9742
|
try {
|
|
8771
9743
|
const langFlag = this.language ? `--language ${this.language}` : "";
|
|
@@ -8789,7 +9761,7 @@ var WhisperSTT = class _WhisperSTT {
|
|
|
8789
9761
|
static detectBinary() {
|
|
8790
9762
|
for (const bin of ["whisper", "faster-whisper", "whisper.cpp"]) {
|
|
8791
9763
|
try {
|
|
8792
|
-
const result =
|
|
9764
|
+
const result = spawnSync5(bin, ["--help"], { timeout: 3e3, stdio: "pipe" });
|
|
8793
9765
|
if (result.status === 0 || result.status === 1) return bin;
|
|
8794
9766
|
} catch {
|
|
8795
9767
|
}
|
|
@@ -8798,25 +9770,25 @@ var WhisperSTT = class _WhisperSTT {
|
|
|
8798
9770
|
}
|
|
8799
9771
|
};
|
|
8800
9772
|
async function recordAudio(durationSeconds) {
|
|
8801
|
-
const outDir = join4(
|
|
9773
|
+
const outDir = join4(tmpdir4(), "0agent-voice");
|
|
8802
9774
|
if (!existsSync14(outDir)) mkdirSync7(outDir, { recursive: true });
|
|
8803
9775
|
const outPath = join4(outDir, `recording-${Date.now()}.wav`);
|
|
8804
|
-
const soxResult =
|
|
9776
|
+
const soxResult = spawnSync5(
|
|
8805
9777
|
"sox",
|
|
8806
9778
|
["-d", "-r", "16000", "-c", "1", "-b", "16", outPath, "trim", "0", String(durationSeconds)],
|
|
8807
9779
|
{ timeout: (durationSeconds + 5) * 1e3, stdio: "pipe" }
|
|
8808
9780
|
);
|
|
8809
9781
|
if (soxResult.status === 0 && existsSync14(outPath)) return outPath;
|
|
8810
|
-
const
|
|
9782
|
+
const platform3 = process.platform;
|
|
8811
9783
|
let ffmpegDevice;
|
|
8812
|
-
if (
|
|
9784
|
+
if (platform3 === "darwin") {
|
|
8813
9785
|
ffmpegDevice = ["-f", "avfoundation", "-i", ":0"];
|
|
8814
|
-
} else if (
|
|
9786
|
+
} else if (platform3 === "linux") {
|
|
8815
9787
|
ffmpegDevice = ["-f", "alsa", "-i", "default"];
|
|
8816
9788
|
} else {
|
|
8817
9789
|
return null;
|
|
8818
9790
|
}
|
|
8819
|
-
const ffmpegResult =
|
|
9791
|
+
const ffmpegResult = spawnSync5(
|
|
8820
9792
|
"ffmpeg",
|
|
8821
9793
|
["-y", ...ffmpegDevice, "-ar", "16000", "-ac", "1", "-t", String(durationSeconds), outPath],
|
|
8822
9794
|
{ timeout: (durationSeconds + 5) * 1e3, stdio: "pipe" }
|
|
@@ -8825,7 +9797,7 @@ async function recordAudio(durationSeconds) {
|
|
|
8825
9797
|
}
|
|
8826
9798
|
|
|
8827
9799
|
// packages/daemon/src/surfaces/NativeTTS.ts
|
|
8828
|
-
import { spawnSync as
|
|
9800
|
+
import { spawnSync as spawnSync6, spawn as spawn8 } from "node:child_process";
|
|
8829
9801
|
var NativeTTS = class _NativeTTS {
|
|
8830
9802
|
engine;
|
|
8831
9803
|
voice;
|
|
@@ -8849,11 +9821,11 @@ var NativeTTS = class _NativeTTS {
|
|
|
8849
9821
|
if (!this.resolvedEngine) return;
|
|
8850
9822
|
const cleaned = this._clean(text);
|
|
8851
9823
|
if (!cleaned) return;
|
|
8852
|
-
return new Promise((
|
|
9824
|
+
return new Promise((resolve17) => {
|
|
8853
9825
|
const args = this._buildArgs(this.resolvedEngine, cleaned);
|
|
8854
|
-
const proc =
|
|
8855
|
-
proc.on("close", () =>
|
|
8856
|
-
proc.on("error", () =>
|
|
9826
|
+
const proc = spawn8(this.resolvedEngine, args, { stdio: "ignore" });
|
|
9827
|
+
proc.on("close", () => resolve17());
|
|
9828
|
+
proc.on("error", () => resolve17());
|
|
8857
9829
|
});
|
|
8858
9830
|
}
|
|
8859
9831
|
/** Check if any TTS engine is available */
|
|
@@ -8867,8 +9839,8 @@ var NativeTTS = class _NativeTTS {
|
|
|
8867
9839
|
return _NativeTTS._detectEngine();
|
|
8868
9840
|
}
|
|
8869
9841
|
static _detectEngine() {
|
|
8870
|
-
const
|
|
8871
|
-
if (
|
|
9842
|
+
const platform3 = process.platform;
|
|
9843
|
+
if (platform3 === "darwin") {
|
|
8872
9844
|
if (_NativeTTS._isAvailable("say")) return "say";
|
|
8873
9845
|
}
|
|
8874
9846
|
if (_NativeTTS._isAvailable("piper")) return "piper";
|
|
@@ -8878,7 +9850,7 @@ var NativeTTS = class _NativeTTS {
|
|
|
8878
9850
|
}
|
|
8879
9851
|
static _isAvailable(engine) {
|
|
8880
9852
|
try {
|
|
8881
|
-
const r =
|
|
9853
|
+
const r = spawnSync6(engine, ["--help"], { timeout: 2e3, stdio: "pipe" });
|
|
8882
9854
|
return r.status === 0 || r.status === 1;
|
|
8883
9855
|
} catch {
|
|
8884
9856
|
return false;
|
|
@@ -8911,7 +9883,7 @@ var NativeTTS = class _NativeTTS {
|
|
|
8911
9883
|
}
|
|
8912
9884
|
_speakWith(engine, text) {
|
|
8913
9885
|
const args = this._buildArgs(engine, text);
|
|
8914
|
-
const proc =
|
|
9886
|
+
const proc = spawn8(engine, args, { stdio: "ignore", detached: true });
|
|
8915
9887
|
proc.unref();
|
|
8916
9888
|
}
|
|
8917
9889
|
/** Remove markdown/ANSI and control chars before speaking */
|
|
@@ -9036,10 +10008,10 @@ var VoiceAdapter = class {
|
|
|
9036
10008
|
};
|
|
9037
10009
|
|
|
9038
10010
|
// packages/daemon/src/surfaces/MeetingAdapter.ts
|
|
9039
|
-
import { existsSync as existsSync15, mkdirSync as mkdirSync8, writeFileSync as
|
|
9040
|
-
import { tmpdir as
|
|
10011
|
+
import { existsSync as existsSync15, mkdirSync as mkdirSync8, writeFileSync as writeFileSync11 } from "node:fs";
|
|
10012
|
+
import { tmpdir as tmpdir5 } from "node:os";
|
|
9041
10013
|
import { join as join5 } from "node:path";
|
|
9042
|
-
import { spawn as
|
|
10014
|
+
import { spawn as spawn9 } from "node:child_process";
|
|
9043
10015
|
var MeetingAdapter = class {
|
|
9044
10016
|
name = "meeting";
|
|
9045
10017
|
messageHandler = null;
|
|
@@ -9063,7 +10035,7 @@ var MeetingAdapter = class {
|
|
|
9063
10035
|
this.silenceTimeoutSeconds = config.silence_timeout_seconds ?? 60;
|
|
9064
10036
|
this.triggerPhrases = config.trigger_phrases ?? ["agent,", "hey agent", "ok agent"];
|
|
9065
10037
|
this.contextWindowSeconds = config.context_window_seconds ?? 120;
|
|
9066
|
-
this.tmpDir = join5(
|
|
10038
|
+
this.tmpDir = join5(tmpdir5(), "0agent-meeting");
|
|
9067
10039
|
if (!existsSync15(this.tmpDir)) mkdirSync8(this.tmpDir, { recursive: true });
|
|
9068
10040
|
this.stt = new WhisperSTT({ model: config.whisper_model ?? "base" });
|
|
9069
10041
|
}
|
|
@@ -9170,26 +10142,26 @@ ${msg.text}
|
|
|
9170
10142
|
}
|
|
9171
10143
|
}
|
|
9172
10144
|
async _captureSystemAudio(outPath, seconds) {
|
|
9173
|
-
return new Promise((
|
|
9174
|
-
const
|
|
10145
|
+
return new Promise((resolve17) => {
|
|
10146
|
+
const platform3 = process.platform;
|
|
9175
10147
|
let args;
|
|
9176
|
-
if (
|
|
10148
|
+
if (platform3 === "darwin") {
|
|
9177
10149
|
args = ["-y", "-f", "avfoundation", "-i", ":1", "-ar", "16000", "-ac", "1", "-t", String(seconds), outPath];
|
|
9178
|
-
} else if (
|
|
10150
|
+
} else if (platform3 === "linux") {
|
|
9179
10151
|
args = ["-y", "-f", "pulse", "-i", "default.monitor", "-ar", "16000", "-ac", "1", "-t", String(seconds), outPath];
|
|
9180
10152
|
} else {
|
|
9181
|
-
|
|
10153
|
+
resolve17(false);
|
|
9182
10154
|
return;
|
|
9183
10155
|
}
|
|
9184
|
-
const proc =
|
|
10156
|
+
const proc = spawn9("ffmpeg", args, { stdio: "pipe" });
|
|
9185
10157
|
this.ffmpegProcess = proc;
|
|
9186
10158
|
proc.on("close", (code) => {
|
|
9187
10159
|
this.ffmpegProcess = null;
|
|
9188
|
-
|
|
10160
|
+
resolve17(code === 0);
|
|
9189
10161
|
});
|
|
9190
10162
|
proc.on("error", () => {
|
|
9191
10163
|
this.ffmpegProcess = null;
|
|
9192
|
-
|
|
10164
|
+
resolve17(false);
|
|
9193
10165
|
});
|
|
9194
10166
|
});
|
|
9195
10167
|
}
|
|
@@ -9248,13 +10220,13 @@ ${fullTranscript}`,
|
|
|
9248
10220
|
const content = `Meeting Transcript
|
|
9249
10221
|
${"=".repeat(40)}
|
|
9250
10222
|
${this.getTranscript()}`;
|
|
9251
|
-
|
|
10223
|
+
writeFileSync11(outPath, content, "utf8");
|
|
9252
10224
|
return outPath;
|
|
9253
10225
|
}
|
|
9254
10226
|
static isAvailable() {
|
|
9255
10227
|
try {
|
|
9256
|
-
const { spawnSync:
|
|
9257
|
-
const r =
|
|
10228
|
+
const { spawnSync: spawnSync7 } = __require("node:child_process");
|
|
10229
|
+
const r = spawnSync7("ffmpeg", ["-version"], { timeout: 2e3, stdio: "pipe" });
|
|
9258
10230
|
return r.status === 0;
|
|
9259
10231
|
} catch {
|
|
9260
10232
|
return false;
|
|
@@ -9287,11 +10259,11 @@ var ZeroAgentDaemon = class {
|
|
|
9287
10259
|
startedAt = 0;
|
|
9288
10260
|
pidFilePath;
|
|
9289
10261
|
constructor() {
|
|
9290
|
-
this.pidFilePath =
|
|
10262
|
+
this.pidFilePath = resolve15(homedir9(), ".0agent", "daemon.pid");
|
|
9291
10263
|
}
|
|
9292
10264
|
async start(opts) {
|
|
9293
10265
|
this.config = await loadConfig(opts?.config_path);
|
|
9294
|
-
const dotDir =
|
|
10266
|
+
const dotDir = resolve15(homedir9(), ".0agent");
|
|
9295
10267
|
if (!existsSync17(dotDir)) {
|
|
9296
10268
|
mkdirSync9(dotDir, { recursive: true });
|
|
9297
10269
|
}
|
|
@@ -9366,10 +10338,10 @@ var ZeroAgentDaemon = class {
|
|
|
9366
10338
|
console.log(`[0agent] Teams: ${teams.map((t) => t.team_name).join(", ")}`);
|
|
9367
10339
|
}
|
|
9368
10340
|
const _daemonFile = fileURLToPath3(import.meta.url);
|
|
9369
|
-
const _agentRoot =
|
|
10341
|
+
const _agentRoot = resolve15(dirname7(_daemonFile), "..");
|
|
9370
10342
|
let agentRoot;
|
|
9371
10343
|
try {
|
|
9372
|
-
const _pkg = JSON.parse(readFileSync15(
|
|
10344
|
+
const _pkg = JSON.parse(readFileSync15(resolve15(_agentRoot, "package.json"), "utf8"));
|
|
9373
10345
|
if (_pkg.name === "0agent") agentRoot = _agentRoot;
|
|
9374
10346
|
} catch {
|
|
9375
10347
|
}
|
|
@@ -9521,7 +10493,7 @@ var ZeroAgentDaemon = class {
|
|
|
9521
10493
|
}
|
|
9522
10494
|
});
|
|
9523
10495
|
await this.httpServer.start();
|
|
9524
|
-
|
|
10496
|
+
writeFileSync12(this.pidFilePath, String(process.pid), "utf8");
|
|
9525
10497
|
console.log(
|
|
9526
10498
|
`[0agent] Daemon started on ${this.config.server.host}:${this.config.server.port} (PID: ${process.pid})`
|
|
9527
10499
|
);
|
|
@@ -9575,7 +10547,7 @@ var ZeroAgentDaemon = class {
|
|
|
9575
10547
|
this.adapter = null;
|
|
9576
10548
|
if (existsSync17(this.pidFilePath)) {
|
|
9577
10549
|
try {
|
|
9578
|
-
|
|
10550
|
+
unlinkSync4(this.pidFilePath);
|
|
9579
10551
|
} catch {
|
|
9580
10552
|
}
|
|
9581
10553
|
}
|
|
@@ -9603,10 +10575,10 @@ var ZeroAgentDaemon = class {
|
|
|
9603
10575
|
};
|
|
9604
10576
|
|
|
9605
10577
|
// packages/daemon/src/start.ts
|
|
9606
|
-
import { resolve as
|
|
10578
|
+
import { resolve as resolve16 } from "node:path";
|
|
9607
10579
|
import { homedir as homedir10 } from "node:os";
|
|
9608
10580
|
import { existsSync as existsSync18 } from "node:fs";
|
|
9609
|
-
var CONFIG_PATH = process.env["ZEROAGENT_CONFIG"] ??
|
|
10581
|
+
var CONFIG_PATH = process.env["ZEROAGENT_CONFIG"] ?? resolve16(homedir10(), ".0agent", "config.yaml");
|
|
9610
10582
|
if (!existsSync18(CONFIG_PATH)) {
|
|
9611
10583
|
console.error(`
|
|
9612
10584
|
0agent is not initialised.
|