0agent 1.0.67 → 1.0.69
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/daemon.mjs +1074 -148
- package/package.json +1 -1
package/dist/daemon.mjs
CHANGED
|
@@ -1507,7 +1507,7 @@ var init_EdgeWeightUpdater = __esm({
|
|
|
1507
1507
|
this.weightLog.append(event);
|
|
1508
1508
|
}
|
|
1509
1509
|
sleep(ms) {
|
|
1510
|
-
return new Promise((
|
|
1510
|
+
return new Promise((resolve17) => setTimeout(resolve17, ms));
|
|
1511
1511
|
}
|
|
1512
1512
|
};
|
|
1513
1513
|
}
|
|
@@ -3007,11 +3007,934 @@ var init_MemoryCapability = __esm({
|
|
|
3007
3007
|
}
|
|
3008
3008
|
});
|
|
3009
3009
|
|
|
3010
|
-
// packages/daemon/src/capabilities/
|
|
3011
|
-
import { spawn as spawn3 } from "node:child_process";
|
|
3010
|
+
// packages/daemon/src/capabilities/GUICapability.ts
|
|
3011
|
+
import { spawn as spawn3, spawnSync as spawnSync4 } from "node:child_process";
|
|
3012
3012
|
import { writeFileSync as writeFileSync2, unlinkSync } from "node:fs";
|
|
3013
3013
|
import { resolve as resolve3 } from "node:path";
|
|
3014
|
-
import { tmpdir } from "node:os";
|
|
3014
|
+
import { tmpdir, platform as platform2 } from "node:os";
|
|
3015
|
+
var GUICapability;
|
|
3016
|
+
var init_GUICapability = __esm({
|
|
3017
|
+
"packages/daemon/src/capabilities/GUICapability.ts"() {
|
|
3018
|
+
"use strict";
|
|
3019
|
+
GUICapability = class {
|
|
3020
|
+
name = "gui_automation";
|
|
3021
|
+
description = "Automate desktop GUI \u2014 click, type, screenshot, hotkeys, find text on screen.";
|
|
3022
|
+
toolDefinition = {
|
|
3023
|
+
name: "gui_automation",
|
|
3024
|
+
description: "GUI automation + comprehensive browser control. BROWSER (no Screen Recording needed): click_text \u2014 click any element by its visible text; type_in \u2014 fill a form field by placeholder/label; get_elements \u2014 list all interactive elements on the page; read_element \u2014 read text of an element by CSS selector; get_media_state \u2014 check if video is playing/paused/current time; scroll_to \u2014 scroll page or scroll to specific element; exec_js \u2014 run arbitrary JavaScript in Chrome tab; browser_state \u2014 get current URL + title; cdp_screenshot \u2014 screenshot via CDP (needs --remote-debugging-port=9222) with OCR. NATIVE APPS: accessibility_click \u2014 click button in macOS app (WhatsApp, Finder) via Accessibility API. NAVIGATION: open_url \u2014 navigate Chrome tab, returns URL+title+video state. MOUSE/KEYBOARD: click, type, hotkey (use app param to target Chrome vs Terminal), screenshot (needs Screen Recording).",
|
|
3025
|
+
input_schema: {
|
|
3026
|
+
type: "object",
|
|
3027
|
+
properties: {
|
|
3028
|
+
action: {
|
|
3029
|
+
type: "string",
|
|
3030
|
+
description: 'Browser (no Screen Recording): "click_text"|"type_in"|"get_elements"|"read_element"|"get_media_state"|"scroll_to"|"exec_js"|"browser_state"|"cdp_screenshot" | Native apps: "accessibility_click" | Navigation: "open_url"|"open_app" | Mouse/KB (Screen Recording for screenshots): "screenshot"|"click"|"double_click"|"right_click"|"move"|"type"|"hotkey"|"scroll"|"drag"|"find_and_click"|"get_screen_size"|"get_cursor_pos"|"wait"'
|
|
3031
|
+
},
|
|
3032
|
+
js: { type: "string", description: `JavaScript to execute in Chrome tab (use with exec_js). Example: "document.querySelector('video').paused"` },
|
|
3033
|
+
selector: { type: "string", description: 'CSS selector for read_element, type_in, scroll_to (e.g. "input[type=search]", ".title", "video")' },
|
|
3034
|
+
x: { type: "number", description: "X coordinate (pixels from left)" },
|
|
3035
|
+
y: { type: "number", description: "Y coordinate (pixels from top)" },
|
|
3036
|
+
to_x: { type: "number", description: "End X for drag" },
|
|
3037
|
+
to_y: { type: "number", description: "End Y for drag" },
|
|
3038
|
+
text: { type: "string", description: "Text to type, or text to search for (find_and_click)" },
|
|
3039
|
+
keys: { type: "string", description: 'Hotkey combo e.g. "cmd+c", "ctrl+z", "alt+tab", "enter"' },
|
|
3040
|
+
direction: { type: "string", description: '"up" | "down" | "left" | "right" for scroll' },
|
|
3041
|
+
amount: { type: "number", description: "Scroll clicks (default 3)" },
|
|
3042
|
+
app: { type: "string", description: 'App name to open e.g. "Safari", "Terminal", "Chrome"' },
|
|
3043
|
+
url: { type: "string", description: 'URL to open e.g. "https://example.com" (use with open_url)' },
|
|
3044
|
+
seconds: { type: "number", description: "Seconds to wait (use with wait action, default 2)" },
|
|
3045
|
+
interval: { type: "number", description: "Seconds to wait between actions (default 0.05)" },
|
|
3046
|
+
duration: { type: "number", description: "Seconds for mouse movement animation (default 0.2)" }
|
|
3047
|
+
},
|
|
3048
|
+
required: ["action"]
|
|
3049
|
+
}
|
|
3050
|
+
};
|
|
3051
|
+
async execute(input, _cwd, signal) {
|
|
3052
|
+
const action = String(input.action ?? "").toLowerCase().trim();
|
|
3053
|
+
const start = Date.now();
|
|
3054
|
+
const script = this._buildScript(action, input);
|
|
3055
|
+
if (!script) {
|
|
3056
|
+
return { success: false, output: `Unknown GUI action: "${action}". Valid: screenshot, click, double_click, right_click, move, type, hotkey, scroll, drag, find_and_click, get_screen_size, get_cursor_pos, wait, open_url, open_app, exec_js, browser_state`, duration_ms: 0 };
|
|
3057
|
+
}
|
|
3058
|
+
if (signal?.aborted) {
|
|
3059
|
+
return { success: false, output: "Cancelled.", duration_ms: 0 };
|
|
3060
|
+
}
|
|
3061
|
+
const tmpFile = resolve3(tmpdir(), `0agent_gui_${Date.now()}.py`);
|
|
3062
|
+
writeFileSync2(tmpFile, script, "utf8");
|
|
3063
|
+
const runPy = (file) => new Promise((res) => {
|
|
3064
|
+
const proc = spawn3("python3", [file], { env: process.env });
|
|
3065
|
+
const out = [];
|
|
3066
|
+
const err = [];
|
|
3067
|
+
let settled = false;
|
|
3068
|
+
const finish = (code) => {
|
|
3069
|
+
if (settled) return;
|
|
3070
|
+
settled = true;
|
|
3071
|
+
signal?.removeEventListener("abort", onAbort);
|
|
3072
|
+
clearTimeout(timer);
|
|
3073
|
+
res({ stdout: out.join(""), stderr: err.join(""), code });
|
|
3074
|
+
};
|
|
3075
|
+
const onAbort = () => {
|
|
3076
|
+
try {
|
|
3077
|
+
proc.kill("SIGKILL");
|
|
3078
|
+
} catch {
|
|
3079
|
+
}
|
|
3080
|
+
finish(null);
|
|
3081
|
+
};
|
|
3082
|
+
signal?.addEventListener("abort", onAbort, { once: true });
|
|
3083
|
+
proc.stdout.on("data", (d) => out.push(d.toString()));
|
|
3084
|
+
proc.stderr.on("data", (d) => err.push(d.toString()));
|
|
3085
|
+
proc.on("exit", finish);
|
|
3086
|
+
proc.on("error", () => finish(-1));
|
|
3087
|
+
const timer = setTimeout(() => {
|
|
3088
|
+
try {
|
|
3089
|
+
proc.kill("SIGKILL");
|
|
3090
|
+
} catch {
|
|
3091
|
+
}
|
|
3092
|
+
finish(null);
|
|
3093
|
+
}, 3e4);
|
|
3094
|
+
});
|
|
3095
|
+
let result = await runPy(tmpFile);
|
|
3096
|
+
try {
|
|
3097
|
+
unlinkSync(tmpFile);
|
|
3098
|
+
} catch {
|
|
3099
|
+
}
|
|
3100
|
+
if (signal?.aborted) {
|
|
3101
|
+
return { success: false, output: "Cancelled.", duration_ms: Date.now() - start };
|
|
3102
|
+
}
|
|
3103
|
+
if (result.code !== 0 && result.code !== null) {
|
|
3104
|
+
const err = result.stderr.trim();
|
|
3105
|
+
if (err.includes("No module named") || err.includes("ModuleNotFoundError")) {
|
|
3106
|
+
const missing = err.includes("pyautogui") ? "pyautogui pillow pytesseract" : err.includes("PIL") ? "pillow" : err.includes("tesseract") ? "pytesseract" : "pyautogui pillow";
|
|
3107
|
+
const install = spawnSync4("pip3", ["install", ...missing.split(" "), "-q"], {
|
|
3108
|
+
timeout: 6e4,
|
|
3109
|
+
encoding: "utf8"
|
|
3110
|
+
});
|
|
3111
|
+
if (install.status !== 0) {
|
|
3112
|
+
return { success: false, output: `Auto-install failed: ${install.stderr?.slice(0, 200)}. Run: pip3 install ${missing}`, duration_ms: Date.now() - start };
|
|
3113
|
+
}
|
|
3114
|
+
writeFileSync2(tmpFile, script, "utf8");
|
|
3115
|
+
result = await runPy(tmpFile);
|
|
3116
|
+
try {
|
|
3117
|
+
unlinkSync(tmpFile);
|
|
3118
|
+
} catch {
|
|
3119
|
+
}
|
|
3120
|
+
if (signal?.aborted) return { success: false, output: "Cancelled.", duration_ms: Date.now() - start };
|
|
3121
|
+
if (result.code === 0) return { success: true, output: result.stdout.trim() || "Done", duration_ms: Date.now() - start };
|
|
3122
|
+
return { success: false, output: result.stderr.trim() || "Unknown error after install", duration_ms: Date.now() - start };
|
|
3123
|
+
}
|
|
3124
|
+
const isScreenRecordingDenied = err.includes("could not create image from display") || err.includes("screen capture failed") || err.includes("screencapture") || err.includes("CGDisplayStream") || err.includes("Operation not permitted") || err.includes("OSError") && err.includes("display") || result.stdout.includes("could not create image from display");
|
|
3125
|
+
if (isScreenRecordingDenied) {
|
|
3126
|
+
if (platform2() === "darwin") {
|
|
3127
|
+
spawnSync4("open", ["x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture"], { timeout: 3e3 });
|
|
3128
|
+
const fallbackScript = `
|
|
3129
|
+
import subprocess
|
|
3130
|
+
as_script = '''tell application "Google Chrome"
|
|
3131
|
+
tell front window
|
|
3132
|
+
tell active tab
|
|
3133
|
+
set tabURL to URL
|
|
3134
|
+
set tabTitle to title
|
|
3135
|
+
set videoSt to execute javascript "try{let v=document.querySelector('video');v?(v.paused?'PAUSED':'PLAYING:'+v.currentTime.toFixed(1)+'s'):'no-video'}catch(e){'?'}"
|
|
3136
|
+
return tabURL & "|||" & tabTitle & "|||" & videoSt
|
|
3137
|
+
end tell
|
|
3138
|
+
end tell
|
|
3139
|
+
end tell'''
|
|
3140
|
+
r = subprocess.run(['osascript', '-e', as_script], capture_output=True, text=True)
|
|
3141
|
+
out = r.stdout.strip()
|
|
3142
|
+
parts = out.split('|||') if '|||' in out else []
|
|
3143
|
+
if len(parts) >= 3:
|
|
3144
|
+
print(f"[No screenshot \u2014 Screen Recording permission needed]")
|
|
3145
|
+
print(f"Browser URL: {parts[0]}")
|
|
3146
|
+
print(f"Page title: {parts[1]}")
|
|
3147
|
+
print(f"Video state: {parts[2]}")
|
|
3148
|
+
print(f"To enable screenshots: System Settings \u2192 Privacy & Security \u2192 Screen Recording \u2192 enable Terminal")
|
|
3149
|
+
else:
|
|
3150
|
+
print("[No screenshot \u2014 Screen Recording permission needed]")
|
|
3151
|
+
print("To fix: System Settings \u2192 Privacy & Security \u2192 Screen Recording \u2192 enable Terminal (or iTerm2)")
|
|
3152
|
+
`;
|
|
3153
|
+
const fallbackTmp = resolve3(tmpdir(), `0agent_scfb_${Date.now()}.py`);
|
|
3154
|
+
writeFileSync2(fallbackTmp, fallbackScript, "utf8");
|
|
3155
|
+
const fbResult = await runPy(fallbackTmp);
|
|
3156
|
+
try {
|
|
3157
|
+
unlinkSync(fallbackTmp);
|
|
3158
|
+
} catch {
|
|
3159
|
+
}
|
|
3160
|
+
if (fbResult.code === 0 && fbResult.stdout.trim()) {
|
|
3161
|
+
return { success: false, output: fbResult.stdout.trim(), duration_ms: Date.now() - start };
|
|
3162
|
+
}
|
|
3163
|
+
}
|
|
3164
|
+
return {
|
|
3165
|
+
success: false,
|
|
3166
|
+
output: 'macOS Screen Recording permission required for screenshots.\nSystem Settings opened \u2192 Privacy & Security \u2192 Screen Recording \u2192 enable Terminal/iTerm2.\nFor browser content, use exec_js instead: {action:"exec_js",js:"document.title"} or {action:"browser_state"} \u2014 these work without Screen Recording.',
|
|
3167
|
+
duration_ms: Date.now() - start
|
|
3168
|
+
};
|
|
3169
|
+
}
|
|
3170
|
+
if (err.includes("accessibility") || err.includes("permission") || err.includes("AXIsProcessTrusted")) {
|
|
3171
|
+
if (platform2() === "darwin") {
|
|
3172
|
+
spawnSync4("open", ["x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility"], { timeout: 3e3 });
|
|
3173
|
+
}
|
|
3174
|
+
return {
|
|
3175
|
+
success: false,
|
|
3176
|
+
output: "macOS Accessibility permission required for GUI automation.\n\u2192 System Settings has been opened automatically.\n\u2192 Go to: Privacy & Security \u2192 Accessibility \u2192 enable Terminal (or iTerm2 / the app running 0agent)\n\u2192 Then re-run your task.",
|
|
3177
|
+
duration_ms: Date.now() - start
|
|
3178
|
+
};
|
|
3179
|
+
}
|
|
3180
|
+
return { success: false, output: `GUI error: ${err.slice(0, 300)}`, duration_ms: Date.now() - start };
|
|
3181
|
+
}
|
|
3182
|
+
return { success: true, output: result.stdout.trim() || "Done", duration_ms: Date.now() - start };
|
|
3183
|
+
}
|
|
3184
|
+
_buildScript(action, input) {
|
|
3185
|
+
const x = input.x != null ? Number(input.x) : null;
|
|
3186
|
+
const y = input.y != null ? Number(input.y) : null;
|
|
3187
|
+
const toX = input.to_x != null ? Number(input.to_x) : null;
|
|
3188
|
+
const toY = input.to_y != null ? Number(input.to_y) : null;
|
|
3189
|
+
const text = input.text != null ? String(input.text) : "";
|
|
3190
|
+
const keys = input.keys != null ? String(input.keys) : "";
|
|
3191
|
+
const dir = input.direction != null ? String(input.direction) : "down";
|
|
3192
|
+
const amount = input.amount != null ? Number(input.amount) : 3;
|
|
3193
|
+
const app = input.app != null ? String(input.app) : "";
|
|
3194
|
+
const url = input.url != null ? String(input.url) : "";
|
|
3195
|
+
const seconds = input.seconds != null ? Number(input.seconds) : 2;
|
|
3196
|
+
const interval = input.interval != null ? Number(input.interval) : 0.05;
|
|
3197
|
+
const duration = input.duration != null ? Number(input.duration) : 0.2;
|
|
3198
|
+
const header = `
|
|
3199
|
+
import pyautogui
|
|
3200
|
+
import time
|
|
3201
|
+
import sys
|
|
3202
|
+
pyautogui.FAILSAFE = False
|
|
3203
|
+
pyautogui.PAUSE = ${interval}
|
|
3204
|
+
`;
|
|
3205
|
+
switch (action) {
|
|
3206
|
+
case "get_screen_size":
|
|
3207
|
+
return header + `
|
|
3208
|
+
w, h = pyautogui.size()
|
|
3209
|
+
print(f"Screen size: {w} x {h}")
|
|
3210
|
+
`;
|
|
3211
|
+
case "get_cursor_pos":
|
|
3212
|
+
return header + `
|
|
3213
|
+
x, y = pyautogui.position()
|
|
3214
|
+
print(f"Cursor position: ({x}, {y})")
|
|
3215
|
+
`;
|
|
3216
|
+
case "wait":
|
|
3217
|
+
return header + `
|
|
3218
|
+
time.sleep(${seconds})
|
|
3219
|
+
print(f"Waited ${seconds}s")
|
|
3220
|
+
`;
|
|
3221
|
+
case "screenshot": {
|
|
3222
|
+
return header + `
|
|
3223
|
+
import os, tempfile
|
|
3224
|
+
from PIL import Image
|
|
3225
|
+
|
|
3226
|
+
# Take screenshot
|
|
3227
|
+
shot_path = os.path.join(tempfile.gettempdir(), "0agent_screen.png")
|
|
3228
|
+
img = pyautogui.screenshot(shot_path)
|
|
3229
|
+
|
|
3230
|
+
w, h = img.size
|
|
3231
|
+
print(f"Screen: {w}x{h}")
|
|
3232
|
+
|
|
3233
|
+
# Try OCR with pytesseract
|
|
3234
|
+
try:
|
|
3235
|
+
import pytesseract
|
|
3236
|
+
# Resize for faster OCR if screen is large
|
|
3237
|
+
scale = min(1.0, 1920 / w)
|
|
3238
|
+
small = img.resize((int(w * scale), int(h * scale)), Image.LANCZOS)
|
|
3239
|
+
text = pytesseract.image_to_string(small, config='--psm 11')
|
|
3240
|
+
lines = [l.strip() for l in text.splitlines() if l.strip()]
|
|
3241
|
+
print("\\nOn-screen text (OCR):")
|
|
3242
|
+
print("\\n".join(lines[:80]))
|
|
3243
|
+
|
|
3244
|
+
# Also get bounding boxes for clickable text
|
|
3245
|
+
data = pytesseract.image_to_data(small, output_type=pytesseract.Output.DICT)
|
|
3246
|
+
hits = []
|
|
3247
|
+
for i, word in enumerate(data['text']):
|
|
3248
|
+
if word.strip() and int(data['conf'][i]) > 50:
|
|
3249
|
+
bx = int(data['left'][i] / scale)
|
|
3250
|
+
by = int(data['top'][i] / scale)
|
|
3251
|
+
bw = int(data['width'][i] / scale)
|
|
3252
|
+
bh = int(data['height'][i] / scale)
|
|
3253
|
+
hits.append(f" '{word}' at ({bx + bw//2}, {by + bh//2})")
|
|
3254
|
+
if hits:
|
|
3255
|
+
print("\\nClickable words with center coordinates:")
|
|
3256
|
+
print("\\n".join(hits[:40]))
|
|
3257
|
+
except ImportError:
|
|
3258
|
+
print("(pytesseract not installed \u2014 install it for OCR: pip3 install pytesseract)")
|
|
3259
|
+
except Exception as e:
|
|
3260
|
+
print(f"OCR failed: {e}")
|
|
3261
|
+
finally:
|
|
3262
|
+
try:
|
|
3263
|
+
os.remove(shot_path)
|
|
3264
|
+
except Exception:
|
|
3265
|
+
pass
|
|
3266
|
+
`;
|
|
3267
|
+
}
|
|
3268
|
+
case "click":
|
|
3269
|
+
if (x == null || y == null) return null;
|
|
3270
|
+
return header + `
|
|
3271
|
+
pyautogui.click(${x}, ${y}, duration=${duration})
|
|
3272
|
+
print(f"Clicked at ({${x}}, {${y}})")
|
|
3273
|
+
`;
|
|
3274
|
+
case "double_click":
|
|
3275
|
+
if (x == null || y == null) return null;
|
|
3276
|
+
return header + `
|
|
3277
|
+
pyautogui.doubleClick(${x}, ${y}, duration=${duration})
|
|
3278
|
+
print(f"Double-clicked at ({${x}}, {${y}})")
|
|
3279
|
+
`;
|
|
3280
|
+
case "right_click":
|
|
3281
|
+
if (x == null || y == null) return null;
|
|
3282
|
+
return header + `
|
|
3283
|
+
pyautogui.rightClick(${x}, ${y}, duration=${duration})
|
|
3284
|
+
print(f"Right-clicked at ({${x}}, {${y}})")
|
|
3285
|
+
`;
|
|
3286
|
+
case "move":
|
|
3287
|
+
if (x == null || y == null) return null;
|
|
3288
|
+
return header + `
|
|
3289
|
+
pyautogui.moveTo(${x}, ${y}, duration=${duration})
|
|
3290
|
+
print(f"Moved to ({${x}}, {${y}})")
|
|
3291
|
+
`;
|
|
3292
|
+
case "type": {
|
|
3293
|
+
if (!text) return null;
|
|
3294
|
+
return header + `
|
|
3295
|
+
pyautogui.write(${JSON.stringify(text)}, interval=${interval})
|
|
3296
|
+
print("Typed successfully")
|
|
3297
|
+
`;
|
|
3298
|
+
}
|
|
3299
|
+
case "hotkey": {
|
|
3300
|
+
if (!keys) return null;
|
|
3301
|
+
const targetApp = input.app ? String(input.app) : "";
|
|
3302
|
+
const parts = keys.toLowerCase().replace(/cmd|command|meta/g, "command").replace(/ctrl|control/g, "ctrl").replace(/opt|option/g, "option").split(/[+\-]/).map((k) => k.trim()).filter(Boolean);
|
|
3303
|
+
const pyKeys = JSON.stringify(parts);
|
|
3304
|
+
if (targetApp && platform2() === "darwin") {
|
|
3305
|
+
const safeApp = targetApp.replace(/'/g, "\\'");
|
|
3306
|
+
const asKey = parts[parts.length - 1] ?? "";
|
|
3307
|
+
const modifiers = parts.slice(0, -1).map((k) => {
|
|
3308
|
+
if (k === "command") return "command down";
|
|
3309
|
+
if (k === "ctrl") return "control down";
|
|
3310
|
+
if (k === "shift") return "shift down";
|
|
3311
|
+
if (k === "option") return "option down";
|
|
3312
|
+
return "";
|
|
3313
|
+
}).filter(Boolean).join(", ");
|
|
3314
|
+
const asModStr = modifiers ? ` using {${modifiers}}` : "";
|
|
3315
|
+
return header + `
|
|
3316
|
+
import subprocess, time
|
|
3317
|
+
# Focus target app first
|
|
3318
|
+
subprocess.run(['osascript', '-e', 'tell application "${safeApp}" to activate'], capture_output=True)
|
|
3319
|
+
time.sleep(0.3)
|
|
3320
|
+
# Send keystroke via AppleScript (reliable \u2014 goes to the focused app, not Terminal)
|
|
3321
|
+
as_script = '''tell application "System Events"
|
|
3322
|
+
keystroke "${asKey}"${asModStr}
|
|
3323
|
+
end tell'''
|
|
3324
|
+
r = subprocess.run(['osascript', '-e', as_script], capture_output=True, text=True)
|
|
3325
|
+
if r.returncode == 0:
|
|
3326
|
+
print(f"Sent ${parts.join("+")} to ${safeApp}")
|
|
3327
|
+
else:
|
|
3328
|
+
print(f"Keystroke error: {r.stderr.strip()[:200]}")
|
|
3329
|
+
`;
|
|
3330
|
+
}
|
|
3331
|
+
return header + `
|
|
3332
|
+
keys = ${pyKeys}
|
|
3333
|
+
pyautogui.hotkey(*keys)
|
|
3334
|
+
print(f"Pressed: {'+'.join(keys)}")
|
|
3335
|
+
`;
|
|
3336
|
+
}
|
|
3337
|
+
case "scroll": {
|
|
3338
|
+
const clicksVal = dir === "up" ? amount : dir === "down" ? -amount : 0;
|
|
3339
|
+
const hVal = dir === "left" ? -amount : dir === "right" ? amount : 0;
|
|
3340
|
+
const sx = x ?? "pyautogui.size()[0]//2";
|
|
3341
|
+
const sy = y ?? "pyautogui.size()[1]//2";
|
|
3342
|
+
return header + `
|
|
3343
|
+
${hVal !== 0 ? `pyautogui.hscroll(${hVal}, x=${sx}, y=${sy})` : `pyautogui.scroll(${clicksVal}, x=${sx}, y=${sy})`}
|
|
3344
|
+
print(f"Scrolled ${dir} by ${amount}")
|
|
3345
|
+
`;
|
|
3346
|
+
}
|
|
3347
|
+
case "drag":
|
|
3348
|
+
if (x == null || y == null || toX == null || toY == null) return null;
|
|
3349
|
+
return header + `
|
|
3350
|
+
pyautogui.moveTo(${x}, ${y}, duration=${duration})
|
|
3351
|
+
pyautogui.dragTo(${toX}, ${toY}, duration=${duration * 2}, button='left')
|
|
3352
|
+
print(f"Dragged from ({${x}},{${y}}) to ({${toX}},{${toY}})")
|
|
3353
|
+
`;
|
|
3354
|
+
case "find_and_click": {
|
|
3355
|
+
if (!text) return null;
|
|
3356
|
+
const safeText = text.replace(/'/g, "\\'");
|
|
3357
|
+
return header + `
|
|
3358
|
+
from PIL import Image
|
|
3359
|
+
import pytesseract, os, tempfile
|
|
3360
|
+
|
|
3361
|
+
shot_path = os.path.join(tempfile.gettempdir(), "0agent_screen.png")
|
|
3362
|
+
img = pyautogui.screenshot(shot_path)
|
|
3363
|
+
w, h = img.size
|
|
3364
|
+
|
|
3365
|
+
data = pytesseract.image_to_data(img, output_type=pytesseract.Output.DICT)
|
|
3366
|
+
target = '${safeText}'.lower()
|
|
3367
|
+
found = []
|
|
3368
|
+
for i, word in enumerate(data['text']):
|
|
3369
|
+
if target in word.lower() and int(data['conf'][i]) > 40:
|
|
3370
|
+
cx = data['left'][i] + data['width'][i] // 2
|
|
3371
|
+
cy = data['top'][i] + data['height'][i] // 2
|
|
3372
|
+
found.append((cx, cy, word))
|
|
3373
|
+
|
|
3374
|
+
if found:
|
|
3375
|
+
cx, cy, word = found[0]
|
|
3376
|
+
pyautogui.click(cx, cy, duration=${duration})
|
|
3377
|
+
print(f"Found '{word}' at ({cx},{cy}) \u2014 clicked")
|
|
3378
|
+
else:
|
|
3379
|
+
# Retry once after a brief wait (element may still be loading)
|
|
3380
|
+
time.sleep(1.5)
|
|
3381
|
+
img2 = pyautogui.screenshot()
|
|
3382
|
+
data2 = pytesseract.image_to_data(img2, output_type=pytesseract.Output.DICT)
|
|
3383
|
+
found2 = []
|
|
3384
|
+
for i, word in enumerate(data2['text']):
|
|
3385
|
+
if target in word.lower() and int(data2['conf'][i]) > 40:
|
|
3386
|
+
cx2 = data2['left'][i] + data2['width'][i] // 2
|
|
3387
|
+
cy2 = data2['top'][i] + data2['height'][i] // 2
|
|
3388
|
+
found2.append((cx2, cy2, word))
|
|
3389
|
+
if found2:
|
|
3390
|
+
cx2, cy2, word2 = found2[0]
|
|
3391
|
+
pyautogui.click(cx2, cy2, duration=${duration})
|
|
3392
|
+
print(f"Found '{word2}' at ({cx2},{cy2}) after retry \u2014 clicked")
|
|
3393
|
+
else:
|
|
3394
|
+
print(f"Text '${safeText}' not found on screen after retry. Take a screenshot to see what changed.")
|
|
3395
|
+
sys.exit(1)
|
|
3396
|
+
try:
|
|
3397
|
+
os.remove(shot_path)
|
|
3398
|
+
except Exception:
|
|
3399
|
+
pass
|
|
3400
|
+
`;
|
|
3401
|
+
}
|
|
3402
|
+
case "open_url": {
|
|
3403
|
+
if (!url) return null;
|
|
3404
|
+
let finalUrl = url;
|
|
3405
|
+
if (/youtube\.com\/watch/i.test(url) && !url.includes("autoplay")) {
|
|
3406
|
+
finalUrl = url + (url.includes("?") ? "&" : "?") + "autoplay=1";
|
|
3407
|
+
}
|
|
3408
|
+
const safeUrl = finalUrl.replace(/\\/g, "\\\\").replace(/'/g, "\\'");
|
|
3409
|
+
const isYouTubeVideo = /youtube\.com\/watch/i.test(finalUrl);
|
|
3410
|
+
const osName = platform2();
|
|
3411
|
+
if (osName === "darwin") {
|
|
3412
|
+
return header + `
|
|
3413
|
+
import subprocess
|
|
3414
|
+
import time
|
|
3415
|
+
|
|
3416
|
+
url = '${safeUrl}'
|
|
3417
|
+
is_youtube_video = ${isYouTubeVideo ? "True" : "False"}
|
|
3418
|
+
|
|
3419
|
+
# Check if Chrome is running
|
|
3420
|
+
chrome_running = subprocess.run(['pgrep', '-x', 'Google Chrome'], capture_output=True).returncode == 0
|
|
3421
|
+
firefox_running = subprocess.run(['pgrep', '-x', 'firefox'], capture_output=True).returncode == 0
|
|
3422
|
+
safari_running = subprocess.run(['pgrep', '-x', 'Safari'], capture_output=True).returncode == 0
|
|
3423
|
+
|
|
3424
|
+
import urllib.parse
|
|
3425
|
+
domain = urllib.parse.urlparse(url).netloc
|
|
3426
|
+
|
|
3427
|
+
if chrome_running:
|
|
3428
|
+
if is_youtube_video:
|
|
3429
|
+
# Navigate the CURRENT active tab directly to avoid domain-matching a wrong/stale tab
|
|
3430
|
+
nav_script = f"""tell application "Google Chrome"
|
|
3431
|
+
tell front window
|
|
3432
|
+
tell active tab
|
|
3433
|
+
set URL to "{url}"
|
|
3434
|
+
end tell
|
|
3435
|
+
end tell
|
|
3436
|
+
activate
|
|
3437
|
+
end tell"""
|
|
3438
|
+
subprocess.run(['osascript', '-e', nav_script], capture_output=True)
|
|
3439
|
+
time.sleep(3)
|
|
3440
|
+
# Unmute + play via JS (handles autoplay policy blocks)
|
|
3441
|
+
play_script = """tell application "Google Chrome"
|
|
3442
|
+
tell front window
|
|
3443
|
+
tell active tab
|
|
3444
|
+
execute javascript "try{let v=document.querySelector('video');if(v){v.muted=false;v.volume=1.0;v.play();}}catch(e){}"
|
|
3445
|
+
end tell
|
|
3446
|
+
end tell
|
|
3447
|
+
end tell"""
|
|
3448
|
+
subprocess.run(['osascript', '-e', play_script], capture_output=True)
|
|
3449
|
+
time.sleep(1)
|
|
3450
|
+
# Verify: get URL, title, video state \u2014 all via AppleScript, no Screen Recording needed
|
|
3451
|
+
verify_script = """tell application "Google Chrome"
|
|
3452
|
+
tell front window
|
|
3453
|
+
tell active tab
|
|
3454
|
+
set tabURL to URL
|
|
3455
|
+
set tabTitle to title
|
|
3456
|
+
set videoSt to execute javascript "try{let v=document.querySelector('video');v?(v.paused?'PAUSED':'PLAYING:'+v.currentTime.toFixed(1)+'s'):'no-video'}catch(e){'err'}"
|
|
3457
|
+
return tabURL & "|||" & tabTitle & "|||" & videoSt
|
|
3458
|
+
end tell
|
|
3459
|
+
end tell
|
|
3460
|
+
end tell"""
|
|
3461
|
+
vr = subprocess.run(['osascript', '-e', verify_script], capture_output=True, text=True)
|
|
3462
|
+
parts = vr.stdout.strip().split('|||')
|
|
3463
|
+
if len(parts) >= 3:
|
|
3464
|
+
print(f"URL: {parts[0]}")
|
|
3465
|
+
print(f"Title: {parts[1]}")
|
|
3466
|
+
st = parts[2].strip()
|
|
3467
|
+
if 'PLAYING' in st:
|
|
3468
|
+
print(f"Video: {st} \u2713")
|
|
3469
|
+
elif st == 'PAUSED':
|
|
3470
|
+
# Send play() one more time
|
|
3471
|
+
subprocess.run(['osascript', '-e', play_script], capture_output=True)
|
|
3472
|
+
time.sleep(0.5)
|
|
3473
|
+
print("Video: was PAUSED \u2014 sent play() again, should be playing now")
|
|
3474
|
+
else:
|
|
3475
|
+
print(f"Video state: {st} (page may still be loading)")
|
|
3476
|
+
else:
|
|
3477
|
+
print(f"Navigated to: {url}")
|
|
3478
|
+
print(f"(Verification unavailable: {vr.stdout.strip() or vr.stderr.strip()[:100]})")
|
|
3479
|
+
else:
|
|
3480
|
+
# Non-video: switch to existing same-domain tab or open new tab
|
|
3481
|
+
check_script = f"""tell application "Google Chrome"
|
|
3482
|
+
set foundTab to false
|
|
3483
|
+
repeat with w in every window
|
|
3484
|
+
set tabIdx to 1
|
|
3485
|
+
repeat with t in every tab of w
|
|
3486
|
+
if URL of t contains "{domain}" then
|
|
3487
|
+
set active tab index of w to tabIdx
|
|
3488
|
+
set index of w to 1
|
|
3489
|
+
set foundTab to true
|
|
3490
|
+
exit repeat
|
|
3491
|
+
end if
|
|
3492
|
+
set tabIdx to tabIdx + 1
|
|
3493
|
+
end repeat
|
|
3494
|
+
if foundTab then exit repeat
|
|
3495
|
+
end repeat
|
|
3496
|
+
if foundTab then
|
|
3497
|
+
activate
|
|
3498
|
+
return "switched"
|
|
3499
|
+
else
|
|
3500
|
+
tell front window to make new tab with properties {{URL:"{url}"}}
|
|
3501
|
+
activate
|
|
3502
|
+
return "new-tab"
|
|
3503
|
+
end if
|
|
3504
|
+
end tell"""
|
|
3505
|
+
r = subprocess.run(['osascript', '-e', check_script], capture_output=True, text=True)
|
|
3506
|
+
switched = r.stdout.strip() == "switched"
|
|
3507
|
+
# Verify actual URL and title loaded (catches wrong-domain tab issues)
|
|
3508
|
+
state_script = """tell application "Google Chrome"
|
|
3509
|
+
tell front window
|
|
3510
|
+
tell active tab
|
|
3511
|
+
return URL & "|||" & title
|
|
3512
|
+
end tell
|
|
3513
|
+
end tell
|
|
3514
|
+
end tell"""
|
|
3515
|
+
sr = subprocess.run(['osascript', '-e', state_script], capture_output=True, text=True)
|
|
3516
|
+
sp = sr.stdout.strip().split('|||')
|
|
3517
|
+
if len(sp) >= 2:
|
|
3518
|
+
print(f"{'Switched to' if switched else 'Opened'}: {sp[0]}")
|
|
3519
|
+
print(f"Title: {sp[1]}")
|
|
3520
|
+
else:
|
|
3521
|
+
print(f"{'Switched to existing' if switched else 'Opened new'} Chrome tab: {url}")
|
|
3522
|
+
elif firefox_running:
|
|
3523
|
+
script = f'tell application "Firefox" to open location "{url}"'
|
|
3524
|
+
subprocess.run(['osascript', '-e', script])
|
|
3525
|
+
subprocess.run(['osascript', '-e', 'tell application "Firefox" to activate'])
|
|
3526
|
+
print(f"Navigated Firefox to: {url}")
|
|
3527
|
+
elif safari_running:
|
|
3528
|
+
script = f'tell application "Safari" to open location "{url}"'
|
|
3529
|
+
subprocess.run(['osascript', '-e', script])
|
|
3530
|
+
subprocess.run(['osascript', '-e', 'tell application "Safari" to activate'])
|
|
3531
|
+
print(f"Navigated Safari to: {url}")
|
|
3532
|
+
else:
|
|
3533
|
+
# No browser open \u2014 launch default browser with the URL
|
|
3534
|
+
subprocess.run(['open', url])
|
|
3535
|
+
print(f"Launched browser with: {url}")
|
|
3536
|
+
time.sleep(1.0)
|
|
3537
|
+
`;
|
|
3538
|
+
}
|
|
3539
|
+
return header + `
|
|
3540
|
+
import subprocess
|
|
3541
|
+
|
|
3542
|
+
url = '${safeUrl}'
|
|
3543
|
+
|
|
3544
|
+
# Try to reuse existing browser via wmctrl/xdotool, fall back to xdg-open
|
|
3545
|
+
chrome_pid = subprocess.run(['pgrep', '-x', 'chrome'], capture_output=True)
|
|
3546
|
+
firefox_pid = subprocess.run(['pgrep', '-x', 'firefox'], capture_output=True)
|
|
3547
|
+
|
|
3548
|
+
if chrome_pid.returncode == 0:
|
|
3549
|
+
subprocess.Popen(['google-chrome', '--new-tab', url])
|
|
3550
|
+
print(f"Opened in Chrome tab: {url}")
|
|
3551
|
+
elif firefox_pid.returncode == 0:
|
|
3552
|
+
subprocess.Popen(['firefox', '--new-tab', url])
|
|
3553
|
+
print(f"Opened in Firefox tab: {url}")
|
|
3554
|
+
else:
|
|
3555
|
+
subprocess.Popen(['xdg-open', url])
|
|
3556
|
+
print(f"Opened with default browser: {url}")
|
|
3557
|
+
time.sleep(1.0)
|
|
3558
|
+
`;
|
|
3559
|
+
}
|
|
3560
|
+
case "open_app": {
|
|
3561
|
+
if (!app) return null;
|
|
3562
|
+
const safeApp = app.replace(/'/g, "\\'");
|
|
3563
|
+
const os = platform2();
|
|
3564
|
+
if (os === "darwin") {
|
|
3565
|
+
return header + `
|
|
3566
|
+
import subprocess
|
|
3567
|
+
result = subprocess.run(['open', '-a', '${safeApp}'], capture_output=True, text=True)
|
|
3568
|
+
if result.returncode == 0:
|
|
3569
|
+
print(f"Opened: ${safeApp}")
|
|
3570
|
+
time.sleep(1.5) # wait for app to launch
|
|
3571
|
+
else:
|
|
3572
|
+
# Try spotlight
|
|
3573
|
+
pyautogui.hotkey('command', 'space')
|
|
3574
|
+
time.sleep(0.5)
|
|
3575
|
+
pyautogui.write('${safeApp}', interval=0.05)
|
|
3576
|
+
time.sleep(0.5)
|
|
3577
|
+
pyautogui.press('enter')
|
|
3578
|
+
print(f"Opened via Spotlight: ${safeApp}")
|
|
3579
|
+
time.sleep(1.5)
|
|
3580
|
+
`;
|
|
3581
|
+
}
|
|
3582
|
+
return header + `
|
|
3583
|
+
import subprocess
|
|
3584
|
+
subprocess.Popen(['${safeApp}'])
|
|
3585
|
+
print(f"Launched: ${safeApp}")
|
|
3586
|
+
time.sleep(1.5)
|
|
3587
|
+
`;
|
|
3588
|
+
}
|
|
3589
|
+
// ── New high-level browser actions — no Screen Recording needed ───────────
|
|
3590
|
+
case "click_text": {
|
|
3591
|
+
if (!text) return null;
|
|
3592
|
+
if (platform2() !== "darwin") return header + `print("click_text requires macOS + Chrome")`;
|
|
3593
|
+
return this._chromeJs(JSON.stringify(text), `
|
|
3594
|
+
(function(t) {
|
|
3595
|
+
t = t.toLowerCase().trim();
|
|
3596
|
+
// Pass 1: interactive elements (buttons, links, roles)
|
|
3597
|
+
var candidates = Array.from(document.querySelectorAll(
|
|
3598
|
+
'button,a,[role="button"],[role="link"],[role="menuitem"],[role="option"],[role="tab"],[tabindex="0"],label'
|
|
3599
|
+
));
|
|
3600
|
+
var match = candidates.find(el => {
|
|
3601
|
+
var txt = (el.textContent || el.getAttribute('aria-label') || el.getAttribute('title') || '').trim().toLowerCase();
|
|
3602
|
+
return txt === t || txt.startsWith(t) || (t.length > 3 && txt.includes(t));
|
|
3603
|
+
});
|
|
3604
|
+
// Pass 2: any visible leaf element with matching text
|
|
3605
|
+
if (!match) {
|
|
3606
|
+
match = Array.from(document.querySelectorAll('*')).find(el => {
|
|
3607
|
+
if (!el.offsetParent && el !== document.body) return false;
|
|
3608
|
+
if (el.children.length > 0) return false;
|
|
3609
|
+
var txt = (el.textContent || '').trim().toLowerCase();
|
|
3610
|
+
return txt === t || (t.length > 4 && txt.includes(t) && txt.length < t.length * 3);
|
|
3611
|
+
});
|
|
3612
|
+
}
|
|
3613
|
+
if (!match) return 'NOT_FOUND: ' + t;
|
|
3614
|
+
match.scrollIntoView({behavior:'instant', block:'center'});
|
|
3615
|
+
match.focus();
|
|
3616
|
+
['mousedown','mouseup','click'].forEach(e =>
|
|
3617
|
+
match.dispatchEvent(new MouseEvent(e, {bubbles:true, cancelable:true}))
|
|
3618
|
+
);
|
|
3619
|
+
return 'CLICKED: ' + (match.textContent || match.getAttribute('aria-label') || match.tagName).trim().slice(0,80);
|
|
3620
|
+
})(JSARG)
|
|
3621
|
+
`);
|
|
3622
|
+
}
|
|
3623
|
+
case "type_in": {
|
|
3624
|
+
if (!text) return null;
|
|
3625
|
+
if (platform2() !== "darwin") return header + `print("type_in requires macOS + Chrome")`;
|
|
3626
|
+
const query = String(input.selector ?? input.query ?? "").trim() || "active";
|
|
3627
|
+
const args = JSON.stringify([query, text]);
|
|
3628
|
+
return this._chromeJs(args, `
|
|
3629
|
+
(function(query, value) {
|
|
3630
|
+
var el = query === 'active' ? document.activeElement :
|
|
3631
|
+
document.querySelector('input[placeholder*="'+query+'" i]') ||
|
|
3632
|
+
document.querySelector('input[aria-label*="'+query+'" i]') ||
|
|
3633
|
+
document.querySelector('textarea[placeholder*="'+query+'" i]') ||
|
|
3634
|
+
document.querySelector('[role="textbox"][aria-label*="'+query+'" i]') ||
|
|
3635
|
+
document.querySelector('[contenteditable="true"]') ||
|
|
3636
|
+
document.querySelector('input[type="text"],input[type="search"],input:not([type])');
|
|
3637
|
+
if (!el) return 'NOT_FOUND: ' + query;
|
|
3638
|
+
el.focus();
|
|
3639
|
+
if (el.getAttribute('contenteditable') !== null) {
|
|
3640
|
+
el.textContent = '';
|
|
3641
|
+
document.execCommand('insertText', false, value);
|
|
3642
|
+
} else {
|
|
3643
|
+
var proto = el instanceof HTMLTextAreaElement ? HTMLTextAreaElement.prototype : HTMLInputElement.prototype;
|
|
3644
|
+
Object.getOwnPropertyDescriptor(proto, 'value').set.call(el, value);
|
|
3645
|
+
['input','change'].forEach(t => el.dispatchEvent(new Event(t, {bubbles:true})));
|
|
3646
|
+
}
|
|
3647
|
+
return 'TYPED "'+value.slice(0,40)+'" in '+(el.placeholder||el.getAttribute('aria-label')||el.tagName);
|
|
3648
|
+
})(JSARG[0], JSARG[1])
|
|
3649
|
+
`);
|
|
3650
|
+
}
|
|
3651
|
+
case "read_element": {
|
|
3652
|
+
if (platform2() !== "darwin") return header + `print("read_element requires macOS + Chrome")`;
|
|
3653
|
+
const sel = String(input.selector ?? "").trim();
|
|
3654
|
+
return this._chromeJs(JSON.stringify(sel || "body"), `
|
|
3655
|
+
(function(sel) {
|
|
3656
|
+
var el = sel ? document.querySelector(sel) : document.body;
|
|
3657
|
+
if (!el) return 'NOT_FOUND: ' + sel;
|
|
3658
|
+
return (el.textContent || el.innerText || el.value || '').trim().replace(/\\s+/g,' ').slice(0, 800);
|
|
3659
|
+
})(JSARG)
|
|
3660
|
+
`);
|
|
3661
|
+
}
|
|
3662
|
+
case "get_elements": {
|
|
3663
|
+
if (platform2() !== "darwin") return header + `print("get_elements requires macOS + Chrome")`;
|
|
3664
|
+
return this._chromeJs(JSON.stringify(""), `
|
|
3665
|
+
(function() {
|
|
3666
|
+
var seen = new Set(), els = [];
|
|
3667
|
+
document.querySelectorAll('button,a,input,select,textarea,[role="button"],[role="link"],[role="tab"],[role="option"],h1,h2,h3,video,audio').forEach(function(el,i) {
|
|
3668
|
+
if (i > 80 || !el.offsetParent) return;
|
|
3669
|
+
var label = (el.textContent || el.getAttribute('aria-label') || el.placeholder || el.getAttribute('title') || el.value || '').trim().slice(0,80);
|
|
3670
|
+
if (!label || seen.has(label)) return;
|
|
3671
|
+
seen.add(label);
|
|
3672
|
+
els.push(el.tagName.toLowerCase()+': '+label);
|
|
3673
|
+
});
|
|
3674
|
+
return els.length ? els.join('\\n') : 'No interactive elements found';
|
|
3675
|
+
})()
|
|
3676
|
+
`);
|
|
3677
|
+
}
|
|
3678
|
+
case "get_media_state": {
|
|
3679
|
+
if (platform2() !== "darwin") return header + `print("get_media_state requires macOS + Chrome")`;
|
|
3680
|
+
return this._chromeJs(JSON.stringify(""), `
|
|
3681
|
+
(function() {
|
|
3682
|
+
var v = document.querySelector('video,audio');
|
|
3683
|
+
if (!v) return 'No media on this page';
|
|
3684
|
+
return JSON.stringify({
|
|
3685
|
+
state: v.paused ? 'PAUSED' : 'PLAYING',
|
|
3686
|
+
time: v.currentTime.toFixed(1)+'s',
|
|
3687
|
+
duration: isFinite(v.duration) ? v.duration.toFixed(1)+'s' : 'live/unknown',
|
|
3688
|
+
muted: v.muted,
|
|
3689
|
+
volume: Math.round(v.volume*100)+'%',
|
|
3690
|
+
title: document.title.slice(0,80)
|
|
3691
|
+
});
|
|
3692
|
+
})()
|
|
3693
|
+
`);
|
|
3694
|
+
}
|
|
3695
|
+
case "scroll_to": {
|
|
3696
|
+
if (platform2() !== "darwin") return header + `print("scroll_to requires macOS + Chrome")`;
|
|
3697
|
+
const sel = String(input.selector ?? "").trim();
|
|
3698
|
+
const scrollDir = String(input.direction ?? "down").toLowerCase();
|
|
3699
|
+
const scrollAmt = input.amount != null ? Number(input.amount) : 400;
|
|
3700
|
+
if (sel) {
|
|
3701
|
+
return this._chromeJs(JSON.stringify(sel), `
|
|
3702
|
+
(function(s){var el=document.querySelector(s);if(!el)return 'NOT_FOUND: '+s;el.scrollIntoView({behavior:'instant',block:'center'});return 'Scrolled to: '+s;})(JSARG)
|
|
3703
|
+
`);
|
|
3704
|
+
}
|
|
3705
|
+
const scrollY = scrollDir === "up" ? -scrollAmt : scrollDir === "down" ? scrollAmt : 0;
|
|
3706
|
+
const scrollX = scrollDir === "left" ? -scrollAmt : scrollDir === "right" ? scrollAmt : 0;
|
|
3707
|
+
return this._chromeJs(JSON.stringify([scrollX, scrollY]), `
|
|
3708
|
+
(function(xy){window.scrollBy(xy[0],xy[1]);return 'Scrolled';} )(JSARG)
|
|
3709
|
+
`);
|
|
3710
|
+
}
|
|
3711
|
+
case "accessibility_click": {
|
|
3712
|
+
const appName = String(input.app ?? "").trim();
|
|
3713
|
+
const elemLabel = String(input.element ?? text ?? "").trim();
|
|
3714
|
+
if (!appName || !elemLabel) return null;
|
|
3715
|
+
const osName = platform2();
|
|
3716
|
+
if (osName !== "darwin") return header + `print("accessibility_click is macOS only")`;
|
|
3717
|
+
const safeApp = appName.replace(/'/g, "\\'");
|
|
3718
|
+
const safeElem = elemLabel.replace(/'/g, "\\'");
|
|
3719
|
+
return header + `
|
|
3720
|
+
import subprocess, time
|
|
3721
|
+
|
|
3722
|
+
# Bring app to foreground
|
|
3723
|
+
subprocess.run(['osascript', '-e', 'tell application "${safeApp}" to activate'], capture_output=True)
|
|
3724
|
+
time.sleep(0.5)
|
|
3725
|
+
|
|
3726
|
+
# Try clicking by name, then by description, then by value
|
|
3727
|
+
attempts = [
|
|
3728
|
+
f'''tell application "System Events" to tell process "${safeApp}" to click (first UI element of front window whose name contains "${safeElem}")''',
|
|
3729
|
+
f'''tell application "System Events" to tell process "${safeApp}" to click (first button whose description contains "${safeElem}")''',
|
|
3730
|
+
f'''tell application "System Events" to tell process "${safeApp}" to click (first UI element whose value contains "${safeElem}")''',
|
|
3731
|
+
]
|
|
3732
|
+
|
|
3733
|
+
success = False
|
|
3734
|
+
for script in attempts:
|
|
3735
|
+
r = subprocess.run(['osascript', '-e', script], capture_output=True, text=True)
|
|
3736
|
+
if r.returncode == 0:
|
|
3737
|
+
print(f"Clicked '{${JSON.stringify(elemLabel)}}' in ${safeApp}")
|
|
3738
|
+
success = True
|
|
3739
|
+
break
|
|
3740
|
+
|
|
3741
|
+
if not success:
|
|
3742
|
+
# Last resort: try clicking the front window element matching description
|
|
3743
|
+
list_script = f"""tell application "System Events"
|
|
3744
|
+
tell process "${safeApp}"
|
|
3745
|
+
return name of every UI element of front window
|
|
3746
|
+
end tell
|
|
3747
|
+
end tell"""
|
|
3748
|
+
lr = subprocess.run(['osascript', '-e', list_script], capture_output=True, text=True)
|
|
3749
|
+
print(f"Could not find element '${safeElem}' in ${safeApp}")
|
|
3750
|
+
print(f"Available elements: {lr.stdout.strip()[:300] or 'could not list'}")
|
|
3751
|
+
`;
|
|
3752
|
+
}
|
|
3753
|
+
case "cdp_screenshot": {
|
|
3754
|
+
if (platform2() !== "darwin") return header + `print("cdp_screenshot is macOS only for now")`;
|
|
3755
|
+
return header + `
|
|
3756
|
+
import urllib.request, json, base64, os, tempfile, subprocess
|
|
3757
|
+
|
|
3758
|
+
def get_browser_state():
|
|
3759
|
+
simple_scr = """tell application "Google Chrome"
|
|
3760
|
+
tell front window
|
|
3761
|
+
tell active tab
|
|
3762
|
+
return URL & "|||" & title
|
|
3763
|
+
end tell
|
|
3764
|
+
end tell
|
|
3765
|
+
end tell"""
|
|
3766
|
+
r = subprocess.run(['osascript', '-e', simple_scr], capture_output=True, text=True)
|
|
3767
|
+
parts = r.stdout.strip().split('|||')
|
|
3768
|
+
if len(parts) >= 2:
|
|
3769
|
+
print(f"[No CDP screenshot] Tab: {parts[1]}")
|
|
3770
|
+
print(f"URL: {parts[0]}")
|
|
3771
|
+
else:
|
|
3772
|
+
print("[No CDP screenshot \u2014 Chrome not running or no active tab]")
|
|
3773
|
+
print("To enable screenshots without Screen Recording, start Chrome with:")
|
|
3774
|
+
print(" /Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome --remote-debugging-port=9222")
|
|
3775
|
+
|
|
3776
|
+
try:
|
|
3777
|
+
tabs_raw = urllib.request.urlopen('http://localhost:9222/json', timeout=2).read()
|
|
3778
|
+
tabs = json.loads(tabs_raw)
|
|
3779
|
+
if not tabs:
|
|
3780
|
+
raise Exception("No tabs available")
|
|
3781
|
+
ws_url = tabs[0].get('webSocketDebuggerUrl', '')
|
|
3782
|
+
if not ws_url:
|
|
3783
|
+
raise Exception("No WebSocket URL")
|
|
3784
|
+
|
|
3785
|
+
# Auto-install websockets if needed
|
|
3786
|
+
try:
|
|
3787
|
+
import websockets
|
|
3788
|
+
except ImportError:
|
|
3789
|
+
subprocess.run(['pip3', 'install', 'websockets', '-q'], capture_output=True, timeout=60)
|
|
3790
|
+
import websockets
|
|
3791
|
+
|
|
3792
|
+
import asyncio
|
|
3793
|
+
|
|
3794
|
+
async def capture():
|
|
3795
|
+
async with websockets.connect(ws_url) as ws:
|
|
3796
|
+
await ws.send(json.dumps({'id':1,'method':'Page.captureScreenshot','params':{'format':'jpeg','quality':75}}))
|
|
3797
|
+
resp = json.loads(await ws.recv())
|
|
3798
|
+
return resp.get('result', {}).get('data')
|
|
3799
|
+
|
|
3800
|
+
img_b64 = asyncio.run(capture())
|
|
3801
|
+
if not img_b64:
|
|
3802
|
+
raise Exception("No screenshot data returned")
|
|
3803
|
+
|
|
3804
|
+
out_path = os.path.join(tempfile.gettempdir(), '0agent_cdp_shot.jpg')
|
|
3805
|
+
with open(out_path, 'wb') as f:
|
|
3806
|
+
f.write(base64.b64decode(img_b64))
|
|
3807
|
+
|
|
3808
|
+
print(f"Screenshot: {out_path}")
|
|
3809
|
+
print(f"Tab: {tabs[0].get('title','?')} \u2014 {tabs[0].get('url','?')[:80]}")
|
|
3810
|
+
|
|
3811
|
+
try:
|
|
3812
|
+
import pytesseract
|
|
3813
|
+
from PIL import Image
|
|
3814
|
+
img = Image.open(out_path)
|
|
3815
|
+
text = pytesseract.image_to_string(img, config='--psm 11')
|
|
3816
|
+
lines = [l.strip() for l in text.splitlines() if l.strip()]
|
|
3817
|
+
if lines:
|
|
3818
|
+
print("On-screen text (OCR):\\n" + "\\n".join(lines[:50]))
|
|
3819
|
+
except Exception:
|
|
3820
|
+
print("(OCR not available \u2014 install pytesseract for text extraction)")
|
|
3821
|
+
|
|
3822
|
+
except Exception as e:
|
|
3823
|
+
get_browser_state()
|
|
3824
|
+
`;
|
|
3825
|
+
}
|
|
3826
|
+
case "exec_js": {
|
|
3827
|
+
const js = String(input.js ?? "").trim();
|
|
3828
|
+
if (!js) return null;
|
|
3829
|
+
const osName = platform2();
|
|
3830
|
+
if (osName !== "darwin") {
|
|
3831
|
+
return header + `print("exec_js requires macOS + Google Chrome")`;
|
|
3832
|
+
}
|
|
3833
|
+
const jsJson = JSON.stringify(js);
|
|
3834
|
+
return header + `
|
|
3835
|
+
import subprocess, json, os, tempfile
|
|
3836
|
+
|
|
3837
|
+
js = json.loads(${jsJson})
|
|
3838
|
+
tmpjs = os.path.join(tempfile.gettempdir(), f"0agent_execjs_{os.getpid()}.js")
|
|
3839
|
+
with open(tmpjs, 'w') as f:
|
|
3840
|
+
f.write(js)
|
|
3841
|
+
|
|
3842
|
+
as_script = f'''tell application "Google Chrome"
|
|
3843
|
+
tell front window
|
|
3844
|
+
tell active tab
|
|
3845
|
+
set jsCode to do shell script "cat '{tmpjs}'"
|
|
3846
|
+
return execute javascript jsCode
|
|
3847
|
+
end tell
|
|
3848
|
+
end tell
|
|
3849
|
+
end tell'''
|
|
3850
|
+
|
|
3851
|
+
r = subprocess.run(['osascript', '-e', as_script], capture_output=True, text=True)
|
|
3852
|
+
try: os.remove(tmpjs)
|
|
3853
|
+
except: pass
|
|
3854
|
+
|
|
3855
|
+
if r.returncode == 0:
|
|
3856
|
+
print(r.stdout.strip() if r.stdout.strip() else "(no return value)")
|
|
3857
|
+
else:
|
|
3858
|
+
print(f"JS error: {r.stderr.strip()[:300]}")
|
|
3859
|
+
`;
|
|
3860
|
+
}
|
|
3861
|
+
case "browser_state": {
|
|
3862
|
+
const osName = platform2();
|
|
3863
|
+
if (osName !== "darwin") {
|
|
3864
|
+
return header + `print("browser_state requires macOS + Google Chrome")`;
|
|
3865
|
+
}
|
|
3866
|
+
return header + `
|
|
3867
|
+
import subprocess
|
|
3868
|
+
|
|
3869
|
+
as_script = '''tell application "Google Chrome"
|
|
3870
|
+
tell front window
|
|
3871
|
+
tell active tab
|
|
3872
|
+
return URL & "|||" & title
|
|
3873
|
+
end tell
|
|
3874
|
+
end tell
|
|
3875
|
+
end tell'''
|
|
3876
|
+
|
|
3877
|
+
r = subprocess.run(['osascript', '-e', as_script], capture_output=True, text=True)
|
|
3878
|
+
out = r.stdout.strip()
|
|
3879
|
+
if '|||' in out:
|
|
3880
|
+
parts = out.split('|||', 1)
|
|
3881
|
+
print(f"URL: {parts[0]}")
|
|
3882
|
+
print(f"Title: {parts[1]}")
|
|
3883
|
+
else:
|
|
3884
|
+
print(out or r.stderr.strip() or "Chrome not running or no active tab")
|
|
3885
|
+
`;
|
|
3886
|
+
}
|
|
3887
|
+
default:
|
|
3888
|
+
return null;
|
|
3889
|
+
}
|
|
3890
|
+
}
|
|
3891
|
+
/**
|
|
3892
|
+
* Generate a Python script that runs JS in the current Chrome tab via AppleScript.
|
|
3893
|
+
* jsArgJson is passed as variable JSARG inside the JS template.
|
|
3894
|
+
* No Screen Recording needed — uses Chrome's built-in execute javascript.
|
|
3895
|
+
*/
|
|
3896
|
+
_chromeJs(jsArgJson, jsTemplate) {
|
|
3897
|
+
const finalJs = `var JSARG = ${jsArgJson};
|
|
3898
|
+
${jsTemplate.trim()}`;
|
|
3899
|
+
const jsJson = JSON.stringify(finalJs);
|
|
3900
|
+
return `
|
|
3901
|
+
import subprocess, json, os, tempfile
|
|
3902
|
+
|
|
3903
|
+
js = json.loads(${jsJson})
|
|
3904
|
+
tmpjs = os.path.join(tempfile.gettempdir(), f"0agent_cjs_{os.getpid()}.js")
|
|
3905
|
+
with open(tmpjs, 'w') as f:
|
|
3906
|
+
f.write(js)
|
|
3907
|
+
as_script = f"""tell application "Google Chrome"
|
|
3908
|
+
tell front window
|
|
3909
|
+
tell active tab
|
|
3910
|
+
set jsCode to do shell script "cat '{tmpjs}'"
|
|
3911
|
+
return execute javascript jsCode
|
|
3912
|
+
end tell
|
|
3913
|
+
end tell
|
|
3914
|
+
end tell"""
|
|
3915
|
+
r = subprocess.run(['osascript', '-e', as_script], capture_output=True, text=True)
|
|
3916
|
+
try: os.remove(tmpjs)
|
|
3917
|
+
except: pass
|
|
3918
|
+
result = r.stdout.strip()
|
|
3919
|
+
if r.returncode != 0:
|
|
3920
|
+
print(f"JS error: {r.stderr.strip()[:300]}")
|
|
3921
|
+
elif result.startswith('NOT_FOUND:'):
|
|
3922
|
+
print(f"Not found: {result[10:]} \u2014 call get_elements to see available elements")
|
|
3923
|
+
elif result.startswith('CLICKED:') or result.startswith('TYPED'):
|
|
3924
|
+
print(f"OK {result}")
|
|
3925
|
+
else:
|
|
3926
|
+
print(result if result else "(no return value)")
|
|
3927
|
+
`;
|
|
3928
|
+
}
|
|
3929
|
+
};
|
|
3930
|
+
}
|
|
3931
|
+
});
|
|
3932
|
+
|
|
3933
|
+
// packages/daemon/src/capabilities/OpenInterpreterCapability.ts
|
|
3934
|
+
import { spawn as spawn4 } from "node:child_process";
|
|
3935
|
+
import { writeFileSync as writeFileSync3, unlinkSync as unlinkSync2 } from "node:fs";
|
|
3936
|
+
import { resolve as resolve4 } from "node:path";
|
|
3937
|
+
import { tmpdir as tmpdir2 } from "node:os";
|
|
3015
3938
|
var OI_SCRIPT, OpenInterpreterCapability;
|
|
3016
3939
|
var init_OpenInterpreterCapability = __esm({
|
|
3017
3940
|
"packages/daemon/src/capabilities/OpenInterpreterCapability.ts"() {
|
|
@@ -3104,11 +4027,11 @@ print(output if output else "Task completed successfully")
|
|
|
3104
4027
|
const fullTask = context ? `Context: ${context}
|
|
3105
4028
|
|
|
3106
4029
|
Task: ${task}` : task;
|
|
3107
|
-
const tmpFile =
|
|
3108
|
-
|
|
4030
|
+
const tmpFile = resolve4(tmpdir2(), `0agent_oi_${Date.now()}.py`);
|
|
4031
|
+
writeFileSync3(tmpFile, OI_SCRIPT, "utf8");
|
|
3109
4032
|
let result = await this._runScript(tmpFile, fullTask, signal);
|
|
3110
4033
|
try {
|
|
3111
|
-
|
|
4034
|
+
unlinkSync2(tmpFile);
|
|
3112
4035
|
} catch {
|
|
3113
4036
|
}
|
|
3114
4037
|
if (signal?.aborted) {
|
|
@@ -3124,10 +4047,10 @@ Run manually: pip3 install open-interpreter`,
|
|
|
3124
4047
|
duration_ms: Date.now() - start
|
|
3125
4048
|
};
|
|
3126
4049
|
}
|
|
3127
|
-
|
|
4050
|
+
writeFileSync3(tmpFile, OI_SCRIPT, "utf8");
|
|
3128
4051
|
result = await this._runScript(tmpFile, fullTask, signal);
|
|
3129
4052
|
try {
|
|
3130
|
-
|
|
4053
|
+
unlinkSync2(tmpFile);
|
|
3131
4054
|
} catch {
|
|
3132
4055
|
}
|
|
3133
4056
|
if (signal?.aborted) {
|
|
@@ -3147,8 +4070,8 @@ Run manually: pip3 install open-interpreter`,
|
|
|
3147
4070
|
}
|
|
3148
4071
|
/** Async pip install — never blocks the event loop (unlike spawnSync). */
|
|
3149
4072
|
_pipInstall(pkg, signal) {
|
|
3150
|
-
return new Promise((
|
|
3151
|
-
const proc =
|
|
4073
|
+
return new Promise((resolve17) => {
|
|
4074
|
+
const proc = spawn4("pip3", ["install", pkg, "-q"], {
|
|
3152
4075
|
env: process.env,
|
|
3153
4076
|
stdio: "ignore"
|
|
3154
4077
|
});
|
|
@@ -3158,7 +4081,7 @@ Run manually: pip3 install open-interpreter`,
|
|
|
3158
4081
|
settled = true;
|
|
3159
4082
|
signal?.removeEventListener("abort", onAbort);
|
|
3160
4083
|
clearTimeout(timer);
|
|
3161
|
-
|
|
4084
|
+
resolve17(ok);
|
|
3162
4085
|
};
|
|
3163
4086
|
const onAbort = () => {
|
|
3164
4087
|
try {
|
|
@@ -3180,8 +4103,8 @@ Run manually: pip3 install open-interpreter`,
|
|
|
3180
4103
|
});
|
|
3181
4104
|
}
|
|
3182
4105
|
_runScript(scriptPath, stdinData, signal) {
|
|
3183
|
-
return new Promise((
|
|
3184
|
-
const proc =
|
|
4106
|
+
return new Promise((resolve17) => {
|
|
4107
|
+
const proc = spawn4("python3", [scriptPath], {
|
|
3185
4108
|
env: process.env,
|
|
3186
4109
|
stdio: ["pipe", "pipe", "pipe"]
|
|
3187
4110
|
});
|
|
@@ -3193,7 +4116,7 @@ Run manually: pip3 install open-interpreter`,
|
|
|
3193
4116
|
settled = true;
|
|
3194
4117
|
signal?.removeEventListener("abort", onAbort);
|
|
3195
4118
|
clearTimeout(timer);
|
|
3196
|
-
|
|
4119
|
+
resolve17({ stdout: out.join(""), stderr: err.join(""), code });
|
|
3197
4120
|
};
|
|
3198
4121
|
const onAbort = () => {
|
|
3199
4122
|
try {
|
|
@@ -3307,6 +4230,7 @@ var init_CapabilityRegistry = __esm({
|
|
|
3307
4230
|
init_ShellCapability();
|
|
3308
4231
|
init_FileCapability();
|
|
3309
4232
|
init_MemoryCapability();
|
|
4233
|
+
init_GUICapability();
|
|
3310
4234
|
init_OpenInterpreterCapability();
|
|
3311
4235
|
CapabilityRegistry = class {
|
|
3312
4236
|
capabilities = /* @__PURE__ */ new Map();
|
|
@@ -3335,6 +4259,7 @@ var init_CapabilityRegistry = __esm({
|
|
|
3335
4259
|
this.register(new ScraperCapability());
|
|
3336
4260
|
this.register(new ShellCapability());
|
|
3337
4261
|
this.register(new FileCapability());
|
|
4262
|
+
this.register(new GUICapability());
|
|
3338
4263
|
this.register(new OpenInterpreterCapability());
|
|
3339
4264
|
if (graph) {
|
|
3340
4265
|
this.register(new MemoryCapability(graph, onMemoryWrite));
|
|
@@ -3414,9 +4339,9 @@ var init_capabilities = __esm({
|
|
|
3414
4339
|
});
|
|
3415
4340
|
|
|
3416
4341
|
// packages/daemon/src/AgentExecutor.ts
|
|
3417
|
-
import { spawn as
|
|
3418
|
-
import { writeFileSync as
|
|
3419
|
-
import { resolve as
|
|
4342
|
+
import { spawn as spawn5 } from "node:child_process";
|
|
4343
|
+
import { writeFileSync as writeFileSync4, readFileSync as readFileSync3, readdirSync as readdirSync2, mkdirSync as mkdirSync2, existsSync as existsSync3 } from "node:fs";
|
|
4344
|
+
import { resolve as resolve5, dirname as dirname2, relative } from "node:path";
|
|
3420
4345
|
import { homedir as homedir2 } from "node:os";
|
|
3421
4346
|
var SELF_MOD_PATTERN, AgentExecutor;
|
|
3422
4347
|
var init_AgentExecutor = __esm({
|
|
@@ -3611,9 +4536,9 @@ var init_AgentExecutor = __esm({
|
|
|
3611
4536
|
}
|
|
3612
4537
|
}
|
|
3613
4538
|
shellExec(command, timeoutMs) {
|
|
3614
|
-
return new Promise((
|
|
4539
|
+
return new Promise((resolve17) => {
|
|
3615
4540
|
const chunks = [];
|
|
3616
|
-
const proc =
|
|
4541
|
+
const proc = spawn5("bash", ["-c", command], {
|
|
3617
4542
|
cwd: this.cwd,
|
|
3618
4543
|
env: { ...process.env, TERM: "dumb" },
|
|
3619
4544
|
timeout: timeoutMs
|
|
@@ -3622,10 +4547,10 @@ var init_AgentExecutor = __esm({
|
|
|
3622
4547
|
proc.stderr.on("data", (d) => chunks.push(d.toString()));
|
|
3623
4548
|
proc.on("close", (code) => {
|
|
3624
4549
|
const output = chunks.join("").trim();
|
|
3625
|
-
|
|
4550
|
+
resolve17(output || (code === 0 ? "(command completed, no output)" : `exit code ${code}`));
|
|
3626
4551
|
});
|
|
3627
4552
|
proc.on("error", (err) => {
|
|
3628
|
-
|
|
4553
|
+
resolve17(`Error: ${err.message}`);
|
|
3629
4554
|
});
|
|
3630
4555
|
});
|
|
3631
4556
|
}
|
|
@@ -3633,7 +4558,7 @@ var init_AgentExecutor = __esm({
|
|
|
3633
4558
|
const safe = this.safePath(filePath);
|
|
3634
4559
|
if (!safe) return "Error: path outside working directory";
|
|
3635
4560
|
mkdirSync2(dirname2(safe), { recursive: true });
|
|
3636
|
-
|
|
4561
|
+
writeFileSync4(safe, content, "utf8");
|
|
3637
4562
|
const rel = relative(this.cwd, safe);
|
|
3638
4563
|
return `Written: ${rel} (${content.length} bytes)`;
|
|
3639
4564
|
}
|
|
@@ -3742,7 +4667,7 @@ content = element.text if element else page.get_all_text()` : `content = page.ge
|
|
|
3742
4667
|
}
|
|
3743
4668
|
// ─── Helpers ───────────────────────────────────────────────────────────────
|
|
3744
4669
|
safePath(p) {
|
|
3745
|
-
const resolved =
|
|
4670
|
+
const resolved = resolve5(this.cwd, p);
|
|
3746
4671
|
return resolved.startsWith(this.cwd) ? resolved : null;
|
|
3747
4672
|
}
|
|
3748
4673
|
buildSystemPrompt(extra, task) {
|
|
@@ -3783,20 +4708,21 @@ content = element.text if element else page.get_all_text()` : `content = page.ge
|
|
|
3783
4708
|
if (hasGUI) {
|
|
3784
4709
|
lines.push(
|
|
3785
4710
|
``,
|
|
3786
|
-
`GUI
|
|
3787
|
-
|
|
3788
|
-
|
|
3789
|
-
|
|
3790
|
-
|
|
3791
|
-
|
|
3792
|
-
|
|
3793
|
-
|
|
3794
|
-
|
|
3795
|
-
|
|
3796
|
-
|
|
3797
|
-
|
|
3798
|
-
|
|
3799
|
-
`
|
|
4711
|
+
`Browser/GUI actions \u2014 ALL work without Screen Recording permission:`,
|
|
4712
|
+
`\u2022 click_text {text:"Submit"} \u2014 click any button/link/tab by its visible text. Use get_elements first if unsure.`,
|
|
4713
|
+
`\u2022 type_in {selector:"search", text:"Drake"} \u2014 fill form field by placeholder or aria-label. Handles React events.`,
|
|
4714
|
+
`\u2022 get_elements \u2014 list every button/link/input/heading on the page. ALWAYS call this first when navigating.`,
|
|
4715
|
+
`\u2022 read_element {selector:"h1"} \u2014 read text of any element. selector="" reads full page.`,
|
|
4716
|
+
`\u2022 get_media_state \u2014 returns {state:PLAYING/PAUSED, time, duration, volume}. Call after play/pause to verify.`,
|
|
4717
|
+
`\u2022 scroll_to {selector:".result"} or {direction:"down", amount:400} \u2014 scroll page.`,
|
|
4718
|
+
`\u2022 exec_js {js:"..."} \u2014 run arbitrary JS. Use for anything not covered above.`,
|
|
4719
|
+
`\u2022 browser_state \u2014 get current URL + title. Call after any navigation.`,
|
|
4720
|
+
`\u2022 cdp_screenshot \u2014 screenshot via Chrome DevTools Protocol (if Chrome has --remote-debugging-port=9222).`,
|
|
4721
|
+
`\u2022 accessibility_click {app:"WhatsApp", element:"Send"} \u2014 click native macOS app button (no Screen Recording).`,
|
|
4722
|
+
`\u2022 open_url {url:"..."} \u2014 navigate. Returns actual URL + title + video state. Read it.`,
|
|
4723
|
+
`\u2022 hotkey {keys:"k", app:"Google Chrome"} \u2014 send key to specific app. Without app param: goes to Terminal.`,
|
|
4724
|
+
`WORKFLOW: navigate \u2192 get_elements \u2192 click_text/type_in \u2192 get_media_state/browser_state to verify.`,
|
|
4725
|
+
`NEVER assume success. Always verify with get_media_state, browser_state, or read_element.`
|
|
3800
4726
|
);
|
|
3801
4727
|
}
|
|
3802
4728
|
if (isSelfMod && this.agentRoot) {
|
|
@@ -3809,10 +4735,10 @@ content = element.text if element else page.get_all_text()` : `content = page.ge
|
|
|
3809
4735
|
);
|
|
3810
4736
|
}
|
|
3811
4737
|
const agentsFiles = [
|
|
3812
|
-
|
|
3813
|
-
|
|
3814
|
-
|
|
3815
|
-
|
|
4738
|
+
resolve5(this.cwd, "AGENTS.md"),
|
|
4739
|
+
resolve5(this.cwd, ".0agent", "AGENTS.md"),
|
|
4740
|
+
resolve5(this.cwd, "CLAUDE.md"),
|
|
4741
|
+
resolve5(homedir2(), ".0agent", "AGENTS.md")
|
|
3816
4742
|
];
|
|
3817
4743
|
for (const f of agentsFiles) {
|
|
3818
4744
|
try {
|
|
@@ -3923,7 +4849,7 @@ content = element.text if element else page.get_all_text()` : `content = page.ge
|
|
|
3923
4849
|
|
|
3924
4850
|
// packages/daemon/src/ExecutionVerifier.ts
|
|
3925
4851
|
import { existsSync as existsSync5 } from "node:fs";
|
|
3926
|
-
import { resolve as
|
|
4852
|
+
import { resolve as resolve6 } from "node:path";
|
|
3927
4853
|
var ExecutionVerifier;
|
|
3928
4854
|
var init_ExecutionVerifier = __esm({
|
|
3929
4855
|
"packages/daemon/src/ExecutionVerifier.ts"() {
|
|
@@ -3960,7 +4886,7 @@ var init_ExecutionVerifier = __esm({
|
|
|
3960
4886
|
};
|
|
3961
4887
|
}
|
|
3962
4888
|
if (files.length > 0) {
|
|
3963
|
-
const lastFile =
|
|
4889
|
+
const lastFile = resolve6(this.cwd, files[files.length - 1]);
|
|
3964
4890
|
const exists = existsSync5(lastFile);
|
|
3965
4891
|
return {
|
|
3966
4892
|
success: exists,
|
|
@@ -4000,10 +4926,10 @@ var init_ExecutionVerifier = __esm({
|
|
|
4000
4926
|
});
|
|
4001
4927
|
|
|
4002
4928
|
// packages/daemon/src/RuntimeSelfHeal.ts
|
|
4003
|
-
import { readFileSync as readFileSync5, writeFileSync as
|
|
4004
|
-
import { resolve as
|
|
4929
|
+
import { readFileSync as readFileSync5, writeFileSync as writeFileSync5, existsSync as existsSync6 } from "node:fs";
|
|
4930
|
+
import { resolve as resolve7, dirname as dirname3 } from "node:path";
|
|
4005
4931
|
import { fileURLToPath } from "node:url";
|
|
4006
|
-
import { execSync as execSync4, spawn as
|
|
4932
|
+
import { execSync as execSync4, spawn as spawn6 } from "node:child_process";
|
|
4007
4933
|
function isRuntimeBug(error) {
|
|
4008
4934
|
if (TASK_FAILURE_PATTERNS.some((p) => p.test(error))) return false;
|
|
4009
4935
|
return RUNTIME_BUG_PATTERNS.some((p) => p.test(error));
|
|
@@ -4073,8 +4999,8 @@ var init_RuntimeSelfHeal = __esm({
|
|
|
4073
4999
|
this.llm = llm;
|
|
4074
5000
|
this.eventBus = eventBus;
|
|
4075
5001
|
let dir = dirname3(fileURLToPath(import.meta.url));
|
|
4076
|
-
while (dir !== "/" && !existsSync6(
|
|
4077
|
-
dir =
|
|
5002
|
+
while (dir !== "/" && !existsSync6(resolve7(dir, "package.json"))) {
|
|
5003
|
+
dir = resolve7(dir, "..");
|
|
4078
5004
|
}
|
|
4079
5005
|
this.projectRoot = dir;
|
|
4080
5006
|
}
|
|
@@ -4120,7 +5046,7 @@ var init_RuntimeSelfHeal = __esm({
|
|
|
4120
5046
|
try {
|
|
4121
5047
|
const original = readFileSync5(tsPath, "utf8");
|
|
4122
5048
|
const backup = tsPath + ".bak";
|
|
4123
|
-
|
|
5049
|
+
writeFileSync5(backup, original, "utf8");
|
|
4124
5050
|
if (!original.includes(proposal.original_code.trim())) {
|
|
4125
5051
|
return {
|
|
4126
5052
|
applied: false,
|
|
@@ -4129,8 +5055,8 @@ var init_RuntimeSelfHeal = __esm({
|
|
|
4129
5055
|
};
|
|
4130
5056
|
}
|
|
4131
5057
|
const patched = original.replace(proposal.original_code, proposal.proposed_code);
|
|
4132
|
-
|
|
4133
|
-
const bundleScript =
|
|
5058
|
+
writeFileSync5(tsPath, patched, "utf8");
|
|
5059
|
+
const bundleScript = resolve7(this.projectRoot, "scripts", "bundle.mjs");
|
|
4134
5060
|
if (existsSync6(bundleScript)) {
|
|
4135
5061
|
try {
|
|
4136
5062
|
execSync4(`node "${bundleScript}"`, {
|
|
@@ -4139,7 +5065,7 @@ var init_RuntimeSelfHeal = __esm({
|
|
|
4139
5065
|
stdio: "ignore"
|
|
4140
5066
|
});
|
|
4141
5067
|
} catch {
|
|
4142
|
-
|
|
5068
|
+
writeFileSync5(tsPath, original, "utf8");
|
|
4143
5069
|
return {
|
|
4144
5070
|
applied: false,
|
|
4145
5071
|
restarted: false,
|
|
@@ -4164,11 +5090,11 @@ var init_RuntimeSelfHeal = __esm({
|
|
|
4164
5090
|
// ─── Private helpers ───────────────────────────────────────────────────────
|
|
4165
5091
|
findSourceFile(location) {
|
|
4166
5092
|
const candidates = [
|
|
4167
|
-
|
|
5093
|
+
resolve7(this.projectRoot, location.relPath),
|
|
4168
5094
|
// If relPath starts with dist/, look in src/
|
|
4169
|
-
|
|
4170
|
-
|
|
4171
|
-
|
|
5095
|
+
resolve7(this.projectRoot, location.relPath.replace(/^dist\//, "src/").replace(/\.js$/, ".ts")),
|
|
5096
|
+
resolve7(this.projectRoot, "packages", "daemon", "src", location.relPath.replace(/.*src\//, "")),
|
|
5097
|
+
resolve7(this.projectRoot, "packages", "core", "src", location.relPath.replace(/.*src\//, ""))
|
|
4172
5098
|
];
|
|
4173
5099
|
for (const p of candidates) {
|
|
4174
5100
|
if (existsSync6(p)) return p;
|
|
@@ -4236,9 +5162,9 @@ Rules:
|
|
|
4236
5162
|
}
|
|
4237
5163
|
}
|
|
4238
5164
|
restartDaemon() {
|
|
4239
|
-
const bundlePath =
|
|
5165
|
+
const bundlePath = resolve7(this.projectRoot, "dist", "daemon.mjs");
|
|
4240
5166
|
if (existsSync6(bundlePath)) {
|
|
4241
|
-
const child =
|
|
5167
|
+
const child = spawn6(process.execPath, [bundlePath], {
|
|
4242
5168
|
detached: true,
|
|
4243
5169
|
stdio: "ignore",
|
|
4244
5170
|
env: process.env
|
|
@@ -4343,7 +5269,7 @@ __export(ProactiveSurface_exports, {
|
|
|
4343
5269
|
});
|
|
4344
5270
|
import { execSync as execSync7 } from "node:child_process";
|
|
4345
5271
|
import { existsSync as existsSync16, readFileSync as readFileSync14, statSync, readdirSync as readdirSync5 } from "node:fs";
|
|
4346
|
-
import { resolve as
|
|
5272
|
+
import { resolve as resolve14, join as join6 } from "node:path";
|
|
4347
5273
|
function readdirSafe(dir) {
|
|
4348
5274
|
try {
|
|
4349
5275
|
return readdirSync5(dir);
|
|
@@ -4392,7 +5318,7 @@ var init_ProactiveSurface = __esm({
|
|
|
4392
5318
|
return [...this.insights];
|
|
4393
5319
|
}
|
|
4394
5320
|
async poll() {
|
|
4395
|
-
if (!existsSync16(
|
|
5321
|
+
if (!existsSync16(resolve14(this.cwd, ".git"))) return;
|
|
4396
5322
|
const newInsights = [];
|
|
4397
5323
|
const gitInsight = this.checkGitActivity();
|
|
4398
5324
|
if (gitInsight) newInsights.push(gitInsight);
|
|
@@ -4497,8 +5423,8 @@ var init_ProactiveSurface = __esm({
|
|
|
4497
5423
|
|
|
4498
5424
|
// packages/daemon/src/ZeroAgentDaemon.ts
|
|
4499
5425
|
init_src();
|
|
4500
|
-
import { writeFileSync as
|
|
4501
|
-
import { resolve as
|
|
5426
|
+
import { writeFileSync as writeFileSync12, unlinkSync as unlinkSync4, existsSync as existsSync17, mkdirSync as mkdirSync9, readFileSync as readFileSync15 } from "node:fs";
|
|
5427
|
+
import { resolve as resolve15 } from "node:path";
|
|
4502
5428
|
import { homedir as homedir9 } from "node:os";
|
|
4503
5429
|
|
|
4504
5430
|
// packages/daemon/src/config/DaemonConfig.ts
|
|
@@ -5000,19 +5926,19 @@ var ProjectScanner = class {
|
|
|
5000
5926
|
async getRunningPorts() {
|
|
5001
5927
|
const open = [];
|
|
5002
5928
|
await Promise.all(PORTS_TO_CHECK.map(
|
|
5003
|
-
(port) => new Promise((
|
|
5929
|
+
(port) => new Promise((resolve17) => {
|
|
5004
5930
|
const s = createServer();
|
|
5005
5931
|
s.listen(port, "127.0.0.1", () => {
|
|
5006
5932
|
s.close();
|
|
5007
|
-
|
|
5933
|
+
resolve17();
|
|
5008
5934
|
});
|
|
5009
5935
|
s.on("error", () => {
|
|
5010
5936
|
open.push(port);
|
|
5011
|
-
|
|
5937
|
+
resolve17();
|
|
5012
5938
|
});
|
|
5013
5939
|
setTimeout(() => {
|
|
5014
5940
|
s.close();
|
|
5015
|
-
|
|
5941
|
+
resolve17();
|
|
5016
5942
|
}, 200);
|
|
5017
5943
|
})
|
|
5018
5944
|
));
|
|
@@ -5089,7 +6015,7 @@ var ConversationStore = class {
|
|
|
5089
6015
|
|
|
5090
6016
|
// packages/daemon/src/SessionManager.ts
|
|
5091
6017
|
import { readFileSync as readFileSync6, existsSync as existsSync7 } from "node:fs";
|
|
5092
|
-
import { resolve as
|
|
6018
|
+
import { resolve as resolve8 } from "node:path";
|
|
5093
6019
|
import { homedir as homedir3 } from "node:os";
|
|
5094
6020
|
import YAML2 from "yaml";
|
|
5095
6021
|
var SessionManager = class {
|
|
@@ -5466,7 +6392,7 @@ Current task:`;
|
|
|
5466
6392
|
model: agentResult.model
|
|
5467
6393
|
});
|
|
5468
6394
|
} else {
|
|
5469
|
-
const cfgPath =
|
|
6395
|
+
const cfgPath = resolve8(homedir3(), ".0agent", "config.yaml");
|
|
5470
6396
|
const output = `No LLM API key found. Add one to ${cfgPath} or run: 0agent init`;
|
|
5471
6397
|
this.addStep(sessionId, "\u26A0 No LLM API key configured \u2014 run: 0agent init");
|
|
5472
6398
|
this.completeSession(sessionId, { output });
|
|
@@ -5509,7 +6435,7 @@ Current task:`;
|
|
|
5509
6435
|
*/
|
|
5510
6436
|
getFreshLLM() {
|
|
5511
6437
|
try {
|
|
5512
|
-
const configPath =
|
|
6438
|
+
const configPath = resolve8(homedir3(), ".0agent", "config.yaml");
|
|
5513
6439
|
if (!existsSync7(configPath)) return this.llm;
|
|
5514
6440
|
const raw = readFileSync6(configPath, "utf8");
|
|
5515
6441
|
const cfg = YAML2.parse(raw);
|
|
@@ -5537,7 +6463,7 @@ Current task:`;
|
|
|
5537
6463
|
if (!this.graph) return;
|
|
5538
6464
|
let extractLLM;
|
|
5539
6465
|
try {
|
|
5540
|
-
const cfgPath =
|
|
6466
|
+
const cfgPath = resolve8(homedir3(), ".0agent", "config.yaml");
|
|
5541
6467
|
if (existsSync7(cfgPath)) {
|
|
5542
6468
|
const raw = readFileSync6(cfgPath, "utf8");
|
|
5543
6469
|
const cfg = YAML2.parse(raw);
|
|
@@ -5902,7 +6828,7 @@ var BackgroundWorkers = class {
|
|
|
5902
6828
|
};
|
|
5903
6829
|
|
|
5904
6830
|
// packages/daemon/src/SkillRegistry.ts
|
|
5905
|
-
import { readFileSync as readFileSync7, readdirSync as readdirSync3, existsSync as existsSync8, writeFileSync as
|
|
6831
|
+
import { readFileSync as readFileSync7, readdirSync as readdirSync3, existsSync as existsSync8, writeFileSync as writeFileSync6, unlinkSync as unlinkSync3, mkdirSync as mkdirSync3 } from "node:fs";
|
|
5906
6832
|
import { join as join2 } from "node:path";
|
|
5907
6833
|
import { homedir as homedir4 } from "node:os";
|
|
5908
6834
|
import YAML3 from "yaml";
|
|
@@ -5966,7 +6892,7 @@ var SkillRegistry = class {
|
|
|
5966
6892
|
}
|
|
5967
6893
|
mkdirSync3(this.customDir, { recursive: true });
|
|
5968
6894
|
const filePath = join2(this.customDir, `${name}.yaml`);
|
|
5969
|
-
|
|
6895
|
+
writeFileSync6(filePath, yamlContent, "utf8");
|
|
5970
6896
|
const skill = YAML3.parse(yamlContent);
|
|
5971
6897
|
this.skills.set(name, skill);
|
|
5972
6898
|
return skill;
|
|
@@ -5980,7 +6906,7 @@ var SkillRegistry = class {
|
|
|
5980
6906
|
}
|
|
5981
6907
|
const filePath = join2(this.customDir, `${name}.yaml`);
|
|
5982
6908
|
if (existsSync8(filePath)) {
|
|
5983
|
-
|
|
6909
|
+
unlinkSync3(filePath);
|
|
5984
6910
|
}
|
|
5985
6911
|
this.skills.delete(name);
|
|
5986
6912
|
}
|
|
@@ -5993,7 +6919,7 @@ var SkillRegistry = class {
|
|
|
5993
6919
|
import { Hono as Hono14 } from "hono";
|
|
5994
6920
|
import { serve } from "@hono/node-server";
|
|
5995
6921
|
import { readFileSync as readFileSync9 } from "node:fs";
|
|
5996
|
-
import { resolve as
|
|
6922
|
+
import { resolve as resolve10, dirname as dirname4 } from "node:path";
|
|
5997
6923
|
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
5998
6924
|
|
|
5999
6925
|
// packages/daemon/src/routes/health.ts
|
|
@@ -6286,7 +7212,7 @@ function memoryRoutes(deps) {
|
|
|
6286
7212
|
init_LLMExecutor();
|
|
6287
7213
|
import { Hono as Hono10 } from "hono";
|
|
6288
7214
|
import { readFileSync as readFileSync8, existsSync as existsSync9 } from "node:fs";
|
|
6289
|
-
import { resolve as
|
|
7215
|
+
import { resolve as resolve9 } from "node:path";
|
|
6290
7216
|
import { homedir as homedir5 } from "node:os";
|
|
6291
7217
|
import YAML4 from "yaml";
|
|
6292
7218
|
function llmRoutes() {
|
|
@@ -6294,7 +7220,7 @@ function llmRoutes() {
|
|
|
6294
7220
|
app.post("/ping", async (c) => {
|
|
6295
7221
|
const start = Date.now();
|
|
6296
7222
|
try {
|
|
6297
|
-
const configPath =
|
|
7223
|
+
const configPath = resolve9(homedir5(), ".0agent", "config.yaml");
|
|
6298
7224
|
if (!existsSync9(configPath)) {
|
|
6299
7225
|
return c.json({ ok: false, error: "Config not found. Run: 0agent init" });
|
|
6300
7226
|
}
|
|
@@ -6813,11 +7739,11 @@ function runtimeRoutes(deps) {
|
|
|
6813
7739
|
// packages/daemon/src/HTTPServer.ts
|
|
6814
7740
|
function findGraphHtml() {
|
|
6815
7741
|
const candidates = [
|
|
6816
|
-
|
|
7742
|
+
resolve10(dirname4(fileURLToPath2(import.meta.url)), "graph.html"),
|
|
6817
7743
|
// dev (src/)
|
|
6818
|
-
|
|
7744
|
+
resolve10(dirname4(fileURLToPath2(import.meta.url)), "..", "graph.html"),
|
|
6819
7745
|
// bundled (dist/../)
|
|
6820
|
-
|
|
7746
|
+
resolve10(dirname4(fileURLToPath2(import.meta.url)), "..", "dist", "graph.html")
|
|
6821
7747
|
];
|
|
6822
7748
|
for (const p of candidates) {
|
|
6823
7749
|
try {
|
|
@@ -6867,7 +7793,7 @@ var HTTPServer = class {
|
|
|
6867
7793
|
this.app.get("/graph", serveGraph);
|
|
6868
7794
|
}
|
|
6869
7795
|
start() {
|
|
6870
|
-
return new Promise((
|
|
7796
|
+
return new Promise((resolve17) => {
|
|
6871
7797
|
this.server = serve(
|
|
6872
7798
|
{
|
|
6873
7799
|
fetch: this.app.fetch,
|
|
@@ -6875,20 +7801,20 @@ var HTTPServer = class {
|
|
|
6875
7801
|
hostname: this.deps.host
|
|
6876
7802
|
},
|
|
6877
7803
|
() => {
|
|
6878
|
-
|
|
7804
|
+
resolve17();
|
|
6879
7805
|
}
|
|
6880
7806
|
);
|
|
6881
7807
|
});
|
|
6882
7808
|
}
|
|
6883
7809
|
stop() {
|
|
6884
|
-
return new Promise((
|
|
7810
|
+
return new Promise((resolve17, reject) => {
|
|
6885
7811
|
if (!this.server) {
|
|
6886
|
-
|
|
7812
|
+
resolve17();
|
|
6887
7813
|
return;
|
|
6888
7814
|
}
|
|
6889
7815
|
this.server.close((err) => {
|
|
6890
7816
|
if (err) reject(err);
|
|
6891
|
-
else
|
|
7817
|
+
else resolve17();
|
|
6892
7818
|
});
|
|
6893
7819
|
});
|
|
6894
7820
|
}
|
|
@@ -6902,11 +7828,11 @@ init_LLMExecutor();
|
|
|
6902
7828
|
|
|
6903
7829
|
// packages/daemon/src/IdentityManager.ts
|
|
6904
7830
|
init_src();
|
|
6905
|
-
import { readFileSync as readFileSync10, writeFileSync as
|
|
6906
|
-
import { resolve as
|
|
7831
|
+
import { readFileSync as readFileSync10, writeFileSync as writeFileSync7, existsSync as existsSync10, mkdirSync as mkdirSync4 } from "node:fs";
|
|
7832
|
+
import { resolve as resolve11, dirname as dirname5 } from "node:path";
|
|
6907
7833
|
import { homedir as homedir6, hostname } from "node:os";
|
|
6908
7834
|
import YAML5 from "yaml";
|
|
6909
|
-
var IDENTITY_PATH =
|
|
7835
|
+
var IDENTITY_PATH = resolve11(homedir6(), ".0agent", "identity.yaml");
|
|
6910
7836
|
var DEFAULT_IDENTITY = {
|
|
6911
7837
|
name: "User",
|
|
6912
7838
|
device_id: `unknown-device`,
|
|
@@ -6978,16 +7904,16 @@ var IdentityManager = class {
|
|
|
6978
7904
|
if (!existsSync10(dir)) {
|
|
6979
7905
|
mkdirSync4(dir, { recursive: true });
|
|
6980
7906
|
}
|
|
6981
|
-
|
|
7907
|
+
writeFileSync7(IDENTITY_PATH, YAML5.stringify(this.identity), "utf8");
|
|
6982
7908
|
}
|
|
6983
7909
|
};
|
|
6984
7910
|
|
|
6985
7911
|
// packages/daemon/src/TeamManager.ts
|
|
6986
|
-
import { readFileSync as readFileSync11, writeFileSync as
|
|
6987
|
-
import { resolve as
|
|
7912
|
+
import { readFileSync as readFileSync11, writeFileSync as writeFileSync8, existsSync as existsSync11, mkdirSync as mkdirSync5 } from "node:fs";
|
|
7913
|
+
import { resolve as resolve12 } from "node:path";
|
|
6988
7914
|
import { homedir as homedir7 } from "node:os";
|
|
6989
7915
|
import YAML6 from "yaml";
|
|
6990
|
-
var TEAMS_PATH =
|
|
7916
|
+
var TEAMS_PATH = resolve12(homedir7(), ".0agent", "teams.yaml");
|
|
6991
7917
|
var TeamManager = class {
|
|
6992
7918
|
config;
|
|
6993
7919
|
constructor() {
|
|
@@ -7047,8 +7973,8 @@ var TeamManager = class {
|
|
|
7047
7973
|
}
|
|
7048
7974
|
}
|
|
7049
7975
|
save() {
|
|
7050
|
-
mkdirSync5(
|
|
7051
|
-
|
|
7976
|
+
mkdirSync5(resolve12(homedir7(), ".0agent"), { recursive: true });
|
|
7977
|
+
writeFileSync8(TEAMS_PATH, YAML6.stringify(this.config), "utf8");
|
|
7052
7978
|
}
|
|
7053
7979
|
};
|
|
7054
7980
|
|
|
@@ -7131,8 +8057,8 @@ var TeamSync = class {
|
|
|
7131
8057
|
};
|
|
7132
8058
|
|
|
7133
8059
|
// packages/daemon/src/GitHubMemorySync.ts
|
|
7134
|
-
import { readFileSync as readFileSync12, writeFileSync as
|
|
7135
|
-
import { resolve as
|
|
8060
|
+
import { readFileSync as readFileSync12, writeFileSync as writeFileSync9, existsSync as existsSync12, readdirSync as readdirSync4 } from "node:fs";
|
|
8061
|
+
import { resolve as resolve13 } from "node:path";
|
|
7136
8062
|
import { homedir as homedir8 } from "node:os";
|
|
7137
8063
|
var GITHUB_API = "https://api.github.com";
|
|
7138
8064
|
async function ghFetch(path, token, opts) {
|
|
@@ -7252,10 +8178,10 @@ var GitHubMemorySync = class {
|
|
|
7252
8178
|
)
|
|
7253
8179
|
);
|
|
7254
8180
|
}
|
|
7255
|
-
const customSkillsDir =
|
|
8181
|
+
const customSkillsDir = resolve13(homedir8(), ".0agent", "skills", "custom");
|
|
7256
8182
|
if (existsSync12(customSkillsDir)) {
|
|
7257
8183
|
for (const file of readdirSync4(customSkillsDir).filter((f) => f.endsWith(".yaml"))) {
|
|
7258
|
-
const content = readFileSync12(
|
|
8184
|
+
const content = readFileSync12(resolve13(customSkillsDir, file), "utf8");
|
|
7259
8185
|
pushes.push(putFile(token, owner, repo, `skills/custom/${file}`, content, commitMsg));
|
|
7260
8186
|
}
|
|
7261
8187
|
}
|
|
@@ -7441,7 +8367,7 @@ var GitHubMemorySync = class {
|
|
|
7441
8367
|
}
|
|
7442
8368
|
async pullCustomSkills() {
|
|
7443
8369
|
const { token, owner, repo } = this.config;
|
|
7444
|
-
const dir =
|
|
8370
|
+
const dir = resolve13(homedir8(), ".0agent", "skills", "custom");
|
|
7445
8371
|
try {
|
|
7446
8372
|
const res = await ghFetch(`/repos/${owner}/${repo}/contents/skills/custom`, token);
|
|
7447
8373
|
if (!res.ok) return;
|
|
@@ -7451,7 +8377,7 @@ var GitHubMemorySync = class {
|
|
|
7451
8377
|
if (content) {
|
|
7452
8378
|
const { mkdirSync: mkdirSync10 } = await import("node:fs");
|
|
7453
8379
|
mkdirSync10(dir, { recursive: true });
|
|
7454
|
-
|
|
8380
|
+
writeFileSync9(resolve13(dir, file.name), content, "utf8");
|
|
7455
8381
|
}
|
|
7456
8382
|
}
|
|
7457
8383
|
} catch {
|
|
@@ -7528,7 +8454,7 @@ git checkout <commit> graph/ # restore graph files
|
|
|
7528
8454
|
};
|
|
7529
8455
|
|
|
7530
8456
|
// packages/daemon/src/CodespaceManager.ts
|
|
7531
|
-
import { execSync as execSync5, spawn as
|
|
8457
|
+
import { execSync as execSync5, spawn as spawn7 } from "node:child_process";
|
|
7532
8458
|
var BROWSER_PORT_REMOTE = 3e3;
|
|
7533
8459
|
var BROWSER_PORT_LOCAL = 3001;
|
|
7534
8460
|
var DISPLAY_NAME = "0agent-browser";
|
|
@@ -7623,7 +8549,7 @@ var CodespaceManager = class {
|
|
|
7623
8549
|
async openTunnel(name) {
|
|
7624
8550
|
this.closeTunnel();
|
|
7625
8551
|
console.log(`[Codespace] Opening tunnel port ${BROWSER_PORT_REMOTE} \u2192 localhost:${BROWSER_PORT_LOCAL}...`);
|
|
7626
|
-
this.forwardProcess =
|
|
8552
|
+
this.forwardProcess = spawn7(
|
|
7627
8553
|
"gh",
|
|
7628
8554
|
["codespace", "ports", "forward", `${BROWSER_PORT_REMOTE}:${BROWSER_PORT_LOCAL}`, "--codespace", name],
|
|
7629
8555
|
{ stdio: ["ignore", "ignore", "ignore"] }
|
|
@@ -8073,7 +8999,7 @@ var SurfaceRouter = class {
|
|
|
8073
8999
|
|
|
8074
9000
|
// packages/daemon/src/surfaces/TelegramAdapter.ts
|
|
8075
9001
|
import { existsSync as existsSync13, mkdirSync as mkdirSync6 } from "node:fs";
|
|
8076
|
-
import { tmpdir as
|
|
9002
|
+
import { tmpdir as tmpdir3 } from "node:os";
|
|
8077
9003
|
import { join as join3 } from "node:path";
|
|
8078
9004
|
var TelegramAdapter = class {
|
|
8079
9005
|
constructor(config) {
|
|
@@ -8278,15 +9204,15 @@ Sessions: ${h.active_sessions} active`
|
|
|
8278
9204
|
try {
|
|
8279
9205
|
const fileUrl = await this._getFileUrl(fileId);
|
|
8280
9206
|
if (!fileUrl) return null;
|
|
8281
|
-
const tmpDir = join3(
|
|
9207
|
+
const tmpDir = join3(tmpdir3(), "0agent-voice");
|
|
8282
9208
|
if (!existsSync13(tmpDir)) mkdirSync6(tmpDir, { recursive: true });
|
|
8283
9209
|
const tmpPath = join3(tmpDir, `${fileId}.ogg`);
|
|
8284
9210
|
const wavPath = join3(tmpDir, `${fileId}.wav`);
|
|
8285
9211
|
const res = await fetch(fileUrl);
|
|
8286
9212
|
if (!res.ok) return null;
|
|
8287
9213
|
const buf = await res.arrayBuffer();
|
|
8288
|
-
const { writeFileSync:
|
|
8289
|
-
|
|
9214
|
+
const { writeFileSync: writeFileSync13 } = await import("node:fs");
|
|
9215
|
+
writeFileSync13(tmpPath, Buffer.from(buf));
|
|
8290
9216
|
const { execSync: execSync8 } = await import("node:child_process");
|
|
8291
9217
|
try {
|
|
8292
9218
|
execSync8(`ffmpeg -y -i "${tmpPath}" -ar 16000 -ac 1 "${wavPath}" 2>/dev/null`, { timeout: 3e4 });
|
|
@@ -8742,9 +9668,9 @@ var WhatsAppAdapter = class {
|
|
|
8742
9668
|
import * as readline from "node:readline";
|
|
8743
9669
|
|
|
8744
9670
|
// packages/daemon/src/surfaces/WhisperSTT.ts
|
|
8745
|
-
import { execSync as execSync6, spawnSync as
|
|
9671
|
+
import { execSync as execSync6, spawnSync as spawnSync5 } from "node:child_process";
|
|
8746
9672
|
import { existsSync as existsSync14, mkdirSync as mkdirSync7, readFileSync as readFileSync13 } from "node:fs";
|
|
8747
|
-
import { tmpdir as
|
|
9673
|
+
import { tmpdir as tmpdir4 } from "node:os";
|
|
8748
9674
|
import { join as join4, basename } from "node:path";
|
|
8749
9675
|
var WhisperSTT = class _WhisperSTT {
|
|
8750
9676
|
model;
|
|
@@ -8765,7 +9691,7 @@ var WhisperSTT = class _WhisperSTT {
|
|
|
8765
9691
|
console.warn(`[WhisperSTT] Audio file not found: ${audioPath}`);
|
|
8766
9692
|
return null;
|
|
8767
9693
|
}
|
|
8768
|
-
const outDir = join4(
|
|
9694
|
+
const outDir = join4(tmpdir4(), "0agent-whisper");
|
|
8769
9695
|
if (!existsSync14(outDir)) mkdirSync7(outDir, { recursive: true });
|
|
8770
9696
|
try {
|
|
8771
9697
|
const langFlag = this.language ? `--language ${this.language}` : "";
|
|
@@ -8789,7 +9715,7 @@ var WhisperSTT = class _WhisperSTT {
|
|
|
8789
9715
|
static detectBinary() {
|
|
8790
9716
|
for (const bin of ["whisper", "faster-whisper", "whisper.cpp"]) {
|
|
8791
9717
|
try {
|
|
8792
|
-
const result =
|
|
9718
|
+
const result = spawnSync5(bin, ["--help"], { timeout: 3e3, stdio: "pipe" });
|
|
8793
9719
|
if (result.status === 0 || result.status === 1) return bin;
|
|
8794
9720
|
} catch {
|
|
8795
9721
|
}
|
|
@@ -8798,25 +9724,25 @@ var WhisperSTT = class _WhisperSTT {
|
|
|
8798
9724
|
}
|
|
8799
9725
|
};
|
|
8800
9726
|
async function recordAudio(durationSeconds) {
|
|
8801
|
-
const outDir = join4(
|
|
9727
|
+
const outDir = join4(tmpdir4(), "0agent-voice");
|
|
8802
9728
|
if (!existsSync14(outDir)) mkdirSync7(outDir, { recursive: true });
|
|
8803
9729
|
const outPath = join4(outDir, `recording-${Date.now()}.wav`);
|
|
8804
|
-
const soxResult =
|
|
9730
|
+
const soxResult = spawnSync5(
|
|
8805
9731
|
"sox",
|
|
8806
9732
|
["-d", "-r", "16000", "-c", "1", "-b", "16", outPath, "trim", "0", String(durationSeconds)],
|
|
8807
9733
|
{ timeout: (durationSeconds + 5) * 1e3, stdio: "pipe" }
|
|
8808
9734
|
);
|
|
8809
9735
|
if (soxResult.status === 0 && existsSync14(outPath)) return outPath;
|
|
8810
|
-
const
|
|
9736
|
+
const platform3 = process.platform;
|
|
8811
9737
|
let ffmpegDevice;
|
|
8812
|
-
if (
|
|
9738
|
+
if (platform3 === "darwin") {
|
|
8813
9739
|
ffmpegDevice = ["-f", "avfoundation", "-i", ":0"];
|
|
8814
|
-
} else if (
|
|
9740
|
+
} else if (platform3 === "linux") {
|
|
8815
9741
|
ffmpegDevice = ["-f", "alsa", "-i", "default"];
|
|
8816
9742
|
} else {
|
|
8817
9743
|
return null;
|
|
8818
9744
|
}
|
|
8819
|
-
const ffmpegResult =
|
|
9745
|
+
const ffmpegResult = spawnSync5(
|
|
8820
9746
|
"ffmpeg",
|
|
8821
9747
|
["-y", ...ffmpegDevice, "-ar", "16000", "-ac", "1", "-t", String(durationSeconds), outPath],
|
|
8822
9748
|
{ timeout: (durationSeconds + 5) * 1e3, stdio: "pipe" }
|
|
@@ -8825,7 +9751,7 @@ async function recordAudio(durationSeconds) {
|
|
|
8825
9751
|
}
|
|
8826
9752
|
|
|
8827
9753
|
// packages/daemon/src/surfaces/NativeTTS.ts
|
|
8828
|
-
import { spawnSync as
|
|
9754
|
+
import { spawnSync as spawnSync6, spawn as spawn8 } from "node:child_process";
|
|
8829
9755
|
var NativeTTS = class _NativeTTS {
|
|
8830
9756
|
engine;
|
|
8831
9757
|
voice;
|
|
@@ -8849,11 +9775,11 @@ var NativeTTS = class _NativeTTS {
|
|
|
8849
9775
|
if (!this.resolvedEngine) return;
|
|
8850
9776
|
const cleaned = this._clean(text);
|
|
8851
9777
|
if (!cleaned) return;
|
|
8852
|
-
return new Promise((
|
|
9778
|
+
return new Promise((resolve17) => {
|
|
8853
9779
|
const args = this._buildArgs(this.resolvedEngine, cleaned);
|
|
8854
|
-
const proc =
|
|
8855
|
-
proc.on("close", () =>
|
|
8856
|
-
proc.on("error", () =>
|
|
9780
|
+
const proc = spawn8(this.resolvedEngine, args, { stdio: "ignore" });
|
|
9781
|
+
proc.on("close", () => resolve17());
|
|
9782
|
+
proc.on("error", () => resolve17());
|
|
8857
9783
|
});
|
|
8858
9784
|
}
|
|
8859
9785
|
/** Check if any TTS engine is available */
|
|
@@ -8867,8 +9793,8 @@ var NativeTTS = class _NativeTTS {
|
|
|
8867
9793
|
return _NativeTTS._detectEngine();
|
|
8868
9794
|
}
|
|
8869
9795
|
static _detectEngine() {
|
|
8870
|
-
const
|
|
8871
|
-
if (
|
|
9796
|
+
const platform3 = process.platform;
|
|
9797
|
+
if (platform3 === "darwin") {
|
|
8872
9798
|
if (_NativeTTS._isAvailable("say")) return "say";
|
|
8873
9799
|
}
|
|
8874
9800
|
if (_NativeTTS._isAvailable("piper")) return "piper";
|
|
@@ -8878,7 +9804,7 @@ var NativeTTS = class _NativeTTS {
|
|
|
8878
9804
|
}
|
|
8879
9805
|
static _isAvailable(engine) {
|
|
8880
9806
|
try {
|
|
8881
|
-
const r =
|
|
9807
|
+
const r = spawnSync6(engine, ["--help"], { timeout: 2e3, stdio: "pipe" });
|
|
8882
9808
|
return r.status === 0 || r.status === 1;
|
|
8883
9809
|
} catch {
|
|
8884
9810
|
return false;
|
|
@@ -8911,7 +9837,7 @@ var NativeTTS = class _NativeTTS {
|
|
|
8911
9837
|
}
|
|
8912
9838
|
_speakWith(engine, text) {
|
|
8913
9839
|
const args = this._buildArgs(engine, text);
|
|
8914
|
-
const proc =
|
|
9840
|
+
const proc = spawn8(engine, args, { stdio: "ignore", detached: true });
|
|
8915
9841
|
proc.unref();
|
|
8916
9842
|
}
|
|
8917
9843
|
/** Remove markdown/ANSI and control chars before speaking */
|
|
@@ -9036,10 +9962,10 @@ var VoiceAdapter = class {
|
|
|
9036
9962
|
};
|
|
9037
9963
|
|
|
9038
9964
|
// packages/daemon/src/surfaces/MeetingAdapter.ts
|
|
9039
|
-
import { existsSync as existsSync15, mkdirSync as mkdirSync8, writeFileSync as
|
|
9040
|
-
import { tmpdir as
|
|
9965
|
+
import { existsSync as existsSync15, mkdirSync as mkdirSync8, writeFileSync as writeFileSync11 } from "node:fs";
|
|
9966
|
+
import { tmpdir as tmpdir5 } from "node:os";
|
|
9041
9967
|
import { join as join5 } from "node:path";
|
|
9042
|
-
import { spawn as
|
|
9968
|
+
import { spawn as spawn9 } from "node:child_process";
|
|
9043
9969
|
var MeetingAdapter = class {
|
|
9044
9970
|
name = "meeting";
|
|
9045
9971
|
messageHandler = null;
|
|
@@ -9063,7 +9989,7 @@ var MeetingAdapter = class {
|
|
|
9063
9989
|
this.silenceTimeoutSeconds = config.silence_timeout_seconds ?? 60;
|
|
9064
9990
|
this.triggerPhrases = config.trigger_phrases ?? ["agent,", "hey agent", "ok agent"];
|
|
9065
9991
|
this.contextWindowSeconds = config.context_window_seconds ?? 120;
|
|
9066
|
-
this.tmpDir = join5(
|
|
9992
|
+
this.tmpDir = join5(tmpdir5(), "0agent-meeting");
|
|
9067
9993
|
if (!existsSync15(this.tmpDir)) mkdirSync8(this.tmpDir, { recursive: true });
|
|
9068
9994
|
this.stt = new WhisperSTT({ model: config.whisper_model ?? "base" });
|
|
9069
9995
|
}
|
|
@@ -9170,26 +10096,26 @@ ${msg.text}
|
|
|
9170
10096
|
}
|
|
9171
10097
|
}
|
|
9172
10098
|
async _captureSystemAudio(outPath, seconds) {
|
|
9173
|
-
return new Promise((
|
|
9174
|
-
const
|
|
10099
|
+
return new Promise((resolve17) => {
|
|
10100
|
+
const platform3 = process.platform;
|
|
9175
10101
|
let args;
|
|
9176
|
-
if (
|
|
10102
|
+
if (platform3 === "darwin") {
|
|
9177
10103
|
args = ["-y", "-f", "avfoundation", "-i", ":1", "-ar", "16000", "-ac", "1", "-t", String(seconds), outPath];
|
|
9178
|
-
} else if (
|
|
10104
|
+
} else if (platform3 === "linux") {
|
|
9179
10105
|
args = ["-y", "-f", "pulse", "-i", "default.monitor", "-ar", "16000", "-ac", "1", "-t", String(seconds), outPath];
|
|
9180
10106
|
} else {
|
|
9181
|
-
|
|
10107
|
+
resolve17(false);
|
|
9182
10108
|
return;
|
|
9183
10109
|
}
|
|
9184
|
-
const proc =
|
|
10110
|
+
const proc = spawn9("ffmpeg", args, { stdio: "pipe" });
|
|
9185
10111
|
this.ffmpegProcess = proc;
|
|
9186
10112
|
proc.on("close", (code) => {
|
|
9187
10113
|
this.ffmpegProcess = null;
|
|
9188
|
-
|
|
10114
|
+
resolve17(code === 0);
|
|
9189
10115
|
});
|
|
9190
10116
|
proc.on("error", () => {
|
|
9191
10117
|
this.ffmpegProcess = null;
|
|
9192
|
-
|
|
10118
|
+
resolve17(false);
|
|
9193
10119
|
});
|
|
9194
10120
|
});
|
|
9195
10121
|
}
|
|
@@ -9248,13 +10174,13 @@ ${fullTranscript}`,
|
|
|
9248
10174
|
const content = `Meeting Transcript
|
|
9249
10175
|
${"=".repeat(40)}
|
|
9250
10176
|
${this.getTranscript()}`;
|
|
9251
|
-
|
|
10177
|
+
writeFileSync11(outPath, content, "utf8");
|
|
9252
10178
|
return outPath;
|
|
9253
10179
|
}
|
|
9254
10180
|
static isAvailable() {
|
|
9255
10181
|
try {
|
|
9256
|
-
const { spawnSync:
|
|
9257
|
-
const r =
|
|
10182
|
+
const { spawnSync: spawnSync7 } = __require("node:child_process");
|
|
10183
|
+
const r = spawnSync7("ffmpeg", ["-version"], { timeout: 2e3, stdio: "pipe" });
|
|
9258
10184
|
return r.status === 0;
|
|
9259
10185
|
} catch {
|
|
9260
10186
|
return false;
|
|
@@ -9287,11 +10213,11 @@ var ZeroAgentDaemon = class {
|
|
|
9287
10213
|
startedAt = 0;
|
|
9288
10214
|
pidFilePath;
|
|
9289
10215
|
constructor() {
|
|
9290
|
-
this.pidFilePath =
|
|
10216
|
+
this.pidFilePath = resolve15(homedir9(), ".0agent", "daemon.pid");
|
|
9291
10217
|
}
|
|
9292
10218
|
async start(opts) {
|
|
9293
10219
|
this.config = await loadConfig(opts?.config_path);
|
|
9294
|
-
const dotDir =
|
|
10220
|
+
const dotDir = resolve15(homedir9(), ".0agent");
|
|
9295
10221
|
if (!existsSync17(dotDir)) {
|
|
9296
10222
|
mkdirSync9(dotDir, { recursive: true });
|
|
9297
10223
|
}
|
|
@@ -9366,10 +10292,10 @@ var ZeroAgentDaemon = class {
|
|
|
9366
10292
|
console.log(`[0agent] Teams: ${teams.map((t) => t.team_name).join(", ")}`);
|
|
9367
10293
|
}
|
|
9368
10294
|
const _daemonFile = fileURLToPath3(import.meta.url);
|
|
9369
|
-
const _agentRoot =
|
|
10295
|
+
const _agentRoot = resolve15(dirname7(_daemonFile), "..");
|
|
9370
10296
|
let agentRoot;
|
|
9371
10297
|
try {
|
|
9372
|
-
const _pkg = JSON.parse(readFileSync15(
|
|
10298
|
+
const _pkg = JSON.parse(readFileSync15(resolve15(_agentRoot, "package.json"), "utf8"));
|
|
9373
10299
|
if (_pkg.name === "0agent") agentRoot = _agentRoot;
|
|
9374
10300
|
} catch {
|
|
9375
10301
|
}
|
|
@@ -9521,7 +10447,7 @@ var ZeroAgentDaemon = class {
|
|
|
9521
10447
|
}
|
|
9522
10448
|
});
|
|
9523
10449
|
await this.httpServer.start();
|
|
9524
|
-
|
|
10450
|
+
writeFileSync12(this.pidFilePath, String(process.pid), "utf8");
|
|
9525
10451
|
console.log(
|
|
9526
10452
|
`[0agent] Daemon started on ${this.config.server.host}:${this.config.server.port} (PID: ${process.pid})`
|
|
9527
10453
|
);
|
|
@@ -9575,7 +10501,7 @@ var ZeroAgentDaemon = class {
|
|
|
9575
10501
|
this.adapter = null;
|
|
9576
10502
|
if (existsSync17(this.pidFilePath)) {
|
|
9577
10503
|
try {
|
|
9578
|
-
|
|
10504
|
+
unlinkSync4(this.pidFilePath);
|
|
9579
10505
|
} catch {
|
|
9580
10506
|
}
|
|
9581
10507
|
}
|
|
@@ -9603,10 +10529,10 @@ var ZeroAgentDaemon = class {
|
|
|
9603
10529
|
};
|
|
9604
10530
|
|
|
9605
10531
|
// packages/daemon/src/start.ts
|
|
9606
|
-
import { resolve as
|
|
10532
|
+
import { resolve as resolve16 } from "node:path";
|
|
9607
10533
|
import { homedir as homedir10 } from "node:os";
|
|
9608
10534
|
import { existsSync as existsSync18 } from "node:fs";
|
|
9609
|
-
var CONFIG_PATH = process.env["ZEROAGENT_CONFIG"] ??
|
|
10535
|
+
var CONFIG_PATH = process.env["ZEROAGENT_CONFIG"] ?? resolve16(homedir10(), ".0agent", "config.yaml");
|
|
9610
10536
|
if (!existsSync18(CONFIG_PATH)) {
|
|
9611
10537
|
console.error(`
|
|
9612
10538
|
0agent is not initialised.
|