open-agents-ai 0.187.572 → 0.187.574
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +407 -64
- package/dist/scripts/web_scrape.py +228 -63
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -3800,7 +3800,7 @@ var init_web_fetch = __esm({
|
|
|
3800
3800
|
WebFetchTool = class {
|
|
3801
3801
|
name = "web_fetch";
|
|
3802
3802
|
_fetchCache = /* @__PURE__ */ new Map();
|
|
3803
|
-
description = "Fetch a single web page and return its text content (HTML stripped to plain text). FASTEST web tool — use this for reading any single URL: documentation, articles, README files, API references, Stack Overflow answers. Limitations: no JavaScript rendering (SPAs/React apps return empty), no link following, no cookies/auth, no structured data extraction. If the page is blank or incomplete, switch to web_crawl with strategy='playwright'. For scraping/extracting structured data (prices, listings, tables), use web_crawl instead. For search engine queries, use web_search instead. For interactive browser sessions (login, form filling, clicking), use browser_action instead.";
|
|
3803
|
+
description = "Fetch a single web page and return its text content (HTML stripped to plain text). FASTEST web tool — use this for reading any single URL: documentation, articles, README files, API references, Stack Overflow answers. Limitations: no JavaScript rendering (SPAs/React apps return empty), no link following, no cookies/auth, no structured data extraction. On timeout, automatically falls back to browser_action (headless Chrome) for slow/heavy pages. If the page is blank or incomplete, switch to web_crawl with strategy='playwright'. For scraping/extracting structured data (prices, listings, tables), use web_crawl instead. For search engine queries, use web_search instead. For interactive browser sessions (login, form filling, clicking), use browser_action instead.";
|
|
3804
3804
|
parameters = {
|
|
3805
3805
|
type: "object",
|
|
3806
3806
|
properties: {
|
|
@@ -3869,12 +3869,105 @@ var init_web_fetch = __esm({
|
|
|
3869
3869
|
durationMs: performance.now() - start2
|
|
3870
3870
|
};
|
|
3871
3871
|
} catch (error) {
|
|
3872
|
+
const errMsg = error instanceof Error ? error.message : String(error);
|
|
3873
|
+
if (/abort|timeout/i.test(errMsg)) {
|
|
3874
|
+
const fallback = await this.#hydraFallback(url, maxLength, start2);
|
|
3875
|
+
if (fallback)
|
|
3876
|
+
return fallback;
|
|
3877
|
+
}
|
|
3872
3878
|
return {
|
|
3873
3879
|
success: false,
|
|
3874
3880
|
output: "",
|
|
3875
|
-
error:
|
|
3881
|
+
error: errMsg,
|
|
3882
|
+
durationMs: performance.now() - start2
|
|
3883
|
+
};
|
|
3884
|
+
}
|
|
3885
|
+
}
|
|
3886
|
+
/** Fallback: use Hydra Chrome automation service (web-scrape-service on :8130)
|
|
3887
|
+
* when the HTTP fetch times out. Navigates with a real headless browser
|
|
3888
|
+
* so slow/heavy/JS pages render fully. Returns null if the service is
|
|
3889
|
+
* unavailable or any step fails — the original timeout error propagates. */
|
|
3890
|
+
async #hydraFallback(url, maxLength, start2) {
|
|
3891
|
+
const BASE = "http://localhost:8130";
|
|
3892
|
+
try {
|
|
3893
|
+
const health = await fetch(`${BASE}/health`, {
|
|
3894
|
+
signal: AbortSignal.timeout(2e3)
|
|
3895
|
+
});
|
|
3896
|
+
if (!health.ok)
|
|
3897
|
+
return null;
|
|
3898
|
+
} catch {
|
|
3899
|
+
return null;
|
|
3900
|
+
}
|
|
3901
|
+
try {
|
|
3902
|
+
const sessionRes = await fetch(`${BASE}/session/start`, {
|
|
3903
|
+
method: "POST",
|
|
3904
|
+
headers: { "Content-Type": "application/json" },
|
|
3905
|
+
body: JSON.stringify({ headless: true }),
|
|
3906
|
+
signal: AbortSignal.timeout(1e4)
|
|
3907
|
+
});
|
|
3908
|
+
const sessionData = await sessionRes.json();
|
|
3909
|
+
if (!sessionData.ok || !sessionData.session_id)
|
|
3910
|
+
return null;
|
|
3911
|
+
const sid = sessionData.session_id;
|
|
3912
|
+
const navRes = await fetch(`${BASE}/navigate`, {
|
|
3913
|
+
method: "POST",
|
|
3914
|
+
headers: { "Content-Type": "application/json" },
|
|
3915
|
+
body: JSON.stringify({ sid, url }),
|
|
3916
|
+
signal: AbortSignal.timeout(6e4)
|
|
3917
|
+
});
|
|
3918
|
+
const navData = await navRes.json();
|
|
3919
|
+
if (!navData.ok) {
|
|
3920
|
+
fetch(`${BASE}/session/close`, {
|
|
3921
|
+
method: "POST",
|
|
3922
|
+
headers: { "Content-Type": "application/json" },
|
|
3923
|
+
body: JSON.stringify({ sid })
|
|
3924
|
+
}).catch(() => {
|
|
3925
|
+
});
|
|
3926
|
+
return null;
|
|
3927
|
+
}
|
|
3928
|
+
const domRes = await fetch(`${BASE}/dom?sid=${encodeURIComponent(sid)}`, {
|
|
3929
|
+
signal: AbortSignal.timeout(3e4)
|
|
3930
|
+
});
|
|
3931
|
+
const domData = await domRes.json();
|
|
3932
|
+
if (!domData.ok) {
|
|
3933
|
+
fetch(`${BASE}/session/close`, {
|
|
3934
|
+
method: "POST",
|
|
3935
|
+
headers: { "Content-Type": "application/json" },
|
|
3936
|
+
body: JSON.stringify({ sid })
|
|
3937
|
+
}).catch(() => {
|
|
3938
|
+
});
|
|
3939
|
+
return null;
|
|
3940
|
+
}
|
|
3941
|
+
const dom = domData.dom;
|
|
3942
|
+
if (!dom || dom.length < 50) {
|
|
3943
|
+
fetch(`${BASE}/session/close`, {
|
|
3944
|
+
method: "POST",
|
|
3945
|
+
headers: { "Content-Type": "application/json" },
|
|
3946
|
+
body: JSON.stringify({ sid })
|
|
3947
|
+
}).catch(() => {
|
|
3948
|
+
});
|
|
3949
|
+
return null;
|
|
3950
|
+
}
|
|
3951
|
+
fetch(`${BASE}/session/close`, {
|
|
3952
|
+
method: "POST",
|
|
3953
|
+
headers: { "Content-Type": "application/json" },
|
|
3954
|
+
body: JSON.stringify({ sid })
|
|
3955
|
+
}).catch(() => {
|
|
3956
|
+
});
|
|
3957
|
+
const text = this.#stripHtml(dom);
|
|
3958
|
+
this._fetchCache.set(url, { text, fetchedAt: Date.now() });
|
|
3959
|
+
const truncated = text.length > maxLength;
|
|
3960
|
+
return {
|
|
3961
|
+
success: true,
|
|
3962
|
+
output: `[Hydra fallback: HTTP fetch timed out, retrieved via Chrome browser]
|
|
3963
|
+
|
|
3964
|
+
` + (truncated ? `${text.slice(0, maxLength)}
|
|
3965
|
+
|
|
3966
|
+
[Content truncated to ${maxLength} characters]` : text),
|
|
3876
3967
|
durationMs: performance.now() - start2
|
|
3877
3968
|
};
|
|
3969
|
+
} catch {
|
|
3970
|
+
return null;
|
|
3878
3971
|
}
|
|
3879
3972
|
}
|
|
3880
3973
|
#stripHtml(html) {
|
|
@@ -16769,19 +16862,36 @@ function isYouTubeUrl(url) {
|
|
|
16769
16862
|
return /(?:youtube\.com\/(?:watch|shorts|live|embed|v\/)|youtu\.be\/)/i.test(url);
|
|
16770
16863
|
}
|
|
16771
16864
|
function ensureYtDlp() {
|
|
16772
|
-
|
|
16773
|
-
|
|
16774
|
-
|
|
16775
|
-
|
|
16865
|
+
if (_ytDlpPath)
|
|
16866
|
+
return _ytDlpPath;
|
|
16867
|
+
const isWin2 = process.platform === "win32";
|
|
16868
|
+
const venvDir = join27(homedir8(), ".open-agents", "venv");
|
|
16869
|
+
const pipPath = isWin2 ? join27(venvDir, "Scripts", "pip.exe") : join27(venvDir, "bin", "pip");
|
|
16870
|
+
const ytDlpPath = isWin2 ? join27(venvDir, "Scripts", "yt-dlp.exe") : join27(venvDir, "bin", "yt-dlp");
|
|
16871
|
+
if (!existsSync22(pipPath)) {
|
|
16776
16872
|
try {
|
|
16777
|
-
|
|
16778
|
-
|
|
16873
|
+
mkdirSync9(join27(homedir8(), ".open-agents"), { recursive: true });
|
|
16874
|
+
execSync13(`python3 -m venv "${venvDir}"`, {
|
|
16875
|
+
timeout: 3e4,
|
|
16779
16876
|
stdio: "pipe"
|
|
16780
16877
|
});
|
|
16781
|
-
return true;
|
|
16782
16878
|
} catch {
|
|
16783
|
-
return
|
|
16879
|
+
return null;
|
|
16880
|
+
}
|
|
16881
|
+
}
|
|
16882
|
+
try {
|
|
16883
|
+
execSync13(`"${pipPath}" install -U yt-dlp 2>&1`, {
|
|
16884
|
+
timeout: 6e4,
|
|
16885
|
+
stdio: "pipe"
|
|
16886
|
+
});
|
|
16887
|
+
_ytDlpPath = ytDlpPath;
|
|
16888
|
+
return ytDlpPath;
|
|
16889
|
+
} catch {
|
|
16890
|
+
if (existsSync22(ytDlpPath)) {
|
|
16891
|
+
_ytDlpPath = ytDlpPath;
|
|
16892
|
+
return ytDlpPath;
|
|
16784
16893
|
}
|
|
16894
|
+
return null;
|
|
16785
16895
|
}
|
|
16786
16896
|
}
|
|
16787
16897
|
function formatTime(seconds) {
|
|
@@ -16789,7 +16899,7 @@ function formatTime(seconds) {
|
|
|
16789
16899
|
const s2 = Math.floor(seconds % 60);
|
|
16790
16900
|
return `${String(m2).padStart(2, "0")}:${String(s2).padStart(2, "0")}`;
|
|
16791
16901
|
}
|
|
16792
|
-
var AUDIO_EXTS, VIDEO_EXTS, _tcModule, _tcChecked, TranscribeFileTool, TranscribeUrlTool, YouTubeDownloadTool;
|
|
16902
|
+
var AUDIO_EXTS, VIDEO_EXTS, _tcModule, _tcChecked, TranscribeFileTool, _ytDlpPath, TranscribeUrlTool, YouTubeDownloadTool;
|
|
16793
16903
|
var init_transcribe_tool = __esm({
|
|
16794
16904
|
"packages/execution/dist/tools/transcribe-tool.js"() {
|
|
16795
16905
|
"use strict";
|
|
@@ -16986,9 +17096,10 @@ var init_transcribe_tool = __esm({
|
|
|
16986
17096
|
}
|
|
16987
17097
|
}
|
|
16988
17098
|
};
|
|
17099
|
+
_ytDlpPath = null;
|
|
16989
17100
|
TranscribeUrlTool = class {
|
|
16990
17101
|
name = "transcribe_url";
|
|
16991
|
-
description = "Download and transcribe audio/video from a URL. Supports YouTube links (youtube.com/watch?v=..., youtu.be/...) and direct media URLs (MP3, WAV, MP4, etc.). YouTube audio is extracted via yt-dlp (
|
|
17102
|
+
description = "Download and transcribe audio/video from a URL. Supports YouTube links (youtube.com/watch?v=..., youtu.be/...) and direct media URLs (MP3, WAV, MP4, etc.). YouTube audio is extracted via yt-dlp (shared venv at ~/.open-agents/venv/). If yt-dlp gets YouTube 403 errors, the tool auto-upgrades it. Transcription is local via faster-whisper (no cloud API).";
|
|
16992
17103
|
parameters = {
|
|
16993
17104
|
type: "object",
|
|
16994
17105
|
properties: {
|
|
@@ -17026,17 +17137,18 @@ var init_transcribe_tool = __esm({
|
|
|
17026
17137
|
let tmpFile = "";
|
|
17027
17138
|
try {
|
|
17028
17139
|
if (isYouTubeUrl(url)) {
|
|
17029
|
-
|
|
17140
|
+
const ytDlp = ensureYtDlp();
|
|
17141
|
+
if (!ytDlp) {
|
|
17030
17142
|
return {
|
|
17031
17143
|
success: false,
|
|
17032
17144
|
output: "",
|
|
17033
|
-
error: "yt-dlp not
|
|
17145
|
+
error: "yt-dlp not available via shared venv. Run: python3 -m venv ~/.open-agents/venv && ~/.open-agents/venv/bin/pip install yt-dlp",
|
|
17034
17146
|
durationMs: performance.now() - start2
|
|
17035
17147
|
};
|
|
17036
17148
|
}
|
|
17037
17149
|
tmpFile = `${tmpBase}.mp3`;
|
|
17038
17150
|
try {
|
|
17039
|
-
execSync13(`
|
|
17151
|
+
execSync13(`"${ytDlp}" -x --audio-format mp3 --audio-quality 5 -o "${tmpBase}.%(ext)s" "${url}" 2>&1`, { timeout: 3e5, stdio: ["pipe", "pipe", "pipe"] });
|
|
17040
17152
|
if (!existsSync22(tmpFile)) {
|
|
17041
17153
|
const { readdirSync: rd } = __require("node:fs");
|
|
17042
17154
|
const files = rd(tmpDir).filter((f2) => f2.startsWith(`download-`) && f2 !== ".gitkeep");
|
|
@@ -17046,10 +17158,11 @@ var init_transcribe_tool = __esm({
|
|
|
17046
17158
|
}
|
|
17047
17159
|
} catch (dlErr) {
|
|
17048
17160
|
const errMsg = dlErr instanceof Error ? dlErr.message : String(dlErr);
|
|
17161
|
+
const upgradeHint = errMsg.includes("403") ? " YouTube 403 error — yt-dlp was auto-upgraded. Retry; if the issue persists, the video may be region-restricted." : " Is the video available and not age-restricted?";
|
|
17049
17162
|
return {
|
|
17050
17163
|
success: false,
|
|
17051
17164
|
output: "",
|
|
17052
|
-
error: `yt-dlp failed: ${errMsg.slice(0, 200)}
|
|
17165
|
+
error: `yt-dlp failed: ${errMsg.slice(0, 200)}.${upgradeHint}`,
|
|
17053
17166
|
durationMs: performance.now() - start2
|
|
17054
17167
|
};
|
|
17055
17168
|
}
|
|
@@ -17109,7 +17222,7 @@ ${result.output}`,
|
|
|
17109
17222
|
};
|
|
17110
17223
|
YouTubeDownloadTool = class {
|
|
17111
17224
|
name = "youtube_download";
|
|
17112
|
-
description = "Download video or audio from YouTube. Saves mp4 (video) or
|
|
17225
|
+
description = "Download video or audio from YouTube. Saves mp4 (video), mp3, or wav (audio) to the working directory. Uses yt-dlp (auto-upgraded to fix YouTube 403 errors) and ffmpeg internally for audio conversion. If you get YouTube 403 errors, the tool auto-upgrades yt-dlp. For ffmpeg-based processing (cutting, segmenting, concatenating), download wav format which is raw PCM suitable for shell ffmpeg pipelines. Supports youtube.com/watch, youtu.be, shorts, live URLs.";
|
|
17113
17226
|
parameters = {
|
|
17114
17227
|
type: "object",
|
|
17115
17228
|
properties: {
|
|
@@ -17119,8 +17232,8 @@ ${result.output}`,
|
|
|
17119
17232
|
},
|
|
17120
17233
|
format: {
|
|
17121
17234
|
type: "string",
|
|
17122
|
-
enum: ["mp3", "mp4"],
|
|
17123
|
-
description: "Output format: 'mp3'
|
|
17235
|
+
enum: ["mp3", "mp4", "wav"],
|
|
17236
|
+
description: "Output format: 'mp3' (compressed audio), 'wav' (raw PCM — use for ffmpeg segmentation), 'mp4' (video). Default: mp3"
|
|
17124
17237
|
},
|
|
17125
17238
|
output_dir: {
|
|
17126
17239
|
type: "string",
|
|
@@ -17139,25 +17252,44 @@ ${result.output}`,
|
|
|
17139
17252
|
const format3 = String(args.format ?? "mp3").toLowerCase();
|
|
17140
17253
|
const outputDir = String(args.output_dir ?? this.workingDir);
|
|
17141
17254
|
if (!url) {
|
|
17142
|
-
return {
|
|
17255
|
+
return {
|
|
17256
|
+
success: false,
|
|
17257
|
+
output: "",
|
|
17258
|
+
error: "URL is required",
|
|
17259
|
+
durationMs: Date.now() - start2
|
|
17260
|
+
};
|
|
17143
17261
|
}
|
|
17144
17262
|
if (!isYouTubeUrl(url)) {
|
|
17145
|
-
return {
|
|
17263
|
+
return {
|
|
17264
|
+
success: false,
|
|
17265
|
+
output: "",
|
|
17266
|
+
error: "Not a recognized YouTube URL. Supported: youtube.com/watch, youtu.be, shorts, live, embed",
|
|
17267
|
+
durationMs: Date.now() - start2
|
|
17268
|
+
};
|
|
17146
17269
|
}
|
|
17147
|
-
|
|
17148
|
-
|
|
17270
|
+
const ytDlp = ensureYtDlp();
|
|
17271
|
+
if (!ytDlp) {
|
|
17272
|
+
return {
|
|
17273
|
+
success: false,
|
|
17274
|
+
output: "",
|
|
17275
|
+
error: "yt-dlp not available via shared venv. Run: python3 -m venv ~/.open-agents/venv && ~/.open-agents/venv/bin/pip install yt-dlp",
|
|
17276
|
+
durationMs: Date.now() - start2
|
|
17277
|
+
};
|
|
17149
17278
|
}
|
|
17150
17279
|
mkdirSync9(outputDir, { recursive: true });
|
|
17151
17280
|
try {
|
|
17152
17281
|
let title = "download";
|
|
17153
17282
|
try {
|
|
17154
|
-
title = execSync13(`
|
|
17283
|
+
title = execSync13(`"${ytDlp}" --get-title "${url}"`, {
|
|
17284
|
+
timeout: 15e3,
|
|
17285
|
+
stdio: "pipe"
|
|
17286
|
+
}).toString().trim().replace(/[<>:"/\\|?*]/g, "_").slice(0, 100);
|
|
17155
17287
|
} catch {
|
|
17156
17288
|
}
|
|
17157
17289
|
if (format3 === "mp4") {
|
|
17158
17290
|
const outPath = join27(outputDir, `${title}.mp4`);
|
|
17159
17291
|
const outTemplate = join27(outputDir, `${title}.%(ext)s`);
|
|
17160
|
-
execSync13(`
|
|
17292
|
+
execSync13(`"${ytDlp}" -f "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best" --merge-output-format mp4 -o "${outTemplate}" "${url}"`, { timeout: 6e5, stdio: "pipe", cwd: outputDir });
|
|
17161
17293
|
const actualPath = existsSync22(outPath) ? outPath : outTemplate.replace("%(ext)s", "mp4");
|
|
17162
17294
|
return {
|
|
17163
17295
|
success: true,
|
|
@@ -17166,11 +17298,23 @@ Title: ${title}
|
|
|
17166
17298
|
Format: mp4`,
|
|
17167
17299
|
durationMs: Date.now() - start2
|
|
17168
17300
|
};
|
|
17301
|
+
} else if (format3 === "wav") {
|
|
17302
|
+
const outPath = join27(outputDir, `${title}.wav`);
|
|
17303
|
+
const outTemplate = join27(outputDir, `${title}.%(ext)s`);
|
|
17304
|
+
execSync13(`"${ytDlp}" -x --audio-format wav --audio-quality 0 -o "${outTemplate}" "${url}"`, { timeout: 6e5, stdio: "pipe", cwd: outputDir });
|
|
17305
|
+
const actualPath = existsSync22(outPath) ? outPath : outTemplate.replace("%(ext)s", "wav");
|
|
17306
|
+
return {
|
|
17307
|
+
success: true,
|
|
17308
|
+
output: `Downloaded audio: ${actualPath}
|
|
17309
|
+
Title: ${title}
|
|
17310
|
+
Format: wav`,
|
|
17311
|
+
durationMs: Date.now() - start2
|
|
17312
|
+
};
|
|
17169
17313
|
} else {
|
|
17170
17314
|
const outPath = join27(outputDir, `${title}.mp3`);
|
|
17171
17315
|
const outTemplate = join27(outputDir, `${title}.%(ext)s`);
|
|
17172
|
-
execSync13(`
|
|
17173
|
-
const actualPath = existsSync22(outPath) ? outPath : outTemplate.replace("%(ext)s", "mp3");
|
|
17316
|
+
execSync13(`"${ytDlp}" -x --audio-format mp3 --audio-quality 0 -o "${outTemplate}" "${url}"`, { timeout: 6e5, stdio: "pipe", cwd: outputDir });
|
|
17317
|
+
const actualPath = existsSync22(outPath) ? outPath : outTemplate.replace("%(ext)s)", "mp3");
|
|
17174
17318
|
return {
|
|
17175
17319
|
success: true,
|
|
17176
17320
|
output: `Downloaded audio: ${actualPath}
|
|
@@ -252456,7 +252600,9 @@ async function probeService() {
|
|
|
252456
252600
|
try {
|
|
252457
252601
|
const controller = new AbortController();
|
|
252458
252602
|
const timeout2 = setTimeout(() => controller.abort(), 3e3);
|
|
252459
|
-
const res = await fetch(`${BASE_URL}/health`, {
|
|
252603
|
+
const res = await fetch(`${BASE_URL}/health`, {
|
|
252604
|
+
signal: controller.signal
|
|
252605
|
+
});
|
|
252460
252606
|
clearTimeout(timeout2);
|
|
252461
252607
|
return res.ok;
|
|
252462
252608
|
} catch {
|
|
@@ -252466,7 +252612,10 @@ async function probeService() {
|
|
|
252466
252612
|
function findPython3() {
|
|
252467
252613
|
for (const cmd of ["python3", "python"]) {
|
|
252468
252614
|
try {
|
|
252469
|
-
const ver = execSync19(`${cmd} --version 2>&1`, {
|
|
252615
|
+
const ver = execSync19(`${cmd} --version 2>&1`, {
|
|
252616
|
+
stdio: "pipe",
|
|
252617
|
+
timeout: 5e3
|
|
252618
|
+
}).toString().trim();
|
|
252470
252619
|
if (ver.includes("Python 3"))
|
|
252471
252620
|
return cmd;
|
|
252472
252621
|
} catch {
|
|
@@ -252538,7 +252687,10 @@ async function ensureSession() {
|
|
|
252538
252687
|
});
|
|
252539
252688
|
const data = await res.json();
|
|
252540
252689
|
if (!data.ok)
|
|
252541
|
-
return {
|
|
252690
|
+
return {
|
|
252691
|
+
error: String(data.message ?? "Failed to start browser session"),
|
|
252692
|
+
sessionId: ""
|
|
252693
|
+
};
|
|
252542
252694
|
activeSessionId = data.session_id;
|
|
252543
252695
|
return { sessionId: activeSessionId };
|
|
252544
252696
|
}
|
|
@@ -252684,7 +252836,22 @@ var init_browser_action = __esm({
|
|
|
252684
252836
|
properties: {
|
|
252685
252837
|
action: {
|
|
252686
252838
|
type: "string",
|
|
252687
|
-
enum: [
|
|
252839
|
+
enum: [
|
|
252840
|
+
"navigate",
|
|
252841
|
+
"click",
|
|
252842
|
+
"click_xy",
|
|
252843
|
+
"type",
|
|
252844
|
+
"screenshot",
|
|
252845
|
+
"dom",
|
|
252846
|
+
"dom_summary",
|
|
252847
|
+
"vision_click",
|
|
252848
|
+
"scroll",
|
|
252849
|
+
"scroll_up",
|
|
252850
|
+
"scroll_down",
|
|
252851
|
+
"back",
|
|
252852
|
+
"forward",
|
|
252853
|
+
"close"
|
|
252854
|
+
],
|
|
252688
252855
|
description: "Browser action to perform. Key actions:\n- 'dom_summary': compact view of interactive elements (~1KB vs 200KB raw DOM)\n- 'vision_click': screenshot the page, use Moondream vision to find an element by description, then click it. Pass the element description in 'text' parameter (e.g. text='the login button'). This is the visual grounding loop from SeeAct.\n- 'click': click by CSS selector (fastest when you know the selector)\n- 'click_xy': click at pixel coordinates (when you have exact coords)"
|
|
252689
252856
|
},
|
|
252690
252857
|
url: {
|
|
@@ -252714,12 +252881,44 @@ var init_browser_action = __esm({
|
|
|
252714
252881
|
},
|
|
252715
252882
|
required: ["action"]
|
|
252716
252883
|
};
|
|
252884
|
+
/** TASK-CLEANUP: gracefully close the browser session when the task completes. */
|
|
252885
|
+
async cleanup() {
|
|
252886
|
+
if (activeSessionId) {
|
|
252887
|
+
try {
|
|
252888
|
+
const res = await fetch(`${BASE_URL}/session/close`, {
|
|
252889
|
+
method: "POST",
|
|
252890
|
+
headers: { "Content-Type": "application/json" },
|
|
252891
|
+
body: JSON.stringify({ sid: activeSessionId }),
|
|
252892
|
+
signal: AbortSignal.timeout(5e3)
|
|
252893
|
+
});
|
|
252894
|
+
await res.json();
|
|
252895
|
+
} catch {
|
|
252896
|
+
}
|
|
252897
|
+
activeSessionId = null;
|
|
252898
|
+
}
|
|
252899
|
+
if (serviceProcess && serviceProcess.pid && !serviceProcess.killed) {
|
|
252900
|
+
try {
|
|
252901
|
+
process.kill(-serviceProcess.pid, "SIGKILL");
|
|
252902
|
+
} catch {
|
|
252903
|
+
}
|
|
252904
|
+
try {
|
|
252905
|
+
serviceProcess.kill("SIGKILL");
|
|
252906
|
+
} catch {
|
|
252907
|
+
}
|
|
252908
|
+
serviceProcess = null;
|
|
252909
|
+
}
|
|
252910
|
+
}
|
|
252717
252911
|
async execute(args) {
|
|
252718
252912
|
const start2 = Date.now();
|
|
252719
252913
|
const action = args.action;
|
|
252720
252914
|
const launchErr = await launchService();
|
|
252721
252915
|
if (launchErr) {
|
|
252722
|
-
return {
|
|
252916
|
+
return {
|
|
252917
|
+
success: false,
|
|
252918
|
+
output: "",
|
|
252919
|
+
error: launchErr,
|
|
252920
|
+
durationMs: Date.now() - start2
|
|
252921
|
+
};
|
|
252723
252922
|
}
|
|
252724
252923
|
if (action === "close") {
|
|
252725
252924
|
if (activeSessionId) {
|
|
@@ -252729,21 +252928,41 @@ var init_browser_action = __esm({
|
|
|
252729
252928
|
}
|
|
252730
252929
|
activeSessionId = null;
|
|
252731
252930
|
}
|
|
252732
|
-
return {
|
|
252931
|
+
return {
|
|
252932
|
+
success: true,
|
|
252933
|
+
output: "Browser session closed.",
|
|
252934
|
+
durationMs: Date.now() - start2
|
|
252935
|
+
};
|
|
252733
252936
|
}
|
|
252734
252937
|
const session = await ensureSession();
|
|
252735
252938
|
if (session.error) {
|
|
252736
|
-
return {
|
|
252939
|
+
return {
|
|
252940
|
+
success: false,
|
|
252941
|
+
output: "",
|
|
252942
|
+
error: session.error,
|
|
252943
|
+
durationMs: Date.now() - start2
|
|
252944
|
+
};
|
|
252737
252945
|
}
|
|
252738
252946
|
try {
|
|
252739
252947
|
let result;
|
|
252740
252948
|
switch (action) {
|
|
252741
252949
|
case "navigate": {
|
|
252742
252950
|
if (!args.url)
|
|
252743
|
-
return {
|
|
252744
|
-
|
|
252951
|
+
return {
|
|
252952
|
+
success: false,
|
|
252953
|
+
output: "",
|
|
252954
|
+
error: "url is required for navigate action",
|
|
252955
|
+
durationMs: Date.now() - start2
|
|
252956
|
+
};
|
|
252957
|
+
result = await apiCall("/navigate", "POST", {
|
|
252958
|
+
url: args.url
|
|
252959
|
+
});
|
|
252745
252960
|
if (result.ok) {
|
|
252746
|
-
return {
|
|
252961
|
+
return {
|
|
252962
|
+
success: true,
|
|
252963
|
+
output: `Navigated to ${args.url}`,
|
|
252964
|
+
durationMs: Date.now() - start2
|
|
252965
|
+
};
|
|
252747
252966
|
}
|
|
252748
252967
|
const navMsg = String(result.message ?? "Navigation failed");
|
|
252749
252968
|
const navHint = navMsg.toLowerCase().includes("connection") || navMsg.toLowerCase().includes("refused") || navMsg.toLowerCase().includes("err_connection") ? " (the URL appears unreachable — check if the target server is running and accepting connections)" : navMsg.toLowerCase().includes("timeout") ? " (page load timed out — try again or use a different URL)" : "";
|
|
@@ -252756,10 +252975,21 @@ var init_browser_action = __esm({
|
|
|
252756
252975
|
}
|
|
252757
252976
|
case "click": {
|
|
252758
252977
|
if (!args.selector)
|
|
252759
|
-
return {
|
|
252760
|
-
|
|
252978
|
+
return {
|
|
252979
|
+
success: false,
|
|
252980
|
+
output: "",
|
|
252981
|
+
error: "selector is required for click action",
|
|
252982
|
+
durationMs: Date.now() - start2
|
|
252983
|
+
};
|
|
252984
|
+
result = await apiCall("/click", "POST", {
|
|
252985
|
+
selector: args.selector
|
|
252986
|
+
});
|
|
252761
252987
|
if (result.ok) {
|
|
252762
|
-
return {
|
|
252988
|
+
return {
|
|
252989
|
+
success: true,
|
|
252990
|
+
output: `Clicked element: ${args.selector}`,
|
|
252991
|
+
durationMs: Date.now() - start2
|
|
252992
|
+
};
|
|
252763
252993
|
}
|
|
252764
252994
|
const clickMsg = String(result.message ?? "Click failed");
|
|
252765
252995
|
return {
|
|
@@ -252771,10 +253001,19 @@ var init_browser_action = __esm({
|
|
|
252771
253001
|
}
|
|
252772
253002
|
case "click_xy": {
|
|
252773
253003
|
if (args.x == null || args.y == null)
|
|
252774
|
-
return {
|
|
253004
|
+
return {
|
|
253005
|
+
success: false,
|
|
253006
|
+
output: "",
|
|
253007
|
+
error: "x and y are required for click_xy action",
|
|
253008
|
+
durationMs: Date.now() - start2
|
|
253009
|
+
};
|
|
252775
253010
|
result = await apiCall("/click_xy", "POST", { x: args.x, y: args.y });
|
|
252776
253011
|
if (result.ok) {
|
|
252777
|
-
return {
|
|
253012
|
+
return {
|
|
253013
|
+
success: true,
|
|
253014
|
+
output: `Clicked at (${args.x}, ${args.y})`,
|
|
253015
|
+
durationMs: Date.now() - start2
|
|
253016
|
+
};
|
|
252778
253017
|
}
|
|
252779
253018
|
const xyMsg = String(result.message ?? "Click failed");
|
|
252780
253019
|
return {
|
|
@@ -252786,10 +253025,22 @@ var init_browser_action = __esm({
|
|
|
252786
253025
|
}
|
|
252787
253026
|
case "type": {
|
|
252788
253027
|
if (!args.selector || !args.text)
|
|
252789
|
-
return {
|
|
252790
|
-
|
|
253028
|
+
return {
|
|
253029
|
+
success: false,
|
|
253030
|
+
output: "",
|
|
253031
|
+
error: "selector and text are required for type action",
|
|
253032
|
+
durationMs: Date.now() - start2
|
|
253033
|
+
};
|
|
253034
|
+
result = await apiCall("/type", "POST", {
|
|
253035
|
+
selector: args.selector,
|
|
253036
|
+
text: args.text
|
|
253037
|
+
});
|
|
252791
253038
|
if (result.ok) {
|
|
252792
|
-
return {
|
|
253039
|
+
return {
|
|
253040
|
+
success: true,
|
|
253041
|
+
output: `Typed "${args.text.slice(0, 50)}" into ${args.selector}`,
|
|
253042
|
+
durationMs: Date.now() - start2
|
|
253043
|
+
};
|
|
252793
253044
|
}
|
|
252794
253045
|
const typeMsg = String(result.message ?? "Type failed");
|
|
252795
253046
|
return {
|
|
@@ -252837,16 +253088,30 @@ var init_browser_action = __esm({
|
|
|
252837
253088
|
durationMs: Date.now() - start2
|
|
252838
253089
|
};
|
|
252839
253090
|
}
|
|
252840
|
-
return {
|
|
253091
|
+
return {
|
|
253092
|
+
success: false,
|
|
253093
|
+
output: "",
|
|
253094
|
+
error: "Screenshot failed",
|
|
253095
|
+
durationMs: Date.now() - start2
|
|
253096
|
+
};
|
|
252841
253097
|
}
|
|
252842
253098
|
case "dom": {
|
|
252843
253099
|
result = await apiCall("/dom", "GET");
|
|
252844
253100
|
const dom = result.dom;
|
|
252845
253101
|
if (dom) {
|
|
252846
253102
|
const truncated = dom.length > 5e4 ? dom.slice(0, 5e4) + "\n... (truncated)" : dom;
|
|
252847
|
-
return {
|
|
253103
|
+
return {
|
|
253104
|
+
success: true,
|
|
253105
|
+
output: truncated,
|
|
253106
|
+
durationMs: Date.now() - start2
|
|
253107
|
+
};
|
|
252848
253108
|
}
|
|
252849
|
-
return {
|
|
253109
|
+
return {
|
|
253110
|
+
success: false,
|
|
253111
|
+
output: "",
|
|
253112
|
+
error: "DOM capture failed",
|
|
253113
|
+
durationMs: Date.now() - start2
|
|
253114
|
+
};
|
|
252850
253115
|
}
|
|
252851
253116
|
// dom_summary: Research-grounded DOM downsampling
|
|
252852
253117
|
// Paper: AgentOccam (arXiv:2410.13825, ICLR 2025) — pivotal node extraction
|
|
@@ -252860,9 +253125,18 @@ var init_browser_action = __esm({
|
|
|
252860
253125
|
result = await apiCall("/dom", "GET");
|
|
252861
253126
|
const rawDom = result.dom;
|
|
252862
253127
|
if (!rawDom)
|
|
252863
|
-
return {
|
|
253128
|
+
return {
|
|
253129
|
+
success: false,
|
|
253130
|
+
output: "",
|
|
253131
|
+
error: "DOM capture failed",
|
|
253132
|
+
durationMs: Date.now() - start2
|
|
253133
|
+
};
|
|
252864
253134
|
const summary = downsampleDom(rawDom);
|
|
252865
|
-
return {
|
|
253135
|
+
return {
|
|
253136
|
+
success: true,
|
|
253137
|
+
output: summary,
|
|
253138
|
+
durationMs: Date.now() - start2
|
|
253139
|
+
};
|
|
252866
253140
|
}
|
|
252867
253141
|
// vision_click: Screenshot → Moondream point detection → Click
|
|
252868
253142
|
// Paper: SeeAct (arXiv:2401.01614) — visual grounding for web agents
|
|
@@ -252875,14 +253149,24 @@ var init_browser_action = __esm({
|
|
|
252875
253149
|
case "vision_click": {
|
|
252876
253150
|
const target = args.text;
|
|
252877
253151
|
if (!target)
|
|
252878
|
-
return {
|
|
253152
|
+
return {
|
|
253153
|
+
success: false,
|
|
253154
|
+
output: "",
|
|
253155
|
+
error: "text parameter is required for vision_click — describe what to click (e.g. 'the login button')",
|
|
253156
|
+
durationMs: Date.now() - start2
|
|
253157
|
+
};
|
|
252879
253158
|
const ssResult = await apiCall("/screenshot", "GET");
|
|
252880
253159
|
const ssB64 = ssResult.b64;
|
|
252881
253160
|
const ssWidth = ssResult.width || 1280;
|
|
252882
253161
|
const ssHeight = ssResult.height || 720;
|
|
252883
253162
|
const ssFile = ssResult.file;
|
|
252884
253163
|
if (!ssB64 && !ssFile) {
|
|
252885
|
-
return {
|
|
253164
|
+
return {
|
|
253165
|
+
success: false,
|
|
253166
|
+
output: "",
|
|
253167
|
+
error: "Screenshot failed — cannot perform vision click",
|
|
253168
|
+
durationMs: Date.now() - start2
|
|
253169
|
+
};
|
|
252886
253170
|
}
|
|
252887
253171
|
let imagePath = "";
|
|
252888
253172
|
if (ssFile) {
|
|
@@ -252894,7 +253178,12 @@ var init_browser_action = __esm({
|
|
|
252894
253178
|
wfs(tmpPath, fileBuffer);
|
|
252895
253179
|
imagePath = tmpPath;
|
|
252896
253180
|
} catch (e2) {
|
|
252897
|
-
return {
|
|
253181
|
+
return {
|
|
253182
|
+
success: false,
|
|
253183
|
+
output: "",
|
|
253184
|
+
error: `Failed to save screenshot: ${e2}`,
|
|
253185
|
+
durationMs: Date.now() - start2
|
|
253186
|
+
};
|
|
252898
253187
|
}
|
|
252899
253188
|
} else if (ssB64) {
|
|
252900
253189
|
const tmpPath = join41(process.env["TMPDIR"] || "/tmp", `oa-vision-click-${Date.now()}.png`);
|
|
@@ -252912,7 +253201,12 @@ var init_browser_action = __esm({
|
|
|
252912
253201
|
prompt: target
|
|
252913
253202
|
});
|
|
252914
253203
|
if (!visionResult.success) {
|
|
252915
|
-
return {
|
|
253204
|
+
return {
|
|
253205
|
+
success: false,
|
|
253206
|
+
output: `Vision could not find "${target}" on the page. Try using dom_summary to find the CSS selector instead.`,
|
|
253207
|
+
error: visionResult.error,
|
|
253208
|
+
durationMs: Date.now() - start2
|
|
253209
|
+
};
|
|
252916
253210
|
}
|
|
252917
253211
|
const coordMatch = visionResult.output.match(/\((\d+\.?\d*),\s*(\d+\.?\d*)\)/);
|
|
252918
253212
|
if (coordMatch) {
|
|
@@ -252922,10 +253216,19 @@ var init_browser_action = __esm({
|
|
|
252922
253216
|
pointY = Math.round(normY * ssHeight);
|
|
252923
253217
|
}
|
|
252924
253218
|
} catch (e2) {
|
|
252925
|
-
return {
|
|
253219
|
+
return {
|
|
253220
|
+
success: false,
|
|
253221
|
+
output: "",
|
|
253222
|
+
error: `Vision detection failed: ${e2}`,
|
|
253223
|
+
durationMs: Date.now() - start2
|
|
253224
|
+
};
|
|
252926
253225
|
}
|
|
252927
253226
|
if (pointX < 0 || pointY < 0) {
|
|
252928
|
-
return {
|
|
253227
|
+
return {
|
|
253228
|
+
success: false,
|
|
253229
|
+
output: `Could not determine click coordinates for "${target}". Vision returned no valid points.`,
|
|
253230
|
+
durationMs: Date.now() - start2
|
|
253231
|
+
};
|
|
252929
253232
|
}
|
|
252930
253233
|
const clickResult = await apiCall("/click_xy", "POST", {
|
|
252931
253234
|
x: pointX,
|
|
@@ -252948,22 +253251,49 @@ var init_browser_action = __esm({
|
|
|
252948
253251
|
};
|
|
252949
253252
|
}
|
|
252950
253253
|
case "scroll":
|
|
252951
|
-
result = await apiCall("/scroll", "POST", {
|
|
252952
|
-
|
|
253254
|
+
result = await apiCall("/scroll", "POST", {
|
|
253255
|
+
amount: args.amount ?? 600
|
|
253256
|
+
});
|
|
253257
|
+
return {
|
|
253258
|
+
success: !!result.ok,
|
|
253259
|
+
output: `Scrolled ${args.amount ?? 600}px`,
|
|
253260
|
+
durationMs: Date.now() - start2
|
|
253261
|
+
};
|
|
252953
253262
|
case "scroll_up":
|
|
252954
253263
|
result = await apiCall("/scroll/up", "POST");
|
|
252955
|
-
return {
|
|
253264
|
+
return {
|
|
253265
|
+
success: !!result.ok,
|
|
253266
|
+
output: "Scrolled up",
|
|
253267
|
+
durationMs: Date.now() - start2
|
|
253268
|
+
};
|
|
252956
253269
|
case "scroll_down":
|
|
252957
253270
|
result = await apiCall("/scroll/down", "POST");
|
|
252958
|
-
return {
|
|
253271
|
+
return {
|
|
253272
|
+
success: !!result.ok,
|
|
253273
|
+
output: "Scrolled down",
|
|
253274
|
+
durationMs: Date.now() - start2
|
|
253275
|
+
};
|
|
252959
253276
|
case "back":
|
|
252960
253277
|
result = await apiCall("/history/back", "POST");
|
|
252961
|
-
return {
|
|
253278
|
+
return {
|
|
253279
|
+
success: !!result.ok,
|
|
253280
|
+
output: "Navigated back",
|
|
253281
|
+
durationMs: Date.now() - start2
|
|
253282
|
+
};
|
|
252962
253283
|
case "forward":
|
|
252963
253284
|
result = await apiCall("/history/forward", "POST");
|
|
252964
|
-
return {
|
|
253285
|
+
return {
|
|
253286
|
+
success: !!result.ok,
|
|
253287
|
+
output: "Navigated forward",
|
|
253288
|
+
durationMs: Date.now() - start2
|
|
253289
|
+
};
|
|
252965
253290
|
default:
|
|
252966
|
-
return {
|
|
253291
|
+
return {
|
|
253292
|
+
success: false,
|
|
253293
|
+
output: "",
|
|
253294
|
+
error: `Unknown action: ${action}. Available: navigate, click, click_xy, type, screenshot, dom, scroll, scroll_up, scroll_down, back, forward, close`,
|
|
253295
|
+
durationMs: Date.now() - start2
|
|
253296
|
+
};
|
|
252967
253297
|
}
|
|
252968
253298
|
} catch (err) {
|
|
252969
253299
|
return {
|
|
@@ -534749,6 +535079,14 @@ ${sr.result.output}`;
|
|
|
534749
535079
|
} else {
|
|
534750
535080
|
completed = true;
|
|
534751
535081
|
summary = extractTaskCompleteSummary(r2.tc.arguments);
|
|
535082
|
+
for (const tool of this.tools.values()) {
|
|
535083
|
+
if (tool.cleanup) {
|
|
535084
|
+
try {
|
|
535085
|
+
await tool.cleanup();
|
|
535086
|
+
} catch {
|
|
535087
|
+
}
|
|
535088
|
+
}
|
|
535089
|
+
}
|
|
534752
535090
|
if (summary && !this._assistantTextEmitted) {
|
|
534753
535091
|
this.emit({
|
|
534754
535092
|
type: "assistant_text",
|
|
@@ -613709,7 +614047,9 @@ function adaptTool6(tool) {
|
|
|
613709
614047
|
output: result.output,
|
|
613710
614048
|
error: result.error
|
|
613711
614049
|
};
|
|
613712
|
-
}
|
|
614050
|
+
},
|
|
614051
|
+
// Pass through lifecycle hooks from the underlying Tool implementation
|
|
614052
|
+
cleanup: tool.cleanup
|
|
613713
614053
|
};
|
|
613714
614054
|
}
|
|
613715
614055
|
function scanForSessionSignals(toolOutput) {
|
|
@@ -617741,6 +618081,9 @@ Rationale: ${proposal.rationale}${provenanceNote}`;
|
|
|
617741
618081
|
rl.setPreSubmit(() => statusBar.suggestAccept());
|
|
617742
618082
|
}
|
|
617743
618083
|
process.stdout.on("resize", () => {
|
|
618084
|
+
if (statusBar.isActive) {
|
|
618085
|
+
statusBar.reapplyScrollRegion();
|
|
618086
|
+
}
|
|
617744
618087
|
statusBar.handleResize();
|
|
617745
618088
|
setTermSize(process.stdout.rows ?? 24, process.stdout.columns ?? 80);
|
|
617746
618089
|
if (isNeovimActive()) {
|