open-agents-ai 0.187.572 → 0.187.574

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3800,7 +3800,7 @@ var init_web_fetch = __esm({
3800
3800
  WebFetchTool = class {
3801
3801
  name = "web_fetch";
3802
3802
  _fetchCache = /* @__PURE__ */ new Map();
3803
- description = "Fetch a single web page and return its text content (HTML stripped to plain text). FASTEST web tool — use this for reading any single URL: documentation, articles, README files, API references, Stack Overflow answers. Limitations: no JavaScript rendering (SPAs/React apps return empty), no link following, no cookies/auth, no structured data extraction. If the page is blank or incomplete, switch to web_crawl with strategy='playwright'. For scraping/extracting structured data (prices, listings, tables), use web_crawl instead. For search engine queries, use web_search instead. For interactive browser sessions (login, form filling, clicking), use browser_action instead.";
3803
+ description = "Fetch a single web page and return its text content (HTML stripped to plain text). FASTEST web tool — use this for reading any single URL: documentation, articles, README files, API references, Stack Overflow answers. Limitations: no JavaScript rendering (SPAs/React apps return empty), no link following, no cookies/auth, no structured data extraction. On timeout, automatically falls back to browser_action (headless Chrome) for slow/heavy pages. If the page is blank or incomplete, switch to web_crawl with strategy='playwright'. For scraping/extracting structured data (prices, listings, tables), use web_crawl instead. For search engine queries, use web_search instead. For interactive browser sessions (login, form filling, clicking), use browser_action instead.";
3804
3804
  parameters = {
3805
3805
  type: "object",
3806
3806
  properties: {
@@ -3869,12 +3869,105 @@ var init_web_fetch = __esm({
3869
3869
  durationMs: performance.now() - start2
3870
3870
  };
3871
3871
  } catch (error) {
3872
+ const errMsg = error instanceof Error ? error.message : String(error);
3873
+ if (/abort|timeout/i.test(errMsg)) {
3874
+ const fallback = await this.#hydraFallback(url, maxLength, start2);
3875
+ if (fallback)
3876
+ return fallback;
3877
+ }
3872
3878
  return {
3873
3879
  success: false,
3874
3880
  output: "",
3875
- error: error instanceof Error ? error.message : String(error),
3881
+ error: errMsg,
3882
+ durationMs: performance.now() - start2
3883
+ };
3884
+ }
3885
+ }
3886
+ /** Fallback: use Hydra Chrome automation service (web-scrape-service on :8130)
3887
+ * when the HTTP fetch times out. Navigates with a real headless browser
3888
+ * so slow/heavy/JS pages render fully. Returns null if the service is
3889
+ * unavailable or any step fails — the original timeout error propagates. */
3890
+ async #hydraFallback(url, maxLength, start2) {
3891
+ const BASE = "http://localhost:8130";
3892
+ try {
3893
+ const health = await fetch(`${BASE}/health`, {
3894
+ signal: AbortSignal.timeout(2e3)
3895
+ });
3896
+ if (!health.ok)
3897
+ return null;
3898
+ } catch {
3899
+ return null;
3900
+ }
3901
+ try {
3902
+ const sessionRes = await fetch(`${BASE}/session/start`, {
3903
+ method: "POST",
3904
+ headers: { "Content-Type": "application/json" },
3905
+ body: JSON.stringify({ headless: true }),
3906
+ signal: AbortSignal.timeout(1e4)
3907
+ });
3908
+ const sessionData = await sessionRes.json();
3909
+ if (!sessionData.ok || !sessionData.session_id)
3910
+ return null;
3911
+ const sid = sessionData.session_id;
3912
+ const navRes = await fetch(`${BASE}/navigate`, {
3913
+ method: "POST",
3914
+ headers: { "Content-Type": "application/json" },
3915
+ body: JSON.stringify({ sid, url }),
3916
+ signal: AbortSignal.timeout(6e4)
3917
+ });
3918
+ const navData = await navRes.json();
3919
+ if (!navData.ok) {
3920
+ fetch(`${BASE}/session/close`, {
3921
+ method: "POST",
3922
+ headers: { "Content-Type": "application/json" },
3923
+ body: JSON.stringify({ sid })
3924
+ }).catch(() => {
3925
+ });
3926
+ return null;
3927
+ }
3928
+ const domRes = await fetch(`${BASE}/dom?sid=${encodeURIComponent(sid)}`, {
3929
+ signal: AbortSignal.timeout(3e4)
3930
+ });
3931
+ const domData = await domRes.json();
3932
+ if (!domData.ok) {
3933
+ fetch(`${BASE}/session/close`, {
3934
+ method: "POST",
3935
+ headers: { "Content-Type": "application/json" },
3936
+ body: JSON.stringify({ sid })
3937
+ }).catch(() => {
3938
+ });
3939
+ return null;
3940
+ }
3941
+ const dom = domData.dom;
3942
+ if (!dom || dom.length < 50) {
3943
+ fetch(`${BASE}/session/close`, {
3944
+ method: "POST",
3945
+ headers: { "Content-Type": "application/json" },
3946
+ body: JSON.stringify({ sid })
3947
+ }).catch(() => {
3948
+ });
3949
+ return null;
3950
+ }
3951
+ fetch(`${BASE}/session/close`, {
3952
+ method: "POST",
3953
+ headers: { "Content-Type": "application/json" },
3954
+ body: JSON.stringify({ sid })
3955
+ }).catch(() => {
3956
+ });
3957
+ const text = this.#stripHtml(dom);
3958
+ this._fetchCache.set(url, { text, fetchedAt: Date.now() });
3959
+ const truncated = text.length > maxLength;
3960
+ return {
3961
+ success: true,
3962
+ output: `[Hydra fallback: HTTP fetch timed out, retrieved via Chrome browser]
3963
+
3964
+ ` + (truncated ? `${text.slice(0, maxLength)}
3965
+
3966
+ [Content truncated to ${maxLength} characters]` : text),
3876
3967
  durationMs: performance.now() - start2
3877
3968
  };
3969
+ } catch {
3970
+ return null;
3878
3971
  }
3879
3972
  }
3880
3973
  #stripHtml(html) {
@@ -16769,19 +16862,36 @@ function isYouTubeUrl(url) {
16769
16862
  return /(?:youtube\.com\/(?:watch|shorts|live|embed|v\/)|youtu\.be\/)/i.test(url);
16770
16863
  }
16771
16864
  function ensureYtDlp() {
16772
- try {
16773
- execSync13("yt-dlp --version", { timeout: 5e3, stdio: "pipe" });
16774
- return true;
16775
- } catch {
16865
+ if (_ytDlpPath)
16866
+ return _ytDlpPath;
16867
+ const isWin2 = process.platform === "win32";
16868
+ const venvDir = join27(homedir8(), ".open-agents", "venv");
16869
+ const pipPath = isWin2 ? join27(venvDir, "Scripts", "pip.exe") : join27(venvDir, "bin", "pip");
16870
+ const ytDlpPath = isWin2 ? join27(venvDir, "Scripts", "yt-dlp.exe") : join27(venvDir, "bin", "yt-dlp");
16871
+ if (!existsSync22(pipPath)) {
16776
16872
  try {
16777
- execSync13("pip3 install --user yt-dlp 2>/dev/null || pip install --user yt-dlp 2>/dev/null", {
16778
- timeout: 6e4,
16873
+ mkdirSync9(join27(homedir8(), ".open-agents"), { recursive: true });
16874
+ execSync13(`python3 -m venv "${venvDir}"`, {
16875
+ timeout: 3e4,
16779
16876
  stdio: "pipe"
16780
16877
  });
16781
- return true;
16782
16878
  } catch {
16783
- return false;
16879
+ return null;
16880
+ }
16881
+ }
16882
+ try {
16883
+ execSync13(`"${pipPath}" install -U yt-dlp 2>&1`, {
16884
+ timeout: 6e4,
16885
+ stdio: "pipe"
16886
+ });
16887
+ _ytDlpPath = ytDlpPath;
16888
+ return ytDlpPath;
16889
+ } catch {
16890
+ if (existsSync22(ytDlpPath)) {
16891
+ _ytDlpPath = ytDlpPath;
16892
+ return ytDlpPath;
16784
16893
  }
16894
+ return null;
16785
16895
  }
16786
16896
  }
16787
16897
  function formatTime(seconds) {
@@ -16789,7 +16899,7 @@ function formatTime(seconds) {
16789
16899
  const s2 = Math.floor(seconds % 60);
16790
16900
  return `${String(m2).padStart(2, "0")}:${String(s2).padStart(2, "0")}`;
16791
16901
  }
16792
- var AUDIO_EXTS, VIDEO_EXTS, _tcModule, _tcChecked, TranscribeFileTool, TranscribeUrlTool, YouTubeDownloadTool;
16902
+ var AUDIO_EXTS, VIDEO_EXTS, _tcModule, _tcChecked, TranscribeFileTool, _ytDlpPath, TranscribeUrlTool, YouTubeDownloadTool;
16793
16903
  var init_transcribe_tool = __esm({
16794
16904
  "packages/execution/dist/tools/transcribe-tool.js"() {
16795
16905
  "use strict";
@@ -16986,9 +17096,10 @@ var init_transcribe_tool = __esm({
16986
17096
  }
16987
17097
  }
16988
17098
  };
17099
+ _ytDlpPath = null;
16989
17100
  TranscribeUrlTool = class {
16990
17101
  name = "transcribe_url";
16991
- description = "Download and transcribe audio/video from a URL. Supports YouTube links (youtube.com/watch?v=..., youtu.be/...) and direct media URLs (MP3, WAV, MP4, etc.). YouTube audio is extracted via yt-dlp (auto-installed). Transcription is local via faster-whisper (no cloud API).";
17102
+ description = "Download and transcribe audio/video from a URL. Supports YouTube links (youtube.com/watch?v=..., youtu.be/...) and direct media URLs (MP3, WAV, MP4, etc.). YouTube audio is extracted via yt-dlp (shared venv at ~/.open-agents/venv/). If yt-dlp gets YouTube 403 errors, the tool auto-upgrades it. Transcription is local via faster-whisper (no cloud API).";
16992
17103
  parameters = {
16993
17104
  type: "object",
16994
17105
  properties: {
@@ -17026,17 +17137,18 @@ var init_transcribe_tool = __esm({
17026
17137
  let tmpFile = "";
17027
17138
  try {
17028
17139
  if (isYouTubeUrl(url)) {
17029
- if (!ensureYtDlp()) {
17140
+ const ytDlp = ensureYtDlp();
17141
+ if (!ytDlp) {
17030
17142
  return {
17031
17143
  success: false,
17032
17144
  output: "",
17033
- error: "yt-dlp not found and auto-install failed. Install manually: pip3 install yt-dlp",
17145
+ error: "yt-dlp not available via shared venv. Run: python3 -m venv ~/.open-agents/venv && ~/.open-agents/venv/bin/pip install yt-dlp",
17034
17146
  durationMs: performance.now() - start2
17035
17147
  };
17036
17148
  }
17037
17149
  tmpFile = `${tmpBase}.mp3`;
17038
17150
  try {
17039
- execSync13(`yt-dlp -x --audio-format mp3 --audio-quality 5 -o "${tmpBase}.%(ext)s" "${url}" 2>&1`, { timeout: 3e5, stdio: ["pipe", "pipe", "pipe"] });
17151
+ execSync13(`"${ytDlp}" -x --audio-format mp3 --audio-quality 5 -o "${tmpBase}.%(ext)s" "${url}" 2>&1`, { timeout: 3e5, stdio: ["pipe", "pipe", "pipe"] });
17040
17152
  if (!existsSync22(tmpFile)) {
17041
17153
  const { readdirSync: rd } = __require("node:fs");
17042
17154
  const files = rd(tmpDir).filter((f2) => f2.startsWith(`download-`) && f2 !== ".gitkeep");
@@ -17046,10 +17158,11 @@ var init_transcribe_tool = __esm({
17046
17158
  }
17047
17159
  } catch (dlErr) {
17048
17160
  const errMsg = dlErr instanceof Error ? dlErr.message : String(dlErr);
17161
+ const upgradeHint = errMsg.includes("403") ? " YouTube 403 error — yt-dlp was auto-upgraded. Retry; if the issue persists, the video may be region-restricted." : " Is the video available and not age-restricted?";
17049
17162
  return {
17050
17163
  success: false,
17051
17164
  output: "",
17052
- error: `yt-dlp failed: ${errMsg.slice(0, 200)}. Is the video available and not age-restricted?`,
17165
+ error: `yt-dlp failed: ${errMsg.slice(0, 200)}.${upgradeHint}`,
17053
17166
  durationMs: performance.now() - start2
17054
17167
  };
17055
17168
  }
@@ -17109,7 +17222,7 @@ ${result.output}`,
17109
17222
  };
17110
17223
  YouTubeDownloadTool = class {
17111
17224
  name = "youtube_download";
17112
- description = "Download video or audio from YouTube. Saves mp4 (video) or mp3 (audio) to the working directory. Uses yt-dlp (auto-installed). Supports youtube.com/watch, youtu.be, shorts, live URLs.";
17225
+ description = "Download video or audio from YouTube. Saves mp4 (video), mp3, or wav (audio) to the working directory. Uses yt-dlp (auto-upgraded to fix YouTube 403 errors) and ffmpeg internally for audio conversion. If you get YouTube 403 errors, the tool auto-upgrades yt-dlp. For ffmpeg-based processing (cutting, segmenting, concatenating), download wav format which is raw PCM suitable for shell ffmpeg pipelines. Supports youtube.com/watch, youtu.be, shorts, live URLs.";
17113
17226
  parameters = {
17114
17227
  type: "object",
17115
17228
  properties: {
@@ -17119,8 +17232,8 @@ ${result.output}`,
17119
17232
  },
17120
17233
  format: {
17121
17234
  type: "string",
17122
- enum: ["mp3", "mp4"],
17123
- description: "Output format: 'mp3' for audio only, 'mp4' for video (default: mp3)"
17235
+ enum: ["mp3", "mp4", "wav"],
17236
+ description: "Output format: 'mp3' (compressed audio), 'wav' (raw PCM — use for ffmpeg segmentation), 'mp4' (video). Default: mp3"
17124
17237
  },
17125
17238
  output_dir: {
17126
17239
  type: "string",
@@ -17139,25 +17252,44 @@ ${result.output}`,
17139
17252
  const format3 = String(args.format ?? "mp3").toLowerCase();
17140
17253
  const outputDir = String(args.output_dir ?? this.workingDir);
17141
17254
  if (!url) {
17142
- return { success: false, output: "", error: "URL is required", durationMs: Date.now() - start2 };
17255
+ return {
17256
+ success: false,
17257
+ output: "",
17258
+ error: "URL is required",
17259
+ durationMs: Date.now() - start2
17260
+ };
17143
17261
  }
17144
17262
  if (!isYouTubeUrl(url)) {
17145
- return { success: false, output: "", error: "Not a recognized YouTube URL. Supported: youtube.com/watch, youtu.be, shorts, live, embed", durationMs: Date.now() - start2 };
17263
+ return {
17264
+ success: false,
17265
+ output: "",
17266
+ error: "Not a recognized YouTube URL. Supported: youtube.com/watch, youtu.be, shorts, live, embed",
17267
+ durationMs: Date.now() - start2
17268
+ };
17146
17269
  }
17147
- if (!ensureYtDlp()) {
17148
- return { success: false, output: "", error: "yt-dlp not available and auto-install failed. Install manually: pip install yt-dlp", durationMs: Date.now() - start2 };
17270
+ const ytDlp = ensureYtDlp();
17271
+ if (!ytDlp) {
17272
+ return {
17273
+ success: false,
17274
+ output: "",
17275
+ error: "yt-dlp not available via shared venv. Run: python3 -m venv ~/.open-agents/venv && ~/.open-agents/venv/bin/pip install yt-dlp",
17276
+ durationMs: Date.now() - start2
17277
+ };
17149
17278
  }
17150
17279
  mkdirSync9(outputDir, { recursive: true });
17151
17280
  try {
17152
17281
  let title = "download";
17153
17282
  try {
17154
- title = execSync13(`yt-dlp --get-title "${url}"`, { timeout: 15e3, stdio: "pipe" }).toString().trim().replace(/[<>:"/\\|?*]/g, "_").slice(0, 100);
17283
+ title = execSync13(`"${ytDlp}" --get-title "${url}"`, {
17284
+ timeout: 15e3,
17285
+ stdio: "pipe"
17286
+ }).toString().trim().replace(/[<>:"/\\|?*]/g, "_").slice(0, 100);
17155
17287
  } catch {
17156
17288
  }
17157
17289
  if (format3 === "mp4") {
17158
17290
  const outPath = join27(outputDir, `${title}.mp4`);
17159
17291
  const outTemplate = join27(outputDir, `${title}.%(ext)s`);
17160
- execSync13(`yt-dlp -f "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best" --merge-output-format mp4 -o "${outTemplate}" "${url}"`, { timeout: 6e5, stdio: "pipe", cwd: outputDir });
17292
+ execSync13(`"${ytDlp}" -f "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best" --merge-output-format mp4 -o "${outTemplate}" "${url}"`, { timeout: 6e5, stdio: "pipe", cwd: outputDir });
17161
17293
  const actualPath = existsSync22(outPath) ? outPath : outTemplate.replace("%(ext)s", "mp4");
17162
17294
  return {
17163
17295
  success: true,
@@ -17166,11 +17298,23 @@ Title: ${title}
17166
17298
  Format: mp4`,
17167
17299
  durationMs: Date.now() - start2
17168
17300
  };
17301
+ } else if (format3 === "wav") {
17302
+ const outPath = join27(outputDir, `${title}.wav`);
17303
+ const outTemplate = join27(outputDir, `${title}.%(ext)s`);
17304
+ execSync13(`"${ytDlp}" -x --audio-format wav --audio-quality 0 -o "${outTemplate}" "${url}"`, { timeout: 6e5, stdio: "pipe", cwd: outputDir });
17305
+ const actualPath = existsSync22(outPath) ? outPath : outTemplate.replace("%(ext)s", "wav");
17306
+ return {
17307
+ success: true,
17308
+ output: `Downloaded audio: ${actualPath}
17309
+ Title: ${title}
17310
+ Format: wav`,
17311
+ durationMs: Date.now() - start2
17312
+ };
17169
17313
  } else {
17170
17314
  const outPath = join27(outputDir, `${title}.mp3`);
17171
17315
  const outTemplate = join27(outputDir, `${title}.%(ext)s`);
17172
- execSync13(`yt-dlp -x --audio-format mp3 --audio-quality 0 -o "${outTemplate}" "${url}"`, { timeout: 6e5, stdio: "pipe", cwd: outputDir });
17173
- const actualPath = existsSync22(outPath) ? outPath : outTemplate.replace("%(ext)s", "mp3");
17316
+ execSync13(`"${ytDlp}" -x --audio-format mp3 --audio-quality 0 -o "${outTemplate}" "${url}"`, { timeout: 6e5, stdio: "pipe", cwd: outputDir });
17317
+ const actualPath = existsSync22(outPath) ? outPath : outTemplate.replace("%(ext)s)", "mp3");
17174
17318
  return {
17175
17319
  success: true,
17176
17320
  output: `Downloaded audio: ${actualPath}
@@ -252456,7 +252600,9 @@ async function probeService() {
252456
252600
  try {
252457
252601
  const controller = new AbortController();
252458
252602
  const timeout2 = setTimeout(() => controller.abort(), 3e3);
252459
- const res = await fetch(`${BASE_URL}/health`, { signal: controller.signal });
252603
+ const res = await fetch(`${BASE_URL}/health`, {
252604
+ signal: controller.signal
252605
+ });
252460
252606
  clearTimeout(timeout2);
252461
252607
  return res.ok;
252462
252608
  } catch {
@@ -252466,7 +252612,10 @@ async function probeService() {
252466
252612
  function findPython3() {
252467
252613
  for (const cmd of ["python3", "python"]) {
252468
252614
  try {
252469
- const ver = execSync19(`${cmd} --version 2>&1`, { stdio: "pipe", timeout: 5e3 }).toString().trim();
252615
+ const ver = execSync19(`${cmd} --version 2>&1`, {
252616
+ stdio: "pipe",
252617
+ timeout: 5e3
252618
+ }).toString().trim();
252470
252619
  if (ver.includes("Python 3"))
252471
252620
  return cmd;
252472
252621
  } catch {
@@ -252538,7 +252687,10 @@ async function ensureSession() {
252538
252687
  });
252539
252688
  const data = await res.json();
252540
252689
  if (!data.ok)
252541
- return { error: String(data.message ?? "Failed to start browser session"), sessionId: "" };
252690
+ return {
252691
+ error: String(data.message ?? "Failed to start browser session"),
252692
+ sessionId: ""
252693
+ };
252542
252694
  activeSessionId = data.session_id;
252543
252695
  return { sessionId: activeSessionId };
252544
252696
  }
@@ -252684,7 +252836,22 @@ var init_browser_action = __esm({
252684
252836
  properties: {
252685
252837
  action: {
252686
252838
  type: "string",
252687
- enum: ["navigate", "click", "click_xy", "type", "screenshot", "dom", "dom_summary", "vision_click", "scroll", "scroll_up", "scroll_down", "back", "forward", "close"],
252839
+ enum: [
252840
+ "navigate",
252841
+ "click",
252842
+ "click_xy",
252843
+ "type",
252844
+ "screenshot",
252845
+ "dom",
252846
+ "dom_summary",
252847
+ "vision_click",
252848
+ "scroll",
252849
+ "scroll_up",
252850
+ "scroll_down",
252851
+ "back",
252852
+ "forward",
252853
+ "close"
252854
+ ],
252688
252855
  description: "Browser action to perform. Key actions:\n- 'dom_summary': compact view of interactive elements (~1KB vs 200KB raw DOM)\n- 'vision_click': screenshot the page, use Moondream vision to find an element by description, then click it. Pass the element description in 'text' parameter (e.g. text='the login button'). This is the visual grounding loop from SeeAct.\n- 'click': click by CSS selector (fastest when you know the selector)\n- 'click_xy': click at pixel coordinates (when you have exact coords)"
252689
252856
  },
252690
252857
  url: {
@@ -252714,12 +252881,44 @@ var init_browser_action = __esm({
252714
252881
  },
252715
252882
  required: ["action"]
252716
252883
  };
252884
+ /** TASK-CLEANUP: gracefully close the browser session when the task completes. */
252885
+ async cleanup() {
252886
+ if (activeSessionId) {
252887
+ try {
252888
+ const res = await fetch(`${BASE_URL}/session/close`, {
252889
+ method: "POST",
252890
+ headers: { "Content-Type": "application/json" },
252891
+ body: JSON.stringify({ sid: activeSessionId }),
252892
+ signal: AbortSignal.timeout(5e3)
252893
+ });
252894
+ await res.json();
252895
+ } catch {
252896
+ }
252897
+ activeSessionId = null;
252898
+ }
252899
+ if (serviceProcess && serviceProcess.pid && !serviceProcess.killed) {
252900
+ try {
252901
+ process.kill(-serviceProcess.pid, "SIGKILL");
252902
+ } catch {
252903
+ }
252904
+ try {
252905
+ serviceProcess.kill("SIGKILL");
252906
+ } catch {
252907
+ }
252908
+ serviceProcess = null;
252909
+ }
252910
+ }
252717
252911
  async execute(args) {
252718
252912
  const start2 = Date.now();
252719
252913
  const action = args.action;
252720
252914
  const launchErr = await launchService();
252721
252915
  if (launchErr) {
252722
- return { success: false, output: "", error: launchErr, durationMs: Date.now() - start2 };
252916
+ return {
252917
+ success: false,
252918
+ output: "",
252919
+ error: launchErr,
252920
+ durationMs: Date.now() - start2
252921
+ };
252723
252922
  }
252724
252923
  if (action === "close") {
252725
252924
  if (activeSessionId) {
@@ -252729,21 +252928,41 @@ var init_browser_action = __esm({
252729
252928
  }
252730
252929
  activeSessionId = null;
252731
252930
  }
252732
- return { success: true, output: "Browser session closed.", durationMs: Date.now() - start2 };
252931
+ return {
252932
+ success: true,
252933
+ output: "Browser session closed.",
252934
+ durationMs: Date.now() - start2
252935
+ };
252733
252936
  }
252734
252937
  const session = await ensureSession();
252735
252938
  if (session.error) {
252736
- return { success: false, output: "", error: session.error, durationMs: Date.now() - start2 };
252939
+ return {
252940
+ success: false,
252941
+ output: "",
252942
+ error: session.error,
252943
+ durationMs: Date.now() - start2
252944
+ };
252737
252945
  }
252738
252946
  try {
252739
252947
  let result;
252740
252948
  switch (action) {
252741
252949
  case "navigate": {
252742
252950
  if (!args.url)
252743
- return { success: false, output: "", error: "url is required for navigate action", durationMs: Date.now() - start2 };
252744
- result = await apiCall("/navigate", "POST", { url: args.url });
252951
+ return {
252952
+ success: false,
252953
+ output: "",
252954
+ error: "url is required for navigate action",
252955
+ durationMs: Date.now() - start2
252956
+ };
252957
+ result = await apiCall("/navigate", "POST", {
252958
+ url: args.url
252959
+ });
252745
252960
  if (result.ok) {
252746
- return { success: true, output: `Navigated to ${args.url}`, durationMs: Date.now() - start2 };
252961
+ return {
252962
+ success: true,
252963
+ output: `Navigated to ${args.url}`,
252964
+ durationMs: Date.now() - start2
252965
+ };
252747
252966
  }
252748
252967
  const navMsg = String(result.message ?? "Navigation failed");
252749
252968
  const navHint = navMsg.toLowerCase().includes("connection") || navMsg.toLowerCase().includes("refused") || navMsg.toLowerCase().includes("err_connection") ? " (the URL appears unreachable — check if the target server is running and accepting connections)" : navMsg.toLowerCase().includes("timeout") ? " (page load timed out — try again or use a different URL)" : "";
@@ -252756,10 +252975,21 @@ var init_browser_action = __esm({
252756
252975
  }
252757
252976
  case "click": {
252758
252977
  if (!args.selector)
252759
- return { success: false, output: "", error: "selector is required for click action", durationMs: Date.now() - start2 };
252760
- result = await apiCall("/click", "POST", { selector: args.selector });
252978
+ return {
252979
+ success: false,
252980
+ output: "",
252981
+ error: "selector is required for click action",
252982
+ durationMs: Date.now() - start2
252983
+ };
252984
+ result = await apiCall("/click", "POST", {
252985
+ selector: args.selector
252986
+ });
252761
252987
  if (result.ok) {
252762
- return { success: true, output: `Clicked element: ${args.selector}`, durationMs: Date.now() - start2 };
252988
+ return {
252989
+ success: true,
252990
+ output: `Clicked element: ${args.selector}`,
252991
+ durationMs: Date.now() - start2
252992
+ };
252763
252993
  }
252764
252994
  const clickMsg = String(result.message ?? "Click failed");
252765
252995
  return {
@@ -252771,10 +253001,19 @@ var init_browser_action = __esm({
252771
253001
  }
252772
253002
  case "click_xy": {
252773
253003
  if (args.x == null || args.y == null)
252774
- return { success: false, output: "", error: "x and y are required for click_xy action", durationMs: Date.now() - start2 };
253004
+ return {
253005
+ success: false,
253006
+ output: "",
253007
+ error: "x and y are required for click_xy action",
253008
+ durationMs: Date.now() - start2
253009
+ };
252775
253010
  result = await apiCall("/click_xy", "POST", { x: args.x, y: args.y });
252776
253011
  if (result.ok) {
252777
- return { success: true, output: `Clicked at (${args.x}, ${args.y})`, durationMs: Date.now() - start2 };
253012
+ return {
253013
+ success: true,
253014
+ output: `Clicked at (${args.x}, ${args.y})`,
253015
+ durationMs: Date.now() - start2
253016
+ };
252778
253017
  }
252779
253018
  const xyMsg = String(result.message ?? "Click failed");
252780
253019
  return {
@@ -252786,10 +253025,22 @@ var init_browser_action = __esm({
252786
253025
  }
252787
253026
  case "type": {
252788
253027
  if (!args.selector || !args.text)
252789
- return { success: false, output: "", error: "selector and text are required for type action", durationMs: Date.now() - start2 };
252790
- result = await apiCall("/type", "POST", { selector: args.selector, text: args.text });
253028
+ return {
253029
+ success: false,
253030
+ output: "",
253031
+ error: "selector and text are required for type action",
253032
+ durationMs: Date.now() - start2
253033
+ };
253034
+ result = await apiCall("/type", "POST", {
253035
+ selector: args.selector,
253036
+ text: args.text
253037
+ });
252791
253038
  if (result.ok) {
252792
- return { success: true, output: `Typed "${args.text.slice(0, 50)}" into ${args.selector}`, durationMs: Date.now() - start2 };
253039
+ return {
253040
+ success: true,
253041
+ output: `Typed "${args.text.slice(0, 50)}" into ${args.selector}`,
253042
+ durationMs: Date.now() - start2
253043
+ };
252793
253044
  }
252794
253045
  const typeMsg = String(result.message ?? "Type failed");
252795
253046
  return {
@@ -252837,16 +253088,30 @@ var init_browser_action = __esm({
252837
253088
  durationMs: Date.now() - start2
252838
253089
  };
252839
253090
  }
252840
- return { success: false, output: "", error: "Screenshot failed", durationMs: Date.now() - start2 };
253091
+ return {
253092
+ success: false,
253093
+ output: "",
253094
+ error: "Screenshot failed",
253095
+ durationMs: Date.now() - start2
253096
+ };
252841
253097
  }
252842
253098
  case "dom": {
252843
253099
  result = await apiCall("/dom", "GET");
252844
253100
  const dom = result.dom;
252845
253101
  if (dom) {
252846
253102
  const truncated = dom.length > 5e4 ? dom.slice(0, 5e4) + "\n... (truncated)" : dom;
252847
- return { success: true, output: truncated, durationMs: Date.now() - start2 };
253103
+ return {
253104
+ success: true,
253105
+ output: truncated,
253106
+ durationMs: Date.now() - start2
253107
+ };
252848
253108
  }
252849
- return { success: false, output: "", error: "DOM capture failed", durationMs: Date.now() - start2 };
253109
+ return {
253110
+ success: false,
253111
+ output: "",
253112
+ error: "DOM capture failed",
253113
+ durationMs: Date.now() - start2
253114
+ };
252850
253115
  }
252851
253116
  // dom_summary: Research-grounded DOM downsampling
252852
253117
  // Paper: AgentOccam (arXiv:2410.13825, ICLR 2025) — pivotal node extraction
@@ -252860,9 +253125,18 @@ var init_browser_action = __esm({
252860
253125
  result = await apiCall("/dom", "GET");
252861
253126
  const rawDom = result.dom;
252862
253127
  if (!rawDom)
252863
- return { success: false, output: "", error: "DOM capture failed", durationMs: Date.now() - start2 };
253128
+ return {
253129
+ success: false,
253130
+ output: "",
253131
+ error: "DOM capture failed",
253132
+ durationMs: Date.now() - start2
253133
+ };
252864
253134
  const summary = downsampleDom(rawDom);
252865
- return { success: true, output: summary, durationMs: Date.now() - start2 };
253135
+ return {
253136
+ success: true,
253137
+ output: summary,
253138
+ durationMs: Date.now() - start2
253139
+ };
252866
253140
  }
252867
253141
  // vision_click: Screenshot → Moondream point detection → Click
252868
253142
  // Paper: SeeAct (arXiv:2401.01614) — visual grounding for web agents
@@ -252875,14 +253149,24 @@ var init_browser_action = __esm({
252875
253149
  case "vision_click": {
252876
253150
  const target = args.text;
252877
253151
  if (!target)
252878
- return { success: false, output: "", error: "text parameter is required for vision_click — describe what to click (e.g. 'the login button')", durationMs: Date.now() - start2 };
253152
+ return {
253153
+ success: false,
253154
+ output: "",
253155
+ error: "text parameter is required for vision_click — describe what to click (e.g. 'the login button')",
253156
+ durationMs: Date.now() - start2
253157
+ };
252879
253158
  const ssResult = await apiCall("/screenshot", "GET");
252880
253159
  const ssB64 = ssResult.b64;
252881
253160
  const ssWidth = ssResult.width || 1280;
252882
253161
  const ssHeight = ssResult.height || 720;
252883
253162
  const ssFile = ssResult.file;
252884
253163
  if (!ssB64 && !ssFile) {
252885
- return { success: false, output: "", error: "Screenshot failed — cannot perform vision click", durationMs: Date.now() - start2 };
253164
+ return {
253165
+ success: false,
253166
+ output: "",
253167
+ error: "Screenshot failed — cannot perform vision click",
253168
+ durationMs: Date.now() - start2
253169
+ };
252886
253170
  }
252887
253171
  let imagePath = "";
252888
253172
  if (ssFile) {
@@ -252894,7 +253178,12 @@ var init_browser_action = __esm({
252894
253178
  wfs(tmpPath, fileBuffer);
252895
253179
  imagePath = tmpPath;
252896
253180
  } catch (e2) {
252897
- return { success: false, output: "", error: `Failed to save screenshot: ${e2}`, durationMs: Date.now() - start2 };
253181
+ return {
253182
+ success: false,
253183
+ output: "",
253184
+ error: `Failed to save screenshot: ${e2}`,
253185
+ durationMs: Date.now() - start2
253186
+ };
252898
253187
  }
252899
253188
  } else if (ssB64) {
252900
253189
  const tmpPath = join41(process.env["TMPDIR"] || "/tmp", `oa-vision-click-${Date.now()}.png`);
@@ -252912,7 +253201,12 @@ var init_browser_action = __esm({
252912
253201
  prompt: target
252913
253202
  });
252914
253203
  if (!visionResult.success) {
252915
- return { success: false, output: `Vision could not find "${target}" on the page. Try using dom_summary to find the CSS selector instead.`, error: visionResult.error, durationMs: Date.now() - start2 };
253204
+ return {
253205
+ success: false,
253206
+ output: `Vision could not find "${target}" on the page. Try using dom_summary to find the CSS selector instead.`,
253207
+ error: visionResult.error,
253208
+ durationMs: Date.now() - start2
253209
+ };
252916
253210
  }
252917
253211
  const coordMatch = visionResult.output.match(/\((\d+\.?\d*),\s*(\d+\.?\d*)\)/);
252918
253212
  if (coordMatch) {
@@ -252922,10 +253216,19 @@ var init_browser_action = __esm({
252922
253216
  pointY = Math.round(normY * ssHeight);
252923
253217
  }
252924
253218
  } catch (e2) {
252925
- return { success: false, output: "", error: `Vision detection failed: ${e2}`, durationMs: Date.now() - start2 };
253219
+ return {
253220
+ success: false,
253221
+ output: "",
253222
+ error: `Vision detection failed: ${e2}`,
253223
+ durationMs: Date.now() - start2
253224
+ };
252926
253225
  }
252927
253226
  if (pointX < 0 || pointY < 0) {
252928
- return { success: false, output: `Could not determine click coordinates for "${target}". Vision returned no valid points.`, durationMs: Date.now() - start2 };
253227
+ return {
253228
+ success: false,
253229
+ output: `Could not determine click coordinates for "${target}". Vision returned no valid points.`,
253230
+ durationMs: Date.now() - start2
253231
+ };
252929
253232
  }
252930
253233
  const clickResult = await apiCall("/click_xy", "POST", {
252931
253234
  x: pointX,
@@ -252948,22 +253251,49 @@ var init_browser_action = __esm({
252948
253251
  };
252949
253252
  }
252950
253253
  case "scroll":
252951
- result = await apiCall("/scroll", "POST", { amount: args.amount ?? 600 });
252952
- return { success: !!result.ok, output: `Scrolled ${args.amount ?? 600}px`, durationMs: Date.now() - start2 };
253254
+ result = await apiCall("/scroll", "POST", {
253255
+ amount: args.amount ?? 600
253256
+ });
253257
+ return {
253258
+ success: !!result.ok,
253259
+ output: `Scrolled ${args.amount ?? 600}px`,
253260
+ durationMs: Date.now() - start2
253261
+ };
252953
253262
  case "scroll_up":
252954
253263
  result = await apiCall("/scroll/up", "POST");
252955
- return { success: !!result.ok, output: "Scrolled up", durationMs: Date.now() - start2 };
253264
+ return {
253265
+ success: !!result.ok,
253266
+ output: "Scrolled up",
253267
+ durationMs: Date.now() - start2
253268
+ };
252956
253269
  case "scroll_down":
252957
253270
  result = await apiCall("/scroll/down", "POST");
252958
- return { success: !!result.ok, output: "Scrolled down", durationMs: Date.now() - start2 };
253271
+ return {
253272
+ success: !!result.ok,
253273
+ output: "Scrolled down",
253274
+ durationMs: Date.now() - start2
253275
+ };
252959
253276
  case "back":
252960
253277
  result = await apiCall("/history/back", "POST");
252961
- return { success: !!result.ok, output: "Navigated back", durationMs: Date.now() - start2 };
253278
+ return {
253279
+ success: !!result.ok,
253280
+ output: "Navigated back",
253281
+ durationMs: Date.now() - start2
253282
+ };
252962
253283
  case "forward":
252963
253284
  result = await apiCall("/history/forward", "POST");
252964
- return { success: !!result.ok, output: "Navigated forward", durationMs: Date.now() - start2 };
253285
+ return {
253286
+ success: !!result.ok,
253287
+ output: "Navigated forward",
253288
+ durationMs: Date.now() - start2
253289
+ };
252965
253290
  default:
252966
- return { success: false, output: "", error: `Unknown action: ${action}. Available: navigate, click, click_xy, type, screenshot, dom, scroll, scroll_up, scroll_down, back, forward, close`, durationMs: Date.now() - start2 };
253291
+ return {
253292
+ success: false,
253293
+ output: "",
253294
+ error: `Unknown action: ${action}. Available: navigate, click, click_xy, type, screenshot, dom, scroll, scroll_up, scroll_down, back, forward, close`,
253295
+ durationMs: Date.now() - start2
253296
+ };
252967
253297
  }
252968
253298
  } catch (err) {
252969
253299
  return {
@@ -534749,6 +535079,14 @@ ${sr.result.output}`;
534749
535079
  } else {
534750
535080
  completed = true;
534751
535081
  summary = extractTaskCompleteSummary(r2.tc.arguments);
535082
+ for (const tool of this.tools.values()) {
535083
+ if (tool.cleanup) {
535084
+ try {
535085
+ await tool.cleanup();
535086
+ } catch {
535087
+ }
535088
+ }
535089
+ }
534752
535090
  if (summary && !this._assistantTextEmitted) {
534753
535091
  this.emit({
534754
535092
  type: "assistant_text",
@@ -613709,7 +614047,9 @@ function adaptTool6(tool) {
613709
614047
  output: result.output,
613710
614048
  error: result.error
613711
614049
  };
613712
- }
614050
+ },
614051
+ // Pass through lifecycle hooks from the underlying Tool implementation
614052
+ cleanup: tool.cleanup
613713
614053
  };
613714
614054
  }
613715
614055
  function scanForSessionSignals(toolOutput) {
@@ -617741,6 +618081,9 @@ Rationale: ${proposal.rationale}${provenanceNote}`;
617741
618081
  rl.setPreSubmit(() => statusBar.suggestAccept());
617742
618082
  }
617743
618083
  process.stdout.on("resize", () => {
618084
+ if (statusBar.isActive) {
618085
+ statusBar.reapplyScrollRegion();
618086
+ }
617744
618087
  statusBar.handleResize();
617745
618088
  setTermSize(process.stdout.rows ?? 24, process.stdout.columns ?? 80);
617746
618089
  if (isNeovimActive()) {