omnius 1.0.206 → 1.0.207

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -10326,6 +10326,79 @@ function pngDimensions(buffer2) {
10326
10326
  }
10327
10327
  return null;
10328
10328
  }
10329
+ async function describeFocusedEditable(pageHandle) {
10330
+ const active = await pageHandle.evaluate(`(() => {
10331
+ const el = document.activeElement;
10332
+ if (!el) return null;
10333
+ const rect = el.getBoundingClientRect();
10334
+ const role = (el.getAttribute("role") || "").toLowerCase();
10335
+ const contentEditable = String(el.getAttribute("contenteditable") || "").toLowerCase();
10336
+ const isEditable = el.matches("input, textarea")
10337
+ || contentEditable === "" || contentEditable === "true"
10338
+ || ["textbox", "searchbox", "combobox"].includes(role);
10339
+ return {
10340
+ tag: String(el.tagName || "").toLowerCase(),
10341
+ id: el.id || "",
10342
+ name: el.getAttribute("name") || "",
10343
+ role,
10344
+ ariaLabel: el.getAttribute("aria-label") || "",
10345
+ type: el.getAttribute("type") || "",
10346
+ placeholder: el.getAttribute("placeholder") || "",
10347
+ text: String(el.textContent || "").trim().slice(0, 120),
10348
+ isEditable,
10349
+ rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
10350
+ };
10351
+ })()`);
10352
+ return active && typeof active === "object" ? active : null;
10353
+ }
10354
+ async function clickAndFillBrowserTarget(pageHandle, target, text, typingDelay) {
10355
+ const viewport = pageHandle.viewportSize?.() ?? { width: 1280, height: 720 };
10356
+ let candidate = await findBrowserVisualCandidate(pageHandle, target, viewport.width / 2, viewport.height / 2, true);
10357
+ let source = "dom-candidate";
10358
+ if (!candidate) {
10359
+ candidate = await findBrowserVisualCandidate(pageHandle, target, viewport.width / 2, viewport.height / 2, true, true, true);
10360
+ if (candidate?.["scrolledIntoView"] === true)
10361
+ source += "+scroll";
10362
+ if (candidate)
10363
+ await pageHandle.waitForTimeout(150);
10364
+ }
10365
+ const center = candidate?.["center"];
10366
+ const x = Number(center?.x);
10367
+ const y = Number(center?.y);
10368
+ if (!Number.isFinite(x) || !Number.isFinite(y)) {
10369
+ throw new Error(`No visible editable candidate matched target "${target}". Run observe_bundle or dom_summary to inspect available labels/selectors.`);
10370
+ }
10371
+ await pageHandle.mouse.click(x, y);
10372
+ await pageHandle.waitForTimeout(80);
10373
+ const active = await describeFocusedEditable(pageHandle);
10374
+ if (!active?.["isEditable"]) {
10375
+ throw new Error(`Target "${target}" was clicked, but no editable element became focused. Matched element: ${candidate ? JSON.stringify({
10376
+ tag: candidate["tag"],
10377
+ text: candidate["text"],
10378
+ ariaLabel: candidate["ariaLabel"],
10379
+ placeholder: candidate["placeholder"],
10380
+ name: candidate["name"]
10381
+ }) : "(none)"}.`);
10382
+ }
10383
+ const selectAll = process.platform === "darwin" ? "Meta+A" : "Control+A";
10384
+ await pageHandle.keyboard.press(selectAll);
10385
+ await pageHandle.keyboard.type(text, { delay: typingDelay });
10386
+ return { candidate, active, source };
10387
+ }
10388
+ function evaluateFailureMessage(err, code8) {
10389
+ const raw = err instanceof Error ? err.message : String(err);
10390
+ const hints = [];
10391
+ if (/map is not a function/i.test(raw) && /querySelectorAll/i.test(code8)) {
10392
+ hints.push("document.querySelectorAll() returns a NodeList; use Array.from(document.querySelectorAll(selector)).map(...) or [...document.querySelectorAll(selector)].map(...).");
10393
+ }
10394
+ if (/(?:\.value\s*=|setAttribute\(['"]value['"])/.test(code8) && /\b(input|textarea|querySelector)/i.test(code8)) {
10395
+ hints.push("Do not fill modern React/Vue/Svelte forms by assigning .value in evaluate; use playwright_browser fill, or visual_click the field then type, so input/change events fire.");
10396
+ }
10397
+ if (/querySelectorAll|querySelector/.test(code8)) {
10398
+ hints.push("For page inspection, prefer query_all, dom_summary, or observe_bundle before raw evaluate.");
10399
+ }
10400
+ return [raw.slice(0, 500), ...hints.map((hint) => `Hint: ${hint}`)].join("\n");
10401
+ }
10329
10402
  function buildImageMarker(buffer2) {
10330
10403
  let mimeType = "image/png";
10331
10404
  let out = buffer2;
@@ -10559,7 +10632,7 @@ var init_playwright_browser = __esm({
10559
10632
  PLAYWRIGHT_BROWSERS_DIR = join13(PLAYWRIGHT_RUNTIME_DIR, "browsers");
10560
10633
  PlaywrightBrowserTool = class {
10561
10634
  name = "playwright_browser";
10562
- description = "Full-scope Playwright browser automation + diagnostic capture. Launches a persistent headless Chromium session by default, with optional visible/headed mode when a GUI display is available. Beyond navigation/interaction, this tool buffers everything the running app emits (console messages, network requests, JS exceptions, accessibility tree) so the agent can verify what is ACTUALLY happening — not just what the build/test reports. Auto-installs Playwright + Chromium on first use without sudo or OS package manager escalation. Diagnostic actions: observe_bundle, dom_summary, dom, console_logs, network_log, page_errors, a11y_snapshot, bounding_box, query_all, performance, cookies, storage, viewport, clear_diagnostics. Interaction actions: navigate, click, visual_click, fill, type, press, select, check, hover. Capture actions: screenshot, pdf, content, innerText, innerHTML, getAttribute, evaluate. Loopback URLs (localhost, 127.0.0.1, ::1) are allowed for local development servers; private LAN and metadata URLs remain blocked. Workflow for user-facing work: start/serve the system with the stack-native tool, navigate to the real URL, then inspect page_errors, console_logs, network_log, DOM/accessibility, and screenshot evidence before completion. Build/typecheck/test output is only one layer; runtime browser evidence is required when the delivered artifact is a page, app, dashboard, game, form, visualization, or other UI. Repeat navigate/act/observe until the actual user flow is clean.";
10635
+ description = "Full-scope Playwright browser automation + diagnostic capture. Launches a persistent headless Chromium session by default, with optional visible/headed mode when a GUI display is available. Beyond navigation/interaction, this tool buffers everything the running app emits (console messages, network requests, JS exceptions, accessibility tree) so the agent can verify what is ACTUALLY happening — not just what the build/test reports. Auto-installs Playwright + Chromium on first use without sudo or OS package manager escalation. Diagnostic actions: observe_bundle, dom_summary, dom, console_logs, network_log, page_errors, a11y_snapshot, bounding_box, query_all, performance, cookies, storage, viewport, clear_diagnostics. Interaction actions: navigate, click, visual_click, fill, type, press, select, check, hover. Use fill with a selector or natural-language target for form fields; avoid raw evaluate for form filling because direct .value assignment does not fire app input/change events. This is a separate browser/runtime from browser_action; once you start a workflow here, continue here unless you intentionally navigate browser_action to the same URL. Capture actions: screenshot, pdf, content, innerText, innerHTML, getAttribute, evaluate. Loopback URLs (localhost, 127.0.0.1, ::1) are allowed for local development servers; private LAN and metadata URLs remain blocked. Workflow for user-facing work: start/serve the system with the stack-native tool, navigate to the real URL, then inspect page_errors, console_logs, network_log, DOM/accessibility, and screenshot evidence before completion. Build/typecheck/test output is only one layer; runtime browser evidence is required when the delivered artifact is a page, app, dashboard, game, form, visualization, or other UI. Repeat navigate/act/observe until the actual user flow is clean.";
10563
10636
  parameters = {
10564
10637
  type: "object",
10565
10638
  properties: {
@@ -10609,7 +10682,7 @@ var init_playwright_browser = __esm({
10609
10682
  "clear_diagnostics",
10610
10683
  "close"
10611
10684
  ],
10612
- description: "Action to perform:\n- navigate: go to a URL\n- click: click element by selector\n- fill: clear input and type text (for form fields)\n- type: type text character by character into a selector, or into the currently focused element after visual_click\n- press: press a key (Enter, Tab, Escape, etc.)\n- screenshot: capture the headless browser page, not the desktop; use value to choose the output file path\n- observe_bundle: capture URL/title/viewport, DOM summary, a11y, diagnostics, screenshot, and gate assessment\n- visual_click: browser screenshot -> Moondream point -> elementFromPoint -> human-like Playwright mouse click -> post-action screenshot\n- evaluate: run JavaScript in page context\n- content: get page text content (readable, stripped)\n- dom: get raw page HTML (truncated)\n- dom_summary: compact interactive DOM summary with selectors\n- innerText: get innerText of a specific element\n- select: select dropdown option by value\n- check/uncheck: toggle checkbox\n- hover: hover over element\n- wait: wait for a selector to appear\n- waitForNavigation: wait for page navigation to complete\n- waitForSelector: wait for element matching selector\n- title: get page title\n- url: get current URL\n- getAttribute: get element attribute value\n- innerHTML: get element's innerHTML\n- textContent: get element's textContent\n- goBack/goForward/reload: browser navigation\n- pdf: save page as PDF\n- close: close browser session"
10685
+ description: "Action to perform:\n- navigate: go to a URL\n- click: click element by selector\n- fill: clear input and type text by selector, or by natural-language target when selector is absent\n- type: type text character by character into a selector, or into the currently focused element after visual_click\n- press: press a key (Enter, Tab, Escape, etc.)\n- screenshot: capture the headless browser page, not the desktop; use value to choose the output file path\n- observe_bundle: capture URL/title/viewport, DOM summary, a11y, diagnostics, screenshot, and gate assessment\n- visual_click: browser screenshot -> Moondream point -> elementFromPoint -> human-like Playwright mouse click -> post-action screenshot\n- evaluate: run JavaScript in page context\n- content: get page text content (readable, stripped)\n- dom: get raw page HTML (truncated)\n- dom_summary: compact interactive DOM summary with selectors\n- innerText: get innerText of a specific element\n- select: select dropdown option by value\n- check/uncheck: toggle checkbox\n- hover: hover over element\n- wait: wait for a selector to appear\n- waitForNavigation: wait for page navigation to complete\n- waitForSelector: wait for element matching selector\n- title: get page title\n- url: get current URL\n- getAttribute: get element attribute value\n- innerHTML: get element's innerHTML\n- textContent: get element's textContent\n- goBack/goForward/reload: browser navigation\n- pdf: save page as PDF\n- close: close browser session"
10613
10686
  },
10614
10687
  url: {
10615
10688
  type: "string",
@@ -10625,7 +10698,7 @@ var init_playwright_browser = __esm({
10625
10698
  },
10626
10699
  target: {
10627
10700
  type: "string",
10628
- description: "Natural-language browser visual target for visual_click, for example 'the green Continue button' or 'the search field'."
10701
+ description: "Natural-language browser visual target for visual_click or selector-less fill, for example 'the green Continue button', 'username field', or 'password field'."
10629
10702
  },
10630
10703
  value: {
10631
10704
  type: "string",
@@ -10745,12 +10818,22 @@ var init_playwright_browser = __esm({
10745
10818
  return ok(`Clicked: ${resolvedSelector}${resolvedSelector !== selector ? ` (from ${selector})` : ""}`, start2);
10746
10819
  }
10747
10820
  case "fill": {
10748
- if (!selector)
10749
- return fail("selector is required", start2);
10750
10821
  if (text === void 0)
10751
10822
  return fail("text is required", start2);
10752
- await page.fill(selector, text, { timeout: timeout2 });
10753
- return ok(`Filled ${selector} with "${text}"`, start2);
10823
+ const typingDelay = typeof args.typing_delay_ms === "number" ? Math.max(0, Math.min(500, Math.round(args.typing_delay_ms))) : 20;
10824
+ if (selector) {
10825
+ const resolvedSelector = resolveDomSummarySelector(selector);
10826
+ if (!resolvedSelector)
10827
+ return fail(`No selector known for DOM summary reference ${selector}; run dom_summary and use the emitted selector.`, start2);
10828
+ await page.fill(resolvedSelector, text, { timeout: timeout2 });
10829
+ return ok(`Filled ${resolvedSelector}${resolvedSelector !== selector ? ` (from ${selector})` : ""} with "${text}"`, start2);
10830
+ }
10831
+ const target = typeof args.target === "string" && args.target.trim() ? args.target.trim() : "";
10832
+ if (!target)
10833
+ return fail("selector or target is required for fill. Prefer target for visual/natural-language form fields, e.g. target='username field'.", start2);
10834
+ const result = await clickAndFillBrowserTarget(page, target, text, typingDelay);
10835
+ const active = result.active ?? {};
10836
+ return ok(`Filled target "${target}" via ${result.source} into <${active["tag"] || "element"}>${active["name"] ? ` name=${JSON.stringify(active["name"])}` : ""}${active["placeholder"] ? ` placeholder=${JSON.stringify(active["placeholder"])}` : ""}${active["ariaLabel"] ? ` aria-label=${JSON.stringify(active["ariaLabel"])}` : ""}.`, start2);
10754
10837
  }
10755
10838
  case "type": {
10756
10839
  if (text === void 0)
@@ -10893,9 +10976,13 @@ var init_playwright_browser = __esm({
10893
10976
  case "evaluate": {
10894
10977
  if (!text)
10895
10978
  return fail("text (JavaScript code) is required", start2);
10896
- const result = await page.evaluate(text);
10897
- const serialized = typeof result === "string" ? result : JSON.stringify(result, null, 2);
10898
- return ok(serialized?.slice(0, 15e3) ?? "undefined", start2);
10979
+ try {
10980
+ const result = await page.evaluate(text);
10981
+ const serialized = typeof result === "string" ? result : JSON.stringify(result, null, 2);
10982
+ return ok(serialized?.slice(0, 15e3) ?? "undefined", start2);
10983
+ } catch (err2) {
10984
+ return fail(evaluateFailureMessage(err2, text), start2);
10985
+ }
10899
10986
  }
10900
10987
  // ── Screenshot / PDF ──
10901
10988
  case "screenshot": {
@@ -23994,8 +24081,8 @@ var init_explore_tools = __esm({
23994
24081
  enter_worktree: "Create isolated git worktree for safe parallel file modifications",
23995
24082
  exit_worktree: "Exit and optionally remove a git worktree (keep for merge or discard)",
23996
24083
  notebook_edit: "Edit Jupyter .ipynb notebooks at cell level (list, replace, insert, delete cells)",
23997
- browser_action: "Interactive browser: login, fill forms, click buttons, screenshot — session persists between calls; for console/page-error/network diagnostics prefer playwright_browser",
23998
- playwright_browser: "Full browser verification and visual action loop: observe_bundle, visual_click via Moondream pointing, focused-element typing for visual form filling, screenshot, page_errors, console_logs, network_log, DOM/accessibility, storage",
24084
+ browser_action: "Interactive Selenium browser: login, fill forms, click buttons, screenshot — session persists between browser_action calls only; separate runtime from playwright_browser",
24085
+ playwright_browser: "Full browser verification and visual action loop: observe_bundle, visual_click via Moondream pointing, selector/target fill, focused-element typing, screenshot, page_errors, console_logs, network_log, DOM/accessibility, storage",
23999
24086
  carbonyl_browser: "Terminal-rendered real browser automation via Carbonyl: navigate, read rendered text, click/type, sessions, daemon mode",
24000
24087
  scheduler: "Schedule tasks for automatic future execution via OS cron",
24001
24088
  cronjob: "Alias for scheduler: OS cron-backed time triggers",
@@ -284492,6 +284579,7 @@ async function ensureSession(options2 = {}) {
284492
284579
  }
284493
284580
  activeSessionId = null;
284494
284581
  activeSessionHeadless = null;
284582
+ activeSessionUrl = null;
284495
284583
  }
284496
284584
  }
284497
284585
  if (activeSessionId) {
@@ -284503,6 +284591,13 @@ async function ensureSession(options2 = {}) {
284503
284591
  }
284504
284592
  activeSessionId = null;
284505
284593
  activeSessionHeadless = null;
284594
+ activeSessionUrl = null;
284595
+ }
284596
+ if (options2.allowCreate === false) {
284597
+ return {
284598
+ error: "No active browser_action Selenium session exists for this action. browser_action is a separate browser/runtime from playwright_browser; continue the current page with playwright_browser, or call browser_action({action:'navigate', url: ...}) first.",
284599
+ sessionId: ""
284600
+ };
284506
284601
  }
284507
284602
  const headless = options2.headless ?? defaultBrowserHeadless();
284508
284603
  const res = await fetch(`${BASE_URL}/session/start`, {
@@ -284520,8 +284615,16 @@ async function ensureSession(options2 = {}) {
284520
284615
  return { error: String(data.message ?? "Failed to start browser session"), sessionId: "" };
284521
284616
  activeSessionId = data.session_id;
284522
284617
  activeSessionHeadless = headless;
284618
+ activeSessionUrl = null;
284523
284619
  return { sessionId: activeSessionId };
284524
284620
  }
284621
+ function browserActionRuntimeHint() {
284622
+ return [
284623
+ "browser_action is a separate browser/runtime from playwright_browser and uses its own Selenium/Chrome session; it does not share page state, cookies, focus, or navigation.",
284624
+ activeSessionUrl ? `Current browser_action URL: ${activeSessionUrl}` : "Current browser_action URL: unknown or not navigated.",
284625
+ "If this page was opened with playwright_browser, keep using playwright_browser actions such as dom_summary, fill, type, press, visual_click, and observe_bundle."
284626
+ ].join(" ");
284627
+ }
284525
284628
  async function apiCall(endpoint, method = "POST", body) {
284526
284629
  const options2 = {
284527
284630
  method,
@@ -284544,7 +284647,7 @@ async function apiCall(endpoint, method = "POST", body) {
284544
284647
  const res = await fetch(url, options2);
284545
284648
  return await res.json();
284546
284649
  }
284547
- var __dirname3, DEFAULT_PORT, SCRAPE_SCRIPT, BASE_URL, serviceProcess, activeSessionId, activeSessionHeadless, BrowserActionTool;
284650
+ var __dirname3, DEFAULT_PORT, SCRAPE_SCRIPT, BASE_URL, serviceProcess, activeSessionId, activeSessionHeadless, activeSessionUrl, BrowserActionTool;
284548
284651
  var init_browser_action = __esm({
284549
284652
  "packages/execution/dist/tools/browser-action.js"() {
284550
284653
  "use strict";
@@ -284557,9 +284660,10 @@ var init_browser_action = __esm({
284557
284660
  serviceProcess = null;
284558
284661
  activeSessionId = null;
284559
284662
  activeSessionHeadless = null;
284663
+ activeSessionUrl = null;
284560
284664
  BrowserActionTool = class {
284561
284665
  name = "browser_action";
284562
- description = "Control a persistent headless Chrome browser session for interactive web tasks. The browser stays open between calls, maintaining cookies, login state, and history. Use this (not web_fetch/web_crawl) when you need to: (1) log into a website, (2) fill and submit forms, (3) click buttons or links interactively, (4) take screenshots of rendered pages, (5) navigate multi-step workflows (checkout, signup, dashboards), (6) interact with elements that require JavaScript (dropdowns, modals, infinite scroll). Actions: navigate, click, click_xy, type, screenshot, dom, scroll, scroll_up, scroll_down, back, forward, close. For browser visuals, use browser_action({action:'screenshot', width, height, output_path}) — this captures the headless browser viewport, not the desktop. Use the desktop screenshot tool only when the actual OS screen is the target. For verification of browser runtime failures, prefer playwright_browser because it exposes page_errors, console_logs, network_log, DOM/accessibility, and screenshots from the same session. IMPORTANT: Start by calling navigate with the URL — do NOT ask the user for credentials or info first. Loopback URLs (localhost, 127.0.0.1, ::1) are allowed for local development servers; private LAN and metadata URLs remain blocked. Navigate to the page, then use dom/screenshot to see what's there, then type/click to interact. Call 'close' when done to free resources. This tool does not save or download arbitrary rendered files (PDFs, archives, media) to disk — clicking a 'Download' link inside the browser does not produce a local file path for the agent. For file acquisition, use the dedicated download/file tool and validate the resulting content-type and size before treating the result as success.";
284666
+ description = "Control a persistent headless Chrome browser session for interactive web tasks. The browser stays open between calls, maintaining cookies, login state, and history. This is a separate Selenium/Chrome runtime from playwright_browser; do not switch between the two mid-workflow unless you intentionally navigate the second tool to the same URL. Use this (not web_fetch/web_crawl) when you need to: (1) log into a website, (2) fill and submit forms, (3) click buttons or links interactively, (4) take screenshots of rendered pages, (5) navigate multi-step workflows (checkout, signup, dashboards), (6) interact with elements that require JavaScript (dropdowns, modals, infinite scroll). Actions: navigate, click, click_xy, type, screenshot, dom, scroll, scroll_up, scroll_down, back, forward, close. For browser visuals, use browser_action({action:'screenshot', width, height, output_path}) — this captures the headless browser viewport, not the desktop. Use the desktop screenshot tool only when the actual OS screen is the target. For verification of browser runtime failures, prefer playwright_browser because it exposes page_errors, console_logs, network_log, DOM/accessibility, and screenshots from the same session. IMPORTANT: Start by calling navigate with the URL — do NOT ask the user for credentials or info first. Loopback URLs (localhost, 127.0.0.1, ::1) are allowed for local development servers; private LAN and metadata URLs remain blocked. Navigate to the page, then use dom/screenshot to see what's there, then type/click to interact. Call 'close' when done to free resources. This tool does not save or download arbitrary rendered files (PDFs, archives, media) to disk — clicking a 'Download' link inside the browser does not produce a local file path for the agent. For file acquisition, use the dedicated download/file tool and validate the resulting content-type and size before treating the result as success.";
284563
284667
  parameters = {
284564
284668
  type: "object",
284565
284669
  properties: {
@@ -284629,27 +284733,38 @@ var init_browser_action = __esm({
284629
284733
  const requestedWidth = args.width == null ? void 0 : asPositiveInt2(args.width, 1280, 320, 3840);
284630
284734
  const requestedHeight = args.height == null ? void 0 : asPositiveInt2(args.height, 720, 240, 2160);
284631
284735
  const requestedScale = args.device_scale_factor == null ? void 0 : asPositiveNumber(args.device_scale_factor, 1, 0.25, 3);
284632
- const launchErr = await launchService();
284633
- if (launchErr) {
284634
- return { success: false, output: "", error: launchErr, durationMs: Date.now() - start2 };
284635
- }
284636
284736
  if (action === "close") {
284637
- if (activeSessionId) {
284737
+ if (activeSessionId || await probeService()) {
284638
284738
  try {
284639
284739
  await apiCall("/session/close");
284640
284740
  } catch {
284641
284741
  }
284642
284742
  activeSessionId = null;
284643
284743
  activeSessionHeadless = null;
284744
+ activeSessionUrl = null;
284644
284745
  }
284645
284746
  return { success: true, output: "Browser session closed.", durationMs: Date.now() - start2 };
284646
284747
  }
284748
+ const actionStartsSession = action === "navigate";
284749
+ if (!actionStartsSession && !activeSessionId) {
284750
+ return {
284751
+ success: false,
284752
+ output: "",
284753
+ error: `browser_action ${action || "(missing action)"} requires an active browser_action session. ` + browserActionRuntimeHint(),
284754
+ durationMs: Date.now() - start2
284755
+ };
284756
+ }
284757
+ const launchErr = await launchService();
284758
+ if (launchErr) {
284759
+ return { success: false, output: "", error: launchErr, durationMs: Date.now() - start2 };
284760
+ }
284647
284761
  const session = await ensureSession({
284648
284762
  width: requestedWidth,
284649
284763
  height: requestedHeight,
284650
284764
  deviceScaleFactor: requestedScale,
284651
284765
  headless: asOptionalBoolean2(args.headless),
284652
- forceNew: asOptionalBoolean2(args.force_new) === true
284766
+ forceNew: asOptionalBoolean2(args.force_new) === true,
284767
+ allowCreate: actionStartsSession
284653
284768
  });
284654
284769
  if (session.error) {
284655
284770
  return { success: false, output: "", error: session.error, durationMs: Date.now() - start2 };
@@ -284667,7 +284782,13 @@ var init_browser_action = __esm({
284667
284782
  }
284668
284783
  result = await apiCall("/navigate", "POST", { url: args.url });
284669
284784
  if (result.ok) {
284670
- return { success: true, output: `Navigated to ${args.url}`, durationMs: Date.now() - start2 };
284785
+ activeSessionUrl = args.url;
284786
+ return {
284787
+ success: true,
284788
+ output: `Navigated to ${args.url}
284789
+ Runtime: browser_action Selenium/Chrome session. Continue with browser_action for this page, or use playwright_browser separately after navigating it.`,
284790
+ durationMs: Date.now() - start2
284791
+ };
284671
284792
  }
284672
284793
  const navMsg = String(result.message ?? "Navigation failed");
284673
284794
  const navHint = navMsg.toLowerCase().includes("connection") || navMsg.toLowerCase().includes("refused") || navMsg.toLowerCase().includes("err_connection") ? " (the URL appears unreachable — check if the target server is running and accepting connections)" : navMsg.toLowerCase().includes("timeout") ? " (page load timed out — try again or use a different URL)" : "";
@@ -284689,7 +284810,7 @@ var init_browser_action = __esm({
284689
284810
  return {
284690
284811
  success: false,
284691
284812
  output: `Click on ${args.selector} failed: ${clickMsg}`,
284692
- error: `browser_action click failed: ${clickMsg}. Try dom_summary first to see what selectors exist on the page.`,
284813
+ error: `browser_action click failed: ${clickMsg}. Try dom_summary first to see what selectors exist on the page. ${browserActionRuntimeHint()}`,
284693
284814
  durationMs: Date.now() - start2
284694
284815
  };
284695
284816
  }
@@ -284731,7 +284852,7 @@ var init_browser_action = __esm({
284731
284852
  return {
284732
284853
  success: false,
284733
284854
  output: `Type into ${args.selector} failed: ${typeMsg}`,
284734
- error: `browser_action type failed: ${typeMsg}. Verify the element is visible and is an input/textarea — use dom_summary to check.`,
284855
+ error: `browser_action type failed: ${typeMsg}. Verify the element is visible and is an input/textarea — use dom_summary to check. ${browserActionRuntimeHint()}`,
284735
284856
  durationMs: Date.now() - start2
284736
284857
  };
284737
284858
  }
@@ -284872,7 +284993,7 @@ var init_browser_action = __esm({
284872
284993
  if (!pointResult || pointResult.points.length === 0) {
284873
284994
  return {
284874
284995
  success: false,
284875
- output: `Vision could not find "${target}" on the page. Try using dom_summary to find the CSS selector instead.`,
284996
+ output: `Vision could not find "${target}" on the page. Try using dom_summary to find the CSS selector instead. ${browserActionRuntimeHint()}`,
284876
284997
  error: "No point backend returned normalized coordinates.",
284877
284998
  durationMs: Date.now() - start2
284878
284999
  };
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.206",
3
+ "version": "1.0.207",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.206",
9
+ "version": "1.0.207",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
@@ -4565,9 +4565,19 @@
4565
4565
  }
4566
4566
  },
4567
4567
  "node_modules/js-yaml": {
4568
- "version": "4.1.1",
4569
- "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz",
4570
- "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==",
4568
+ "version": "4.2.0",
4569
+ "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.2.0.tgz",
4570
+ "integrity": "sha512-ePWsvanv0DWuDRsW8dnt+R4jQ31SCRCQ7hhNcPXZPsoBZiemuZNYGf7adZdqX2D86j6rvKp3RpCxVTSb8WQlOw==",
4571
+ "funding": [
4572
+ {
4573
+ "type": "github",
4574
+ "url": "https://github.com/sponsors/puzrin"
4575
+ },
4576
+ {
4577
+ "type": "github",
4578
+ "url": "https://github.com/sponsors/nodeca"
4579
+ }
4580
+ ],
4571
4581
  "license": "MIT",
4572
4582
  "dependencies": {
4573
4583
  "argparse": "^2.0.1"
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.206",
3
+ "version": "1.0.207",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",