omnius 1.0.206 → 1.0.207
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +145 -24
- package/npm-shrinkwrap.json +15 -5
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -10326,6 +10326,79 @@ function pngDimensions(buffer2) {
|
|
|
10326
10326
|
}
|
|
10327
10327
|
return null;
|
|
10328
10328
|
}
|
|
10329
|
+
async function describeFocusedEditable(pageHandle) {
|
|
10330
|
+
const active = await pageHandle.evaluate(`(() => {
|
|
10331
|
+
const el = document.activeElement;
|
|
10332
|
+
if (!el) return null;
|
|
10333
|
+
const rect = el.getBoundingClientRect();
|
|
10334
|
+
const role = (el.getAttribute("role") || "").toLowerCase();
|
|
10335
|
+
const contentEditable = String(el.getAttribute("contenteditable") || "").toLowerCase();
|
|
10336
|
+
const isEditable = el.matches("input, textarea")
|
|
10337
|
+
|| contentEditable === "" || contentEditable === "true"
|
|
10338
|
+
|| ["textbox", "searchbox", "combobox"].includes(role);
|
|
10339
|
+
return {
|
|
10340
|
+
tag: String(el.tagName || "").toLowerCase(),
|
|
10341
|
+
id: el.id || "",
|
|
10342
|
+
name: el.getAttribute("name") || "",
|
|
10343
|
+
role,
|
|
10344
|
+
ariaLabel: el.getAttribute("aria-label") || "",
|
|
10345
|
+
type: el.getAttribute("type") || "",
|
|
10346
|
+
placeholder: el.getAttribute("placeholder") || "",
|
|
10347
|
+
text: String(el.textContent || "").trim().slice(0, 120),
|
|
10348
|
+
isEditable,
|
|
10349
|
+
rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
|
|
10350
|
+
};
|
|
10351
|
+
})()`);
|
|
10352
|
+
return active && typeof active === "object" ? active : null;
|
|
10353
|
+
}
|
|
10354
|
+
async function clickAndFillBrowserTarget(pageHandle, target, text, typingDelay) {
|
|
10355
|
+
const viewport = pageHandle.viewportSize?.() ?? { width: 1280, height: 720 };
|
|
10356
|
+
let candidate = await findBrowserVisualCandidate(pageHandle, target, viewport.width / 2, viewport.height / 2, true);
|
|
10357
|
+
let source = "dom-candidate";
|
|
10358
|
+
if (!candidate) {
|
|
10359
|
+
candidate = await findBrowserVisualCandidate(pageHandle, target, viewport.width / 2, viewport.height / 2, true, true, true);
|
|
10360
|
+
if (candidate?.["scrolledIntoView"] === true)
|
|
10361
|
+
source += "+scroll";
|
|
10362
|
+
if (candidate)
|
|
10363
|
+
await pageHandle.waitForTimeout(150);
|
|
10364
|
+
}
|
|
10365
|
+
const center = candidate?.["center"];
|
|
10366
|
+
const x = Number(center?.x);
|
|
10367
|
+
const y = Number(center?.y);
|
|
10368
|
+
if (!Number.isFinite(x) || !Number.isFinite(y)) {
|
|
10369
|
+
throw new Error(`No visible editable candidate matched target "${target}". Run observe_bundle or dom_summary to inspect available labels/selectors.`);
|
|
10370
|
+
}
|
|
10371
|
+
await pageHandle.mouse.click(x, y);
|
|
10372
|
+
await pageHandle.waitForTimeout(80);
|
|
10373
|
+
const active = await describeFocusedEditable(pageHandle);
|
|
10374
|
+
if (!active?.["isEditable"]) {
|
|
10375
|
+
throw new Error(`Target "${target}" was clicked, but no editable element became focused. Matched element: ${candidate ? JSON.stringify({
|
|
10376
|
+
tag: candidate["tag"],
|
|
10377
|
+
text: candidate["text"],
|
|
10378
|
+
ariaLabel: candidate["ariaLabel"],
|
|
10379
|
+
placeholder: candidate["placeholder"],
|
|
10380
|
+
name: candidate["name"]
|
|
10381
|
+
}) : "(none)"}.`);
|
|
10382
|
+
}
|
|
10383
|
+
const selectAll = process.platform === "darwin" ? "Meta+A" : "Control+A";
|
|
10384
|
+
await pageHandle.keyboard.press(selectAll);
|
|
10385
|
+
await pageHandle.keyboard.type(text, { delay: typingDelay });
|
|
10386
|
+
return { candidate, active, source };
|
|
10387
|
+
}
|
|
10388
|
+
function evaluateFailureMessage(err, code8) {
|
|
10389
|
+
const raw = err instanceof Error ? err.message : String(err);
|
|
10390
|
+
const hints = [];
|
|
10391
|
+
if (/map is not a function/i.test(raw) && /querySelectorAll/i.test(code8)) {
|
|
10392
|
+
hints.push("document.querySelectorAll() returns a NodeList; use Array.from(document.querySelectorAll(selector)).map(...) or [...document.querySelectorAll(selector)].map(...).");
|
|
10393
|
+
}
|
|
10394
|
+
if (/(?:\.value\s*=|setAttribute\(['"]value['"])/.test(code8) && /\b(input|textarea|querySelector)/i.test(code8)) {
|
|
10395
|
+
hints.push("Do not fill modern React/Vue/Svelte forms by assigning .value in evaluate; use playwright_browser fill, or visual_click the field then type, so input/change events fire.");
|
|
10396
|
+
}
|
|
10397
|
+
if (/querySelectorAll|querySelector/.test(code8)) {
|
|
10398
|
+
hints.push("For page inspection, prefer query_all, dom_summary, or observe_bundle before raw evaluate.");
|
|
10399
|
+
}
|
|
10400
|
+
return [raw.slice(0, 500), ...hints.map((hint) => `Hint: ${hint}`)].join("\n");
|
|
10401
|
+
}
|
|
10329
10402
|
function buildImageMarker(buffer2) {
|
|
10330
10403
|
let mimeType = "image/png";
|
|
10331
10404
|
let out = buffer2;
|
|
@@ -10559,7 +10632,7 @@ var init_playwright_browser = __esm({
|
|
|
10559
10632
|
PLAYWRIGHT_BROWSERS_DIR = join13(PLAYWRIGHT_RUNTIME_DIR, "browsers");
|
|
10560
10633
|
PlaywrightBrowserTool = class {
|
|
10561
10634
|
name = "playwright_browser";
|
|
10562
|
-
description = "Full-scope Playwright browser automation + diagnostic capture. Launches a persistent headless Chromium session by default, with optional visible/headed mode when a GUI display is available. Beyond navigation/interaction, this tool buffers everything the running app emits (console messages, network requests, JS exceptions, accessibility tree) so the agent can verify what is ACTUALLY happening — not just what the build/test reports. Auto-installs Playwright + Chromium on first use without sudo or OS package manager escalation. Diagnostic actions: observe_bundle, dom_summary, dom, console_logs, network_log, page_errors, a11y_snapshot, bounding_box, query_all, performance, cookies, storage, viewport, clear_diagnostics. Interaction actions: navigate, click, visual_click, fill, type, press, select, check, hover. Capture actions: screenshot, pdf, content, innerText, innerHTML, getAttribute, evaluate. Loopback URLs (localhost, 127.0.0.1, ::1) are allowed for local development servers; private LAN and metadata URLs remain blocked. Workflow for user-facing work: start/serve the system with the stack-native tool, navigate to the real URL, then inspect page_errors, console_logs, network_log, DOM/accessibility, and screenshot evidence before completion. Build/typecheck/test output is only one layer; runtime browser evidence is required when the delivered artifact is a page, app, dashboard, game, form, visualization, or other UI. Repeat navigate/act/observe until the actual user flow is clean.";
|
|
10635
|
+
description = "Full-scope Playwright browser automation + diagnostic capture. Launches a persistent headless Chromium session by default, with optional visible/headed mode when a GUI display is available. Beyond navigation/interaction, this tool buffers everything the running app emits (console messages, network requests, JS exceptions, accessibility tree) so the agent can verify what is ACTUALLY happening — not just what the build/test reports. Auto-installs Playwright + Chromium on first use without sudo or OS package manager escalation. Diagnostic actions: observe_bundle, dom_summary, dom, console_logs, network_log, page_errors, a11y_snapshot, bounding_box, query_all, performance, cookies, storage, viewport, clear_diagnostics. Interaction actions: navigate, click, visual_click, fill, type, press, select, check, hover. Use fill with a selector or natural-language target for form fields; avoid raw evaluate for form filling because direct .value assignment does not fire app input/change events. This is a separate browser/runtime from browser_action; once you start a workflow here, continue here unless you intentionally navigate browser_action to the same URL. Capture actions: screenshot, pdf, content, innerText, innerHTML, getAttribute, evaluate. Loopback URLs (localhost, 127.0.0.1, ::1) are allowed for local development servers; private LAN and metadata URLs remain blocked. Workflow for user-facing work: start/serve the system with the stack-native tool, navigate to the real URL, then inspect page_errors, console_logs, network_log, DOM/accessibility, and screenshot evidence before completion. Build/typecheck/test output is only one layer; runtime browser evidence is required when the delivered artifact is a page, app, dashboard, game, form, visualization, or other UI. Repeat navigate/act/observe until the actual user flow is clean.";
|
|
10563
10636
|
parameters = {
|
|
10564
10637
|
type: "object",
|
|
10565
10638
|
properties: {
|
|
@@ -10609,7 +10682,7 @@ var init_playwright_browser = __esm({
|
|
|
10609
10682
|
"clear_diagnostics",
|
|
10610
10683
|
"close"
|
|
10611
10684
|
],
|
|
10612
|
-
description: "Action to perform:\n- navigate: go to a URL\n- click: click element by selector\n- fill: clear input and type text
|
|
10685
|
+
description: "Action to perform:\n- navigate: go to a URL\n- click: click element by selector\n- fill: clear input and type text by selector, or by natural-language target when selector is absent\n- type: type text character by character into a selector, or into the currently focused element after visual_click\n- press: press a key (Enter, Tab, Escape, etc.)\n- screenshot: capture the headless browser page, not the desktop; use value to choose the output file path\n- observe_bundle: capture URL/title/viewport, DOM summary, a11y, diagnostics, screenshot, and gate assessment\n- visual_click: browser screenshot -> Moondream point -> elementFromPoint -> human-like Playwright mouse click -> post-action screenshot\n- evaluate: run JavaScript in page context\n- content: get page text content (readable, stripped)\n- dom: get raw page HTML (truncated)\n- dom_summary: compact interactive DOM summary with selectors\n- innerText: get innerText of a specific element\n- select: select dropdown option by value\n- check/uncheck: toggle checkbox\n- hover: hover over element\n- wait: wait for a selector to appear\n- waitForNavigation: wait for page navigation to complete\n- waitForSelector: wait for element matching selector\n- title: get page title\n- url: get current URL\n- getAttribute: get element attribute value\n- innerHTML: get element's innerHTML\n- textContent: get element's textContent\n- goBack/goForward/reload: browser navigation\n- pdf: save page as PDF\n- close: close browser session"
|
|
10613
10686
|
},
|
|
10614
10687
|
url: {
|
|
10615
10688
|
type: "string",
|
|
@@ -10625,7 +10698,7 @@ var init_playwright_browser = __esm({
|
|
|
10625
10698
|
},
|
|
10626
10699
|
target: {
|
|
10627
10700
|
type: "string",
|
|
10628
|
-
description: "Natural-language browser visual target for visual_click, for example 'the green Continue button' or '
|
|
10701
|
+
description: "Natural-language browser visual target for visual_click or selector-less fill, for example 'the green Continue button', 'username field', or 'password field'."
|
|
10629
10702
|
},
|
|
10630
10703
|
value: {
|
|
10631
10704
|
type: "string",
|
|
@@ -10745,12 +10818,22 @@ var init_playwright_browser = __esm({
|
|
|
10745
10818
|
return ok(`Clicked: ${resolvedSelector}${resolvedSelector !== selector ? ` (from ${selector})` : ""}`, start2);
|
|
10746
10819
|
}
|
|
10747
10820
|
case "fill": {
|
|
10748
|
-
if (!selector)
|
|
10749
|
-
return fail("selector is required", start2);
|
|
10750
10821
|
if (text === void 0)
|
|
10751
10822
|
return fail("text is required", start2);
|
|
10752
|
-
|
|
10753
|
-
|
|
10823
|
+
const typingDelay = typeof args.typing_delay_ms === "number" ? Math.max(0, Math.min(500, Math.round(args.typing_delay_ms))) : 20;
|
|
10824
|
+
if (selector) {
|
|
10825
|
+
const resolvedSelector = resolveDomSummarySelector(selector);
|
|
10826
|
+
if (!resolvedSelector)
|
|
10827
|
+
return fail(`No selector known for DOM summary reference ${selector}; run dom_summary and use the emitted selector.`, start2);
|
|
10828
|
+
await page.fill(resolvedSelector, text, { timeout: timeout2 });
|
|
10829
|
+
return ok(`Filled ${resolvedSelector}${resolvedSelector !== selector ? ` (from ${selector})` : ""} with "${text}"`, start2);
|
|
10830
|
+
}
|
|
10831
|
+
const target = typeof args.target === "string" && args.target.trim() ? args.target.trim() : "";
|
|
10832
|
+
if (!target)
|
|
10833
|
+
return fail("selector or target is required for fill. Prefer target for visual/natural-language form fields, e.g. target='username field'.", start2);
|
|
10834
|
+
const result = await clickAndFillBrowserTarget(page, target, text, typingDelay);
|
|
10835
|
+
const active = result.active ?? {};
|
|
10836
|
+
return ok(`Filled target "${target}" via ${result.source} into <${active["tag"] || "element"}>${active["name"] ? ` name=${JSON.stringify(active["name"])}` : ""}${active["placeholder"] ? ` placeholder=${JSON.stringify(active["placeholder"])}` : ""}${active["ariaLabel"] ? ` aria-label=${JSON.stringify(active["ariaLabel"])}` : ""}.`, start2);
|
|
10754
10837
|
}
|
|
10755
10838
|
case "type": {
|
|
10756
10839
|
if (text === void 0)
|
|
@@ -10893,9 +10976,13 @@ var init_playwright_browser = __esm({
|
|
|
10893
10976
|
case "evaluate": {
|
|
10894
10977
|
if (!text)
|
|
10895
10978
|
return fail("text (JavaScript code) is required", start2);
|
|
10896
|
-
|
|
10897
|
-
|
|
10898
|
-
|
|
10979
|
+
try {
|
|
10980
|
+
const result = await page.evaluate(text);
|
|
10981
|
+
const serialized = typeof result === "string" ? result : JSON.stringify(result, null, 2);
|
|
10982
|
+
return ok(serialized?.slice(0, 15e3) ?? "undefined", start2);
|
|
10983
|
+
} catch (err2) {
|
|
10984
|
+
return fail(evaluateFailureMessage(err2, text), start2);
|
|
10985
|
+
}
|
|
10899
10986
|
}
|
|
10900
10987
|
// ── Screenshot / PDF ──
|
|
10901
10988
|
case "screenshot": {
|
|
@@ -23994,8 +24081,8 @@ var init_explore_tools = __esm({
|
|
|
23994
24081
|
enter_worktree: "Create isolated git worktree for safe parallel file modifications",
|
|
23995
24082
|
exit_worktree: "Exit and optionally remove a git worktree (keep for merge or discard)",
|
|
23996
24083
|
notebook_edit: "Edit Jupyter .ipynb notebooks at cell level (list, replace, insert, delete cells)",
|
|
23997
|
-
browser_action: "Interactive browser: login, fill forms, click buttons, screenshot — session persists between calls;
|
|
23998
|
-
playwright_browser: "Full browser verification and visual action loop: observe_bundle, visual_click via Moondream pointing, focused-element typing
|
|
24084
|
+
browser_action: "Interactive Selenium browser: login, fill forms, click buttons, screenshot — session persists between browser_action calls only; separate runtime from playwright_browser",
|
|
24085
|
+
playwright_browser: "Full browser verification and visual action loop: observe_bundle, visual_click via Moondream pointing, selector/target fill, focused-element typing, screenshot, page_errors, console_logs, network_log, DOM/accessibility, storage",
|
|
23999
24086
|
carbonyl_browser: "Terminal-rendered real browser automation via Carbonyl: navigate, read rendered text, click/type, sessions, daemon mode",
|
|
24000
24087
|
scheduler: "Schedule tasks for automatic future execution via OS cron",
|
|
24001
24088
|
cronjob: "Alias for scheduler: OS cron-backed time triggers",
|
|
@@ -284492,6 +284579,7 @@ async function ensureSession(options2 = {}) {
|
|
|
284492
284579
|
}
|
|
284493
284580
|
activeSessionId = null;
|
|
284494
284581
|
activeSessionHeadless = null;
|
|
284582
|
+
activeSessionUrl = null;
|
|
284495
284583
|
}
|
|
284496
284584
|
}
|
|
284497
284585
|
if (activeSessionId) {
|
|
@@ -284503,6 +284591,13 @@ async function ensureSession(options2 = {}) {
|
|
|
284503
284591
|
}
|
|
284504
284592
|
activeSessionId = null;
|
|
284505
284593
|
activeSessionHeadless = null;
|
|
284594
|
+
activeSessionUrl = null;
|
|
284595
|
+
}
|
|
284596
|
+
if (options2.allowCreate === false) {
|
|
284597
|
+
return {
|
|
284598
|
+
error: "No active browser_action Selenium session exists for this action. browser_action is a separate browser/runtime from playwright_browser; continue the current page with playwright_browser, or call browser_action({action:'navigate', url: ...}) first.",
|
|
284599
|
+
sessionId: ""
|
|
284600
|
+
};
|
|
284506
284601
|
}
|
|
284507
284602
|
const headless = options2.headless ?? defaultBrowserHeadless();
|
|
284508
284603
|
const res = await fetch(`${BASE_URL}/session/start`, {
|
|
@@ -284520,8 +284615,16 @@ async function ensureSession(options2 = {}) {
|
|
|
284520
284615
|
return { error: String(data.message ?? "Failed to start browser session"), sessionId: "" };
|
|
284521
284616
|
activeSessionId = data.session_id;
|
|
284522
284617
|
activeSessionHeadless = headless;
|
|
284618
|
+
activeSessionUrl = null;
|
|
284523
284619
|
return { sessionId: activeSessionId };
|
|
284524
284620
|
}
|
|
284621
|
+
function browserActionRuntimeHint() {
|
|
284622
|
+
return [
|
|
284623
|
+
"browser_action is a separate browser/runtime from playwright_browser and uses its own Selenium/Chrome session; it does not share page state, cookies, focus, or navigation.",
|
|
284624
|
+
activeSessionUrl ? `Current browser_action URL: ${activeSessionUrl}` : "Current browser_action URL: unknown or not navigated.",
|
|
284625
|
+
"If this page was opened with playwright_browser, keep using playwright_browser actions such as dom_summary, fill, type, press, visual_click, and observe_bundle."
|
|
284626
|
+
].join(" ");
|
|
284627
|
+
}
|
|
284525
284628
|
async function apiCall(endpoint, method = "POST", body) {
|
|
284526
284629
|
const options2 = {
|
|
284527
284630
|
method,
|
|
@@ -284544,7 +284647,7 @@ async function apiCall(endpoint, method = "POST", body) {
|
|
|
284544
284647
|
const res = await fetch(url, options2);
|
|
284545
284648
|
return await res.json();
|
|
284546
284649
|
}
|
|
284547
|
-
var __dirname3, DEFAULT_PORT, SCRAPE_SCRIPT, BASE_URL, serviceProcess, activeSessionId, activeSessionHeadless, BrowserActionTool;
|
|
284650
|
+
var __dirname3, DEFAULT_PORT, SCRAPE_SCRIPT, BASE_URL, serviceProcess, activeSessionId, activeSessionHeadless, activeSessionUrl, BrowserActionTool;
|
|
284548
284651
|
var init_browser_action = __esm({
|
|
284549
284652
|
"packages/execution/dist/tools/browser-action.js"() {
|
|
284550
284653
|
"use strict";
|
|
@@ -284557,9 +284660,10 @@ var init_browser_action = __esm({
|
|
|
284557
284660
|
serviceProcess = null;
|
|
284558
284661
|
activeSessionId = null;
|
|
284559
284662
|
activeSessionHeadless = null;
|
|
284663
|
+
activeSessionUrl = null;
|
|
284560
284664
|
BrowserActionTool = class {
|
|
284561
284665
|
name = "browser_action";
|
|
284562
|
-
description = "Control a persistent headless Chrome browser session for interactive web tasks. The browser stays open between calls, maintaining cookies, login state, and history. Use this (not web_fetch/web_crawl) when you need to: (1) log into a website, (2) fill and submit forms, (3) click buttons or links interactively, (4) take screenshots of rendered pages, (5) navigate multi-step workflows (checkout, signup, dashboards), (6) interact with elements that require JavaScript (dropdowns, modals, infinite scroll). Actions: navigate, click, click_xy, type, screenshot, dom, scroll, scroll_up, scroll_down, back, forward, close. For browser visuals, use browser_action({action:'screenshot', width, height, output_path}) — this captures the headless browser viewport, not the desktop. Use the desktop screenshot tool only when the actual OS screen is the target. For verification of browser runtime failures, prefer playwright_browser because it exposes page_errors, console_logs, network_log, DOM/accessibility, and screenshots from the same session. IMPORTANT: Start by calling navigate with the URL — do NOT ask the user for credentials or info first. Loopback URLs (localhost, 127.0.0.1, ::1) are allowed for local development servers; private LAN and metadata URLs remain blocked. Navigate to the page, then use dom/screenshot to see what's there, then type/click to interact. Call 'close' when done to free resources. This tool does not save or download arbitrary rendered files (PDFs, archives, media) to disk — clicking a 'Download' link inside the browser does not produce a local file path for the agent. For file acquisition, use the dedicated download/file tool and validate the resulting content-type and size before treating the result as success.";
|
|
284666
|
+
description = "Control a persistent headless Chrome browser session for interactive web tasks. The browser stays open between calls, maintaining cookies, login state, and history. This is a separate Selenium/Chrome runtime from playwright_browser; do not switch between the two mid-workflow unless you intentionally navigate the second tool to the same URL. Use this (not web_fetch/web_crawl) when you need to: (1) log into a website, (2) fill and submit forms, (3) click buttons or links interactively, (4) take screenshots of rendered pages, (5) navigate multi-step workflows (checkout, signup, dashboards), (6) interact with elements that require JavaScript (dropdowns, modals, infinite scroll). Actions: navigate, click, click_xy, type, screenshot, dom, scroll, scroll_up, scroll_down, back, forward, close. For browser visuals, use browser_action({action:'screenshot', width, height, output_path}) — this captures the headless browser viewport, not the desktop. Use the desktop screenshot tool only when the actual OS screen is the target. For verification of browser runtime failures, prefer playwright_browser because it exposes page_errors, console_logs, network_log, DOM/accessibility, and screenshots from the same session. IMPORTANT: Start by calling navigate with the URL — do NOT ask the user for credentials or info first. Loopback URLs (localhost, 127.0.0.1, ::1) are allowed for local development servers; private LAN and metadata URLs remain blocked. Navigate to the page, then use dom/screenshot to see what's there, then type/click to interact. Call 'close' when done to free resources. This tool does not save or download arbitrary rendered files (PDFs, archives, media) to disk — clicking a 'Download' link inside the browser does not produce a local file path for the agent. For file acquisition, use the dedicated download/file tool and validate the resulting content-type and size before treating the result as success.";
|
|
284563
284667
|
parameters = {
|
|
284564
284668
|
type: "object",
|
|
284565
284669
|
properties: {
|
|
@@ -284629,27 +284733,38 @@ var init_browser_action = __esm({
|
|
|
284629
284733
|
const requestedWidth = args.width == null ? void 0 : asPositiveInt2(args.width, 1280, 320, 3840);
|
|
284630
284734
|
const requestedHeight = args.height == null ? void 0 : asPositiveInt2(args.height, 720, 240, 2160);
|
|
284631
284735
|
const requestedScale = args.device_scale_factor == null ? void 0 : asPositiveNumber(args.device_scale_factor, 1, 0.25, 3);
|
|
284632
|
-
const launchErr = await launchService();
|
|
284633
|
-
if (launchErr) {
|
|
284634
|
-
return { success: false, output: "", error: launchErr, durationMs: Date.now() - start2 };
|
|
284635
|
-
}
|
|
284636
284736
|
if (action === "close") {
|
|
284637
|
-
if (activeSessionId) {
|
|
284737
|
+
if (activeSessionId || await probeService()) {
|
|
284638
284738
|
try {
|
|
284639
284739
|
await apiCall("/session/close");
|
|
284640
284740
|
} catch {
|
|
284641
284741
|
}
|
|
284642
284742
|
activeSessionId = null;
|
|
284643
284743
|
activeSessionHeadless = null;
|
|
284744
|
+
activeSessionUrl = null;
|
|
284644
284745
|
}
|
|
284645
284746
|
return { success: true, output: "Browser session closed.", durationMs: Date.now() - start2 };
|
|
284646
284747
|
}
|
|
284748
|
+
const actionStartsSession = action === "navigate";
|
|
284749
|
+
if (!actionStartsSession && !activeSessionId) {
|
|
284750
|
+
return {
|
|
284751
|
+
success: false,
|
|
284752
|
+
output: "",
|
|
284753
|
+
error: `browser_action ${action || "(missing action)"} requires an active browser_action session. ` + browserActionRuntimeHint(),
|
|
284754
|
+
durationMs: Date.now() - start2
|
|
284755
|
+
};
|
|
284756
|
+
}
|
|
284757
|
+
const launchErr = await launchService();
|
|
284758
|
+
if (launchErr) {
|
|
284759
|
+
return { success: false, output: "", error: launchErr, durationMs: Date.now() - start2 };
|
|
284760
|
+
}
|
|
284647
284761
|
const session = await ensureSession({
|
|
284648
284762
|
width: requestedWidth,
|
|
284649
284763
|
height: requestedHeight,
|
|
284650
284764
|
deviceScaleFactor: requestedScale,
|
|
284651
284765
|
headless: asOptionalBoolean2(args.headless),
|
|
284652
|
-
forceNew: asOptionalBoolean2(args.force_new) === true
|
|
284766
|
+
forceNew: asOptionalBoolean2(args.force_new) === true,
|
|
284767
|
+
allowCreate: actionStartsSession
|
|
284653
284768
|
});
|
|
284654
284769
|
if (session.error) {
|
|
284655
284770
|
return { success: false, output: "", error: session.error, durationMs: Date.now() - start2 };
|
|
@@ -284667,7 +284782,13 @@ var init_browser_action = __esm({
|
|
|
284667
284782
|
}
|
|
284668
284783
|
result = await apiCall("/navigate", "POST", { url: args.url });
|
|
284669
284784
|
if (result.ok) {
|
|
284670
|
-
|
|
284785
|
+
activeSessionUrl = args.url;
|
|
284786
|
+
return {
|
|
284787
|
+
success: true,
|
|
284788
|
+
output: `Navigated to ${args.url}
|
|
284789
|
+
Runtime: browser_action Selenium/Chrome session. Continue with browser_action for this page, or use playwright_browser separately after navigating it.`,
|
|
284790
|
+
durationMs: Date.now() - start2
|
|
284791
|
+
};
|
|
284671
284792
|
}
|
|
284672
284793
|
const navMsg = String(result.message ?? "Navigation failed");
|
|
284673
284794
|
const navHint = navMsg.toLowerCase().includes("connection") || navMsg.toLowerCase().includes("refused") || navMsg.toLowerCase().includes("err_connection") ? " (the URL appears unreachable — check if the target server is running and accepting connections)" : navMsg.toLowerCase().includes("timeout") ? " (page load timed out — try again or use a different URL)" : "";
|
|
@@ -284689,7 +284810,7 @@ var init_browser_action = __esm({
|
|
|
284689
284810
|
return {
|
|
284690
284811
|
success: false,
|
|
284691
284812
|
output: `Click on ${args.selector} failed: ${clickMsg}`,
|
|
284692
|
-
error: `browser_action click failed: ${clickMsg}. Try dom_summary first to see what selectors exist on the page
|
|
284813
|
+
error: `browser_action click failed: ${clickMsg}. Try dom_summary first to see what selectors exist on the page. ${browserActionRuntimeHint()}`,
|
|
284693
284814
|
durationMs: Date.now() - start2
|
|
284694
284815
|
};
|
|
284695
284816
|
}
|
|
@@ -284731,7 +284852,7 @@ var init_browser_action = __esm({
|
|
|
284731
284852
|
return {
|
|
284732
284853
|
success: false,
|
|
284733
284854
|
output: `Type into ${args.selector} failed: ${typeMsg}`,
|
|
284734
|
-
error: `browser_action type failed: ${typeMsg}. Verify the element is visible and is an input/textarea — use dom_summary to check
|
|
284855
|
+
error: `browser_action type failed: ${typeMsg}. Verify the element is visible and is an input/textarea — use dom_summary to check. ${browserActionRuntimeHint()}`,
|
|
284735
284856
|
durationMs: Date.now() - start2
|
|
284736
284857
|
};
|
|
284737
284858
|
}
|
|
@@ -284872,7 +284993,7 @@ var init_browser_action = __esm({
|
|
|
284872
284993
|
if (!pointResult || pointResult.points.length === 0) {
|
|
284873
284994
|
return {
|
|
284874
284995
|
success: false,
|
|
284875
|
-
output: `Vision could not find "${target}" on the page. Try using dom_summary to find the CSS selector instead
|
|
284996
|
+
output: `Vision could not find "${target}" on the page. Try using dom_summary to find the CSS selector instead. ${browserActionRuntimeHint()}`,
|
|
284876
284997
|
error: "No point backend returned normalized coordinates.",
|
|
284877
284998
|
durationMs: Date.now() - start2
|
|
284878
284999
|
};
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.207",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.207",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
|
@@ -4565,9 +4565,19 @@
|
|
|
4565
4565
|
}
|
|
4566
4566
|
},
|
|
4567
4567
|
"node_modules/js-yaml": {
|
|
4568
|
-
"version": "4.
|
|
4569
|
-
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.
|
|
4570
|
-
"integrity": "sha512-
|
|
4568
|
+
"version": "4.2.0",
|
|
4569
|
+
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.2.0.tgz",
|
|
4570
|
+
"integrity": "sha512-ePWsvanv0DWuDRsW8dnt+R4jQ31SCRCQ7hhNcPXZPsoBZiemuZNYGf7adZdqX2D86j6rvKp3RpCxVTSb8WQlOw==",
|
|
4571
|
+
"funding": [
|
|
4572
|
+
{
|
|
4573
|
+
"type": "github",
|
|
4574
|
+
"url": "https://github.com/sponsors/puzrin"
|
|
4575
|
+
},
|
|
4576
|
+
{
|
|
4577
|
+
"type": "github",
|
|
4578
|
+
"url": "https://github.com/sponsors/nodeca"
|
|
4579
|
+
}
|
|
4580
|
+
],
|
|
4571
4581
|
"license": "MIT",
|
|
4572
4582
|
"dependencies": {
|
|
4573
4583
|
"argparse": "^2.0.1"
|
package/package.json
CHANGED