omnius 1.0.207 → 1.0.209
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +303 -58
- package/dist/scripts/web_scrape.py +122 -5
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -10326,15 +10326,19 @@ function pngDimensions(buffer2) {
|
|
|
10326
10326
|
}
|
|
10327
10327
|
return null;
|
|
10328
10328
|
}
|
|
10329
|
-
async function
|
|
10330
|
-
const active = await
|
|
10329
|
+
async function describeFocusedEditableInContext(context2, frameMeta) {
|
|
10330
|
+
const active = await context2.evaluate(`(() => {
|
|
10331
10331
|
const el = document.activeElement;
|
|
10332
10332
|
if (!el) return null;
|
|
10333
10333
|
const rect = el.getBoundingClientRect();
|
|
10334
10334
|
const role = (el.getAttribute("role") || "").toLowerCase();
|
|
10335
|
-
const
|
|
10336
|
-
const
|
|
10337
|
-
|
|
10335
|
+
const contentEditableAttr = el.getAttribute("contenteditable");
|
|
10336
|
+
const contentEditable = contentEditableAttr !== null
|
|
10337
|
+
&& (contentEditableAttr === "" || String(contentEditableAttr).toLowerCase() === "true");
|
|
10338
|
+
const disabled = !!el.disabled;
|
|
10339
|
+
const readOnly = !!el.readOnly;
|
|
10340
|
+
const isEditable = (el.matches("input, textarea") && !disabled && !readOnly)
|
|
10341
|
+
|| contentEditable || el.isContentEditable === true
|
|
10338
10342
|
|| ["textbox", "searchbox", "combobox"].includes(role);
|
|
10339
10343
|
return {
|
|
10340
10344
|
tag: String(el.tagName || "").toLowerCase(),
|
|
@@ -10346,10 +10350,35 @@ async function describeFocusedEditable(pageHandle) {
|
|
|
10346
10350
|
placeholder: el.getAttribute("placeholder") || "",
|
|
10347
10351
|
text: String(el.textContent || "").trim().slice(0, 120),
|
|
10348
10352
|
isEditable,
|
|
10353
|
+
disabled,
|
|
10354
|
+
readOnly,
|
|
10349
10355
|
rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
|
|
10350
10356
|
};
|
|
10351
10357
|
})()`);
|
|
10352
|
-
|
|
10358
|
+
if (!active || typeof active !== "object")
|
|
10359
|
+
return null;
|
|
10360
|
+
return frameMeta ? { ...active, frame: frameMeta } : active;
|
|
10361
|
+
}
|
|
10362
|
+
async function describeFocusedEditable(pageHandle) {
|
|
10363
|
+
const main2 = await describeFocusedEditableInContext(pageHandle, { kind: "main", url: pageHandle.url?.() ?? "" }).catch(() => null);
|
|
10364
|
+
if (main2?.["isEditable"])
|
|
10365
|
+
return main2;
|
|
10366
|
+
const frames = typeof pageHandle.frames === "function" ? pageHandle.frames() : [];
|
|
10367
|
+
const mainFrame = typeof pageHandle.mainFrame === "function" ? pageHandle.mainFrame() : null;
|
|
10368
|
+
for (let i2 = 0; i2 < frames.length; i2++) {
|
|
10369
|
+
const frame = frames[i2];
|
|
10370
|
+
if (!frame || frame === mainFrame)
|
|
10371
|
+
continue;
|
|
10372
|
+
const active = await describeFocusedEditableInContext(frame, {
|
|
10373
|
+
kind: "frame",
|
|
10374
|
+
index: i2,
|
|
10375
|
+
url: typeof frame.url === "function" ? frame.url() : "",
|
|
10376
|
+
name: typeof frame.name === "function" ? frame.name() : ""
|
|
10377
|
+
}).catch(() => null);
|
|
10378
|
+
if (active?.["isEditable"])
|
|
10379
|
+
return active;
|
|
10380
|
+
}
|
|
10381
|
+
return main2;
|
|
10353
10382
|
}
|
|
10354
10383
|
async function clickAndFillBrowserTarget(pageHandle, target, text, typingDelay) {
|
|
10355
10384
|
const viewport = pageHandle.viewportSize?.() ?? { width: 1280, height: 720 };
|
|
@@ -10464,8 +10493,8 @@ ${input.text.slice(0, 2e4)}`.toLowerCase();
|
|
|
10464
10493
|
}
|
|
10465
10494
|
return { kind: "none", confidence: 0, evidence: [] };
|
|
10466
10495
|
}
|
|
10467
|
-
async function
|
|
10468
|
-
const candidate = await
|
|
10496
|
+
async function findBrowserVisualCandidateInContext(context2, target, visualX, visualY, forceCandidate, includeOffscreen = false, scrollIntoView = false) {
|
|
10497
|
+
const candidate = await context2.evaluate(`(() => {
|
|
10469
10498
|
const target = ${JSON.stringify(target)};
|
|
10470
10499
|
const visualX = ${JSON.stringify(visualX)};
|
|
10471
10500
|
const visualY = ${JSON.stringify(visualY)};
|
|
@@ -10496,12 +10525,55 @@ async function findBrowserVisualCandidate(pageHandle, target, visualX, visualY,
|
|
|
10496
10525
|
if (window.CSS && typeof window.CSS.escape === "function") return window.CSS.escape(id);
|
|
10497
10526
|
return String(id).replace(/["\\\\]/g, "\\\\$&");
|
|
10498
10527
|
};
|
|
10528
|
+
const hasVisibleStyle = (el) => {
|
|
10529
|
+
for (let cur = el; cur && cur.nodeType === 1; cur = cur.parentElement) {
|
|
10530
|
+
const style = getComputedStyle(cur);
|
|
10531
|
+
if (style.display === "none" || style.visibility === "hidden" || style.visibility === "collapse") return false;
|
|
10532
|
+
const opacity = Number(style.opacity);
|
|
10533
|
+
if (Number.isFinite(opacity) && opacity <= 0.02) return false;
|
|
10534
|
+
}
|
|
10535
|
+
return true;
|
|
10536
|
+
};
|
|
10537
|
+
const hasUsableBox = (el) => {
|
|
10538
|
+
const rect = el.getBoundingClientRect();
|
|
10539
|
+
return rect.width > 1 && rect.height > 1;
|
|
10540
|
+
};
|
|
10541
|
+
const inViewport = (rect) => !(rect.bottom < 0 || rect.right < 0 || rect.top > window.innerHeight || rect.left > window.innerWidth);
|
|
10542
|
+
const isRendered = (el) => !!el && hasUsableBox(el) && hasVisibleStyle(el);
|
|
10499
10543
|
const associatedControl = (el) => {
|
|
10500
10544
|
if (/^label$/i.test(el.tagName || "") && el.getAttribute("for")) {
|
|
10501
10545
|
return document.getElementById(el.getAttribute("for"));
|
|
10502
10546
|
}
|
|
10547
|
+
if (/^label$/i.test(el.tagName || "")) {
|
|
10548
|
+
if (el.control) return el.control;
|
|
10549
|
+
const nested = el.querySelector("input, textarea, select, [contenteditable='true'], [role='textbox']");
|
|
10550
|
+
if (nested) return nested;
|
|
10551
|
+
const labelRect = el.getBoundingClientRect();
|
|
10552
|
+
let scope = el.parentElement;
|
|
10553
|
+
for (let depth = 0; depth < 5 && scope; depth++, scope = scope.parentElement) {
|
|
10554
|
+
const controls = Array.from(scope.querySelectorAll("input, textarea, select, [contenteditable='true'], [role='textbox']"))
|
|
10555
|
+
.filter(control => {
|
|
10556
|
+
const rect = control.getBoundingClientRect();
|
|
10557
|
+
return rect.width > 1 && rect.height > 1 && rect.top >= labelRect.top - 12 && Math.abs(rect.left - labelRect.left) < 260;
|
|
10558
|
+
})
|
|
10559
|
+
.sort((a, b) => {
|
|
10560
|
+
const ar = a.getBoundingClientRect();
|
|
10561
|
+
const br = b.getBoundingClientRect();
|
|
10562
|
+
return (Math.abs(ar.top - labelRect.bottom) - Math.abs(br.top - labelRect.bottom))
|
|
10563
|
+
|| (Math.abs(ar.left - labelRect.left) - Math.abs(br.left - labelRect.left));
|
|
10564
|
+
});
|
|
10565
|
+
if (controls[0]) return controls[0];
|
|
10566
|
+
}
|
|
10567
|
+
}
|
|
10503
10568
|
return el;
|
|
10504
10569
|
};
|
|
10570
|
+
const clickTargetFor = (el) => {
|
|
10571
|
+
const control = associatedControl(el) || el;
|
|
10572
|
+
if (control === el) return el;
|
|
10573
|
+
if (isRendered(control)) return control;
|
|
10574
|
+
if (/^label$/i.test(el.tagName || "") && isRendered(el)) return el;
|
|
10575
|
+
return control;
|
|
10576
|
+
};
|
|
10505
10577
|
const associatedLabelText = (el) => {
|
|
10506
10578
|
const control = associatedControl(el);
|
|
10507
10579
|
const id = control && control.id ? control.id : el.id;
|
|
@@ -10544,20 +10616,25 @@ async function findBrowserVisualCandidate(pageHandle, target, visualX, visualY,
|
|
|
10544
10616
|
el.id || "",
|
|
10545
10617
|
el.className || "",
|
|
10546
10618
|
el.getAttribute("type") || "",
|
|
10619
|
+
el.getAttribute("autocomplete") || "",
|
|
10547
10620
|
].join(" ").toLowerCase();
|
|
10548
10621
|
const infoFor = (el, score) => {
|
|
10549
|
-
const
|
|
10550
|
-
const
|
|
10622
|
+
const control = associatedControl(el) || el;
|
|
10623
|
+
const clickTarget = clickTargetFor(el);
|
|
10624
|
+
const rect = clickTarget.getBoundingClientRect();
|
|
10625
|
+
const visible = inViewport(rect) && hasVisibleStyle(clickTarget);
|
|
10551
10626
|
return {
|
|
10552
|
-
tag: String(el.tagName || "").toLowerCase(),
|
|
10553
|
-
id: el.id || "",
|
|
10554
|
-
className: String(el.className || "").slice(0, 160),
|
|
10555
|
-
role: el.getAttribute("role") || "",
|
|
10556
|
-
ariaLabel: el.getAttribute("aria-label") || "",
|
|
10557
|
-
name: el.getAttribute("name") || "",
|
|
10558
|
-
type: el.getAttribute("type") || "",
|
|
10559
|
-
|
|
10560
|
-
|
|
10627
|
+
tag: String(control.tagName || el.tagName || "").toLowerCase(),
|
|
10628
|
+
id: control.id || el.id || "",
|
|
10629
|
+
className: String(control.className || el.className || "").slice(0, 160),
|
|
10630
|
+
role: control.getAttribute("role") || el.getAttribute("role") || "",
|
|
10631
|
+
ariaLabel: control.getAttribute("aria-label") || el.getAttribute("aria-label") || "",
|
|
10632
|
+
name: control.getAttribute("name") || el.getAttribute("name") || "",
|
|
10633
|
+
type: control.getAttribute("type") || el.getAttribute("type") || "",
|
|
10634
|
+
autocomplete: control.getAttribute("autocomplete") || el.getAttribute("autocomplete") || "",
|
|
10635
|
+
placeholder: control.getAttribute("placeholder") || el.getAttribute("placeholder") || "",
|
|
10636
|
+
text: String(el.innerText || el.textContent || control.innerText || control.textContent || control.getAttribute("value") || "").trim().slice(0, 240),
|
|
10637
|
+
clickTag: String(clickTarget.tagName || "").toLowerCase(),
|
|
10561
10638
|
rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
|
|
10562
10639
|
center: { x: rect.x + rect.width / 2, y: rect.y + rect.height / 2 },
|
|
10563
10640
|
visible,
|
|
@@ -10579,9 +10656,11 @@ async function findBrowserVisualCandidate(pageHandle, target, visualX, visualY,
|
|
|
10579
10656
|
let best = null;
|
|
10580
10657
|
let bestElement = null;
|
|
10581
10658
|
for (const el of Array.from(document.querySelectorAll(selectors))) {
|
|
10582
|
-
|
|
10583
|
-
|
|
10584
|
-
|
|
10659
|
+
if (!isRendered(el)) continue;
|
|
10660
|
+
const clickTarget = clickTargetFor(el);
|
|
10661
|
+
if (!isRendered(clickTarget)) continue;
|
|
10662
|
+
const rect = clickTarget.getBoundingClientRect();
|
|
10663
|
+
const visible = inViewport(rect);
|
|
10585
10664
|
if (!includeOffscreen && !visible) continue;
|
|
10586
10665
|
const hay = textFor(el);
|
|
10587
10666
|
const tokenHits = tokens.filter(t => hay.includes(t)).length;
|
|
@@ -10592,7 +10671,7 @@ async function findBrowserVisualCandidate(pageHandle, target, visualX, visualY,
|
|
|
10592
10671
|
if (score <= (forceCandidate ? 8 : 0)) continue;
|
|
10593
10672
|
if (!best || score > best.score) {
|
|
10594
10673
|
best = infoFor(el, score);
|
|
10595
|
-
bestElement =
|
|
10674
|
+
bestElement = clickTarget;
|
|
10596
10675
|
}
|
|
10597
10676
|
}
|
|
10598
10677
|
if (bestElement && scrollIntoView && best && !best.visible) {
|
|
@@ -10604,6 +10683,82 @@ async function findBrowserVisualCandidate(pageHandle, target, visualX, visualY,
|
|
|
10604
10683
|
})()`);
|
|
10605
10684
|
return candidate && typeof candidate === "object" ? candidate : null;
|
|
10606
10685
|
}
|
|
10686
|
+
function offsetBrowserCandidate(candidate, offset, viewport, frameMeta) {
|
|
10687
|
+
const rect = candidate["rect"];
|
|
10688
|
+
const center = candidate["center"];
|
|
10689
|
+
const x = Number(rect?.x) + offset.x;
|
|
10690
|
+
const y = Number(rect?.y) + offset.y;
|
|
10691
|
+
const width = Number(rect?.width);
|
|
10692
|
+
const height = Number(rect?.height);
|
|
10693
|
+
const cx = Number(center?.x) + offset.x;
|
|
10694
|
+
const cy = Number(center?.y) + offset.y;
|
|
10695
|
+
const globalRect = {
|
|
10696
|
+
x,
|
|
10697
|
+
y,
|
|
10698
|
+
width,
|
|
10699
|
+
height
|
|
10700
|
+
};
|
|
10701
|
+
const visible = Number.isFinite(x) && Number.isFinite(y) && Number.isFinite(width) && Number.isFinite(height) && !(y + height < 0 || x + width < 0 || y > viewport.height || x > viewport.width);
|
|
10702
|
+
return {
|
|
10703
|
+
...candidate,
|
|
10704
|
+
rect: globalRect,
|
|
10705
|
+
center: { x: cx, y: cy },
|
|
10706
|
+
visible: candidate["visible"] === true && visible,
|
|
10707
|
+
...frameMeta ? {
|
|
10708
|
+
frame: frameMeta,
|
|
10709
|
+
frameLocalRect: rect,
|
|
10710
|
+
frameLocalCenter: center
|
|
10711
|
+
} : {}
|
|
10712
|
+
};
|
|
10713
|
+
}
|
|
10714
|
+
async function findBrowserVisualCandidate(pageHandle, target, visualX, visualY, forceCandidate, includeOffscreen = false, scrollIntoView = false) {
|
|
10715
|
+
const viewport = pageHandle.viewportSize?.() ?? { width: 1280, height: 720 };
|
|
10716
|
+
const candidates = [];
|
|
10717
|
+
const top = await findBrowserVisualCandidateInContext(pageHandle, target, visualX, visualY, forceCandidate, includeOffscreen, scrollIntoView).catch(() => null);
|
|
10718
|
+
if (top)
|
|
10719
|
+
candidates.push(offsetBrowserCandidate(top, { x: 0, y: 0 }, viewport));
|
|
10720
|
+
const frames = typeof pageHandle.frames === "function" ? pageHandle.frames() : [];
|
|
10721
|
+
const mainFrame = typeof pageHandle.mainFrame === "function" ? pageHandle.mainFrame() : null;
|
|
10722
|
+
for (let i2 = 0; i2 < frames.length; i2++) {
|
|
10723
|
+
const frame = frames[i2];
|
|
10724
|
+
if (!frame || frame === mainFrame)
|
|
10725
|
+
continue;
|
|
10726
|
+
const elementHandle = typeof frame.frameElement === "function" ? await frame.frameElement().catch(() => null) : null;
|
|
10727
|
+
if (!elementHandle)
|
|
10728
|
+
continue;
|
|
10729
|
+
let box = await elementHandle.boundingBox().catch(() => null);
|
|
10730
|
+
if (!box || box.width <= 1 || box.height <= 1)
|
|
10731
|
+
continue;
|
|
10732
|
+
const frameVisible = !(box.y + box.height < 0 || box.x + box.width < 0 || box.y > viewport.height || box.x > viewport.width);
|
|
10733
|
+
if (!includeOffscreen && !frameVisible)
|
|
10734
|
+
continue;
|
|
10735
|
+
const localX = Math.max(0, Math.min(box.width, visualX - box.x));
|
|
10736
|
+
const localY = Math.max(0, Math.min(box.height, visualY - box.y));
|
|
10737
|
+
let candidate = await findBrowserVisualCandidateInContext(frame, target, localX, localY, forceCandidate, includeOffscreen, scrollIntoView).catch(() => null);
|
|
10738
|
+
if (!candidate)
|
|
10739
|
+
continue;
|
|
10740
|
+
if (scrollIntoView && (!candidate["visible"] || !frameVisible)) {
|
|
10741
|
+
if (typeof elementHandle.scrollIntoViewIfNeeded === "function") {
|
|
10742
|
+
await elementHandle.scrollIntoViewIfNeeded().catch(() => void 0);
|
|
10743
|
+
}
|
|
10744
|
+
box = await elementHandle.boundingBox().catch(() => box);
|
|
10745
|
+
candidate = {
|
|
10746
|
+
...candidate,
|
|
10747
|
+
scrolledIntoView: true
|
|
10748
|
+
};
|
|
10749
|
+
}
|
|
10750
|
+
if (!box)
|
|
10751
|
+
continue;
|
|
10752
|
+
candidates.push(offsetBrowserCandidate(candidate, { x: box.x, y: box.y }, viewport, {
|
|
10753
|
+
kind: "frame",
|
|
10754
|
+
index: i2,
|
|
10755
|
+
url: typeof frame.url === "function" ? frame.url() : "",
|
|
10756
|
+
name: typeof frame.name === "function" ? frame.name() : "",
|
|
10757
|
+
rect: { x: box.x, y: box.y, width: box.width, height: box.height }
|
|
10758
|
+
}));
|
|
10759
|
+
}
|
|
10760
|
+
return candidates.filter((candidate) => includeOffscreen || candidate["visible"] === true).sort((a2, b) => Number(b["score"] ?? 0) - Number(a2["score"] ?? 0))[0] ?? null;
|
|
10761
|
+
}
|
|
10607
10762
|
function ok(output, start2) {
|
|
10608
10763
|
return { success: true, output, durationMs: Date.now() - start2 };
|
|
10609
10764
|
}
|
|
@@ -10682,7 +10837,7 @@ var init_playwright_browser = __esm({
|
|
|
10682
10837
|
"clear_diagnostics",
|
|
10683
10838
|
"close"
|
|
10684
10839
|
],
|
|
10685
|
-
description: "Action to perform:\n- navigate: go to a URL\n- click: click element by selector\n- fill: clear input and type text by selector, or by natural-language target when selector is absent\n- type: type text character by character into a selector, or into the currently focused element after visual_click\n- press: press a key (Enter, Tab, Escape, etc.)\n- screenshot: capture the headless browser page, not the desktop; use value to choose the output file path\n- observe_bundle: capture URL/title/viewport, DOM summary, a11y, diagnostics, screenshot, and gate assessment\n- visual_click: browser screenshot -> Moondream point -> elementFromPoint -> human-like Playwright mouse click -> post-action screenshot\n- evaluate: run JavaScript in page context\n- content: get page text content (readable, stripped)\n- dom: get raw page HTML (truncated)\n- dom_summary: compact interactive DOM summary with selectors\n- innerText: get innerText of a specific element\n- select: select dropdown option by value\n- check/uncheck: toggle checkbox\n- hover: hover over element\n- wait: wait for a selector to appear\n- waitForNavigation: wait for page navigation to complete\n- waitForSelector: wait for element matching selector\n- title: get page title\n- url: get current URL\n- getAttribute: get element attribute value\n- innerHTML: get element's innerHTML\n- textContent: get element's textContent\n- goBack/goForward/reload: browser navigation\n- pdf: save page as PDF\n- close: close browser session"
|
|
10840
|
+
description: "Action to perform:\n- navigate: go to a URL\n- click: click element by selector\n- fill: clear input and type text by selector, or by natural-language target when selector is absent\n- type: type text character by character into a selector, or into the currently focused element after visual_click\n- press: press a key (Enter, Tab, Escape, etc.)\n- screenshot: capture the headless browser page, not the desktop; use value to choose the output file path\n- observe_bundle: capture URL/title/viewport, DOM summary, a11y, diagnostics, screenshot, and gate assessment\n- visual_click: browser screenshot -> Moondream point -> elementFromPoint -> human-like Playwright mouse click -> post-action screenshot\n- evaluate: run JavaScript in page context\n- content: get page text content (readable, stripped)\n- dom: get raw page HTML (truncated)\n- dom_summary: compact interactive DOM summary with selectors\n- innerText: get innerText of a specific element\n- select: select dropdown option by value\n- check/uncheck: toggle checkbox\n- hover: hover over element\n- wait: wait for a selector to appear, or sleep for timeout ms when no selector is provided\n- waitForNavigation: wait for page navigation to complete\n- waitForSelector: wait for element matching selector\n- title: get page title\n- url: get current URL\n- getAttribute: get element attribute value\n- innerHTML: get element's innerHTML\n- textContent: get element's textContent\n- goBack/goForward/reload: browser navigation\n- pdf: save page as PDF\n- close: close browser session"
|
|
10686
10841
|
},
|
|
10687
10842
|
url: {
|
|
10688
10843
|
type: "string",
|
|
@@ -10843,30 +10998,14 @@ var init_playwright_browser = __esm({
|
|
|
10843
10998
|
await page.type(selector, text, { timeout: timeout2, delay: typingDelay });
|
|
10844
10999
|
return ok(`Typed "${text}" into ${selector}`, start2);
|
|
10845
11000
|
}
|
|
10846
|
-
const active = await page
|
|
10847
|
-
const el = document.activeElement;
|
|
10848
|
-
if (!el) return null;
|
|
10849
|
-
const rect = el.getBoundingClientRect();
|
|
10850
|
-
return {
|
|
10851
|
-
tag: String(el.tagName || "").toLowerCase(),
|
|
10852
|
-
id: el.id || "",
|
|
10853
|
-
name: el.getAttribute("name") || "",
|
|
10854
|
-
role: el.getAttribute("role") || "",
|
|
10855
|
-
ariaLabel: el.getAttribute("aria-label") || "",
|
|
10856
|
-
type: el.getAttribute("type") || "",
|
|
10857
|
-
placeholder: el.getAttribute("placeholder") || "",
|
|
10858
|
-
text: String(el.textContent || "").trim().slice(0, 120),
|
|
10859
|
-
isEditable: el.matches("input, textarea, [contenteditable=''], [contenteditable='true']")
|
|
10860
|
-
|| ["textbox", "searchbox", "combobox"].includes((el.getAttribute("role") || "").toLowerCase()),
|
|
10861
|
-
rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
|
|
10862
|
-
};
|
|
10863
|
-
})()`);
|
|
11001
|
+
const active = await describeFocusedEditable(page);
|
|
10864
11002
|
if (!active || typeof active !== "object" || active.isEditable !== true) {
|
|
10865
11003
|
return fail("No editable focused element is active; use visual_click on a form field or pass a selector to type.", start2);
|
|
10866
11004
|
}
|
|
10867
11005
|
await page.keyboard.type(text, { delay: typingDelay });
|
|
10868
11006
|
const label = active && typeof active === "object" ? `<${active.tag || "element"}>${active.id ? `#${active.id}` : ""}` : "focused element";
|
|
10869
|
-
|
|
11007
|
+
const frame = active["frame"];
|
|
11008
|
+
return ok(`Typed "${text}" into ${label}${frame?.kind === "frame" ? ` in frame ${frame.index}` : ""}`, start2);
|
|
10870
11009
|
}
|
|
10871
11010
|
case "press": {
|
|
10872
11011
|
const key = text || "Enter";
|
|
@@ -10907,7 +11046,14 @@ var init_playwright_browser = __esm({
|
|
|
10907
11046
|
return ok(`Hovered: ${resolvedSelector}${resolvedSelector !== selector ? ` (from ${selector})` : ""}`, start2);
|
|
10908
11047
|
}
|
|
10909
11048
|
// ── Waiting ──
|
|
10910
|
-
case "wait":
|
|
11049
|
+
case "wait": {
|
|
11050
|
+
if (selector) {
|
|
11051
|
+
await page.waitForSelector(selector, { timeout: timeout2 });
|
|
11052
|
+
return ok(`Element appeared: ${selector}`, start2);
|
|
11053
|
+
}
|
|
11054
|
+
await page.waitForTimeout(timeout2);
|
|
11055
|
+
return ok(`Waited ${timeout2}ms`, start2);
|
|
11056
|
+
}
|
|
10911
11057
|
case "waitForSelector": {
|
|
10912
11058
|
if (!selector)
|
|
10913
11059
|
return fail("selector is required", start2);
|
|
@@ -11360,8 +11506,13 @@ ${JSON.stringify(data, null, 2)}`, start2);
|
|
|
11360
11506
|
})()`);
|
|
11361
11507
|
let clickSource = point.source || pointResult?.source || "vision";
|
|
11362
11508
|
const candidate = await findBrowserVisualCandidate(page, visualTarget, cssX, cssY, false);
|
|
11363
|
-
|
|
11364
|
-
|
|
11509
|
+
let candidateRecord = candidate;
|
|
11510
|
+
if (!candidateRecord) {
|
|
11511
|
+
candidateRecord = await findBrowserVisualCandidate(page, visualTarget, cssX, cssY, true, true, true);
|
|
11512
|
+
if (candidateRecord)
|
|
11513
|
+
await page.waitForTimeout(150);
|
|
11514
|
+
}
|
|
11515
|
+
if (candidateRecord) {
|
|
11365
11516
|
const center = candidateRecord["center"];
|
|
11366
11517
|
const nextX = Number(center?.x);
|
|
11367
11518
|
const nextY = Number(center?.y);
|
|
@@ -11369,7 +11520,7 @@ ${JSON.stringify(data, null, 2)}`, start2);
|
|
|
11369
11520
|
cssX = Math.max(0, Math.min(viewport.width, nextX));
|
|
11370
11521
|
cssY = Math.max(0, Math.min(viewport.height, nextY));
|
|
11371
11522
|
elementInfo = candidateRecord;
|
|
11372
|
-
clickSource = `${clickSource}+dom-candidate`;
|
|
11523
|
+
clickSource = `${clickSource}+dom-candidate${candidateRecord["scrolledIntoView"] === true ? "+scroll" : ""}`;
|
|
11373
11524
|
}
|
|
11374
11525
|
}
|
|
11375
11526
|
await page.mouse.move(cssX, cssY, { steps: 12 });
|
|
@@ -284467,15 +284618,52 @@ function findScrapeScript() {
|
|
|
284467
284618
|
];
|
|
284468
284619
|
return candidates.find((p2) => existsSync44(p2)) || candidates[0];
|
|
284469
284620
|
}
|
|
284470
|
-
async function
|
|
284621
|
+
async function probeServiceInfo() {
|
|
284471
284622
|
try {
|
|
284472
284623
|
const controller = new AbortController();
|
|
284473
284624
|
const timeout2 = setTimeout(() => controller.abort(), 3e3);
|
|
284474
284625
|
const res = await fetch(`${BASE_URL}/health`, { signal: controller.signal });
|
|
284475
284626
|
clearTimeout(timeout2);
|
|
284476
|
-
|
|
284627
|
+
if (!res.ok)
|
|
284628
|
+
return null;
|
|
284629
|
+
const data = await res.json().catch(() => null);
|
|
284630
|
+
return data && typeof data === "object" ? data : {};
|
|
284477
284631
|
} catch {
|
|
284632
|
+
return null;
|
|
284633
|
+
}
|
|
284634
|
+
}
|
|
284635
|
+
async function probeService() {
|
|
284636
|
+
return Boolean(await probeServiceInfo());
|
|
284637
|
+
}
|
|
284638
|
+
function serviceHasCapabilities(info) {
|
|
284639
|
+
if (!info)
|
|
284478
284640
|
return false;
|
|
284641
|
+
const raw = info["capabilities"];
|
|
284642
|
+
const capabilities = Array.isArray(raw) ? raw.map(String) : [];
|
|
284643
|
+
return REQUIRED_SERVICE_CAPABILITIES.every((capability) => capabilities.includes(capability));
|
|
284644
|
+
}
|
|
284645
|
+
function killBrowserActionServicePort() {
|
|
284646
|
+
if (serviceProcess && serviceProcess.pid && !serviceProcess.killed) {
|
|
284647
|
+
try {
|
|
284648
|
+
process.kill(-serviceProcess.pid, "SIGTERM");
|
|
284649
|
+
} catch {
|
|
284650
|
+
}
|
|
284651
|
+
try {
|
|
284652
|
+
serviceProcess.kill("SIGTERM");
|
|
284653
|
+
} catch {
|
|
284654
|
+
}
|
|
284655
|
+
serviceProcess = null;
|
|
284656
|
+
}
|
|
284657
|
+
const commands = [
|
|
284658
|
+
`lsof -ti tcp:${DEFAULT_PORT} | xargs -r kill -TERM`,
|
|
284659
|
+
`fuser -k ${DEFAULT_PORT}/tcp`
|
|
284660
|
+
];
|
|
284661
|
+
for (const cmd of commands) {
|
|
284662
|
+
try {
|
|
284663
|
+
execSync22(cmd, { stdio: "ignore", timeout: 5e3 });
|
|
284664
|
+
break;
|
|
284665
|
+
} catch {
|
|
284666
|
+
}
|
|
284479
284667
|
}
|
|
284480
284668
|
}
|
|
284481
284669
|
function findPython3() {
|
|
@@ -284490,8 +284678,17 @@ function findPython3() {
|
|
|
284490
284678
|
return null;
|
|
284491
284679
|
}
|
|
284492
284680
|
async function launchService() {
|
|
284493
|
-
|
|
284494
|
-
|
|
284681
|
+
const existing = await probeServiceInfo();
|
|
284682
|
+
if (existing) {
|
|
284683
|
+
if (serviceHasCapabilities(existing))
|
|
284684
|
+
return null;
|
|
284685
|
+
killBrowserActionServicePort();
|
|
284686
|
+
for (let i2 = 0; i2 < 20; i2++) {
|
|
284687
|
+
await new Promise((r2) => setTimeout(r2, 250));
|
|
284688
|
+
if (!await probeService())
|
|
284689
|
+
break;
|
|
284690
|
+
}
|
|
284691
|
+
}
|
|
284495
284692
|
const python = findPython3();
|
|
284496
284693
|
if (!python)
|
|
284497
284694
|
return "Python 3 not found. Install Python 3.9+ to use browser automation.";
|
|
@@ -284503,6 +284700,7 @@ async function launchService() {
|
|
|
284503
284700
|
env: {
|
|
284504
284701
|
...process.env,
|
|
284505
284702
|
SCRAPE_PORT: String(DEFAULT_PORT),
|
|
284703
|
+
OMNIUS_BROWSER_ACTION_VENV: join55(omniusHomeDir(), "runtimes", "browser", ".venv-selenium"),
|
|
284506
284704
|
SCRAPE_HEADLESS_DEFAULT: process.env["SCRAPE_HEADLESS_DEFAULT"] ?? (defaultBrowserHeadless() ? "1" : "0"),
|
|
284507
284705
|
SCRAPE_REQUIRE_AUTH: "0"
|
|
284508
284706
|
}
|
|
@@ -284645,13 +284843,33 @@ async function apiCall(endpoint, method = "POST", body) {
|
|
|
284645
284843
|
url += `?${params.toString()}`;
|
|
284646
284844
|
}
|
|
284647
284845
|
const res = await fetch(url, options2);
|
|
284648
|
-
|
|
284846
|
+
const raw = await res.text();
|
|
284847
|
+
try {
|
|
284848
|
+
return JSON.parse(raw);
|
|
284849
|
+
} catch {
|
|
284850
|
+
return {
|
|
284851
|
+
ok: false,
|
|
284852
|
+
error: `HTTP ${res.status} from browser_action service: ${raw.slice(0, 500)}`
|
|
284853
|
+
};
|
|
284854
|
+
}
|
|
284855
|
+
}
|
|
284856
|
+
function evaluateFailureMessage2(err, code8) {
|
|
284857
|
+
const raw = err instanceof Error ? err.message : String(err);
|
|
284858
|
+
const hints = [];
|
|
284859
|
+
if (/map is not a function/i.test(raw) && /querySelectorAll/i.test(code8)) {
|
|
284860
|
+
hints.push("document.querySelectorAll() returns a NodeList; use Array.from(document.querySelectorAll(selector)).map(...) or [...document.querySelectorAll(selector)].map(...).");
|
|
284861
|
+
}
|
|
284862
|
+
if (/(?:\.value\s*=|setAttribute\(['"]value['"])/.test(code8) && /\b(input|textarea|querySelector)/i.test(code8)) {
|
|
284863
|
+
hints.push("Direct .value assignment can bypass framework input/change handlers. Prefer browser_action type, browser_action click_xy plus input/sync paths, or playwright_browser fill/visual_click.");
|
|
284864
|
+
}
|
|
284865
|
+
return [raw.slice(0, 500), ...hints.map((hint) => `Hint: ${hint}`)].join("\n");
|
|
284649
284866
|
}
|
|
284650
|
-
var __dirname3, DEFAULT_PORT, SCRAPE_SCRIPT, BASE_URL, serviceProcess, activeSessionId, activeSessionHeadless, activeSessionUrl, BrowserActionTool;
|
|
284867
|
+
var __dirname3, DEFAULT_PORT, SCRAPE_SCRIPT, BASE_URL, serviceProcess, activeSessionId, activeSessionHeadless, activeSessionUrl, REQUIRED_SERVICE_CAPABILITIES, BrowserActionTool;
|
|
284651
284868
|
var init_browser_action = __esm({
|
|
284652
284869
|
"packages/execution/dist/tools/browser-action.js"() {
|
|
284653
284870
|
"use strict";
|
|
284654
284871
|
init_dom_summary();
|
|
284872
|
+
init_model_store();
|
|
284655
284873
|
init_network_egress_policy();
|
|
284656
284874
|
__dirname3 = dirname14(fileURLToPath6(import.meta.url));
|
|
284657
284875
|
DEFAULT_PORT = 8130;
|
|
@@ -284661,16 +284879,17 @@ var init_browser_action = __esm({
|
|
|
284661
284879
|
activeSessionId = null;
|
|
284662
284880
|
activeSessionHeadless = null;
|
|
284663
284881
|
activeSessionUrl = null;
|
|
284882
|
+
REQUIRED_SERVICE_CAPABILITIES = ["evaluate"];
|
|
284664
284883
|
BrowserActionTool = class {
|
|
284665
284884
|
name = "browser_action";
|
|
284666
|
-
description = "Control a persistent headless Chrome browser session for interactive web tasks. The browser stays open between calls, maintaining cookies, login state, and history. This is a separate Selenium/Chrome runtime from playwright_browser; do not switch between the two mid-workflow unless you intentionally navigate the second tool to the same URL. Use this (not web_fetch/web_crawl) when you need to: (1) log into a website, (2) fill and submit forms, (3) click buttons or links interactively, (4) take screenshots of rendered pages, (5) navigate multi-step workflows (checkout, signup, dashboards), (6) interact with elements that require JavaScript (dropdowns, modals, infinite scroll). Actions: navigate, click, click_xy, type, screenshot, dom, scroll, scroll_up, scroll_down, back, forward, close. For browser visuals, use browser_action({action:'screenshot', width, height, output_path}) — this captures the headless browser viewport, not the desktop. Use the desktop screenshot tool only when the actual OS screen is the target. For verification of browser runtime failures, prefer playwright_browser because it exposes page_errors, console_logs, network_log, DOM/accessibility, and screenshots from the same session. IMPORTANT: Start by calling navigate with the URL — do NOT ask the user for credentials or info first. Loopback URLs (localhost, 127.0.0.1, ::1) are allowed for local development servers; private LAN and metadata URLs remain blocked. Navigate to the page, then use dom/screenshot to see what's there, then type/click to interact. Call 'close' when done to free resources. This tool does not save or download arbitrary rendered files (PDFs, archives, media) to disk — clicking a 'Download' link inside the browser does not produce a local file path for the agent. For file acquisition, use the dedicated download/file tool and validate the resulting content-type and size before treating the result as success.";
|
|
284885
|
+
description = "Control a persistent headless Chrome browser session for interactive web tasks. The browser stays open between calls, maintaining cookies, login state, and history. This is a separate Selenium/Chrome runtime from playwright_browser; do not switch between the two mid-workflow unless you intentionally navigate the second tool to the same URL. Use this (not web_fetch/web_crawl) when you need to: (1) log into a website, (2) fill and submit forms, (3) click buttons or links interactively, (4) take screenshots of rendered pages, (5) navigate multi-step workflows (checkout, signup, dashboards), (6) interact with elements that require JavaScript (dropdowns, modals, infinite scroll). Actions: navigate, click, click_xy, type, evaluate, screenshot, dom, scroll, scroll_up, scroll_down, back, forward, close. For browser visuals, use browser_action({action:'screenshot', width, height, output_path}) — this captures the headless browser viewport, not the desktop. Use the desktop screenshot tool only when the actual OS screen is the target. For verification of browser runtime failures, prefer playwright_browser because it exposes page_errors, console_logs, network_log, DOM/accessibility, and screenshots from the same session. IMPORTANT: Start by calling navigate with the URL — do NOT ask the user for credentials or info first. Loopback URLs (localhost, 127.0.0.1, ::1) are allowed for local development servers; private LAN and metadata URLs remain blocked. Navigate to the page, then use dom/screenshot to see what's there, then type/click to interact. Call 'close' when done to free resources. This tool does not save or download arbitrary rendered files (PDFs, archives, media) to disk — clicking a 'Download' link inside the browser does not produce a local file path for the agent. For file acquisition, use the dedicated download/file tool and validate the resulting content-type and size before treating the result as success.";
|
|
284667
284886
|
parameters = {
|
|
284668
284887
|
type: "object",
|
|
284669
284888
|
properties: {
|
|
284670
284889
|
action: {
|
|
284671
284890
|
type: "string",
|
|
284672
|
-
enum: ["navigate", "click", "click_xy", "type", "screenshot", "dom", "dom_summary", "vision_click", "scroll", "scroll_up", "scroll_down", "back", "forward", "close"],
|
|
284673
|
-
description: "Browser action to perform. Key actions:\n- 'screenshot': capture the headless browser render at width/height; returns an image part and a local file path if output_path is provided\n- 'dom_summary': compact view of interactive elements (~1KB vs 200KB raw DOM)\n- 'vision_click': screenshot the page, use Moondream vision to find an element by description, then click it. Pass the element description in 'text' parameter (e.g. text='the login button'). This is the visual grounding loop from SeeAct.\n- 'click': click by CSS selector (fastest when you know the selector)\n- 'click_xy': click at pixel coordinates (when you have exact coords)"
|
|
284891
|
+
enum: ["navigate", "click", "click_xy", "type", "evaluate", "screenshot", "dom", "dom_summary", "vision_click", "scroll", "scroll_up", "scroll_down", "back", "forward", "close"],
|
|
284892
|
+
description: "Browser action to perform. Key actions:\n- 'screenshot': capture the headless browser render at width/height; returns an image part and a local file path if output_path is provided\n- 'dom_summary': compact view of interactive elements (~1KB vs 200KB raw DOM)\n- 'vision_click': screenshot the page, use Moondream vision to find an element by description, then click it. Pass the element description in 'text' parameter (e.g. text='the login button'). This is the visual grounding loop from SeeAct.\n- 'click': click by CSS selector (fastest when you know the selector)\n- 'click_xy': click at pixel coordinates (when you have exact coords)\n- 'evaluate': run JavaScript in the active Selenium page; pass code in text"
|
|
284674
284893
|
},
|
|
284675
284894
|
url: {
|
|
284676
284895
|
type: "string",
|
|
@@ -284682,7 +284901,7 @@ var init_browser_action = __esm({
|
|
|
284682
284901
|
},
|
|
284683
284902
|
text: {
|
|
284684
284903
|
type: "string",
|
|
284685
|
-
description: "Text to type (for 'type' action) OR element description to find and click (for 'vision_click' action, e.g. 'the submit button', 'the search field', 'the country dropdown')"
|
|
284904
|
+
description: "Text to type (for 'type' action), JS code (for 'evaluate'), OR element description to find and click (for 'vision_click' action, e.g. 'the submit button', 'the search field', 'the country dropdown')"
|
|
284686
284905
|
},
|
|
284687
284906
|
x: {
|
|
284688
284907
|
type: "number",
|
|
@@ -284856,6 +285075,32 @@ Runtime: browser_action Selenium/Chrome session. Continue with browser_action fo
|
|
|
284856
285075
|
durationMs: Date.now() - start2
|
|
284857
285076
|
};
|
|
284858
285077
|
}
|
|
285078
|
+
case "evaluate": {
|
|
285079
|
+
const code8 = typeof args.text === "string" ? args.text : typeof args.value === "string" ? args.value : "";
|
|
285080
|
+
if (!code8.trim())
|
|
285081
|
+
return { success: false, output: "", error: "text is required for evaluate action", durationMs: Date.now() - start2 };
|
|
285082
|
+
result = await apiCall("/evaluate", "POST", { script: code8 });
|
|
285083
|
+
if (result.ok) {
|
|
285084
|
+
const resultType = String(result["result_type"] ?? "unknown");
|
|
285085
|
+
const payload = result["result"];
|
|
285086
|
+
const rendered = payload === void 0 ? "undefined" : typeof payload === "string" ? payload : JSON.stringify(payload, null, 2);
|
|
285087
|
+
const truncated = rendered.length > 2e4 ? `${rendered.slice(0, 2e4)}
|
|
285088
|
+
... (truncated)` : rendered;
|
|
285089
|
+
return {
|
|
285090
|
+
success: true,
|
|
285091
|
+
output: `Evaluation result (${resultType}):
|
|
285092
|
+
${truncated}`,
|
|
285093
|
+
durationMs: Date.now() - start2
|
|
285094
|
+
};
|
|
285095
|
+
}
|
|
285096
|
+
const evalMsg = String(result.error ?? result.message ?? "Evaluate failed");
|
|
285097
|
+
return {
|
|
285098
|
+
success: false,
|
|
285099
|
+
output: "",
|
|
285100
|
+
error: `browser_action evaluate failed: ${evaluateFailureMessage2(evalMsg, code8)} ${browserActionRuntimeHint()}`,
|
|
285101
|
+
durationMs: Date.now() - start2
|
|
285102
|
+
};
|
|
285103
|
+
}
|
|
284859
285104
|
case "screenshot": {
|
|
284860
285105
|
if (requestedWidth || requestedHeight || requestedScale) {
|
|
284861
285106
|
const currentW = requestedWidth ?? 1280;
|
|
@@ -285039,7 +285284,7 @@ Runtime: browser_action Selenium/Chrome session. Continue with browser_action fo
|
|
|
285039
285284
|
result = await apiCall("/history/forward", "POST");
|
|
285040
285285
|
return { success: !!result.ok, output: "Navigated forward", durationMs: Date.now() - start2 };
|
|
285041
285286
|
default:
|
|
285042
|
-
return { success: false, output: "", error: `Unknown action: ${action}. Available: navigate, click, click_xy, type, screenshot, dom, scroll, scroll_up, scroll_down, back, forward, close`, durationMs: Date.now() - start2 };
|
|
285287
|
+
return { success: false, output: "", error: `Unknown action: ${action}. Available: navigate, click, click_xy, type, evaluate, screenshot, dom, dom_summary, vision_click, scroll, scroll_up, scroll_down, back, forward, close`, durationMs: Date.now() - start2 };
|
|
285043
285288
|
}
|
|
285044
285289
|
} catch (err) {
|
|
285045
285290
|
return {
|
|
@@ -32,7 +32,13 @@ from typing import Dict, Optional
|
|
|
32
32
|
# ──────────────────────────────────────────────────────────────
|
|
33
33
|
# 0) Embedded venv bootstrap (same pattern as other services)
|
|
34
34
|
# ──────────────────────────────────────────────────────────────
|
|
35
|
-
|
|
35
|
+
SCRIPT_PATH = Path(__file__).resolve()
|
|
36
|
+
SCRIPT_DIR = SCRIPT_PATH.parent
|
|
37
|
+
OMNIUS_HOME = Path(os.environ.get("OMNIUS_HOME") or (Path.home() / ".omnius"))
|
|
38
|
+
VENV_DIR = Path(
|
|
39
|
+
os.environ.get("OMNIUS_BROWSER_ACTION_VENV")
|
|
40
|
+
or (OMNIUS_HOME / "runtimes" / "browser" / ".venv-selenium")
|
|
41
|
+
)
|
|
36
42
|
|
|
37
43
|
|
|
38
44
|
def _in_venv() -> bool:
|
|
@@ -48,6 +54,7 @@ def _ensure_venv_and_reexec() -> None:
|
|
|
48
54
|
return
|
|
49
55
|
python = sys.executable
|
|
50
56
|
if not VENV_DIR.exists():
|
|
57
|
+
VENV_DIR.parent.mkdir(parents=True, exist_ok=True)
|
|
51
58
|
print(f"[bootstrap] creating virtualenv at {VENV_DIR}", file=sys.stderr)
|
|
52
59
|
subprocess.check_call([python, "-m", "venv", str(VENV_DIR)])
|
|
53
60
|
pip_bin = VENV_DIR / ("Scripts/pip.exe" if os.name == "nt" else "bin/pip")
|
|
@@ -69,10 +76,21 @@ _ensure_venv_and_reexec()
|
|
|
69
76
|
# ──────────────────────────────────────────────────────────────
|
|
70
77
|
import subprocess # noqa: E402 (re-import after re-exec)
|
|
71
78
|
|
|
72
|
-
|
|
73
|
-
SCRIPT_DIR = SCRIPT_PATH.parent
|
|
74
|
-
SETUP_MARKER = SCRIPT_DIR / ".scrape_setup_complete"
|
|
79
|
+
SETUP_MARKER = VENV_DIR / ".scrape_setup_complete"
|
|
75
80
|
OUT_DIR = SCRIPT_DIR / "frames"
|
|
81
|
+
SERVICE_VERSION = "2026-06-01-evaluate-v1"
|
|
82
|
+
SERVICE_CAPABILITIES = [
|
|
83
|
+
"navigate",
|
|
84
|
+
"click",
|
|
85
|
+
"click_xy",
|
|
86
|
+
"type",
|
|
87
|
+
"evaluate",
|
|
88
|
+
"screenshot",
|
|
89
|
+
"dom",
|
|
90
|
+
"scroll",
|
|
91
|
+
"history",
|
|
92
|
+
"events",
|
|
93
|
+
]
|
|
76
94
|
|
|
77
95
|
|
|
78
96
|
def _pip_install(*pkgs: str) -> None:
|
|
@@ -129,6 +147,7 @@ from selenium.webdriver.common.by import By # noqa: E402
|
|
|
129
147
|
from selenium.webdriver.common.keys import Keys # noqa: E402
|
|
130
148
|
from selenium.webdriver.chrome.options import Options # noqa: E402
|
|
131
149
|
from selenium.webdriver.chrome.service import Service # noqa: E402
|
|
150
|
+
from selenium.webdriver.remote.webelement import WebElement # noqa: E402
|
|
132
151
|
from selenium.webdriver.support import expected_conditions as EC # noqa: E402
|
|
133
152
|
from selenium.webdriver.support.ui import WebDriverWait # noqa: E402
|
|
134
153
|
from webdriver_manager.chrome import ChromeDriverManager # noqa: E402
|
|
@@ -160,6 +179,64 @@ def _truthy(value) -> bool:
|
|
|
160
179
|
return str(value).lower() in ("1", "true", "yes", "on")
|
|
161
180
|
|
|
162
181
|
|
|
182
|
+
def _serialize_script_result(value, depth: int = 0, seen: Optional[set[int]] = None):
|
|
183
|
+
if seen is None:
|
|
184
|
+
seen = set()
|
|
185
|
+
if value is None or isinstance(value, (str, int, float, bool)):
|
|
186
|
+
return value
|
|
187
|
+
if depth > 5:
|
|
188
|
+
return str(value)
|
|
189
|
+
if isinstance(value, WebElement):
|
|
190
|
+
try:
|
|
191
|
+
rect = value.rect or {}
|
|
192
|
+
except Exception:
|
|
193
|
+
rect = {}
|
|
194
|
+
try:
|
|
195
|
+
text = value.text or ""
|
|
196
|
+
except Exception:
|
|
197
|
+
text = ""
|
|
198
|
+
try:
|
|
199
|
+
tag = value.tag_name or ""
|
|
200
|
+
except Exception:
|
|
201
|
+
tag = ""
|
|
202
|
+
def attr(name: str) -> str:
|
|
203
|
+
try:
|
|
204
|
+
return value.get_attribute(name) or ""
|
|
205
|
+
except Exception:
|
|
206
|
+
return ""
|
|
207
|
+
return {
|
|
208
|
+
"__omnius_type": "element",
|
|
209
|
+
"tag": tag,
|
|
210
|
+
"id": attr("id"),
|
|
211
|
+
"name": attr("name"),
|
|
212
|
+
"type": attr("type"),
|
|
213
|
+
"role": attr("role"),
|
|
214
|
+
"ariaLabel": attr("aria-label"),
|
|
215
|
+
"text": text[:240],
|
|
216
|
+
"rect": {
|
|
217
|
+
"x": rect.get("x", 0),
|
|
218
|
+
"y": rect.get("y", 0),
|
|
219
|
+
"width": rect.get("width", 0),
|
|
220
|
+
"height": rect.get("height", 0),
|
|
221
|
+
},
|
|
222
|
+
}
|
|
223
|
+
if isinstance(value, (list, tuple, set)):
|
|
224
|
+
return [_serialize_script_result(item, depth + 1, seen) for item in list(value)[:200]]
|
|
225
|
+
if isinstance(value, dict):
|
|
226
|
+
ident = id(value)
|
|
227
|
+
if ident in seen:
|
|
228
|
+
return "[Circular]"
|
|
229
|
+
seen.add(ident)
|
|
230
|
+
out = {}
|
|
231
|
+
for idx, (key, item) in enumerate(value.items()):
|
|
232
|
+
if idx >= 200:
|
|
233
|
+
out["__omnius_truncated"] = True
|
|
234
|
+
break
|
|
235
|
+
out[str(key)] = _serialize_script_result(item, depth + 1, seen)
|
|
236
|
+
return out
|
|
237
|
+
return str(value)
|
|
238
|
+
|
|
239
|
+
|
|
163
240
|
class Tools:
|
|
164
241
|
_driver: Optional[webdriver.Chrome] = None
|
|
165
242
|
|
|
@@ -421,6 +498,21 @@ class Tools:
|
|
|
421
498
|
log_message(f"[dom] snapshot failed: {exc}", "WARNING")
|
|
422
499
|
return ""
|
|
423
500
|
|
|
501
|
+
@staticmethod
|
|
502
|
+
def evaluate(script: str):
|
|
503
|
+
if not Tools._driver:
|
|
504
|
+
return {"ok": False, "error": "browser not open"}
|
|
505
|
+
try:
|
|
506
|
+
result = Tools._driver.execute_script(script)
|
|
507
|
+
return {
|
|
508
|
+
"ok": True,
|
|
509
|
+
"result": _serialize_script_result(result),
|
|
510
|
+
"result_type": "undefined" if result is None else type(result).__name__,
|
|
511
|
+
}
|
|
512
|
+
except Exception as exc:
|
|
513
|
+
log_message(f"[evaluate] script failed: {exc}", "ERROR")
|
|
514
|
+
return {"ok": False, "error": str(exc)}
|
|
515
|
+
|
|
424
516
|
@staticmethod
|
|
425
517
|
def scroll(amount: int = 600) -> str:
|
|
426
518
|
if not Tools._driver:
|
|
@@ -921,7 +1013,15 @@ def _error(message: str, status: int = 400):
|
|
|
921
1013
|
# ──────────────────────────────────────────────────────────────
|
|
922
1014
|
@app.get("/health")
|
|
923
1015
|
def health():
|
|
924
|
-
return jsonify({
|
|
1016
|
+
return jsonify({
|
|
1017
|
+
"status": "ok",
|
|
1018
|
+
"service": "browser_action",
|
|
1019
|
+
"version": SERVICE_VERSION,
|
|
1020
|
+
"capabilities": SERVICE_CAPABILITIES,
|
|
1021
|
+
"browser_open": Tools.is_browser_open(),
|
|
1022
|
+
"sessions": len(_SESSIONS),
|
|
1023
|
+
"venv": str(VENV_DIR),
|
|
1024
|
+
})
|
|
925
1025
|
|
|
926
1026
|
|
|
927
1027
|
@app.post("/session/start")
|
|
@@ -1041,6 +1141,23 @@ def type_text():
|
|
|
1041
1141
|
return _ok(message=msg)
|
|
1042
1142
|
|
|
1043
1143
|
|
|
1144
|
+
@app.post("/evaluate")
|
|
1145
|
+
def evaluate_script():
|
|
1146
|
+
if not _auth_ok(request):
|
|
1147
|
+
return _error("unauthorized", 401)
|
|
1148
|
+
data = request.get_json(silent=True) or {}
|
|
1149
|
+
script = data.get("script") or data.get("text") or data.get("code") or ""
|
|
1150
|
+
if not str(script).strip():
|
|
1151
|
+
return _error("missing script", 400)
|
|
1152
|
+
with _slot():
|
|
1153
|
+
result = Tools.evaluate(str(script))
|
|
1154
|
+
if not isinstance(result, dict) or not result.get("ok"):
|
|
1155
|
+
return _error(result.get("error") if isinstance(result, dict) else "evaluate failed", 500)
|
|
1156
|
+
sid = data.get("sid") or next(iter(_SESSIONS), "")
|
|
1157
|
+
_queue_event(sid, {"type": "status", "msg": "evaluate", "ts": int(time.time() * 1000)})
|
|
1158
|
+
return _ok(result=result.get("result"), result_type=result.get("result_type"))
|
|
1159
|
+
|
|
1160
|
+
|
|
1044
1161
|
@app.post("/scroll")
|
|
1045
1162
|
def scroll():
|
|
1046
1163
|
if not _auth_ok(request):
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "omnius",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.209",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "omnius",
|
|
9
|
-
"version": "1.0.
|
|
9
|
+
"version": "1.0.209",
|
|
10
10
|
"bundleDependencies": [
|
|
11
11
|
"image-to-ascii"
|
|
12
12
|
],
|
package/package.json
CHANGED