page-agent 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,30 +10,320 @@
10
10
  console.error("vite-plugin-css-injected-by-js", e);
11
11
  }
12
12
  })();
13
- var __defProp = Object.defineProperty;
13
+ var __defProp2 = Object.defineProperty;
14
14
  var __typeError = (msg) => {
15
15
  throw TypeError(msg);
16
16
  };
17
- var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
18
- var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
17
+ var __defNormalProp = (obj, key, value) => key in obj ? __defProp2(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
18
+ var __name2 = (target, value) => __defProp2(target, "name", { value, configurable: true });
19
19
  var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
20
20
  var __accessCheck = (obj, member, msg) => member.has(obj) || __typeError("Cannot " + msg);
21
21
  var __privateGet = (obj, member, getter) => (__accessCheck(obj, member, "read from private field"), getter ? getter.call(obj) : member.get(obj));
22
22
  var __privateAdd = (obj, member, value) => member.has(obj) ? __typeError("Cannot add the same private member more than once") : member instanceof WeakSet ? member.add(obj) : member.set(obj, value);
23
23
  var __privateSet = (obj, member, value, setter) => (__accessCheck(obj, member, "write to private field"), setter ? setter.call(obj, value) : member.set(obj, value), value);
24
24
  var __privateMethod = (obj, member, method) => (__accessCheck(obj, member, "access private method"), method);
25
- var _bus, _wrapper, _indicator, _statusText, _historySection, _expandButton, _pauseButton, _stopButton, _inputSection, _taskInput, _bus2, _state, _isExpanded, _pageAgent, _userAnswerResolver, _isWaitingForUserAnswer, _headerUpdateTimer, _pendingHeaderText, _isAnimating, _Panel_instances, update_fn, show_fn, hide_fn, reset_fn, togglePause_fn, updatePauseButton_fn, stopAgent_fn, submitTask_fn, handleUserAnswer_fn, showInputArea_fn, hideInputArea_fn, shouldShowInputArea_fn, createWrapper_fn, setupEventListeners_fn, toggle_fn, expand_fn, collapse_fn, startHeaderUpdateLoop_fn, stopHeaderUpdateLoop_fn, checkAndUpdateHeader_fn, animateTextChange_fn, updateStatusIndicator_fn, updateHistory_fn, scrollToBottom_fn, createHistoryItem_fn, _cursor, _currentCursorX, _currentCursorY, _targetCursorX, _targetCursorY, _SimulatorMask_instances, createCursor_fn, moveCursorToTarget_fn, _llm, _totalWaitTime, _abortController, _PageAgent_instances, packMacroTool_fn, getSystemPrompt_fn, assembleUserPrompt_fn, onDone_fn, getBrowserState_fn, updateTree_fn;
25
+ var __PageController, _bus, _wrapper, _indicator, _statusText, _historySection, _expandButton, _pauseButton, _stopButton, _inputSection, _taskInput, _bus2, _state, _isExpanded, _pageAgent, _userAnswerResolver, _isWaitingForUserAnswer, _headerUpdateTimer, _pendingHeaderText, _isAnimating, _Panel_instances, update_fn, show_fn, hide_fn, reset_fn, togglePause_fn, updatePauseButton_fn, stopAgent_fn, submitTask_fn, handleUserAnswer_fn, showInputArea_fn, hideInputArea_fn, shouldShowInputArea_fn, createWrapper_fn, setupEventListeners_fn, toggle_fn, expand_fn, collapse_fn, startHeaderUpdateLoop_fn, stopHeaderUpdateLoop_fn, checkAndUpdateHeader_fn, animateTextChange_fn, updateStatusIndicator_fn, updateHistory_fn, scrollToBottom_fn, createHistoryItem_fn, _cursor, _currentCursorX, _currentCursorY, _targetCursorX, _targetCursorY, _SimulatorMask_instances, createCursor_fn, moveCursorToTarget_fn, _llm, _totalWaitTime, _abortController, _PageAgent_instances, packMacroTool_fn, getSystemPrompt_fn, assembleUserPrompt_fn, onDone_fn, getBrowserState_fn;
26
26
  import chalk from "chalk";
27
27
  import zod, { z } from "zod";
28
28
  import { Motion } from "ai-motion";
29
+ var __defProp = Object.defineProperty;
30
+ var __name = /* @__PURE__ */ __name2((target, value) => __defProp(target, "name", { value, configurable: true }), "__name");
31
+ async function waitFor$1(seconds) {
32
+ await new Promise((resolve) => setTimeout(resolve, seconds * 1e3));
33
+ }
34
+ __name2(waitFor$1, "waitFor$1");
35
+ __name(waitFor$1, "waitFor");
36
+ async function movePointerToElement(element) {
37
+ const rect = element.getBoundingClientRect();
38
+ const x = rect.left + rect.width / 2;
39
+ const y = rect.top + rect.height / 2;
40
+ window.dispatchEvent(new CustomEvent("PageAgent::MovePointerTo", { detail: { x, y } }));
41
+ await waitFor$1(0.3);
42
+ }
43
+ __name2(movePointerToElement, "movePointerToElement");
44
+ __name(movePointerToElement, "movePointerToElement");
45
+ function getElementByIndex(selectorMap, index) {
46
+ const interactiveNode = selectorMap.get(index);
47
+ if (!interactiveNode) {
48
+ throw new Error(`No interactive element found at index ${index}`);
49
+ }
50
+ const element = interactiveNode.ref;
51
+ if (!element) {
52
+ throw new Error(`Element at index ${index} does not have a reference`);
53
+ }
54
+ if (!(element instanceof HTMLElement)) {
55
+ throw new Error(`Element at index ${index} is not an HTMLElement`);
56
+ }
57
+ return element;
58
+ }
59
+ __name2(getElementByIndex, "getElementByIndex");
60
+ __name(getElementByIndex, "getElementByIndex");
61
+ let lastClickedElement = null;
62
+ function blurLastClickedElement() {
63
+ if (lastClickedElement) {
64
+ lastClickedElement.blur();
65
+ lastClickedElement.dispatchEvent(
66
+ new MouseEvent("mouseout", { bubbles: true, cancelable: true })
67
+ );
68
+ lastClickedElement = null;
69
+ }
70
+ }
71
+ __name2(blurLastClickedElement, "blurLastClickedElement");
72
+ __name(blurLastClickedElement, "blurLastClickedElement");
73
+ async function clickElement(element) {
74
+ blurLastClickedElement();
75
+ lastClickedElement = element;
76
+ await scrollIntoViewIfNeeded(element);
77
+ await movePointerToElement(element);
78
+ window.dispatchEvent(new CustomEvent("PageAgent::ClickPointer"));
79
+ await waitFor$1(0.1);
80
+ element.dispatchEvent(new MouseEvent("mouseenter", { bubbles: true, cancelable: true }));
81
+ element.dispatchEvent(new MouseEvent("mouseover", { bubbles: true, cancelable: true }));
82
+ element.dispatchEvent(new MouseEvent("mousedown", { bubbles: true, cancelable: true }));
83
+ element.focus();
84
+ element.dispatchEvent(new MouseEvent("mouseup", { bubbles: true, cancelable: true }));
85
+ element.dispatchEvent(new MouseEvent("click", { bubbles: true, cancelable: true }));
86
+ await waitFor$1(0.1);
87
+ }
88
+ __name2(clickElement, "clickElement");
89
+ __name(clickElement, "clickElement");
90
+ const nativeInputValueSetter = Object.getOwnPropertyDescriptor(
91
+ window.HTMLInputElement.prototype,
92
+ "value"
93
+ ).set;
94
+ const nativeTextAreaValueSetter = Object.getOwnPropertyDescriptor(
95
+ window.HTMLTextAreaElement.prototype,
96
+ "value"
97
+ ).set;
98
+ async function inputTextElement(element, text) {
99
+ if (!(element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement)) {
100
+ throw new Error("Element is not an input or textarea");
101
+ }
102
+ await clickElement(element);
103
+ if (element instanceof HTMLTextAreaElement) {
104
+ nativeTextAreaValueSetter.call(element, text);
105
+ } else {
106
+ nativeInputValueSetter.call(element, text);
107
+ }
108
+ const inputEvent = new Event("input", { bubbles: true });
109
+ element.dispatchEvent(inputEvent);
110
+ await waitFor$1(0.1);
111
+ blurLastClickedElement();
112
+ }
113
+ __name2(inputTextElement, "inputTextElement");
114
+ __name(inputTextElement, "inputTextElement");
115
+ async function selectOptionElement(selectElement, optionText) {
116
+ if (!(selectElement instanceof HTMLSelectElement)) {
117
+ throw new Error("Element is not a select element");
118
+ }
119
+ const options = Array.from(selectElement.options);
120
+ const option = options.find((opt) => opt.textContent?.trim() === optionText.trim());
121
+ if (!option) {
122
+ throw new Error(`Option with text "${optionText}" not found in select element`);
123
+ }
124
+ selectElement.value = option.value;
125
+ selectElement.dispatchEvent(new Event("change", { bubbles: true }));
126
+ await waitFor$1(0.1);
127
+ }
128
+ __name2(selectOptionElement, "selectOptionElement");
129
+ __name(selectOptionElement, "selectOptionElement");
130
+ async function scrollIntoViewIfNeeded(element) {
131
+ const el = element;
132
+ if (el.scrollIntoViewIfNeeded) {
133
+ el.scrollIntoViewIfNeeded();
134
+ } else {
135
+ el.scrollIntoView({ behavior: "auto", block: "center", inline: "nearest" });
136
+ }
137
+ }
138
+ __name2(scrollIntoViewIfNeeded, "scrollIntoViewIfNeeded");
139
+ __name(scrollIntoViewIfNeeded, "scrollIntoViewIfNeeded");
140
+ async function scrollVertically(down, scroll_amount, element) {
141
+ if (element) {
142
+ const targetElement = element;
143
+ console.log(
144
+ "[SCROLL DEBUG] Starting direct container scroll for element:",
145
+ targetElement.tagName
146
+ );
147
+ let currentElement = targetElement;
148
+ let scrollSuccess = false;
149
+ let scrolledElement = null;
150
+ let scrollDelta = 0;
151
+ let attempts = 0;
152
+ const dy2 = scroll_amount;
153
+ while (currentElement && attempts < 10) {
154
+ const computedStyle = window.getComputedStyle(currentElement);
155
+ const hasScrollableY = /(auto|scroll|overlay)/.test(computedStyle.overflowY);
156
+ const canScrollVertically = currentElement.scrollHeight > currentElement.clientHeight;
157
+ console.log(
158
+ "[SCROLL DEBUG] Checking element:",
159
+ currentElement.tagName,
160
+ "hasScrollableY:",
161
+ hasScrollableY,
162
+ "canScrollVertically:",
163
+ canScrollVertically,
164
+ "scrollHeight:",
165
+ currentElement.scrollHeight,
166
+ "clientHeight:",
167
+ currentElement.clientHeight
168
+ );
169
+ if (hasScrollableY && canScrollVertically) {
170
+ const beforeScroll = currentElement.scrollTop;
171
+ const maxScroll = currentElement.scrollHeight - currentElement.clientHeight;
172
+ let scrollAmount = dy2 / 3;
173
+ if (scrollAmount > 0) {
174
+ scrollAmount = Math.min(scrollAmount, maxScroll - beforeScroll);
175
+ } else {
176
+ scrollAmount = Math.max(scrollAmount, -beforeScroll);
177
+ }
178
+ currentElement.scrollTop = beforeScroll + scrollAmount;
179
+ const afterScroll = currentElement.scrollTop;
180
+ const actualScrollDelta = afterScroll - beforeScroll;
181
+ console.log(
182
+ "[SCROLL DEBUG] Scroll attempt:",
183
+ currentElement.tagName,
184
+ "before:",
185
+ beforeScroll,
186
+ "after:",
187
+ afterScroll,
188
+ "delta:",
189
+ actualScrollDelta
190
+ );
191
+ if (Math.abs(actualScrollDelta) > 0.5) {
192
+ scrollSuccess = true;
193
+ scrolledElement = currentElement;
194
+ scrollDelta = actualScrollDelta;
195
+ console.log(
196
+ "[SCROLL DEBUG] Successfully scrolled container:",
197
+ currentElement.tagName,
198
+ "delta:",
199
+ actualScrollDelta
200
+ );
201
+ break;
202
+ }
203
+ }
204
+ if (currentElement === document.body || currentElement === document.documentElement) {
205
+ break;
206
+ }
207
+ currentElement = currentElement.parentElement;
208
+ attempts++;
209
+ }
210
+ if (scrollSuccess) {
211
+ return `Scrolled container (${scrolledElement?.tagName}) by ${scrollDelta}px`;
212
+ } else {
213
+ return `No scrollable container found for element (${targetElement.tagName})`;
214
+ }
215
+ }
216
+ const dy = scroll_amount;
217
+ const bigEnough = /* @__PURE__ */ __name((el2) => el2.clientHeight >= window.innerHeight * 0.5, "bigEnough");
218
+ const canScroll = /* @__PURE__ */ __name((el2) => el2 && /(auto|scroll|overlay)/.test(getComputedStyle(el2).overflowY) && el2.scrollHeight > el2.clientHeight && bigEnough(el2), "canScroll");
219
+ let el = document.activeElement;
220
+ while (el && !canScroll(el) && el !== document.body) el = el.parentElement;
221
+ el = canScroll(el) ? el : Array.from(document.querySelectorAll("*")).find(canScroll) || document.scrollingElement || document.documentElement;
222
+ if (el === document.scrollingElement || el === document.documentElement || el === document.body) {
223
+ window.scrollBy(0, dy);
224
+ return `✅ Scrolled page by ${dy}px.`;
225
+ } else {
226
+ el.scrollBy({ top: dy, behavior: "smooth" });
227
+ await waitFor$1(0.1);
228
+ return `✅ Scrolled container (${el.tagName}) by ${dy}px.`;
229
+ }
230
+ }
231
+ __name2(scrollVertically, "scrollVertically");
232
+ __name(scrollVertically, "scrollVertically");
233
+ async function scrollHorizontally(right, scroll_amount, element) {
234
+ if (element) {
235
+ const targetElement = element;
236
+ console.log(
237
+ "[SCROLL DEBUG] Starting direct container scroll for element:",
238
+ targetElement.tagName
239
+ );
240
+ let currentElement = targetElement;
241
+ let scrollSuccess = false;
242
+ let scrolledElement = null;
243
+ let scrollDelta = 0;
244
+ let attempts = 0;
245
+ const dx2 = right ? scroll_amount : -scroll_amount;
246
+ while (currentElement && attempts < 10) {
247
+ const computedStyle = window.getComputedStyle(currentElement);
248
+ const hasScrollableX = /(auto|scroll|overlay)/.test(computedStyle.overflowX);
249
+ const canScrollHorizontally = currentElement.scrollWidth > currentElement.clientWidth;
250
+ console.log(
251
+ "[SCROLL DEBUG] Checking element:",
252
+ currentElement.tagName,
253
+ "hasScrollableX:",
254
+ hasScrollableX,
255
+ "canScrollHorizontally:",
256
+ canScrollHorizontally,
257
+ "scrollWidth:",
258
+ currentElement.scrollWidth,
259
+ "clientWidth:",
260
+ currentElement.clientWidth
261
+ );
262
+ if (hasScrollableX && canScrollHorizontally) {
263
+ const beforeScroll = currentElement.scrollLeft;
264
+ const maxScroll = currentElement.scrollWidth - currentElement.clientWidth;
265
+ let scrollAmount = dx2 / 3;
266
+ if (scrollAmount > 0) {
267
+ scrollAmount = Math.min(scrollAmount, maxScroll - beforeScroll);
268
+ } else {
269
+ scrollAmount = Math.max(scrollAmount, -beforeScroll);
270
+ }
271
+ currentElement.scrollLeft = beforeScroll + scrollAmount;
272
+ const afterScroll = currentElement.scrollLeft;
273
+ const actualScrollDelta = afterScroll - beforeScroll;
274
+ console.log(
275
+ "[SCROLL DEBUG] Scroll attempt:",
276
+ currentElement.tagName,
277
+ "before:",
278
+ beforeScroll,
279
+ "after:",
280
+ afterScroll,
281
+ "delta:",
282
+ actualScrollDelta
283
+ );
284
+ if (Math.abs(actualScrollDelta) > 0.5) {
285
+ scrollSuccess = true;
286
+ scrolledElement = currentElement;
287
+ scrollDelta = actualScrollDelta;
288
+ console.log(
289
+ "[SCROLL DEBUG] Successfully scrolled container:",
290
+ currentElement.tagName,
291
+ "delta:",
292
+ actualScrollDelta
293
+ );
294
+ break;
295
+ }
296
+ }
297
+ if (currentElement === document.body || currentElement === document.documentElement) {
298
+ break;
299
+ }
300
+ currentElement = currentElement.parentElement;
301
+ attempts++;
302
+ }
303
+ if (scrollSuccess) {
304
+ return `Scrolled container (${scrolledElement?.tagName}) horizontally by ${scrollDelta}px`;
305
+ } else {
306
+ return `No horizontally scrollable container found for element (${targetElement.tagName})`;
307
+ }
308
+ }
309
+ const dx = right ? scroll_amount : -scroll_amount;
310
+ const bigEnough = /* @__PURE__ */ __name((el2) => el2.clientWidth >= window.innerWidth * 0.5, "bigEnough");
311
+ const canScroll = /* @__PURE__ */ __name((el2) => el2 && /(auto|scroll|overlay)/.test(getComputedStyle(el2).overflowX) && el2.scrollWidth > el2.clientWidth && bigEnough(el2), "canScroll");
312
+ let el = document.activeElement;
313
+ while (el && !canScroll(el) && el !== document.body) el = el.parentElement;
314
+ el = canScroll(el) ? el : Array.from(document.querySelectorAll("*")).find(canScroll) || document.scrollingElement || document.documentElement;
315
+ if (el === document.scrollingElement || el === document.documentElement || el === document.body) {
316
+ window.scrollBy(dx, 0);
317
+ return `✅ Scrolled page horizontally by ${dx}px`;
318
+ } else {
319
+ el.scrollBy({ left: dx, behavior: "smooth" });
320
+ await waitFor$1(0.1);
321
+ return `✅ Scrolled container (${el.tagName}) horizontally by ${dx}px`;
322
+ }
323
+ }
324
+ __name2(scrollHorizontally, "scrollHorizontally");
325
+ __name(scrollHorizontally, "scrollHorizontally");
29
326
  const VIEWPORT_EXPANSION = -1;
30
- const DEFAULT_MODEL_NAME = "PAGE-AGENT-FREE-TESTING-RANDOM";
31
- const DEFAULT_API_KEY = "PAGE-AGENT-FREE-TESTING-RANDOM";
32
- const DEFAULT_BASE_URL = "https://hwcxiuzfylggtcktqgij.supabase.co/functions/v1/llm-testing-proxy";
33
- const LLM_MAX_RETRIES = 2;
34
- const MAX_STEPS = 20;
35
- const DEFAULT_TEMPERATURE = 0.7;
36
- const DEFAULT_MAX_TOKENS = 4096;
37
327
  const domTree = /* @__PURE__ */ __name((args = {
38
328
  doHighlightElements: true,
39
329
  focusHighlightIndex: -1,
@@ -57,6 +347,7 @@ const domTree = /* @__PURE__ */ __name((args = {
57
347
  if (!element || element.nodeType !== Node.ELEMENT_NODE) return;
58
348
  extraData.set(element, { ...extraData.get(element), ...data });
59
349
  }
350
+ __name2(addExtraData, "addExtraData");
60
351
  __name(addExtraData, "addExtraData");
61
352
  const DOM_CACHE = {
62
353
  boundingRects: /* @__PURE__ */ new WeakMap(),
@@ -79,6 +370,7 @@ const domTree = /* @__PURE__ */ __name((args = {
79
370
  }
80
371
  return rect;
81
372
  }
373
+ __name2(getCachedBoundingRect, "getCachedBoundingRect");
82
374
  __name(getCachedBoundingRect, "getCachedBoundingRect");
83
375
  function getCachedComputedStyle(element) {
84
376
  if (!element) return null;
@@ -91,6 +383,7 @@ const domTree = /* @__PURE__ */ __name((args = {
91
383
  }
92
384
  return style;
93
385
  }
386
+ __name2(getCachedComputedStyle, "getCachedComputedStyle");
94
387
  __name(getCachedComputedStyle, "getCachedComputedStyle");
95
388
  function getCachedClientRects(element) {
96
389
  if (!element) return null;
@@ -103,6 +396,7 @@ const domTree = /* @__PURE__ */ __name((args = {
103
396
  }
104
397
  return rects;
105
398
  }
399
+ __name2(getCachedClientRects, "getCachedClientRects");
106
400
  __name(getCachedClientRects, "getCachedClientRects");
107
401
  const DOM_HASH_MAP = {};
108
402
  const ID = { current: 0 };
@@ -274,6 +568,7 @@ const domTree = /* @__PURE__ */ __name((args = {
274
568
  }
275
569
  }
276
570
  }
571
+ __name2(highlightElement, "highlightElement");
277
572
  __name(highlightElement, "highlightElement");
278
573
  function isScrollableElement(element) {
279
574
  if (!element || element.nodeType !== Node.ELEMENT_NODE) {
@@ -320,6 +615,7 @@ const domTree = /* @__PURE__ */ __name((args = {
320
615
  });
321
616
  return scrollData;
322
617
  }
618
+ __name2(isScrollableElement, "isScrollableElement");
323
619
  __name(isScrollableElement, "isScrollableElement");
324
620
  function isTextNodeVisible(textNode) {
325
621
  try {
@@ -372,6 +668,7 @@ const domTree = /* @__PURE__ */ __name((args = {
372
668
  return false;
373
669
  }
374
670
  }
671
+ __name2(isTextNodeVisible, "isTextNodeVisible");
375
672
  __name(isTextNodeVisible, "isTextNodeVisible");
376
673
  function isElementAccepted(element) {
377
674
  if (!element || !element.tagName) return false;
@@ -398,11 +695,13 @@ const domTree = /* @__PURE__ */ __name((args = {
398
695
  ]);
399
696
  return !leafElementDenyList.has(tagName);
400
697
  }
698
+ __name2(isElementAccepted, "isElementAccepted");
401
699
  __name(isElementAccepted, "isElementAccepted");
402
700
  function isElementVisible(element) {
403
701
  const style = getCachedComputedStyle(element);
404
702
  return element.offsetWidth > 0 && element.offsetHeight > 0 && style?.visibility !== "hidden" && style?.display !== "none";
405
703
  }
704
+ __name2(isElementVisible, "isElementVisible");
406
705
  __name(isElementVisible, "isElementVisible");
407
706
  function isInteractiveElement(element) {
408
707
  if (!element || element.nodeType !== Node.ELEMENT_NODE) {
@@ -499,6 +798,7 @@ const domTree = /* @__PURE__ */ __name((args = {
499
798
  if (style?.cursor && interactiveCursors.has(style.cursor)) return true;
500
799
  return false;
501
800
  }
801
+ __name2(doesElementHaveInteractivePointer, "doesElementHaveInteractivePointer");
502
802
  __name(doesElementHaveInteractivePointer, "doesElementHaveInteractivePointer");
503
803
  let isInteractiveCursor = doesElementHaveInteractivePointer(element);
504
804
  if (isInteractiveCursor) {
@@ -659,6 +959,7 @@ const domTree = /* @__PURE__ */ __name((args = {
659
959
  }
660
960
  return false;
661
961
  }
962
+ __name2(isInteractiveElement, "isInteractiveElement");
662
963
  __name(isInteractiveElement, "isInteractiveElement");
663
964
  function isTopElement(element) {
664
965
  if (viewportExpansion === -1) {
@@ -730,6 +1031,7 @@ const domTree = /* @__PURE__ */ __name((args = {
730
1031
  }
731
1032
  });
732
1033
  }
1034
+ __name2(isTopElement, "isTopElement");
733
1035
  __name(isTopElement, "isTopElement");
734
1036
  function isInExpandedViewport(element, viewportExpansion2) {
735
1037
  if (viewportExpansion2 === -1) {
@@ -751,6 +1053,7 @@ const domTree = /* @__PURE__ */ __name((args = {
751
1053
  }
752
1054
  return false;
753
1055
  }
1056
+ __name2(isInExpandedViewport, "isInExpandedViewport");
754
1057
  __name(isInExpandedViewport, "isInExpandedViewport");
755
1058
  function isInteractiveCandidate(element) {
756
1059
  if (!element || element.nodeType !== Node.ELEMENT_NODE) return false;
@@ -769,6 +1072,7 @@ const domTree = /* @__PURE__ */ __name((args = {
769
1072
  const hasQuickInteractiveAttr = element.hasAttribute("onclick") || element.hasAttribute("role") || element.hasAttribute("tabindex") || element.hasAttribute("aria-") || element.hasAttribute("data-action") || element.getAttribute("contenteditable") === "true";
770
1073
  return hasQuickInteractiveAttr;
771
1074
  }
1075
+ __name2(isInteractiveCandidate, "isInteractiveCandidate");
772
1076
  __name(isInteractiveCandidate, "isInteractiveCandidate");
773
1077
  const DISTINCT_INTERACTIVE_TAGS = /* @__PURE__ */ new Set([
774
1078
  "a",
@@ -814,6 +1118,7 @@ const domTree = /* @__PURE__ */ __name((args = {
814
1118
  const isParentBody = element.parentElement && element.parentElement.isSameNode(document.body);
815
1119
  return (isInteractiveElement(element) || hasInteractiveAttributes || hasInteractiveClass) && hasVisibleChildren && isInKnownContainer && !isParentBody;
816
1120
  }
1121
+ __name2(isHeuristicallyInteractive, "isHeuristicallyInteractive");
817
1122
  __name(isHeuristicallyInteractive, "isHeuristicallyInteractive");
818
1123
  function isElementDistinctInteraction(element) {
819
1124
  if (!element || element.nodeType !== Node.ELEMENT_NODE) {
@@ -884,6 +1189,7 @@ const domTree = /* @__PURE__ */ __name((args = {
884
1189
  }
885
1190
  return false;
886
1191
  }
1192
+ __name2(isElementDistinctInteraction, "isElementDistinctInteraction");
887
1193
  __name(isElementDistinctInteraction, "isElementDistinctInteraction");
888
1194
  function handleHighlighting(nodeData, node, parentIframe, isParentHighlighted) {
889
1195
  if (!nodeData.isInteractive) return false;
@@ -915,6 +1221,7 @@ const domTree = /* @__PURE__ */ __name((args = {
915
1221
  }
916
1222
  return false;
917
1223
  }
1224
+ __name2(handleHighlighting, "handleHighlighting");
918
1225
  __name(handleHighlighting, "handleHighlighting");
919
1226
  function buildDomTree(node, parentIframe = null, isParentHighlighted = false) {
920
1227
  if (!node || node.id === HIGHLIGHT_CONTAINER_ID || node.nodeType !== Node.ELEMENT_NODE && node.nodeType !== Node.TEXT_NODE) {
@@ -1052,6 +1359,7 @@ const domTree = /* @__PURE__ */ __name((args = {
1052
1359
  DOM_HASH_MAP[id] = nodeData;
1053
1360
  return id;
1054
1361
  }
1362
+ __name2(buildDomTree, "buildDomTree");
1055
1363
  __name(buildDomTree, "buildDomTree");
1056
1364
  const rootId = buildDomTree(document.body);
1057
1365
  DOM_CACHE.clearCache();
@@ -1085,19 +1393,20 @@ function getFlatTree(config) {
1085
1393
  highlightOpacity: config.highlightOpacity ?? 0,
1086
1394
  highlightLabelOpacity: config.highlightLabelOpacity ?? 0.1
1087
1395
  });
1088
- const currentUrl2 = window.location.href;
1396
+ const currentUrl = window.location.href;
1089
1397
  for (const nodeId in elements.map) {
1090
1398
  const node = elements.map[nodeId];
1091
1399
  if (node.isInteractive && node.ref) {
1092
1400
  const ref = node.ref;
1093
1401
  if (!newElementsCache.has(ref)) {
1094
- newElementsCache.set(ref, currentUrl2);
1402
+ newElementsCache.set(ref, currentUrl);
1095
1403
  node.isNew = true;
1096
1404
  }
1097
1405
  }
1098
1406
  }
1099
1407
  return elements;
1100
1408
  }
1409
+ __name2(getFlatTree, "getFlatTree");
1101
1410
  __name(getFlatTree, "getFlatTree");
1102
1411
  function flatTreeToString(flatTree, include_attributes) {
1103
1412
  const DEFAULT_INCLUDE_ATTRIBUTES = [
@@ -1282,6 +1591,7 @@ function flatTreeToString(flatTree, include_attributes) {
1282
1591
  processNode(rootNode, 0, result2);
1283
1592
  return result2.join("\n");
1284
1593
  }
1594
+ __name2(flatTreeToString, "flatTreeToString");
1285
1595
  __name(flatTreeToString, "flatTreeToString");
1286
1596
  const getAllTextTillNextClickableElement = /* @__PURE__ */ __name((node, maxDepth = -1) => {
1287
1597
  const textParts = [];
@@ -1314,6 +1624,7 @@ function getSelectorMap(flatTree) {
1314
1624
  }
1315
1625
  return selectorMap;
1316
1626
  }
1627
+ __name2(getSelectorMap, "getSelectorMap");
1317
1628
  __name(getSelectorMap, "getSelectorMap");
1318
1629
  function getElementTextMap(simplifiedHTML) {
1319
1630
  const lines = simplifiedHTML.split("\n").map((line) => line.trim()).filter((line) => line.length > 0);
@@ -1328,6 +1639,7 @@ function getElementTextMap(simplifiedHTML) {
1328
1639
  }
1329
1640
  return elementTextMap;
1330
1641
  }
1642
+ __name2(getElementTextMap, "getElementTextMap");
1331
1643
  __name(getElementTextMap, "getElementTextMap");
1332
1644
  function cleanUpHighlights() {
1333
1645
  const cleanupFunctions = window._highlightCleanupFunctions || [];
@@ -1338,6 +1650,7 @@ function cleanUpHighlights() {
1338
1650
  }
1339
1651
  window._highlightCleanupFunctions = [];
1340
1652
  }
1653
+ __name2(cleanUpHighlights, "cleanUpHighlights");
1341
1654
  __name(cleanUpHighlights, "cleanUpHighlights");
1342
1655
  window.addEventListener("popstate", () => {
1343
1656
  cleanUpHighlights();
@@ -1354,47 +1667,304 @@ if (navigation && typeof navigation.addEventListener === "function") {
1354
1667
  cleanUpHighlights();
1355
1668
  });
1356
1669
  } else {
1357
- let currentUrl2 = window.location.href;
1670
+ let currentUrl = window.location.href;
1358
1671
  setInterval(() => {
1359
- if (window.location.href !== currentUrl2) {
1360
- currentUrl2 = window.location.href;
1672
+ if (window.location.href !== currentUrl) {
1673
+ currentUrl = window.location.href;
1361
1674
  cleanUpHighlights();
1362
1675
  }
1363
- }, 500);
1364
- }
1365
- function getPageInfo() {
1366
- const viewport_width = window.innerWidth;
1367
- const viewport_height = window.innerHeight;
1368
- const page_width = Math.max(document.documentElement.scrollWidth, document.body.scrollWidth || 0);
1369
- const page_height = Math.max(
1370
- document.documentElement.scrollHeight,
1371
- document.body.scrollHeight || 0
1372
- );
1373
- const scroll_x = window.scrollX || window.pageXOffset || document.documentElement.scrollLeft || 0;
1374
- const scroll_y = window.scrollY || window.pageYOffset || document.documentElement.scrollTop || 0;
1375
- const pixels_below = Math.max(0, page_height - (window.innerHeight + scroll_y));
1376
- const pixels_right = Math.max(0, page_width - (window.innerWidth + scroll_x));
1377
- return {
1378
- // Current viewport dimensions
1379
- viewport_width,
1380
- viewport_height,
1381
- // Total page dimensions
1382
- page_width,
1383
- page_height,
1384
- // Current scroll position
1385
- scroll_x,
1386
- scroll_y,
1387
- pixels_above: scroll_y,
1388
- pixels_below,
1389
- pages_above: viewport_height > 0 ? scroll_y / viewport_height : 0,
1390
- pages_below: viewport_height > 0 ? pixels_below / viewport_height : 0,
1391
- total_pages: viewport_height > 0 ? page_height / viewport_height : 0,
1392
- current_page_position: scroll_y / Math.max(1, page_height - viewport_height),
1393
- pixels_left: scroll_x,
1394
- pixels_right
1395
- };
1396
- }
1397
- __name(getPageInfo, "getPageInfo");
1676
+ }, 500);
1677
+ }
1678
+ function getPageInfo() {
1679
+ const viewport_width = window.innerWidth;
1680
+ const viewport_height = window.innerHeight;
1681
+ const page_width = Math.max(document.documentElement.scrollWidth, document.body.scrollWidth || 0);
1682
+ const page_height = Math.max(
1683
+ document.documentElement.scrollHeight,
1684
+ document.body.scrollHeight || 0
1685
+ );
1686
+ const scroll_x = window.scrollX || window.pageXOffset || document.documentElement.scrollLeft || 0;
1687
+ const scroll_y = window.scrollY || window.pageYOffset || document.documentElement.scrollTop || 0;
1688
+ const pixels_below = Math.max(0, page_height - (window.innerHeight + scroll_y));
1689
+ const pixels_right = Math.max(0, page_width - (window.innerWidth + scroll_x));
1690
+ return {
1691
+ // Current viewport dimensions
1692
+ viewport_width,
1693
+ viewport_height,
1694
+ // Total page dimensions
1695
+ page_width,
1696
+ page_height,
1697
+ // Current scroll position
1698
+ scroll_x,
1699
+ scroll_y,
1700
+ pixels_above: scroll_y,
1701
+ pixels_below,
1702
+ pages_above: viewport_height > 0 ? scroll_y / viewport_height : 0,
1703
+ pages_below: viewport_height > 0 ? pixels_below / viewport_height : 0,
1704
+ total_pages: viewport_height > 0 ? page_height / viewport_height : 0,
1705
+ current_page_position: scroll_y / Math.max(1, page_height - viewport_height),
1706
+ pixels_left: scroll_x,
1707
+ pixels_right
1708
+ };
1709
+ }
1710
+ __name2(getPageInfo, "getPageInfo");
1711
+ __name(getPageInfo, "getPageInfo");
1712
+ function patchReact(pageController) {
1713
+ const reactRootElements = document.querySelectorAll(
1714
+ '[data-reactroot], [data-reactid], [data-react-checksum], #root, #app, [id^="root-"], [id^="app-"], #adex-wrapper, #adex-root'
1715
+ );
1716
+ for (const element of reactRootElements) {
1717
+ element.setAttribute("data-page-agent-not-interactive", "true");
1718
+ }
1719
+ }
1720
+ __name2(patchReact, "patchReact");
1721
+ __name(patchReact, "patchReact");
1722
+ const _PageController = (__PageController = class extends EventTarget {
1723
+ config;
1724
+ /** Corresponds to eval_page in browser-use */
1725
+ flatTree = null;
1726
+ /**
1727
+ * All highlighted index-mapped interactive elements
1728
+ * Corresponds to DOMState.selector_map in browser-use
1729
+ */
1730
+ selectorMap = /* @__PURE__ */ new Map();
1731
+ /** Index -> element text description mapping */
1732
+ elementTextMap = /* @__PURE__ */ new Map();
1733
+ /**
1734
+ * Simplified HTML for LLM consumption.
1735
+ * Corresponds to clickable_elements_to_string in browser-use
1736
+ */
1737
+ simplifiedHTML = "<EMPTY>";
1738
+ /** last time the tree was updated */
1739
+ lastTimeUpdate = 0;
1740
+ constructor(config = {}) {
1741
+ super();
1742
+ this.config = config;
1743
+ patchReact();
1744
+ }
1745
+ // ======= State Queries =======
1746
+ /**
1747
+ * Get current page URL
1748
+ */
1749
+ async getCurrentUrl() {
1750
+ return window.location.href;
1751
+ }
1752
+ /**
1753
+ * Get current page title
1754
+ */
1755
+ async getPageTitle() {
1756
+ return document.title;
1757
+ }
1758
+ /**
1759
+ * Get page scroll and size info
1760
+ */
1761
+ async getPageInfo() {
1762
+ return getPageInfo();
1763
+ }
1764
+ /**
1765
+ * Get the simplified HTML representation of the page.
1766
+ * This is used by LLM to understand the page structure.
1767
+ */
1768
+ async getSimplifiedHTML() {
1769
+ return this.simplifiedHTML;
1770
+ }
1771
+ /**
1772
+ * Get text description for an element by index
1773
+ */
1774
+ async getElementText(index) {
1775
+ return this.elementTextMap.get(index);
1776
+ }
1777
+ /**
1778
+ * Get total number of indexed interactive elements
1779
+ */
1780
+ async getElementCount() {
1781
+ return this.selectorMap.size;
1782
+ }
1783
+ /**
1784
+ * Get last tree update timestamp
1785
+ */
1786
+ async getLastUpdateTime() {
1787
+ return this.lastTimeUpdate;
1788
+ }
1789
+ /**
1790
+ * Get the viewport expansion setting
1791
+ */
1792
+ async getViewportExpansion() {
1793
+ return this.config.viewportExpansion ?? VIEWPORT_EXPANSION;
1794
+ }
1795
+ // ======= DOM Tree Operations =======
1796
+ /**
1797
+ * Update DOM tree, returns simplified HTML for LLM.
1798
+ * This is the main method to refresh the page state.
1799
+ */
1800
+ async updateTree() {
1801
+ this.dispatchEvent(new Event("beforeUpdate"));
1802
+ this.lastTimeUpdate = Date.now();
1803
+ cleanUpHighlights();
1804
+ const blacklist = [
1805
+ ...this.config.interactiveBlacklist || [],
1806
+ ...document.querySelectorAll("[data-page-agent-not-interactive]").values()
1807
+ ];
1808
+ this.flatTree = getFlatTree({
1809
+ ...this.config,
1810
+ interactiveBlacklist: blacklist
1811
+ });
1812
+ this.simplifiedHTML = flatTreeToString(this.flatTree, this.config.include_attributes);
1813
+ this.selectorMap.clear();
1814
+ this.selectorMap = getSelectorMap(this.flatTree);
1815
+ this.elementTextMap.clear();
1816
+ this.elementTextMap = getElementTextMap(this.simplifiedHTML);
1817
+ this.dispatchEvent(new Event("afterUpdate"));
1818
+ return this.simplifiedHTML;
1819
+ }
1820
+ /**
1821
+ * Clean up all element highlights
1822
+ */
1823
+ async cleanUpHighlights() {
1824
+ cleanUpHighlights();
1825
+ }
1826
+ // ======= Element Actions =======
1827
+ /**
1828
+ * Click element by index
1829
+ */
1830
+ async clickElement(index) {
1831
+ try {
1832
+ const element = getElementByIndex(this.selectorMap, index);
1833
+ const elemText = this.elementTextMap.get(index);
1834
+ await clickElement(element);
1835
+ if (element instanceof HTMLAnchorElement && element.target === "_blank") {
1836
+ return {
1837
+ success: true,
1838
+ message: `✅ Clicked element (${elemText ?? index}). ⚠️ Link opens in a new tab. You are not capable of reading new tabs.`
1839
+ };
1840
+ }
1841
+ return {
1842
+ success: true,
1843
+ message: `✅ Clicked element (${elemText ?? index}).`
1844
+ };
1845
+ } catch (error2) {
1846
+ return {
1847
+ success: false,
1848
+ message: `❌ Failed to click element: ${error2}`
1849
+ };
1850
+ }
1851
+ }
1852
+ /**
1853
+ * Input text into element by index
1854
+ */
1855
+ async inputText(index, text) {
1856
+ try {
1857
+ const element = getElementByIndex(this.selectorMap, index);
1858
+ const elemText = this.elementTextMap.get(index);
1859
+ await inputTextElement(element, text);
1860
+ return {
1861
+ success: true,
1862
+ message: `✅ Input text (${text}) into element (${elemText ?? index}).`
1863
+ };
1864
+ } catch (error2) {
1865
+ return {
1866
+ success: false,
1867
+ message: `❌ Failed to input text: ${error2}`
1868
+ };
1869
+ }
1870
+ }
1871
+ /**
1872
+ * Select dropdown option by index and option text
1873
+ */
1874
+ async selectOption(index, optionText) {
1875
+ try {
1876
+ const element = getElementByIndex(this.selectorMap, index);
1877
+ const elemText = this.elementTextMap.get(index);
1878
+ await selectOptionElement(element, optionText);
1879
+ return {
1880
+ success: true,
1881
+ message: `✅ Selected option (${optionText}) in element (${elemText ?? index}).`
1882
+ };
1883
+ } catch (error2) {
1884
+ return {
1885
+ success: false,
1886
+ message: `❌ Failed to select option: ${error2}`
1887
+ };
1888
+ }
1889
+ }
1890
+ /**
1891
+ * Scroll vertically
1892
+ */
1893
+ async scroll(options) {
1894
+ try {
1895
+ const { down, numPages, pixels, index } = options;
1896
+ const scrollAmount = pixels ?? numPages * (down ? 1 : -1) * window.innerHeight;
1897
+ const element = index !== void 0 ? getElementByIndex(this.selectorMap, index) : null;
1898
+ const message = await scrollVertically(down, scrollAmount, element);
1899
+ return {
1900
+ success: true,
1901
+ message
1902
+ };
1903
+ } catch (error2) {
1904
+ return {
1905
+ success: false,
1906
+ message: `❌ Failed to scroll: ${error2}`
1907
+ };
1908
+ }
1909
+ }
1910
+ /**
1911
+ * Scroll horizontally
1912
+ */
1913
+ async scrollHorizontally(options) {
1914
+ try {
1915
+ const { right, pixels, index } = options;
1916
+ const scrollAmount = pixels * (right ? 1 : -1);
1917
+ const element = index !== void 0 ? getElementByIndex(this.selectorMap, index) : null;
1918
+ const message = await scrollHorizontally(right, scrollAmount, element);
1919
+ return {
1920
+ success: true,
1921
+ message
1922
+ };
1923
+ } catch (error2) {
1924
+ return {
1925
+ success: false,
1926
+ message: `❌ Failed to scroll horizontally: ${error2}`
1927
+ };
1928
+ }
1929
+ }
1930
+ /**
1931
+ * Execute arbitrary JavaScript on the page
1932
+ */
1933
+ async executeJavascript(script) {
1934
+ try {
1935
+ const asyncFunction = eval(`(async () => { ${script} })`);
1936
+ const result = await asyncFunction();
1937
+ return {
1938
+ success: true,
1939
+ message: `✅ Executed JavaScript. Result: ${result}`
1940
+ };
1941
+ } catch (error2) {
1942
+ return {
1943
+ success: false,
1944
+ message: `❌ Error executing JavaScript: ${error2}`
1945
+ };
1946
+ }
1947
+ }
1948
+ /**
1949
+ * Dispose and clean up resources
1950
+ */
1951
+ dispose() {
1952
+ cleanUpHighlights();
1953
+ this.flatTree = null;
1954
+ this.selectorMap.clear();
1955
+ this.elementTextMap.clear();
1956
+ this.simplifiedHTML = "<EMPTY>";
1957
+ }
1958
+ }, __name2(__PageController, "_PageController"), __PageController);
1959
+ __name(_PageController, "PageController");
1960
+ let PageController = _PageController;
1961
+ const DEFAULT_MODEL_NAME = "PAGE-AGENT-FREE-TESTING-RANDOM";
1962
+ const DEFAULT_API_KEY = "PAGE-AGENT-FREE-TESTING-RANDOM";
1963
+ const DEFAULT_BASE_URL = "https://hwcxiuzfylggtcktqgij.supabase.co/functions/v1/llm-testing-proxy";
1964
+ const LLM_MAX_RETRIES = 2;
1965
+ const MAX_STEPS = 20;
1966
+ const DEFAULT_TEMPERATURE = 0.7;
1967
+ const DEFAULT_MAX_TOKENS = 4096;
1398
1968
  const enUS = {
1399
1969
  ui: {
1400
1970
  panel: {
@@ -1526,7 +2096,7 @@ const _I18n = class _I18n {
1526
2096
  return this.language;
1527
2097
  }
1528
2098
  };
1529
- __name(_I18n, "I18n");
2099
+ __name2(_I18n, "I18n");
1530
2100
  let I18n = _I18n;
1531
2101
  function parseLLMConfig(config) {
1532
2102
  return {
@@ -1538,13 +2108,13 @@ function parseLLMConfig(config) {
1538
2108
  maxRetries: config.maxRetries ?? LLM_MAX_RETRIES
1539
2109
  };
1540
2110
  }
1541
- __name(parseLLMConfig, "parseLLMConfig");
2111
+ __name2(parseLLMConfig, "parseLLMConfig");
1542
2112
  const _EventBus = class _EventBus extends EventTarget {
1543
2113
  /**
1544
2114
  * Listen to built-in events
1545
2115
  */
1546
2116
  on(event, handler) {
1547
- const wrappedHandler = /* @__PURE__ */ __name((e) => {
2117
+ const wrappedHandler = /* @__PURE__ */ __name2((e) => {
1548
2118
  const customEvent = e;
1549
2119
  const params = customEvent.detail?.[0];
1550
2120
  return handler(params);
@@ -1555,7 +2125,7 @@ const _EventBus = class _EventBus extends EventTarget {
1555
2125
  * Listen to built-in events (one-time)
1556
2126
  */
1557
2127
  once(event, handler) {
1558
- const wrappedHandler = /* @__PURE__ */ __name((e) => {
2128
+ const wrappedHandler = /* @__PURE__ */ __name2((e) => {
1559
2129
  const customEvent = e;
1560
2130
  const params = customEvent.detail?.[0];
1561
2131
  return handler(params);
@@ -1571,7 +2141,7 @@ const _EventBus = class _EventBus extends EventTarget {
1571
2141
  return;
1572
2142
  }
1573
2143
  };
1574
- __name(_EventBus, "EventBus");
2144
+ __name2(_EventBus, "EventBus");
1575
2145
  let EventBus = _EventBus;
1576
2146
  const buses = /* @__PURE__ */ new Map();
1577
2147
  function getEventBus(channel) {
@@ -1582,7 +2152,7 @@ function getEventBus(channel) {
1582
2152
  buses.set(channel, bus);
1583
2153
  return bus;
1584
2154
  }
1585
- __name(getEventBus, "getEventBus");
2155
+ __name2(getEventBus, "getEventBus");
1586
2156
  const InvokeErrorType = {
1587
2157
  // Retryable
1588
2158
  NETWORK_ERROR: "network_error",
@@ -1631,7 +2201,7 @@ const _InvokeError = class _InvokeError extends Error {
1631
2201
  return retryableTypes.includes(type);
1632
2202
  }
1633
2203
  };
1634
- __name(_InvokeError, "InvokeError");
2204
+ __name2(_InvokeError, "InvokeError");
1635
2205
  let InvokeError = _InvokeError;
1636
2206
  function zodToOpenAITool(name, tool2) {
1637
2207
  return {
@@ -1643,7 +2213,7 @@ function zodToOpenAITool(name, tool2) {
1643
2213
  }
1644
2214
  };
1645
2215
  }
1646
- __name(zodToOpenAITool, "zodToOpenAITool");
2216
+ __name2(zodToOpenAITool, "zodToOpenAITool");
1647
2217
  function lenientParseMacroToolCall(responseData, inputSchema) {
1648
2218
  const choice = responseData.choices?.[0];
1649
2219
  if (!choice) {
@@ -1764,7 +2334,7 @@ function lenientParseMacroToolCall(responseData, inputSchema) {
1764
2334
  );
1765
2335
  }
1766
2336
  }
1767
- __name(lenientParseMacroToolCall, "lenientParseMacroToolCall");
2337
+ __name2(lenientParseMacroToolCall, "lenientParseMacroToolCall");
1768
2338
  function modelPatch(body) {
1769
2339
  const model = body.model || "";
1770
2340
  if (model.toLowerCase().startsWith("claude")) {
@@ -1780,7 +2350,7 @@ function modelPatch(body) {
1780
2350
  }
1781
2351
  return body;
1782
2352
  }
1783
- __name(modelPatch, "modelPatch");
2353
+ __name2(modelPatch, "modelPatch");
1784
2354
  const _OpenAIClient = class _OpenAIClient {
1785
2355
  config;
1786
2356
  constructor(config) {
@@ -1877,7 +2447,7 @@ const _OpenAIClient = class _OpenAIClient {
1877
2447
  };
1878
2448
  }
1879
2449
  };
1880
- __name(_OpenAIClient, "OpenAIClient");
2450
+ __name2(_OpenAIClient, "OpenAIClient");
1881
2451
  let OpenAIClient = _OpenAIClient;
1882
2452
  const _LLM = class _LLM {
1883
2453
  constructor(config, id) {
@@ -1910,383 +2480,111 @@ const _LLM = class _LLM {
1910
2480
  // retry settings
1911
2481
  {
1912
2482
  maxRetries: this.config.maxRetries,
1913
- onRetry: /* @__PURE__ */ __name((retries) => {
2483
+ onRetry: /* @__PURE__ */ __name2((retries) => {
1914
2484
  __privateGet(this, _bus).emit("panel:update", {
1915
2485
  type: "retry",
1916
2486
  displayText: `retry-ing (${retries} / ${this.config.maxRetries})`
1917
2487
  });
1918
2488
  }, "onRetry"),
1919
- onError: /* @__PURE__ */ __name((error2, withRetry2) => {
2489
+ onError: /* @__PURE__ */ __name2((error2, withRetry2) => {
1920
2490
  __privateGet(this, _bus).emit("panel:update", {
1921
2491
  type: "error",
1922
2492
  displayText: `step failed: ${error2.message}`
1923
- });
1924
- }, "onError")
1925
- }
1926
- );
1927
- }
1928
- };
1929
- _bus = new WeakMap();
1930
- __name(_LLM, "LLM");
1931
- let LLM = _LLM;
1932
- async function withRetry(fn, settings) {
1933
- let retries = 0;
1934
- let lastError = null;
1935
- while (retries <= settings.maxRetries) {
1936
- if (retries > 0) {
1937
- settings.onRetry(retries);
1938
- await new Promise((resolve) => setTimeout(resolve, 100));
1939
- }
1940
- try {
1941
- return await fn();
1942
- } catch (error2) {
1943
- console.error(error2);
1944
- settings.onError(error2, retries < settings.maxRetries);
1945
- if (error2?.name === "AbortError") throw error2;
1946
- if (error2 instanceof InvokeError && !error2.retryable) throw error2;
1947
- lastError = error2;
1948
- retries++;
1949
- await new Promise((resolve) => setTimeout(resolve, 100));
1950
- }
1951
- }
1952
- throw lastError;
1953
- }
1954
- __name(withRetry, "withRetry");
1955
- function patchReact(pageAgent) {
1956
- const reactRootElements = document.querySelectorAll(
1957
- '[data-reactroot], [data-reactid], [data-react-checksum], #root, #app, [id^="root-"], [id^="app-"], #adex-wrapper, #adex-root'
1958
- );
1959
- for (const element of reactRootElements) {
1960
- element.setAttribute("data-page-agent-not-interactive", "true");
1961
- }
1962
- }
1963
- __name(patchReact, "patchReact");
1964
- const SYSTEM_PROMPT = 'You are an AI agent designed to operate in an iterative loop to automate browser tasks. Your ultimate goal is accomplishing the task provided in <user_request>.\n\n<intro>\nYou excel at following tasks:\n1. Navigating complex websites and extracting precise information\n2. Automating form submissions and interactive web actions\n3. Gathering and saving information \n4. Operate effectively in an agent loop\n5. Efficiently performing diverse web tasks\n</intro>\n\n<language_settings>\n- Default working language: **中文**\n- Use the language that user is using. Return in user\'s language.\n</language_settings>\n\n<input>\nAt every step, your input will consist of: \n1. <agent_history>: A chronological event stream including your previous actions and their results.\n2. <agent_state>: Current <user_request> and <step_info>.\n3. <browser_state>: Current URL, interactive elements indexed for actions, and visible page content.\n</input>\n\n<agent_history>\nAgent history will be given as a list of step information as follows:\n\n<step_{step_number}>:\nEvaluation of Previous Step: Assessment of last action\nMemory: Your memory of this step\nNext Goal: Your goal for this step\nAction Results: Your actions and their results\n</step_{step_number}>\n\nand system messages wrapped in <sys> tag.\n</agent_history>\n\n<user_request>\nUSER REQUEST: This is your ultimate objective and always remains visible.\n- This has the highest priority. Make the user happy.\n- If the user request is very specific - then carefully follow each step and dont skip or hallucinate steps.\n- If the task is open ended you can plan yourself how to get it done.\n</user_request>\n\n<browser_state>\n1. Browser State will be given as:\n\nCurrent URL: URL of the page you are currently viewing.\nInteractive Elements: All interactive elements will be provided in format as [index]<type>text</type> where\n- index: Numeric identifier for interaction\n- type: HTML element type (button, input, etc.)\n- text: Element description\n\nExamples:\n[33]<div>User form</div>\n\\t*[35]<button aria-label=\'Submit form\'>Submit</button>\n\nNote that:\n- Only elements with numeric indexes in [] are interactive\n- (stacked) indentation (with \\t) is important and means that the element is a (html) child of the element above (with a lower index)\n- Elements tagged with `*[` are the new clickable elements that appeared on the website since the last step - if url has not changed.\n- Pure text elements without [] are not interactive.\n</browser_state>\n\n<browser_rules>\nStrictly follow these rules while using the browser and navigating the web:\n- Only interact with elements that have a numeric [index] assigned.\n- Only use indexes that are explicitly provided.\n- If the page changes after, for example, an input text action, analyze if you need to interact with new elements, e.g. selecting the right option from the list.\n- By default, only elements in the visible viewport are listed. Use scrolling actions if you suspect relevant content is offscreen which you need to interact with. Scroll ONLY if there are more pixels below or above the page.\n- You can scroll by a specific number of pages using the num_pages parameter (e.g., 0.5 for half page, 2.0 for two pages).\n- All the elements that are scrollable are marked with `data-scrollable` attribute. Including the scrollable distance in every directions. You can scroll *the element* in case some area are overflowed.\n- If a captcha appears, tell user you can not solve captcha. finished the task and ask user to solve it.\n- If expected elements are missing, try scrolling, or navigating back.\n- If the page is not fully loaded, use the `wait` action.\n- Do not repeat one action for more than 3 times unless some conditions changed.\n- If you fill an input field and your action sequence is interrupted, most often something changed e.g. suggestions popped up under the field.\n- If the <user_request> includes specific page information such as product type, rating, price, location, etc., try to apply filters to be more efficient.\n- The <user_request> is the ultimate goal. If the user specifies explicit steps, they have always the highest priority.\n- If you input_text into a field, you might need to press enter, click the search button, or select from dropdown for completion.\n- Don\'t login into a page if you don\'t have to. Don\'t login if you don\'t have the credentials. \n- There are 2 types of tasks always first think which type of request you are dealing with:\n1. Very specific step by step instructions:\n- Follow them as very precise and don\'t skip steps. Try to complete everything as requested.\n2. Open ended tasks. Plan yourself, be creative in achieving them.\n- If you get stuck e.g. with logins or captcha in open-ended tasks you can re-evaluate the task and try alternative ways, e.g. sometimes accidentally login pops up, even though there some part of the page is accessible or you get some information via web search.\n</browser_rules>\n\n<capability>\n- You can only handle single page app. Do not jump out of current page.\n- Do not click on link if it will open in a new page (etc. <a target="_blank">)\n- It is ok to fail the task.\n - User can be wrong. If the request of user is not achievable, inappropriate or you do not have enough information or tools to achieve it. Tell user to make a better request.\n - Webpage can be broken. All webpages or apps have bugs. Some bug will make it hard for your job. It\'s encouraged to tell user the problem of current page. Your feedbacks (including failing) are valuable for user.\n - Trying to hard can be harmful. Repeating some action back and forth or pushing for a complex procedure with little knowledge can cause unwanted result and harmful side-effects. User would rather you to complete the task with a fail.\n- If you are not clear about the request or steps. `ask_user` to clarify it.\n- If you do not have knowledge for the current webpage or task. You must require user to give specific instructions and detailed steps.\n</capability>\n\n<task_completion_rules>\nYou must call the `done` action in one of three cases:\n- When you have fully completed the USER REQUEST.\n- When you reach the final allowed step (`max_steps`), even if the task is incomplete.\n- When you feel stuck or unable to solve user request. Or user request is not clear or contains inappropriate content.\n- If it is ABSOLUTELY IMPOSSIBLE to continue.\n\nThe `done` action is your opportunity to terminate and share your findings with the user.\n- Set `success` to `true` only if the full USER REQUEST has been completed with no missing components.\n- If any part of the request is missing, incomplete, or uncertain, set `success` to `false`.\n- You can use the `text` field of the `done` action to communicate your findings and to provide a coherent reply to the user and fulfill the USER REQUEST.\n- You are ONLY ALLOWED to call `done` as a single action. Don\'t call it together with other actions.\n- If the user asks for specified format, such as "return JSON with following structure", "return a list of format...", MAKE sure to use the right format in your answer.\n- If the user asks for a structured output, your `done` action\'s schema may be modified. Take this schema into account when solving the task!\n</task_completion_rules>\n\n<reasoning_rules>\nExhibit the following reasoning patterns to successfully achieve the <user_request>:\n\n- Reason about <agent_history> to track progress and context toward <user_request>.\n- Analyze the most recent "Next Goal" and "Action Result" in <agent_history> and clearly state what you previously tried to achieve.\n- Analyze all relevant items in <agent_history> and <browser_state> to understand your state.\n- Explicitly judge success/failure/uncertainty of the last action. Never assume an action succeeded just because it appears to be executed in your last step in <agent_history>. If the expected change is missing, mark the last action as failed (or uncertain) and plan a recovery.\n- Analyze whether you are stuck, e.g. when you repeat the same actions multiple times without any progress. Then consider alternative approaches e.g. scrolling for more context or ask user for help.\n- `ask_user` for help if you have any difficulty. Users want to be kept in the loop.\n- If you see information relevant to <user_request>, plan saving the information to memory.\n- Always reason about the <user_request>. Make sure to carefully analyze the specific steps and information required. E.g. specific filters, specific form fields, specific information to search. Make sure to always compare the current trajectory with the user request and think carefully if thats how the user requested it.\n</reasoning_rules>\n\n<examples>\nHere are examples of good output patterns. Use them as reference but never copy them directly.\n\n<evaluation_examples>\n- Positive Examples:\n"evaluation_previous_goal": "Successfully navigated to the product page and found the target information. Verdict: Success"\n"evaluation_previous_goal": "Clicked the login button and user authentication form appeared. Verdict: Success"\n</evaluation_examples>\n\n<memory_examples>\n"memory": "Found many pending reports that need to be analyzed in the main page. Successfully processed the first 2 reports on quarterly sales data and moving on to inventory analysis and customer feedback reports."\n</memory_examples>\n\n<next_goal_examples>\n"next_goal": "Click on the \'Add to Cart\' button to proceed with the purchase flow."\n"next_goal": "Extract details from the first item on the page."\n</next_goal_examples>\n</examples>\n\n<output>\nYou must ALWAYS respond with a valid JSON in this exact format:\n\n{\n "evaluation_previous_goal": "Concise one-sentence analysis of your last action. Clearly state success, failure, or uncertain.",\n "memory": "1-3 concise sentences of specific memory of this step and overall progress. You should put here everything that will help you track progress in future steps. Like counting pages visited, items found, etc.",\n "next_goal": "State the next immediate goal and action to achieve it, in one clear sentence."\n "action":{"one_action_name": {// action-specific parameter}}\n}\n</output>\n';
1965
- async function waitFor(seconds) {
1966
- await new Promise((resolve) => setTimeout(resolve, seconds * 1e3));
1967
- }
1968
- __name(waitFor, "waitFor");
1969
- let currentUrl = window.location.href;
1970
- async function getSystemInfo() {
1971
- if (currentUrl === window.location.href) return "";
1972
- await waitFor(0.3);
1973
- currentUrl = window.location.href;
1974
- return `
1975
- <sys> Current URL changed to: ${currentUrl} </sys>`;
1976
- }
1977
- __name(getSystemInfo, "getSystemInfo");
1978
- async function movePointerToElement(element) {
1979
- const rect = element.getBoundingClientRect();
1980
- const x = rect.left + rect.width / 2;
1981
- const y = rect.top + rect.height / 2;
1982
- window.dispatchEvent(new CustomEvent("PageAgent::MovePointerTo", { detail: { x, y } }));
1983
- await waitFor(0.3);
1984
- }
1985
- __name(movePointerToElement, "movePointerToElement");
1986
- function getElementByIndex(pageAgent, index) {
1987
- const interactiveNode = pageAgent.selectorMap.get(index);
1988
- if (!interactiveNode) {
1989
- throw new Error(`No interactive element found at index ${index}`);
1990
- }
1991
- const element = interactiveNode.ref;
1992
- if (!element) {
1993
- throw new Error(`Element at index ${index} does not have a reference`);
1994
- }
1995
- if (!(element instanceof HTMLElement)) {
1996
- throw new Error(`Element at index ${index} is not an HTMLElement`);
1997
- }
1998
- return element;
1999
- }
2000
- __name(getElementByIndex, "getElementByIndex");
2001
- let lastClickedElement = null;
2002
- function blurLastClickedElement() {
2003
- if (lastClickedElement) {
2004
- lastClickedElement.blur();
2005
- lastClickedElement.dispatchEvent(
2006
- new MouseEvent("mouseout", { bubbles: true, cancelable: true })
2007
- );
2008
- lastClickedElement = null;
2009
- }
2010
- }
2011
- __name(blurLastClickedElement, "blurLastClickedElement");
2012
- async function clickElement(element) {
2013
- blurLastClickedElement();
2014
- lastClickedElement = element;
2015
- await scrollIntoViewIfNeeded(element);
2016
- await movePointerToElement(element);
2017
- window.dispatchEvent(new CustomEvent("PageAgent::ClickPointer"));
2018
- await waitFor(0.1);
2019
- element.dispatchEvent(new MouseEvent("mouseenter", { bubbles: true, cancelable: true }));
2020
- element.dispatchEvent(new MouseEvent("mouseover", { bubbles: true, cancelable: true }));
2021
- element.dispatchEvent(new MouseEvent("mousedown", { bubbles: true, cancelable: true }));
2022
- element.focus();
2023
- element.dispatchEvent(new MouseEvent("mouseup", { bubbles: true, cancelable: true }));
2024
- element.dispatchEvent(new MouseEvent("click", { bubbles: true, cancelable: true }));
2025
- await waitFor(0.1);
2026
- }
2027
- __name(clickElement, "clickElement");
2028
- const nativeInputValueSetter = Object.getOwnPropertyDescriptor(
2029
- window.HTMLInputElement.prototype,
2030
- "value"
2031
- ).set;
2032
- const nativeTextAreaValueSetter = Object.getOwnPropertyDescriptor(
2033
- window.HTMLTextAreaElement.prototype,
2034
- "value"
2035
- ).set;
2036
- async function createSyntheticInputEvent(elem, key) {
2037
- elem.dispatchEvent(new KeyboardEvent("keydown", { bubbles: true, cancelable: true, key }));
2038
- await waitFor(0.01);
2039
- if (elem instanceof HTMLInputElement || elem instanceof HTMLTextAreaElement) {
2040
- elem.dispatchEvent(new Event("beforeinput", { bubbles: true }));
2041
- await waitFor(0.01);
2042
- elem.dispatchEvent(new Event("input", { bubbles: true }));
2043
- await waitFor(0.01);
2044
- }
2045
- elem.dispatchEvent(new KeyboardEvent("keyup", { bubbles: true, cancelable: true, key }));
2046
- }
2047
- __name(createSyntheticInputEvent, "createSyntheticInputEvent");
2048
- async function inputTextElement(element, text) {
2049
- if (!(element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement)) {
2050
- throw new Error("Element is not an input or textarea");
2051
- }
2052
- await clickElement(element);
2053
- if (element instanceof HTMLTextAreaElement) {
2054
- nativeTextAreaValueSetter.call(element, text);
2055
- } else {
2056
- nativeInputValueSetter.call(element, text);
2057
- }
2058
- const inputEvent = new Event("input", { bubbles: true });
2059
- element.dispatchEvent(inputEvent);
2060
- await waitFor(0.1);
2061
- blurLastClickedElement();
2062
- }
2063
- __name(inputTextElement, "inputTextElement");
2064
- async function selectOptionElement(selectElement, optionText) {
2065
- if (!(selectElement instanceof HTMLSelectElement)) {
2066
- throw new Error("Element is not a select element");
2067
- }
2068
- const options = Array.from(selectElement.options);
2069
- const option = options.find((opt) => opt.textContent?.trim() === optionText.trim());
2070
- if (!option) {
2071
- throw new Error(`Option with text "${optionText}" not found in select element`);
2072
- }
2073
- selectElement.value = option.value;
2074
- selectElement.dispatchEvent(new Event("change", { bubbles: true }));
2075
- await waitFor(0.1);
2076
- }
2077
- __name(selectOptionElement, "selectOptionElement");
2078
- async function scrollIntoViewIfNeeded(element) {
2079
- const el = element;
2080
- if (el.scrollIntoViewIfNeeded) {
2081
- el.scrollIntoViewIfNeeded();
2082
- } else {
2083
- el.scrollIntoView({ behavior: "auto", block: "center", inline: "nearest" });
2084
- }
2085
- }
2086
- __name(scrollIntoViewIfNeeded, "scrollIntoViewIfNeeded");
2087
- async function scrollVertically(down, scroll_amount, element) {
2088
- if (element) {
2089
- const targetElement = element;
2090
- console.log(
2091
- "[SCROLL DEBUG] Starting direct container scroll for element:",
2092
- targetElement.tagName
2093
- );
2094
- let currentElement = targetElement;
2095
- let scrollSuccess = false;
2096
- let scrolledElement = null;
2097
- let scrollDelta = 0;
2098
- let attempts = 0;
2099
- const dy2 = scroll_amount;
2100
- while (currentElement && attempts < 10) {
2101
- const computedStyle = window.getComputedStyle(currentElement);
2102
- const hasScrollableY = /(auto|scroll|overlay)/.test(computedStyle.overflowY);
2103
- const canScrollVertically = currentElement.scrollHeight > currentElement.clientHeight;
2104
- console.log(
2105
- "[SCROLL DEBUG] Checking element:",
2106
- currentElement.tagName,
2107
- "hasScrollableY:",
2108
- hasScrollableY,
2109
- "canScrollVertically:",
2110
- canScrollVertically,
2111
- "scrollHeight:",
2112
- currentElement.scrollHeight,
2113
- "clientHeight:",
2114
- currentElement.clientHeight
2115
- );
2116
- if (hasScrollableY && canScrollVertically) {
2117
- const beforeScroll = currentElement.scrollTop;
2118
- const maxScroll = currentElement.scrollHeight - currentElement.clientHeight;
2119
- let scrollAmount = dy2 / 3;
2120
- if (scrollAmount > 0) {
2121
- scrollAmount = Math.min(scrollAmount, maxScroll - beforeScroll);
2122
- } else {
2123
- scrollAmount = Math.max(scrollAmount, -beforeScroll);
2124
- }
2125
- currentElement.scrollTop = beforeScroll + scrollAmount;
2126
- const afterScroll = currentElement.scrollTop;
2127
- const actualScrollDelta = afterScroll - beforeScroll;
2128
- console.log(
2129
- "[SCROLL DEBUG] Scroll attempt:",
2130
- currentElement.tagName,
2131
- "before:",
2132
- beforeScroll,
2133
- "after:",
2134
- afterScroll,
2135
- "delta:",
2136
- actualScrollDelta
2137
- );
2138
- if (Math.abs(actualScrollDelta) > 0.5) {
2139
- scrollSuccess = true;
2140
- scrolledElement = currentElement;
2141
- scrollDelta = actualScrollDelta;
2142
- console.log(
2143
- "[SCROLL DEBUG] Successfully scrolled container:",
2144
- currentElement.tagName,
2145
- "delta:",
2146
- actualScrollDelta
2147
- );
2148
- break;
2149
- }
2150
- }
2151
- if (currentElement === document.body || currentElement === document.documentElement) {
2152
- break;
2493
+ });
2494
+ }, "onError")
2153
2495
  }
2154
- currentElement = currentElement.parentElement;
2155
- attempts++;
2496
+ );
2497
+ }
2498
+ };
2499
+ _bus = new WeakMap();
2500
+ __name2(_LLM, "LLM");
2501
+ let LLM = _LLM;
2502
+ async function withRetry(fn, settings) {
2503
+ let retries = 0;
2504
+ let lastError = null;
2505
+ while (retries <= settings.maxRetries) {
2506
+ if (retries > 0) {
2507
+ settings.onRetry(retries);
2508
+ await new Promise((resolve) => setTimeout(resolve, 100));
2156
2509
  }
2157
- if (scrollSuccess) {
2158
- return `Scrolled container (${scrolledElement?.tagName}) by ${scrollDelta}px`;
2159
- } else {
2160
- return `No scrollable container found for element (${targetElement.tagName})`;
2510
+ try {
2511
+ return await fn();
2512
+ } catch (error2) {
2513
+ console.error(error2);
2514
+ settings.onError(error2, retries < settings.maxRetries);
2515
+ if (error2?.name === "AbortError") throw error2;
2516
+ if (error2 instanceof InvokeError && !error2.retryable) throw error2;
2517
+ lastError = error2;
2518
+ retries++;
2519
+ await new Promise((resolve) => setTimeout(resolve, 100));
2161
2520
  }
2162
2521
  }
2163
- const dy = scroll_amount;
2164
- const bigEnough = /* @__PURE__ */ __name((el2) => el2.clientHeight >= window.innerHeight * 0.5, "bigEnough");
2165
- const canScroll = /* @__PURE__ */ __name((el2) => el2 && /(auto|scroll|overlay)/.test(getComputedStyle(el2).overflowY) && el2.scrollHeight > el2.clientHeight && bigEnough(el2), "canScroll");
2166
- let el = document.activeElement;
2167
- while (el && !canScroll(el) && el !== document.body) el = el.parentElement;
2168
- el = canScroll(el) ? el : Array.from(document.querySelectorAll("*")).find(canScroll) || document.scrollingElement || document.documentElement;
2169
- if (el === document.scrollingElement || el === document.documentElement || el === document.body) {
2170
- window.scrollBy(0, dy);
2171
- return `✅ Scrolled page by ${dy}px.`;
2172
- } else {
2173
- el.scrollBy({ top: dy, behavior: "smooth" });
2174
- await waitFor(0.1);
2175
- return `✅ Scrolled container (${el.tagName}) by ${dy}px.`;
2176
- }
2522
+ throw lastError;
2177
2523
  }
2178
- __name(scrollVertically, "scrollVertically");
2179
- async function scrollHorizontally(right, scroll_amount, element) {
2180
- if (element) {
2181
- const targetElement = element;
2182
- console.log(
2183
- "[SCROLL DEBUG] Starting direct container scroll for element:",
2184
- targetElement.tagName
2185
- );
2186
- let currentElement = targetElement;
2187
- let scrollSuccess = false;
2188
- let scrolledElement = null;
2189
- let scrollDelta = 0;
2190
- let attempts = 0;
2191
- const dx2 = right ? scroll_amount : -scroll_amount;
2192
- while (currentElement && attempts < 10) {
2193
- const computedStyle = window.getComputedStyle(currentElement);
2194
- const hasScrollableX = /(auto|scroll|overlay)/.test(computedStyle.overflowX);
2195
- const canScrollHorizontally = currentElement.scrollWidth > currentElement.clientWidth;
2196
- console.log(
2197
- "[SCROLL DEBUG] Checking element:",
2198
- currentElement.tagName,
2199
- "hasScrollableX:",
2200
- hasScrollableX,
2201
- "canScrollHorizontally:",
2202
- canScrollHorizontally,
2203
- "scrollWidth:",
2204
- currentElement.scrollWidth,
2205
- "clientWidth:",
2206
- currentElement.clientWidth
2207
- );
2208
- if (hasScrollableX && canScrollHorizontally) {
2209
- const beforeScroll = currentElement.scrollLeft;
2210
- const maxScroll = currentElement.scrollWidth - currentElement.clientWidth;
2211
- let scrollAmount = dx2 / 3;
2212
- if (scrollAmount > 0) {
2213
- scrollAmount = Math.min(scrollAmount, maxScroll - beforeScroll);
2214
- } else {
2215
- scrollAmount = Math.max(scrollAmount, -beforeScroll);
2216
- }
2217
- currentElement.scrollLeft = beforeScroll + scrollAmount;
2218
- const afterScroll = currentElement.scrollLeft;
2219
- const actualScrollDelta = afterScroll - beforeScroll;
2220
- console.log(
2221
- "[SCROLL DEBUG] Scroll attempt:",
2222
- currentElement.tagName,
2223
- "before:",
2224
- beforeScroll,
2225
- "after:",
2226
- afterScroll,
2227
- "delta:",
2228
- actualScrollDelta
2229
- );
2230
- if (Math.abs(actualScrollDelta) > 0.5) {
2231
- scrollSuccess = true;
2232
- scrolledElement = currentElement;
2233
- scrollDelta = actualScrollDelta;
2234
- console.log(
2235
- "[SCROLL DEBUG] Successfully scrolled container:",
2236
- currentElement.tagName,
2237
- "delta:",
2238
- actualScrollDelta
2239
- );
2240
- break;
2241
- }
2242
- }
2243
- if (currentElement === document.body || currentElement === document.documentElement) {
2244
- break;
2524
+ __name2(withRetry, "withRetry");
2525
+ const SYSTEM_PROMPT = 'You are an AI agent designed to operate in an iterative loop to automate browser tasks. Your ultimate goal is accomplishing the task provided in <user_request>.\n\n<intro>\nYou excel at following tasks:\n1. Navigating complex websites and extracting precise information\n2. Automating form submissions and interactive web actions\n3. Gathering and saving information \n4. Operate effectively in an agent loop\n5. Efficiently performing diverse web tasks\n</intro>\n\n<language_settings>\n- Default working language: **中文**\n- Use the language that user is using. Return in user\'s language.\n</language_settings>\n\n<input>\nAt every step, your input will consist of: \n1. <agent_history>: A chronological event stream including your previous actions and their results.\n2. <agent_state>: Current <user_request> and <step_info>.\n3. <browser_state>: Current URL, interactive elements indexed for actions, and visible page content.\n</input>\n\n<agent_history>\nAgent history will be given as a list of step information as follows:\n\n<step_{step_number}>:\nEvaluation of Previous Step: Assessment of last action\nMemory: Your memory of this step\nNext Goal: Your goal for this step\nAction Results: Your actions and their results\n</step_{step_number}>\n\nand system messages wrapped in <sys> tag.\n</agent_history>\n\n<user_request>\nUSER REQUEST: This is your ultimate objective and always remains visible.\n- This has the highest priority. Make the user happy.\n- If the user request is very specific - then carefully follow each step and dont skip or hallucinate steps.\n- If the task is open ended you can plan yourself how to get it done.\n</user_request>\n\n<browser_state>\n1. Browser State will be given as:\n\nCurrent URL: URL of the page you are currently viewing.\nInteractive Elements: All interactive elements will be provided in format as [index]<type>text</type> where\n- index: Numeric identifier for interaction\n- type: HTML element type (button, input, etc.)\n- text: Element description\n\nExamples:\n[33]<div>User form</div>\n\\t*[35]<button aria-label=\'Submit form\'>Submit</button>\n\nNote that:\n- Only elements with numeric indexes in [] are interactive\n- (stacked) indentation (with \\t) is important and means that the element is a (html) child of the element above (with a lower index)\n- Elements tagged with `*[` are the new clickable elements that appeared on the website since the last step - if url has not changed.\n- Pure text elements without [] are not interactive.\n</browser_state>\n\n<browser_rules>\nStrictly follow these rules while using the browser and navigating the web:\n- Only interact with elements that have a numeric [index] assigned.\n- Only use indexes that are explicitly provided.\n- If the page changes after, for example, an input text action, analyze if you need to interact with new elements, e.g. selecting the right option from the list.\n- By default, only elements in the visible viewport are listed. Use scrolling actions if you suspect relevant content is offscreen which you need to interact with. Scroll ONLY if there are more pixels below or above the page.\n- You can scroll by a specific number of pages using the num_pages parameter (e.g., 0.5 for half page, 2.0 for two pages).\n- All the elements that are scrollable are marked with `data-scrollable` attribute. Including the scrollable distance in every directions. You can scroll *the element* in case some area are overflowed.\n- If a captcha appears, tell user you can not solve captcha. finished the task and ask user to solve it.\n- If expected elements are missing, try scrolling, or navigating back.\n- If the page is not fully loaded, use the `wait` action.\n- Do not repeat one action for more than 3 times unless some conditions changed.\n- If you fill an input field and your action sequence is interrupted, most often something changed e.g. suggestions popped up under the field.\n- If the <user_request> includes specific page information such as product type, rating, price, location, etc., try to apply filters to be more efficient.\n- The <user_request> is the ultimate goal. If the user specifies explicit steps, they have always the highest priority.\n- If you input_text into a field, you might need to press enter, click the search button, or select from dropdown for completion.\n- Don\'t login into a page if you don\'t have to. Don\'t login if you don\'t have the credentials. \n- There are 2 types of tasks always first think which type of request you are dealing with:\n1. Very specific step by step instructions:\n- Follow them as very precise and don\'t skip steps. Try to complete everything as requested.\n2. Open ended tasks. Plan yourself, be creative in achieving them.\n- If you get stuck e.g. with logins or captcha in open-ended tasks you can re-evaluate the task and try alternative ways, e.g. sometimes accidentally login pops up, even though there some part of the page is accessible or you get some information via web search.\n</browser_rules>\n\n<capability>\n- You can only handle single page app. Do not jump out of current page.\n- Do not click on link if it will open in a new page (etc. <a target="_blank">)\n- It is ok to fail the task.\n - User can be wrong. If the request of user is not achievable, inappropriate or you do not have enough information or tools to achieve it. Tell user to make a better request.\n - Webpage can be broken. All webpages or apps have bugs. Some bug will make it hard for your job. It\'s encouraged to tell user the problem of current page. Your feedbacks (including failing) are valuable for user.\n - Trying to hard can be harmful. Repeating some action back and forth or pushing for a complex procedure with little knowledge can cause unwanted result and harmful side-effects. User would rather you to complete the task with a fail.\n- If you are not clear about the request or steps. `ask_user` to clarify it.\n- If you do not have knowledge for the current webpage or task. You must require user to give specific instructions and detailed steps.\n</capability>\n\n<task_completion_rules>\nYou must call the `done` action in one of three cases:\n- When you have fully completed the USER REQUEST.\n- When you reach the final allowed step (`max_steps`), even if the task is incomplete.\n- When you feel stuck or unable to solve user request. Or user request is not clear or contains inappropriate content.\n- If it is ABSOLUTELY IMPOSSIBLE to continue.\n\nThe `done` action is your opportunity to terminate and share your findings with the user.\n- Set `success` to `true` only if the full USER REQUEST has been completed with no missing components.\n- If any part of the request is missing, incomplete, or uncertain, set `success` to `false`.\n- You can use the `text` field of the `done` action to communicate your findings and to provide a coherent reply to the user and fulfill the USER REQUEST.\n- You are ONLY ALLOWED to call `done` as a single action. Don\'t call it together with other actions.\n- If the user asks for specified format, such as "return JSON with following structure", "return a list of format...", MAKE sure to use the right format in your answer.\n- If the user asks for a structured output, your `done` action\'s schema may be modified. Take this schema into account when solving the task!\n</task_completion_rules>\n\n<reasoning_rules>\nExhibit the following reasoning patterns to successfully achieve the <user_request>:\n\n- Reason about <agent_history> to track progress and context toward <user_request>.\n- Analyze the most recent "Next Goal" and "Action Result" in <agent_history> and clearly state what you previously tried to achieve.\n- Analyze all relevant items in <agent_history> and <browser_state> to understand your state.\n- Explicitly judge success/failure/uncertainty of the last action. Never assume an action succeeded just because it appears to be executed in your last step in <agent_history>. If the expected change is missing, mark the last action as failed (or uncertain) and plan a recovery.\n- Analyze whether you are stuck, e.g. when you repeat the same actions multiple times without any progress. Then consider alternative approaches e.g. scrolling for more context or ask user for help.\n- `ask_user` for help if you have any difficulty. Users want to be kept in the loop.\n- If you see information relevant to <user_request>, plan saving the information to memory.\n- Always reason about the <user_request>. Make sure to carefully analyze the specific steps and information required. E.g. specific filters, specific form fields, specific information to search. Make sure to always compare the current trajectory with the user request and think carefully if thats how the user requested it.\n</reasoning_rules>\n\n<examples>\nHere are examples of good output patterns. Use them as reference but never copy them directly.\n\n<evaluation_examples>\n- Positive Examples:\n"evaluation_previous_goal": "Successfully navigated to the product page and found the target information. Verdict: Success"\n"evaluation_previous_goal": "Clicked the login button and user authentication form appeared. Verdict: Success"\n</evaluation_examples>\n\n<memory_examples>\n"memory": "Found many pending reports that need to be analyzed in the main page. Successfully processed the first 2 reports on quarterly sales data and moving on to inventory analysis and customer feedback reports."\n</memory_examples>\n\n<next_goal_examples>\n"next_goal": "Click on the \'Add to Cart\' button to proceed with the purchase flow."\n"next_goal": "Extract details from the first item on the page."\n</next_goal_examples>\n</examples>\n\n<output>\nYou must ALWAYS respond with a valid JSON in this exact format:\n\n{\n "evaluation_previous_goal": "Concise one-sentence analysis of your last action. Clearly state success, failure, or uncertain.",\n "memory": "1-3 concise sentences of specific memory of this step and overall progress. You should put here everything that will help you track progress in future steps. Like counting pages visited, items found, etc.",\n "next_goal": "State the next immediate goal and action to achieve it, in one clear sentence."\n "action":{"one_action_name": {// action-specific parameter}}\n}\n</output>\n';
2526
+ async function waitUntil(check, timeout = 60 * 601e3) {
2527
+ if (check()) return true;
2528
+ return new Promise((resolve, reject) => {
2529
+ const start = Date.now();
2530
+ const interval = setInterval(() => {
2531
+ if (check()) {
2532
+ clearInterval(interval);
2533
+ resolve(true);
2534
+ } else if (Date.now() - start > timeout) {
2535
+ clearInterval(interval);
2536
+ reject(new Error("Timeout waiting for condition to become true"));
2245
2537
  }
2246
- currentElement = currentElement.parentElement;
2247
- attempts++;
2248
- }
2249
- if (scrollSuccess) {
2250
- return `Scrolled container (${scrolledElement?.tagName}) horizontally by ${scrollDelta}px`;
2251
- } else {
2252
- return `No horizontally scrollable container found for element (${targetElement.tagName})`;
2253
- }
2538
+ }, 100);
2539
+ });
2540
+ }
2541
+ __name2(waitUntil, "waitUntil");
2542
+ async function waitFor(seconds) {
2543
+ await new Promise((resolve) => setTimeout(resolve, seconds * 1e3));
2544
+ }
2545
+ __name2(waitFor, "waitFor");
2546
+ function truncate(text, maxLength) {
2547
+ if (text.length > maxLength) {
2548
+ return text.substring(0, maxLength) + "...";
2254
2549
  }
2255
- const dx = right ? scroll_amount : -scroll_amount;
2256
- const bigEnough = /* @__PURE__ */ __name((el2) => el2.clientWidth >= window.innerWidth * 0.5, "bigEnough");
2257
- const canScroll = /* @__PURE__ */ __name((el2) => el2 && /(auto|scroll|overlay)/.test(getComputedStyle(el2).overflowX) && el2.scrollWidth > el2.clientWidth && bigEnough(el2), "canScroll");
2258
- let el = document.activeElement;
2259
- while (el && !canScroll(el) && el !== document.body) el = el.parentElement;
2260
- el = canScroll(el) ? el : Array.from(document.querySelectorAll("*")).find(canScroll) || document.scrollingElement || document.documentElement;
2261
- if (el === document.scrollingElement || el === document.documentElement || el === document.body) {
2262
- window.scrollBy(dx, 0);
2263
- return `✅ Scrolled page horizontally by ${dx}px`;
2264
- } else {
2265
- el.scrollBy({ left: dx, behavior: "smooth" });
2266
- await waitFor(0.1);
2267
- return `✅ Scrolled container (${el.tagName}) horizontally by ${dx}px`;
2550
+ return text;
2551
+ }
2552
+ __name2(truncate, "truncate");
2553
+ function trimLines(text) {
2554
+ return text.split("\n").map((line) => line.trim()).join("\n");
2555
+ }
2556
+ __name2(trimLines, "trimLines");
2557
+ function randomID(existingIDs) {
2558
+ let id = Math.random().toString(36).substring(2, 11);
2559
+ if (!existingIDs) {
2560
+ return id;
2561
+ }
2562
+ const MAX_TRY = 1e3;
2563
+ let tryCount = 0;
2564
+ while (existingIDs.includes(id)) {
2565
+ id = Math.random().toString(36).substring(2, 11);
2566
+ tryCount++;
2567
+ if (tryCount > MAX_TRY) {
2568
+ throw new Error("randomID: too many try");
2569
+ }
2268
2570
  }
2571
+ return id;
2269
2572
  }
2270
- __name(scrollHorizontally, "scrollHorizontally");
2271
- const utils = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
2272
- __proto__: null,
2273
- clickElement,
2274
- createSyntheticInputEvent,
2275
- getElementByIndex,
2276
- getSystemInfo,
2277
- inputTextElement,
2278
- movePointerToElement,
2279
- scrollHorizontally,
2280
- scrollIntoViewIfNeeded,
2281
- scrollVertically,
2282
- selectOptionElement,
2283
- waitFor
2284
- }, Symbol.toStringTag, { value: "Module" }));
2285
- window.utils = utils;
2573
+ __name2(randomID, "randomID");
2574
+ if (!window.__PAGE_AGENT_IDS__) {
2575
+ window.__PAGE_AGENT_IDS__ = [];
2576
+ }
2577
+ const ids = window.__PAGE_AGENT_IDS__;
2578
+ function uid() {
2579
+ const id = randomID(ids);
2580
+ ids.push(id);
2581
+ return id;
2582
+ }
2583
+ __name2(uid, "uid");
2286
2584
  function tool(options) {
2287
2585
  return options;
2288
2586
  }
2289
- __name(tool, "tool");
2587
+ __name2(tool, "tool");
2290
2588
  const tools = /* @__PURE__ */ new Map();
2291
2589
  tools.set(
2292
2590
  "done",
@@ -2296,7 +2594,7 @@ tools.set(
2296
2594
  text: zod.string(),
2297
2595
  success: zod.boolean().default(true)
2298
2596
  }),
2299
- execute: /* @__PURE__ */ __name(async function(input2) {
2597
+ execute: /* @__PURE__ */ __name2(async function(input2) {
2300
2598
  return Promise.resolve("Task completed");
2301
2599
  }, "execute")
2302
2600
  })
@@ -2308,12 +2606,12 @@ tools.set(
2308
2606
  inputSchema: zod.object({
2309
2607
  seconds: zod.number().min(1).max(10).default(1)
2310
2608
  }),
2311
- execute: /* @__PURE__ */ __name(async function(input2) {
2312
- const lastTimeUpdate = this.lastTimeUpdate;
2609
+ execute: /* @__PURE__ */ __name2(async function(input2) {
2610
+ const lastTimeUpdate = await this.pageController.getLastUpdateTime();
2313
2611
  const actualWaitTime = Math.max(0, input2.seconds - (Date.now() - lastTimeUpdate) / 1e3);
2314
2612
  console.log(`actualWaitTime: ${actualWaitTime} seconds`);
2315
2613
  await waitFor(actualWaitTime);
2316
- return `✅ Waited for ${input2.seconds} seconds.` + await getSystemInfo();
2614
+ return `✅ Waited for ${input2.seconds} seconds.`;
2317
2615
  }, "execute")
2318
2616
  })
2319
2617
  );
@@ -2324,9 +2622,9 @@ tools.set(
2324
2622
  inputSchema: zod.object({
2325
2623
  question: zod.string()
2326
2624
  }),
2327
- execute: /* @__PURE__ */ __name(async function(input2) {
2625
+ execute: /* @__PURE__ */ __name2(async function(input2) {
2328
2626
  const answer = await this.panel.askUser(input2.question);
2329
- return `✅ Received user answer: ${answer}` + await getSystemInfo();
2627
+ return `✅ Received user answer: ${answer}`;
2330
2628
  }, "execute")
2331
2629
  })
2332
2630
  );
@@ -2337,14 +2635,9 @@ tools.set(
2337
2635
  inputSchema: zod.object({
2338
2636
  index: zod.int().min(0)
2339
2637
  }),
2340
- execute: /* @__PURE__ */ __name(async function(input2) {
2341
- const element = getElementByIndex(this, input2.index);
2342
- const elemText = this.elementTextMap.get(input2.index);
2343
- await clickElement(element);
2344
- if (element instanceof HTMLAnchorElement && element.target === "_blank") {
2345
- return `⚠️ Clicked link that opens in a new tab (${elemText ? elemText : input2.index}). You are not capable of reading new tabs.`;
2346
- }
2347
- return `✅ Clicked element (${elemText ? elemText : input2.index}).` + await getSystemInfo();
2638
+ execute: /* @__PURE__ */ __name2(async function(input2) {
2639
+ const result2 = await this.pageController.clickElement(input2.index);
2640
+ return result2.message;
2348
2641
  }, "execute")
2349
2642
  })
2350
2643
  );
@@ -2356,11 +2649,9 @@ tools.set(
2356
2649
  index: zod.int().min(0),
2357
2650
  text: zod.string()
2358
2651
  }),
2359
- execute: /* @__PURE__ */ __name(async function(input2) {
2360
- const element = getElementByIndex(this, input2.index);
2361
- const elemText = this.elementTextMap.get(input2.index);
2362
- await inputTextElement(element, input2.text);
2363
- return `✅ Input text (${input2.text}) into element (${elemText ? elemText : input2.index}).` + await getSystemInfo();
2652
+ execute: /* @__PURE__ */ __name2(async function(input2) {
2653
+ const result2 = await this.pageController.inputText(input2.index, input2.text);
2654
+ return result2.message;
2364
2655
  }, "execute")
2365
2656
  })
2366
2657
  );
@@ -2372,11 +2663,9 @@ tools.set(
2372
2663
  index: zod.int().min(0),
2373
2664
  text: zod.string()
2374
2665
  }),
2375
- execute: /* @__PURE__ */ __name(async function(input2) {
2376
- const element = getElementByIndex(this, input2.index);
2377
- const elemText = this.elementTextMap.get(input2.index);
2378
- await selectOptionElement(element, input2.text);
2379
- return `✅ Selected option (${input2.text}) in element (${elemText ? elemText : input2.index}).` + await getSystemInfo();
2666
+ execute: /* @__PURE__ */ __name2(async function(input2) {
2667
+ const result2 = await this.pageController.selectOption(input2.index, input2.text);
2668
+ return result2.message;
2380
2669
  }, "execute")
2381
2670
  })
2382
2671
  );
@@ -2390,11 +2679,12 @@ tools.set(
2390
2679
  pixels: zod.number().int().min(0).optional(),
2391
2680
  index: zod.number().int().min(0).optional()
2392
2681
  }),
2393
- execute: /* @__PURE__ */ __name(async function(input2) {
2394
- const { down, num_pages, index, pixels } = input2;
2395
- const scroll_amount = pixels ? pixels : num_pages * (down ? 1 : -1) * window.innerHeight;
2396
- const element = index !== void 0 ? getElementByIndex(this, index) : null;
2397
- return await scrollVertically(down, scroll_amount, element) + await getSystemInfo();
2682
+ execute: /* @__PURE__ */ __name2(async function(input2) {
2683
+ const result2 = await this.pageController.scroll({
2684
+ ...input2,
2685
+ numPages: input2.num_pages
2686
+ });
2687
+ return result2.message;
2398
2688
  }, "execute")
2399
2689
  })
2400
2690
  );
@@ -2407,11 +2697,9 @@ tools.set(
2407
2697
  pixels: zod.number().int().min(0),
2408
2698
  index: zod.number().int().min(0).optional()
2409
2699
  }),
2410
- execute: /* @__PURE__ */ __name(async function(input2) {
2411
- const { right, pixels, index } = input2;
2412
- const scroll_amount = pixels * (right ? 1 : -1);
2413
- const element = index !== void 0 ? getElementByIndex(this, index) : null;
2414
- return await scrollHorizontally(right, scroll_amount, element) + await getSystemInfo();
2700
+ execute: /* @__PURE__ */ __name2(async function(input2) {
2701
+ const result2 = await this.pageController.scrollHorizontally(input2);
2702
+ return result2.message;
2415
2703
  }, "execute")
2416
2704
  })
2417
2705
  );
@@ -2422,71 +2710,12 @@ tools.set(
2422
2710
  inputSchema: zod.object({
2423
2711
  script: zod.string()
2424
2712
  }),
2425
- execute: /* @__PURE__ */ __name(async function(input) {
2426
- try {
2427
- const asyncFunction = eval(`(async () => { ${input.script} })`);
2428
- const result = await asyncFunction();
2429
- return `✅ Executed JavaScript. Result: ${result}` + await getSystemInfo();
2430
- } catch (error2) {
2431
- return `❌ Error executing JavaScript: ${error2}` + await getSystemInfo();
2432
- }
2713
+ execute: /* @__PURE__ */ __name2(async function(input2) {
2714
+ const result2 = await this.pageController.executeJavascript(input2.script);
2715
+ return result2.message;
2433
2716
  }, "execute")
2434
2717
  })
2435
2718
  );
2436
- async function waitUntil(check, timeout = 60 * 601e3) {
2437
- if (check()) return true;
2438
- return new Promise((resolve, reject) => {
2439
- const start = Date.now();
2440
- const interval = setInterval(() => {
2441
- if (check()) {
2442
- clearInterval(interval);
2443
- resolve(true);
2444
- } else if (Date.now() - start > timeout) {
2445
- clearInterval(interval);
2446
- reject(new Error("Timeout waiting for condition to become true"));
2447
- }
2448
- }, 100);
2449
- });
2450
- }
2451
- __name(waitUntil, "waitUntil");
2452
- function truncate(text, maxLength) {
2453
- if (text.length > maxLength) {
2454
- return text.substring(0, maxLength) + "...";
2455
- }
2456
- return text;
2457
- }
2458
- __name(truncate, "truncate");
2459
- function trimLines(text) {
2460
- return text.split("\n").map((line) => line.trim()).join("\n");
2461
- }
2462
- __name(trimLines, "trimLines");
2463
- function randomID(existingIDs) {
2464
- let id = Math.random().toString(36).substring(2, 11);
2465
- if (!existingIDs) {
2466
- return id;
2467
- }
2468
- const MAX_TRY = 1e3;
2469
- let tryCount = 0;
2470
- while (existingIDs.includes(id)) {
2471
- id = Math.random().toString(36).substring(2, 11);
2472
- tryCount++;
2473
- if (tryCount > MAX_TRY) {
2474
- throw new Error("randomID: too many try");
2475
- }
2476
- }
2477
- return id;
2478
- }
2479
- __name(randomID, "randomID");
2480
- if (!window.__PAGE_AGENT_IDS__) {
2481
- window.__PAGE_AGENT_IDS__ = [];
2482
- }
2483
- const ids = window.__PAGE_AGENT_IDS__;
2484
- function uid() {
2485
- const id = randomID(ids);
2486
- ids.push(id);
2487
- return id;
2488
- }
2489
- __name(uid, "uid");
2490
2719
  const _UIState = class _UIState {
2491
2720
  steps = [];
2492
2721
  currentStep = null;
@@ -2545,7 +2774,7 @@ const _UIState = class _UIState {
2545
2774
  return `step_${Date.now()}_${Math.random().toString(36).substring(2, 11)}`;
2546
2775
  }
2547
2776
  };
2548
- __name(_UIState, "UIState");
2777
+ __name2(_UIState, "UIState");
2549
2778
  let UIState = _UIState;
2550
2779
  const wrapper$1 = "_wrapper_1j0ct_1";
2551
2780
  const background = "_background_1j0ct_39";
@@ -2721,7 +2950,7 @@ _Panel_instances = new WeakSet();
2721
2950
  /**
2722
2951
  * Update status
2723
2952
  */
2724
- update_fn = /* @__PURE__ */ __name(function(stepData) {
2953
+ update_fn = /* @__PURE__ */ __name2(function(stepData) {
2725
2954
  const step = __privateGet(this, _state).addStep(stepData);
2726
2955
  const headerText = truncate(step.displayText, 20);
2727
2956
  __privateSet(this, _pendingHeaderText, headerText);
@@ -2741,7 +2970,7 @@ update_fn = /* @__PURE__ */ __name(function(stepData) {
2741
2970
  /**
2742
2971
  * Show panel
2743
2972
  */
2744
- show_fn = /* @__PURE__ */ __name(function() {
2973
+ show_fn = /* @__PURE__ */ __name2(function() {
2745
2974
  this.wrapper.style.display = "block";
2746
2975
  void this.wrapper.offsetHeight;
2747
2976
  this.wrapper.style.opacity = "1";
@@ -2750,7 +2979,7 @@ show_fn = /* @__PURE__ */ __name(function() {
2750
2979
  /**
2751
2980
  * Hide panel
2752
2981
  */
2753
- hide_fn = /* @__PURE__ */ __name(function() {
2982
+ hide_fn = /* @__PURE__ */ __name2(function() {
2754
2983
  this.wrapper.style.opacity = "0";
2755
2984
  this.wrapper.style.transform = "translateX(-50%) translateY(20px)";
2756
2985
  this.wrapper.style.display = "none";
@@ -2758,7 +2987,7 @@ hide_fn = /* @__PURE__ */ __name(function() {
2758
2987
  /**
2759
2988
  * Reset state
2760
2989
  */
2761
- reset_fn = /* @__PURE__ */ __name(function() {
2990
+ reset_fn = /* @__PURE__ */ __name2(function() {
2762
2991
  __privateGet(this, _state).reset();
2763
2992
  __privateGet(this, _statusText).textContent = __privateGet(this, _pageAgent).i18n.t("ui.panel.ready");
2764
2993
  __privateMethod(this, _Panel_instances, updateStatusIndicator_fn).call(this, "thinking");
@@ -2773,7 +3002,7 @@ reset_fn = /* @__PURE__ */ __name(function() {
2773
3002
  /**
2774
3003
  * Toggle pause state
2775
3004
  */
2776
- togglePause_fn = /* @__PURE__ */ __name(function() {
3005
+ togglePause_fn = /* @__PURE__ */ __name2(function() {
2777
3006
  __privateGet(this, _pageAgent).paused = !__privateGet(this, _pageAgent).paused;
2778
3007
  __privateMethod(this, _Panel_instances, updatePauseButton_fn).call(this);
2779
3008
  if (__privateGet(this, _pageAgent).paused) {
@@ -2787,7 +3016,7 @@ togglePause_fn = /* @__PURE__ */ __name(function() {
2787
3016
  /**
2788
3017
  * Update pause button state
2789
3018
  */
2790
- updatePauseButton_fn = /* @__PURE__ */ __name(function() {
3019
+ updatePauseButton_fn = /* @__PURE__ */ __name2(function() {
2791
3020
  if (__privateGet(this, _pageAgent).paused) {
2792
3021
  __privateGet(this, _pauseButton).textContent = "▶";
2793
3022
  __privateGet(this, _pauseButton).title = __privateGet(this, _pageAgent).i18n.t("ui.panel.continue");
@@ -2801,7 +3030,7 @@ updatePauseButton_fn = /* @__PURE__ */ __name(function() {
2801
3030
  /**
2802
3031
  * Stop Agent
2803
3032
  */
2804
- stopAgent_fn = /* @__PURE__ */ __name(function() {
3033
+ stopAgent_fn = /* @__PURE__ */ __name2(function() {
2805
3034
  __privateMethod(this, _Panel_instances, update_fn).call(this, {
2806
3035
  type: "error",
2807
3036
  displayText: __privateGet(this, _pageAgent).i18n.t("ui.panel.taskTerminated")
@@ -2811,7 +3040,7 @@ stopAgent_fn = /* @__PURE__ */ __name(function() {
2811
3040
  /**
2812
3041
  * Submit task
2813
3042
  */
2814
- submitTask_fn = /* @__PURE__ */ __name(function() {
3043
+ submitTask_fn = /* @__PURE__ */ __name2(function() {
2815
3044
  const input2 = __privateGet(this, _taskInput).value.trim();
2816
3045
  if (!input2) return;
2817
3046
  __privateMethod(this, _Panel_instances, hideInputArea_fn).call(this);
@@ -2824,7 +3053,7 @@ submitTask_fn = /* @__PURE__ */ __name(function() {
2824
3053
  /**
2825
3054
  * Handle user answer
2826
3055
  */
2827
- handleUserAnswer_fn = /* @__PURE__ */ __name(function(input2) {
3056
+ handleUserAnswer_fn = /* @__PURE__ */ __name2(function(input2) {
2828
3057
  __privateMethod(this, _Panel_instances, update_fn).call(this, {
2829
3058
  type: "input",
2830
3059
  displayText: __privateGet(this, _pageAgent).i18n.t("ui.panel.userAnswer", { input: input2 })
@@ -2838,7 +3067,7 @@ handleUserAnswer_fn = /* @__PURE__ */ __name(function(input2) {
2838
3067
  /**
2839
3068
  * Show input area
2840
3069
  */
2841
- showInputArea_fn = /* @__PURE__ */ __name(function(placeholder) {
3070
+ showInputArea_fn = /* @__PURE__ */ __name2(function(placeholder) {
2842
3071
  __privateGet(this, _taskInput).value = "";
2843
3072
  __privateGet(this, _taskInput).placeholder = placeholder || __privateGet(this, _pageAgent).i18n.t("ui.panel.taskInput");
2844
3073
  __privateGet(this, _inputSection).classList.remove(styles$1.hidden);
@@ -2849,13 +3078,13 @@ showInputArea_fn = /* @__PURE__ */ __name(function(placeholder) {
2849
3078
  /**
2850
3079
  * Hide input area
2851
3080
  */
2852
- hideInputArea_fn = /* @__PURE__ */ __name(function() {
3081
+ hideInputArea_fn = /* @__PURE__ */ __name2(function() {
2853
3082
  __privateGet(this, _inputSection).classList.add(styles$1.hidden);
2854
3083
  }, "#hideInputArea");
2855
3084
  /**
2856
3085
  * Check if input area should be shown
2857
3086
  */
2858
- shouldShowInputArea_fn = /* @__PURE__ */ __name(function() {
3087
+ shouldShowInputArea_fn = /* @__PURE__ */ __name2(function() {
2859
3088
  if (__privateGet(this, _isWaitingForUserAnswer)) return true;
2860
3089
  const steps = __privateGet(this, _state).getAllSteps();
2861
3090
  if (steps.length === 0) {
@@ -2864,7 +3093,7 @@ shouldShowInputArea_fn = /* @__PURE__ */ __name(function() {
2864
3093
  const lastStep = steps[steps.length - 1];
2865
3094
  return lastStep.type === "completed" || lastStep.type === "error";
2866
3095
  }, "#shouldShowInputArea");
2867
- createWrapper_fn = /* @__PURE__ */ __name(function() {
3096
+ createWrapper_fn = /* @__PURE__ */ __name2(function() {
2868
3097
  const wrapper2 = document.createElement("div");
2869
3098
  wrapper2.id = "page-agent-runtime_agent-panel";
2870
3099
  wrapper2.className = `${styles$1.wrapper} ${styles$1.collapsed}`;
@@ -2912,7 +3141,7 @@ createWrapper_fn = /* @__PURE__ */ __name(function() {
2912
3141
  document.body.appendChild(wrapper2);
2913
3142
  return wrapper2;
2914
3143
  }, "#createWrapper");
2915
- setupEventListeners_fn = /* @__PURE__ */ __name(function() {
3144
+ setupEventListeners_fn = /* @__PURE__ */ __name2(function() {
2916
3145
  const header2 = this.wrapper.querySelector(`.${styles$1.header}`);
2917
3146
  header2.addEventListener("click", (e) => {
2918
3147
  if (e.target.closest(`.${styles$1.controlButton}`)) {
@@ -2943,20 +3172,20 @@ setupEventListeners_fn = /* @__PURE__ */ __name(function() {
2943
3172
  e.stopPropagation();
2944
3173
  });
2945
3174
  }, "#setupEventListeners");
2946
- toggle_fn = /* @__PURE__ */ __name(function() {
3175
+ toggle_fn = /* @__PURE__ */ __name2(function() {
2947
3176
  if (__privateGet(this, _isExpanded)) {
2948
3177
  __privateMethod(this, _Panel_instances, collapse_fn).call(this);
2949
3178
  } else {
2950
3179
  __privateMethod(this, _Panel_instances, expand_fn).call(this);
2951
3180
  }
2952
3181
  }, "#toggle");
2953
- expand_fn = /* @__PURE__ */ __name(function() {
3182
+ expand_fn = /* @__PURE__ */ __name2(function() {
2954
3183
  __privateSet(this, _isExpanded, true);
2955
3184
  this.wrapper.classList.remove(styles$1.collapsed);
2956
3185
  this.wrapper.classList.add(styles$1.expanded);
2957
3186
  __privateGet(this, _expandButton).textContent = "▲";
2958
3187
  }, "#expand");
2959
- collapse_fn = /* @__PURE__ */ __name(function() {
3188
+ collapse_fn = /* @__PURE__ */ __name2(function() {
2960
3189
  __privateSet(this, _isExpanded, false);
2961
3190
  this.wrapper.classList.remove(styles$1.expanded);
2962
3191
  this.wrapper.classList.add(styles$1.collapsed);
@@ -2965,7 +3194,7 @@ collapse_fn = /* @__PURE__ */ __name(function() {
2965
3194
  /**
2966
3195
  * Start periodic header update loop
2967
3196
  */
2968
- startHeaderUpdateLoop_fn = /* @__PURE__ */ __name(function() {
3197
+ startHeaderUpdateLoop_fn = /* @__PURE__ */ __name2(function() {
2969
3198
  __privateSet(this, _headerUpdateTimer, setInterval(() => {
2970
3199
  __privateMethod(this, _Panel_instances, checkAndUpdateHeader_fn).call(this);
2971
3200
  }, 450));
@@ -2973,7 +3202,7 @@ startHeaderUpdateLoop_fn = /* @__PURE__ */ __name(function() {
2973
3202
  /**
2974
3203
  * Stop periodic header update loop
2975
3204
  */
2976
- stopHeaderUpdateLoop_fn = /* @__PURE__ */ __name(function() {
3205
+ stopHeaderUpdateLoop_fn = /* @__PURE__ */ __name2(function() {
2977
3206
  if (__privateGet(this, _headerUpdateTimer)) {
2978
3207
  clearInterval(__privateGet(this, _headerUpdateTimer));
2979
3208
  __privateSet(this, _headerUpdateTimer, null);
@@ -2982,7 +3211,7 @@ stopHeaderUpdateLoop_fn = /* @__PURE__ */ __name(function() {
2982
3211
  /**
2983
3212
  * Check if header needs update and trigger animation if not currently animating
2984
3213
  */
2985
- checkAndUpdateHeader_fn = /* @__PURE__ */ __name(function() {
3214
+ checkAndUpdateHeader_fn = /* @__PURE__ */ __name2(function() {
2986
3215
  if (!__privateGet(this, _pendingHeaderText) || __privateGet(this, _isAnimating)) {
2987
3216
  return;
2988
3217
  }
@@ -2997,7 +3226,7 @@ checkAndUpdateHeader_fn = /* @__PURE__ */ __name(function() {
2997
3226
  /**
2998
3227
  * Animate text change with fade out/in effect
2999
3228
  */
3000
- animateTextChange_fn = /* @__PURE__ */ __name(function(newText) {
3229
+ animateTextChange_fn = /* @__PURE__ */ __name2(function(newText) {
3001
3230
  __privateSet(this, _isAnimating, true);
3002
3231
  __privateGet(this, _statusText).classList.add(styles$1.fadeOut);
3003
3232
  setTimeout(() => {
@@ -3010,21 +3239,21 @@ animateTextChange_fn = /* @__PURE__ */ __name(function(newText) {
3010
3239
  }, 300);
3011
3240
  }, 150);
3012
3241
  }, "#animateTextChange");
3013
- updateStatusIndicator_fn = /* @__PURE__ */ __name(function(type) {
3242
+ updateStatusIndicator_fn = /* @__PURE__ */ __name2(function(type) {
3014
3243
  __privateGet(this, _indicator).className = styles$1.indicator;
3015
3244
  __privateGet(this, _indicator).classList.add(styles$1[type]);
3016
3245
  }, "#updateStatusIndicator");
3017
- updateHistory_fn = /* @__PURE__ */ __name(function() {
3246
+ updateHistory_fn = /* @__PURE__ */ __name2(function() {
3018
3247
  const steps = __privateGet(this, _state).getAllSteps();
3019
3248
  __privateGet(this, _historySection).innerHTML = steps.map((step) => __privateMethod(this, _Panel_instances, createHistoryItem_fn).call(this, step)).join("");
3020
3249
  __privateMethod(this, _Panel_instances, scrollToBottom_fn).call(this);
3021
3250
  }, "#updateHistory");
3022
- scrollToBottom_fn = /* @__PURE__ */ __name(function() {
3251
+ scrollToBottom_fn = /* @__PURE__ */ __name2(function() {
3023
3252
  setTimeout(() => {
3024
3253
  __privateGet(this, _historySection).scrollTop = __privateGet(this, _historySection).scrollHeight;
3025
3254
  }, 0);
3026
3255
  }, "#scrollToBottom");
3027
- createHistoryItem_fn = /* @__PURE__ */ __name(function(step) {
3256
+ createHistoryItem_fn = /* @__PURE__ */ __name2(function(step) {
3028
3257
  const time = step.timestamp.toLocaleTimeString("zh-CN", {
3029
3258
  hour12: false,
3030
3259
  hour: "2-digit",
@@ -3080,7 +3309,7 @@ createHistoryItem_fn = /* @__PURE__ */ __name(function(step) {
3080
3309
  </div>
3081
3310
  `;
3082
3311
  }, "#createHistoryItem");
3083
- __name(_Panel, "Panel");
3312
+ __name2(_Panel, "Panel");
3084
3313
  let Panel = _Panel;
3085
3314
  function getToolExecutingText(toolName, args, i18n) {
3086
3315
  switch (toolName) {
@@ -3100,7 +3329,7 @@ function getToolExecutingText(toolName, args, i18n) {
3100
3329
  return i18n.t("ui.tools.executing", { toolName });
3101
3330
  }
3102
3331
  }
3103
- __name(getToolExecutingText, "getToolExecutingText");
3332
+ __name2(getToolExecutingText, "getToolExecutingText");
3104
3333
  function getToolCompletedText(toolName, args, i18n) {
3105
3334
  switch (toolName) {
3106
3335
  case "click_element_by_index":
@@ -3119,7 +3348,7 @@ function getToolCompletedText(toolName, args, i18n) {
3119
3348
  return null;
3120
3349
  }
3121
3350
  }
3122
- __name(getToolCompletedText, "getToolCompletedText");
3351
+ __name2(getToolCompletedText, "getToolCompletedText");
3123
3352
  function hasDarkModeClass() {
3124
3353
  const DFEAULT_DARK_MODE_CLASSES = ["dark", "dark-mode", "theme-dark", "night", "night-mode"];
3125
3354
  const htmlElement = document.documentElement;
@@ -3135,7 +3364,7 @@ function hasDarkModeClass() {
3135
3364
  }
3136
3365
  return false;
3137
3366
  }
3138
- __name(hasDarkModeClass, "hasDarkModeClass");
3367
+ __name2(hasDarkModeClass, "hasDarkModeClass");
3139
3368
  function parseRgbColor(colorString) {
3140
3369
  const rgbMatch = /rgba?\((\d+),\s*(\d+),\s*(\d+)/.exec(colorString);
3141
3370
  if (!rgbMatch) {
@@ -3147,7 +3376,7 @@ function parseRgbColor(colorString) {
3147
3376
  b: parseInt(rgbMatch[3])
3148
3377
  };
3149
3378
  }
3150
- __name(parseRgbColor, "parseRgbColor");
3379
+ __name2(parseRgbColor, "parseRgbColor");
3151
3380
  function isColorDark(colorString, threshold = 128) {
3152
3381
  if (!colorString || colorString === "transparent" || colorString.startsWith("rgba(0, 0, 0, 0)")) {
3153
3382
  return false;
@@ -3159,7 +3388,7 @@ function isColorDark(colorString, threshold = 128) {
3159
3388
  const luminance = 0.299 * rgb.r + 0.587 * rgb.g + 0.114 * rgb.b;
3160
3389
  return luminance < threshold;
3161
3390
  }
3162
- __name(isColorDark, "isColorDark");
3391
+ __name2(isColorDark, "isColorDark");
3163
3392
  function isBackgroundDark() {
3164
3393
  const htmlStyle = window.getComputedStyle(document.documentElement);
3165
3394
  const bodyStyle = window.getComputedStyle(document.body);
@@ -3172,7 +3401,7 @@ function isBackgroundDark() {
3172
3401
  }
3173
3402
  return false;
3174
3403
  }
3175
- __name(isBackgroundDark, "isBackgroundDark");
3404
+ __name2(isBackgroundDark, "isBackgroundDark");
3176
3405
  function isPageDark() {
3177
3406
  if (hasDarkModeClass()) {
3178
3407
  return true;
@@ -3182,7 +3411,7 @@ function isPageDark() {
3182
3411
  }
3183
3412
  return false;
3184
3413
  }
3185
- __name(isPageDark, "isPageDark");
3414
+ __name2(isPageDark, "isPageDark");
3186
3415
  const wrapper = "_wrapper_1oy2s_1";
3187
3416
  const styles = {
3188
3417
  wrapper
@@ -3298,7 +3527,7 @@ _currentCursorY = new WeakMap();
3298
3527
  _targetCursorX = new WeakMap();
3299
3528
  _targetCursorY = new WeakMap();
3300
3529
  _SimulatorMask_instances = new WeakSet();
3301
- createCursor_fn = /* @__PURE__ */ __name(function() {
3530
+ createCursor_fn = /* @__PURE__ */ __name2(function() {
3302
3531
  __privateGet(this, _cursor).className = cursorStyles.cursor;
3303
3532
  const rippleContainer = document.createElement("div");
3304
3533
  rippleContainer.className = cursorStyles.cursorRipple;
@@ -3311,7 +3540,7 @@ createCursor_fn = /* @__PURE__ */ __name(function() {
3311
3540
  __privateGet(this, _cursor).appendChild(borderLayer);
3312
3541
  this.wrapper.appendChild(__privateGet(this, _cursor));
3313
3542
  }, "#createCursor");
3314
- moveCursorToTarget_fn = /* @__PURE__ */ __name(function() {
3543
+ moveCursorToTarget_fn = /* @__PURE__ */ __name2(function() {
3315
3544
  const newX = __privateGet(this, _currentCursorX) + (__privateGet(this, _targetCursorX) - __privateGet(this, _currentCursorX)) * 0.2;
3316
3545
  const newY = __privateGet(this, _currentCursorY) + (__privateGet(this, _targetCursorY) - __privateGet(this, _currentCursorY)) * 0.2;
3317
3546
  const xDistance = Math.abs(newX - __privateGet(this, _targetCursorX));
@@ -3334,7 +3563,7 @@ moveCursorToTarget_fn = /* @__PURE__ */ __name(function() {
3334
3563
  }
3335
3564
  requestAnimationFrame(() => __privateMethod(this, _SimulatorMask_instances, moveCursorToTarget_fn).call(this));
3336
3565
  }, "#moveCursorToTarget");
3337
- __name(_SimulatorMask, "SimulatorMask");
3566
+ __name2(_SimulatorMask, "SimulatorMask");
3338
3567
  let SimulatorMask = _SimulatorMask;
3339
3568
  function assert(condition, message, silent) {
3340
3569
  if (!condition) {
@@ -3343,7 +3572,7 @@ function assert(condition, message, silent) {
3343
3572
  throw new Error(errorMessage);
3344
3573
  }
3345
3574
  }
3346
- __name(assert, "assert");
3575
+ __name2(assert, "assert");
3347
3576
  const _PageAgent = class _PageAgent extends EventTarget {
3348
3577
  constructor(config = {}) {
3349
3578
  super();
@@ -3361,19 +3590,8 @@ const _PageAgent = class _PageAgent extends EventTarget {
3361
3590
  __privateAdd(this, _llm);
3362
3591
  __privateAdd(this, _totalWaitTime, 0);
3363
3592
  __privateAdd(this, _abortController, new AbortController());
3364
- /** Corresponds to eval_page in browser-use */
3365
- __publicField(this, "flatTree", null);
3366
- /**
3367
- * All highlighted index-mapped interactive elements
3368
- * Corresponds to DOMState.selector_map in browser-use
3369
- */
3370
- __publicField(this, "selectorMap", /* @__PURE__ */ new Map());
3371
- /** highlight index -> element text */
3372
- __publicField(this, "elementTextMap", /* @__PURE__ */ new Map());
3373
- /** Corresponds to clickable_elements_to_string in browser-use */
3374
- __publicField(this, "simplifiedHTML", "<EMPTY>");
3375
- /** last time the tree was updated */
3376
- __publicField(this, "lastTimeUpdate", 0);
3593
+ /** PageController for DOM operations */
3594
+ __publicField(this, "pageController");
3377
3595
  /** Fullscreen mask */
3378
3596
  __publicField(this, "mask", new SimulatorMask());
3379
3597
  /** History records */
@@ -3383,6 +3601,7 @@ const _PageAgent = class _PageAgent extends EventTarget {
3383
3601
  this.i18n = new I18n(this.config.language);
3384
3602
  this.panel = new Panel(this);
3385
3603
  this.tools = new Map(tools);
3604
+ this.pageController = new PageController(this.config);
3386
3605
  if (this.config.customTools) {
3387
3606
  for (const [name, tool2] of Object.entries(this.config.customTools)) {
3388
3607
  if (tool2 === null) {
@@ -3395,7 +3614,6 @@ const _PageAgent = class _PageAgent extends EventTarget {
3395
3614
  if (!this.config.experimentalScriptExecutionTool) {
3396
3615
  this.tools.delete("execute_javascript");
3397
3616
  }
3398
- patchReact();
3399
3617
  window.addEventListener("beforeunload", (e) => {
3400
3618
  if (!this.disposed) this.dispose("PAGE_UNLOADING");
3401
3619
  });
@@ -3428,7 +3646,7 @@ const _PageAgent = class _PageAgent extends EventTarget {
3428
3646
  let step = 0;
3429
3647
  while (true) {
3430
3648
  await onBeforeStep.call(this, step);
3431
- console.group(`step: ${step + 1}`);
3649
+ console.group(`step: ${step}`);
3432
3650
  if (__privateGet(this, _abortController).signal.aborted) throw new Error("AbortError");
3433
3651
  await waitUntil(() => !this.paused);
3434
3652
  console.log(chalk.blue("Thinking..."));
@@ -3444,7 +3662,7 @@ const _PageAgent = class _PageAgent extends EventTarget {
3444
3662
  },
3445
3663
  {
3446
3664
  role: "user",
3447
- content: __privateMethod(this, _PageAgent_instances, assembleUserPrompt_fn).call(this)
3665
+ content: await __privateMethod(this, _PageAgent_instances, assembleUserPrompt_fn).call(this)
3448
3666
  }
3449
3667
  ],
3450
3668
  { AgentOutput: __privateMethod(this, _PageAgent_instances, packMacroTool_fn).call(this) },
@@ -3512,10 +3730,7 @@ const _PageAgent = class _PageAgent extends EventTarget {
3512
3730
  dispose(reason) {
3513
3731
  console.log("Disposing PageAgent...");
3514
3732
  this.disposed = true;
3515
- cleanUpHighlights();
3516
- this.flatTree = null;
3517
- this.selectorMap.clear();
3518
- this.elementTextMap.clear();
3733
+ this.pageController.dispose();
3519
3734
  this.panel.dispose();
3520
3735
  this.mask.dispose();
3521
3736
  this.history = [];
@@ -3536,7 +3751,7 @@ _PageAgent_instances = new WeakSet();
3536
3751
  * - action: { toolName: toolInput }
3537
3752
  * where action must be selected from tools defined in this.tools
3538
3753
  */
3539
- packMacroTool_fn = /* @__PURE__ */ __name(function() {
3754
+ packMacroTool_fn = /* @__PURE__ */ __name2(function() {
3540
3755
  const tools2 = this.tools;
3541
3756
  const actionSchemas = Array.from(tools2.entries()).map(([toolName, tool2]) => {
3542
3757
  return zod.object({
@@ -3555,7 +3770,7 @@ packMacroTool_fn = /* @__PURE__ */ __name(function() {
3555
3770
  });
3556
3771
  return {
3557
3772
  inputSchema: macroToolSchema,
3558
- execute: /* @__PURE__ */ __name(async (input2) => {
3773
+ execute: /* @__PURE__ */ __name2(async (input2) => {
3559
3774
  if (__privateGet(this, _abortController).signal.aborted) throw new Error("AbortError");
3560
3775
  await waitUntil(() => !this.paused);
3561
3776
  console.log(chalk.blue.bold("MacroTool execute"), input2);
@@ -3615,7 +3830,7 @@ packMacroTool_fn = /* @__PURE__ */ __name(function() {
3615
3830
  /**
3616
3831
  * Get system prompt, dynamically replace language settings based on configured language
3617
3832
  */
3618
- getSystemPrompt_fn = /* @__PURE__ */ __name(function() {
3833
+ getSystemPrompt_fn = /* @__PURE__ */ __name2(function() {
3619
3834
  let systemPrompt = SYSTEM_PROMPT;
3620
3835
  const targetLanguage = this.config.language === "zh-CN" ? "中文" : "English";
3621
3836
  systemPrompt = systemPrompt.replace(
@@ -3624,7 +3839,7 @@ getSystemPrompt_fn = /* @__PURE__ */ __name(function() {
3624
3839
  );
3625
3840
  return systemPrompt;
3626
3841
  }, "#getSystemPrompt");
3627
- assembleUserPrompt_fn = /* @__PURE__ */ __name(function() {
3842
+ assembleUserPrompt_fn = /* @__PURE__ */ __name2(async function() {
3628
3843
  let prompt = "";
3629
3844
  prompt += "<agent_history>\n";
3630
3845
  this.history.forEach((history, index) => {
@@ -3647,11 +3862,11 @@ assembleUserPrompt_fn = /* @__PURE__ */ __name(function() {
3647
3862
  </step_info>
3648
3863
  </agent_state>
3649
3864
  `;
3650
- prompt += __privateMethod(this, _PageAgent_instances, getBrowserState_fn).call(this);
3865
+ prompt += await __privateMethod(this, _PageAgent_instances, getBrowserState_fn).call(this);
3651
3866
  return trimLines(prompt);
3652
3867
  }, "#assembleUserPrompt");
3653
- onDone_fn = /* @__PURE__ */ __name(function(text, success = true) {
3654
- cleanUpHighlights();
3868
+ onDone_fn = /* @__PURE__ */ __name2(function(text, success = true) {
3869
+ this.pageController.cleanUpHighlights();
3655
3870
  this.bus.emit("panel:update", {
3656
3871
  type: success ? "output" : "error",
3657
3872
  displayText: text
@@ -3663,27 +3878,39 @@ onDone_fn = /* @__PURE__ */ __name(function(text, success = true) {
3663
3878
  this.mask.hide();
3664
3879
  __privateGet(this, _abortController).abort();
3665
3880
  }, "#onDone");
3666
- getBrowserState_fn = /* @__PURE__ */ __name(function() {
3667
- const pageUrl = window.location.href;
3668
- const pageTitle = document.title;
3669
- const pi = getPageInfo();
3670
- __privateMethod(this, _PageAgent_instances, updateTree_fn).call(this);
3881
+ getBrowserState_fn = /* @__PURE__ */ __name2(async function() {
3882
+ const pageUrl = await this.pageController.getCurrentUrl();
3883
+ const pageTitle = await this.pageController.getPageTitle();
3884
+ const pi = await this.pageController.getPageInfo();
3885
+ const viewportExpansion = await this.pageController.getViewportExpansion();
3886
+ this.mask.wrapper.style.pointerEvents = "none";
3887
+ await this.pageController.updateTree();
3888
+ this.mask.wrapper.style.pointerEvents = "auto";
3889
+ const simplifiedHTML = await this.pageController.getSimplifiedHTML();
3671
3890
  let prompt = trimLines(`<browser_state>
3672
3891
  Current Page: [${pageTitle}](${pageUrl})
3673
3892
 
3674
3893
  Page info: ${pi.viewport_width}x${pi.viewport_height}px viewport, ${pi.page_width}x${pi.page_height}px total page size, ${pi.pages_above.toFixed(1)} pages above, ${pi.pages_below.toFixed(1)} pages below, ${pi.total_pages.toFixed(1)} total pages, at ${(pi.current_page_position * 100).toFixed(0)}% of page
3675
3894
 
3676
- ${"Interactive elements from top layer of the current page (full page):"}
3895
+ ${viewportExpansion === -1 ? "Interactive elements from top layer of the current page (full page):" : "Interactive elements from top layer of the current page inside the viewport:"}
3677
3896
 
3678
3897
  `);
3679
- {
3898
+ const has_content_above = pi.pixels_above > 4;
3899
+ if (has_content_above && viewportExpansion !== -1) {
3900
+ prompt += `... ${pi.pixels_above} pixels above (${pi.pages_above.toFixed(1)} pages) - scroll to see more ...
3901
+ `;
3902
+ } else {
3680
3903
  prompt += `[Start of page]
3681
3904
  `;
3682
3905
  }
3683
- prompt += this.simplifiedHTML;
3906
+ prompt += simplifiedHTML;
3684
3907
  prompt += `
3685
3908
  `;
3686
- {
3909
+ const has_content_below = pi.pixels_below > 4;
3910
+ if (has_content_below && viewportExpansion !== -1) {
3911
+ prompt += `... ${pi.pixels_below} pixels below (${pi.pages_below.toFixed(1)} pages) - scroll to see more ...
3912
+ `;
3913
+ } else {
3687
3914
  prompt += `[End of page]
3688
3915
  `;
3689
3916
  }
@@ -3691,30 +3918,7 @@ getBrowserState_fn = /* @__PURE__ */ __name(function() {
3691
3918
  `;
3692
3919
  return prompt;
3693
3920
  }, "#getBrowserState");
3694
- /**
3695
- * Update document tree
3696
- */
3697
- updateTree_fn = /* @__PURE__ */ __name(function() {
3698
- this.dispatchEvent(new Event("beforeUpdate"));
3699
- this.lastTimeUpdate = Date.now();
3700
- cleanUpHighlights();
3701
- this.mask.wrapper.style.pointerEvents = "none";
3702
- this.flatTree = getFlatTree({
3703
- ...this.config,
3704
- interactiveBlacklist: [
3705
- ...this.config.interactiveBlacklist || [],
3706
- ...document.querySelectorAll("[data-page-agent-not-interactive]").values()
3707
- ]
3708
- });
3709
- this.mask.wrapper.style.pointerEvents = "auto";
3710
- this.simplifiedHTML = flatTreeToString(this.flatTree, this.config.include_attributes);
3711
- this.selectorMap.clear();
3712
- this.selectorMap = getSelectorMap(this.flatTree);
3713
- this.elementTextMap.clear();
3714
- this.elementTextMap = getElementTextMap(this.simplifiedHTML);
3715
- this.dispatchEvent(new Event("afterUpdate"));
3716
- }, "#updateTree");
3717
- __name(_PageAgent, "PageAgent");
3921
+ __name2(_PageAgent, "PageAgent");
3718
3922
  let PageAgent = _PageAgent;
3719
3923
  export {
3720
3924
  PageAgent,