page-agent 0.0.13 → 0.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -73,6 +73,7 @@ PageAgent adopts a simplified monorepo structure:
73
73
  ```
74
74
  packages/
75
75
  ├── page-agent/ # AI agent (npm: page-agent)
76
+ ├── llms/ # LLM client (npm: @page-agent/llms)
76
77
  ├── page-controller/ # DOM operations (npm: @page-agent/page-controller)
77
78
  ├── ui/ # Panel & Mask & Mouse Animation (npm: @page-agent/ui)
78
79
  └── website/ # Demo & Documentation site
@@ -1,3 +1,7 @@
1
+ import { AgentBrain } from '@page-agent/llms';
2
+ import { LLMConfig } from '@page-agent/llms';
3
+ import { MacroToolInput } from '@page-agent/llms';
4
+ import { MacroToolResult } from '@page-agent/llms';
1
5
  import { PageController } from '@page-agent/page-controller';
2
6
  import { PageControllerConfig } from '@page-agent/page-controller';
3
7
  import { Panel } from '@page-agent/ui';
@@ -5,11 +9,7 @@ import { SimulatorMask } from '@page-agent/ui';
5
9
  import { SupportedLanguage } from '@page-agent/ui';
6
10
  import { z } from 'zod';
7
11
 
8
- export declare interface AgentBrain {
9
- evaluation_previous_goal: string;
10
- memory: string;
11
- next_goal: string;
12
- }
12
+ export { AgentBrain }
13
13
 
14
14
  declare interface AgentConfig {
15
15
  language?: SupportedLanguage;
@@ -95,32 +95,9 @@ export declare interface ExecutionResult {
95
95
  history: AgentHistory[];
96
96
  }
97
97
 
98
- declare interface LLMConfig {
99
- baseURL?: string;
100
- apiKey?: string;
101
- model?: string;
102
- temperature?: number;
103
- maxTokens?: number;
104
- maxRetries?: number;
105
- }
98
+ export { MacroToolInput }
106
99
 
107
- /**
108
- * MacroTool input structure
109
- */
110
- export declare interface MacroToolInput {
111
- evaluation_previous_goal?: string;
112
- memory?: string;
113
- next_goal?: string;
114
- action: Record<string, any>;
115
- }
116
-
117
- /**
118
- * MacroTool output structure
119
- */
120
- export declare interface MacroToolResult {
121
- input: MacroToolInput;
122
- output: string;
123
- }
100
+ export { MacroToolResult }
124
101
 
125
102
  export declare class PageAgent extends EventTarget {
126
103
  #private;
@@ -11,389 +11,12 @@ var __privateAdd = (obj, member, value) => member.has(obj) ? __typeError("Cannot
11
11
  var __privateSet = (obj, member, value, setter) => (__accessCheck(obj, member, "write to private field"), setter ? setter.call(obj, value) : member.set(obj, value), value);
12
12
  var __privateMethod = (obj, member, method) => (__accessCheck(obj, member, "access private method"), method);
13
13
  var _llm, _totalWaitTime, _abortController, _llmRetryListener, _llmErrorListener, _beforeUnloadListener, _PageAgent_instances, packMacroTool_fn, getSystemPrompt_fn, assembleUserPrompt_fn, onDone_fn, getBrowserState_fn;
14
+ import { LLM } from "@page-agent/llms";
14
15
  import { PageController } from "@page-agent/page-controller";
15
16
  import { SimulatorMask, Panel } from "@page-agent/ui";
16
17
  import chalk from "chalk";
17
- import zod, { z } from "zod";
18
- const DEFAULT_MODEL_NAME = "PAGE-AGENT-FREE-TESTING-RANDOM";
19
- const DEFAULT_API_KEY = "PAGE-AGENT-FREE-TESTING-RANDOM";
20
- const DEFAULT_BASE_URL = "https://hwcxiuzfylggtcktqgij.supabase.co/functions/v1/llm-testing-proxy";
21
- const LLM_MAX_RETRIES = 2;
18
+ import zod from "zod";
22
19
  const MAX_STEPS = 20;
23
- const DEFAULT_TEMPERATURE = 0.7;
24
- const DEFAULT_MAX_TOKENS = 4096;
25
- function parseLLMConfig(config) {
26
- return {
27
- baseURL: config.baseURL ?? DEFAULT_BASE_URL,
28
- apiKey: config.apiKey ?? DEFAULT_API_KEY,
29
- model: config.model ?? DEFAULT_MODEL_NAME,
30
- temperature: config.temperature ?? DEFAULT_TEMPERATURE,
31
- maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
32
- maxRetries: config.maxRetries ?? LLM_MAX_RETRIES
33
- };
34
- }
35
- __name(parseLLMConfig, "parseLLMConfig");
36
- const InvokeErrorType = {
37
- // Retryable
38
- NETWORK_ERROR: "network_error",
39
- // Network error, retry
40
- RATE_LIMIT: "rate_limit",
41
- // Rate limit, retry
42
- SERVER_ERROR: "server_error",
43
- // 5xx, retry
44
- NO_TOOL_CALL: "no_tool_call",
45
- // Model did not call tool
46
- INVALID_TOOL_ARGS: "invalid_tool_args",
47
- // Tool args don't match schema
48
- TOOL_EXECUTION_ERROR: "tool_execution_error",
49
- // Tool execution error
50
- UNKNOWN: "unknown",
51
- // Non-retryable
52
- AUTH_ERROR: "auth_error",
53
- // Authentication failed
54
- CONTEXT_LENGTH: "context_length",
55
- // Prompt too long
56
- CONTENT_FILTER: "content_filter"
57
- // Content filtered
58
- };
59
- const _InvokeError = class _InvokeError extends Error {
60
- type;
61
- retryable;
62
- statusCode;
63
- rawError;
64
- constructor(type, message, rawError) {
65
- super(message);
66
- this.name = "InvokeError";
67
- this.type = type;
68
- this.retryable = this.isRetryable(type);
69
- this.rawError = rawError;
70
- }
71
- isRetryable(type) {
72
- const retryableTypes = [
73
- InvokeErrorType.NETWORK_ERROR,
74
- InvokeErrorType.RATE_LIMIT,
75
- InvokeErrorType.SERVER_ERROR,
76
- InvokeErrorType.NO_TOOL_CALL,
77
- InvokeErrorType.INVALID_TOOL_ARGS,
78
- InvokeErrorType.TOOL_EXECUTION_ERROR,
79
- InvokeErrorType.UNKNOWN
80
- ];
81
- return retryableTypes.includes(type);
82
- }
83
- };
84
- __name(_InvokeError, "InvokeError");
85
- let InvokeError = _InvokeError;
86
- function zodToOpenAITool(name, tool2) {
87
- return {
88
- type: "function",
89
- function: {
90
- name,
91
- description: tool2.description,
92
- parameters: z.toJSONSchema(tool2.inputSchema, { target: "openapi-3.0" })
93
- }
94
- };
95
- }
96
- __name(zodToOpenAITool, "zodToOpenAITool");
97
- function lenientParseMacroToolCall(responseData, inputSchema) {
98
- const choice = responseData.choices?.[0];
99
- if (!choice) {
100
- throw new InvokeError(InvokeErrorType.UNKNOWN, "No choices in response", responseData);
101
- }
102
- switch (choice.finish_reason) {
103
- case "tool_calls":
104
- case "function_call":
105
- // gemini
106
- case "stop":
107
- break;
108
- case "length":
109
- throw new InvokeError(
110
- InvokeErrorType.CONTEXT_LENGTH,
111
- "Response truncated: max tokens reached"
112
- );
113
- case "content_filter":
114
- throw new InvokeError(InvokeErrorType.CONTENT_FILTER, "Content filtered by safety system");
115
- default:
116
- throw new InvokeError(
117
- InvokeErrorType.UNKNOWN,
118
- `Unexpected finish_reason: ${choice.finish_reason}`
119
- );
120
- }
121
- const actionSchema = inputSchema.shape.action;
122
- if (!actionSchema) {
123
- throw new Error('inputSchema must have an "action" field');
124
- }
125
- let arg = null;
126
- const toolCall = choice.message?.tool_calls?.[0]?.function;
127
- arg = toolCall?.arguments ?? null;
128
- if (arg && toolCall.name !== "AgentOutput") {
129
- console.log(chalk.yellow("lenientParseMacroToolCall: #1 fixing incorrect tool call"));
130
- let tmpArg;
131
- try {
132
- tmpArg = JSON.parse(arg);
133
- } catch (error) {
134
- throw new InvokeError(
135
- InvokeErrorType.INVALID_TOOL_ARGS,
136
- "Failed to parse tool arguments as JSON",
137
- error
138
- );
139
- }
140
- arg = JSON.stringify({ action: { [toolCall.name]: tmpArg } });
141
- }
142
- if (!arg) {
143
- arg = choice.message?.content.trim() || null;
144
- }
145
- if (!arg) {
146
- throw new InvokeError(
147
- InvokeErrorType.NO_TOOL_CALL,
148
- "No tool call or content found in response",
149
- responseData
150
- );
151
- }
152
- let parsedArgs;
153
- try {
154
- parsedArgs = JSON.parse(arg);
155
- } catch (error) {
156
- throw new InvokeError(
157
- InvokeErrorType.INVALID_TOOL_ARGS,
158
- "Failed to parse tool arguments as JSON",
159
- error
160
- );
161
- }
162
- if (parsedArgs.action || parsedArgs.evaluation_previous_goal || parsedArgs.next_goal) {
163
- if (!parsedArgs.action) {
164
- console.log(chalk.yellow("lenientParseMacroToolCall: #2 fixing incorrect tool call"));
165
- parsedArgs.action = {
166
- wait: { seconds: 1 }
167
- };
168
- }
169
- } else if (parsedArgs.type && parsedArgs.function) {
170
- if (parsedArgs.function.name !== "AgentOutput")
171
- throw new InvokeError(
172
- InvokeErrorType.INVALID_TOOL_ARGS,
173
- `Expected function name "AgentOutput", got "${parsedArgs.function.name}"`,
174
- null
175
- );
176
- console.log(chalk.yellow("lenientParseMacroToolCall: #3 fixing incorrect tool call"));
177
- parsedArgs = parsedArgs.function.arguments;
178
- } else if (parsedArgs.name && parsedArgs.arguments) {
179
- if (parsedArgs.name !== "AgentOutput")
180
- throw new InvokeError(
181
- InvokeErrorType.INVALID_TOOL_ARGS,
182
- `Expected function name "AgentOutput", got "${parsedArgs.name}"`,
183
- null
184
- );
185
- console.log(chalk.yellow("lenientParseMacroToolCall: #4 fixing incorrect tool call"));
186
- parsedArgs = parsedArgs.arguments;
187
- } else {
188
- console.log(chalk.yellow("lenientParseMacroToolCall: #5 fixing incorrect tool call"));
189
- parsedArgs = { action: parsedArgs };
190
- }
191
- if (typeof parsedArgs === "string") {
192
- console.log(chalk.yellow("lenientParseMacroToolCall: #6 fixing incorrect tool call"));
193
- try {
194
- parsedArgs = JSON.parse(parsedArgs);
195
- } catch (error) {
196
- throw new InvokeError(
197
- InvokeErrorType.INVALID_TOOL_ARGS,
198
- "Failed to parse nested tool arguments as JSON",
199
- error
200
- );
201
- }
202
- }
203
- const validation = inputSchema.safeParse(parsedArgs);
204
- if (validation.success) {
205
- return validation.data;
206
- } else {
207
- const action = parsedArgs.action ?? {};
208
- const actionName = Object.keys(action)[0] || "unknown";
209
- const actionArgs = JSON.stringify(action[actionName] || "unknown");
210
- throw new InvokeError(
211
- InvokeErrorType.INVALID_TOOL_ARGS,
212
- `Tool arguments validation failed: action "${actionName}" with args ${actionArgs}`,
213
- validation.error
214
- );
215
- }
216
- }
217
- __name(lenientParseMacroToolCall, "lenientParseMacroToolCall");
218
- function modelPatch(body) {
219
- const model = body.model || "";
220
- if (model.toLowerCase().startsWith("claude")) {
221
- body.tool_choice = { type: "tool", name: "AgentOutput" };
222
- body.thinking = { type: "disabled" };
223
- }
224
- if (model.toLowerCase().includes("grok")) {
225
- console.log("Applying Grok patch: removing tool_choice");
226
- delete body.tool_choice;
227
- console.log("Applying Grok patch: disable reasoning and thinking");
228
- body.thinking = { type: "disabled", effort: "minimal" };
229
- body.reasoning = { enabled: false, effort: "low" };
230
- }
231
- return body;
232
- }
233
- __name(modelPatch, "modelPatch");
234
- const _OpenAIClient = class _OpenAIClient {
235
- config;
236
- constructor(config) {
237
- this.config = config;
238
- }
239
- async invoke(messages, tools2, abortSignal) {
240
- const openaiTools = Object.entries(tools2).map(([name, tool22]) => zodToOpenAITool(name, tool22));
241
- let response;
242
- try {
243
- response = await fetch(`${this.config.baseURL}/chat/completions`, {
244
- method: "POST",
245
- headers: {
246
- "Content-Type": "application/json",
247
- Authorization: `Bearer ${this.config.apiKey}`
248
- },
249
- body: JSON.stringify(
250
- modelPatch({
251
- model: this.config.model,
252
- temperature: this.config.temperature,
253
- max_tokens: this.config.maxTokens,
254
- messages,
255
- tools: openaiTools,
256
- // tool_choice: 'required',
257
- tool_choice: { type: "function", function: { name: "AgentOutput" } },
258
- // model specific params
259
- // reasoning_effort: 'minimal',
260
- // verbosity: 'low',
261
- parallel_tool_calls: false
262
- })
263
- ),
264
- signal: abortSignal
265
- });
266
- } catch (error) {
267
- throw new InvokeError(InvokeErrorType.NETWORK_ERROR, "Network request failed", error);
268
- }
269
- if (!response.ok) {
270
- const errorData = await response.json().catch();
271
- const errorMessage = errorData.error?.message || response.statusText;
272
- if (response.status === 401 || response.status === 403) {
273
- throw new InvokeError(
274
- InvokeErrorType.AUTH_ERROR,
275
- `Authentication failed: ${errorMessage}`,
276
- errorData
277
- );
278
- }
279
- if (response.status === 429) {
280
- throw new InvokeError(
281
- InvokeErrorType.RATE_LIMIT,
282
- `Rate limit exceeded: ${errorMessage}`,
283
- errorData
284
- );
285
- }
286
- if (response.status >= 500) {
287
- throw new InvokeError(
288
- InvokeErrorType.SERVER_ERROR,
289
- `Server error: ${errorMessage}`,
290
- errorData
291
- );
292
- }
293
- throw new InvokeError(
294
- InvokeErrorType.UNKNOWN,
295
- `HTTP ${response.status}: ${errorMessage}`,
296
- errorData
297
- );
298
- }
299
- const data = await response.json();
300
- const tool2 = tools2.AgentOutput;
301
- const macroToolInput = lenientParseMacroToolCall(data, tool2.inputSchema);
302
- let toolResult;
303
- try {
304
- toolResult = await tool2.execute(macroToolInput);
305
- } catch (e) {
306
- throw new InvokeError(
307
- InvokeErrorType.TOOL_EXECUTION_ERROR,
308
- `Tool execution failed: ${e.message}`,
309
- e
310
- );
311
- }
312
- return {
313
- toolCall: {
314
- // id: toolCall.id,
315
- name: "AgentOutput",
316
- args: macroToolInput
317
- },
318
- toolResult,
319
- usage: {
320
- promptTokens: data.usage?.prompt_tokens ?? 0,
321
- completionTokens: data.usage?.completion_tokens ?? 0,
322
- totalTokens: data.usage?.total_tokens ?? 0,
323
- cachedTokens: data.usage?.prompt_tokens_details?.cached_tokens,
324
- reasoningTokens: data.usage?.completion_tokens_details?.reasoning_tokens
325
- },
326
- rawResponse: data
327
- };
328
- }
329
- };
330
- __name(_OpenAIClient, "OpenAIClient");
331
- let OpenAIClient = _OpenAIClient;
332
- const _LLM = class _LLM extends EventTarget {
333
- config;
334
- client;
335
- constructor(config) {
336
- super();
337
- this.config = parseLLMConfig(config);
338
- this.client = new OpenAIClient({
339
- model: this.config.model,
340
- apiKey: this.config.apiKey,
341
- baseURL: this.config.baseURL,
342
- temperature: this.config.temperature,
343
- maxTokens: this.config.maxTokens
344
- });
345
- }
346
- /**
347
- * - call llm api *once*
348
- * - invoke tool call *once*
349
- * - return the result of the tool
350
- */
351
- async invoke(messages, tools2, abortSignal) {
352
- return await withRetry(
353
- async () => {
354
- const result = await this.client.invoke(messages, tools2, abortSignal);
355
- return result;
356
- },
357
- // retry settings
358
- {
359
- maxRetries: this.config.maxRetries,
360
- onRetry: /* @__PURE__ */ __name((current) => {
361
- this.dispatchEvent(
362
- new CustomEvent("retry", { detail: { current, max: this.config.maxRetries } })
363
- );
364
- }, "onRetry"),
365
- onError: /* @__PURE__ */ __name((error) => {
366
- this.dispatchEvent(new CustomEvent("error", { detail: { error } }));
367
- }, "onError")
368
- }
369
- );
370
- }
371
- };
372
- __name(_LLM, "LLM");
373
- let LLM = _LLM;
374
- async function withRetry(fn, settings) {
375
- let retries = 0;
376
- let lastError = null;
377
- while (retries <= settings.maxRetries) {
378
- if (retries > 0) {
379
- settings.onRetry(retries);
380
- await new Promise((resolve) => setTimeout(resolve, 100));
381
- }
382
- try {
383
- return await fn();
384
- } catch (error) {
385
- console.error(error);
386
- settings.onError(error);
387
- if (error?.name === "AbortError") throw error;
388
- if (error instanceof InvokeError && !error.retryable) throw error;
389
- lastError = error;
390
- retries++;
391
- await new Promise((resolve) => setTimeout(resolve, 100));
392
- }
393
- }
394
- throw lastError;
395
- }
396
- __name(withRetry, "withRetry");
397
20
  const SYSTEM_PROMPT = 'You are an AI agent designed to operate in an iterative loop to automate browser tasks. Your ultimate goal is accomplishing the task provided in <user_request>.\n\n<intro>\nYou excel at following tasks:\n1. Navigating complex websites and extracting precise information\n2. Automating form submissions and interactive web actions\n3. Gathering and saving information \n4. Operate effectively in an agent loop\n5. Efficiently performing diverse web tasks\n</intro>\n\n<language_settings>\n- Default working language: **中文**\n- Use the language that user is using. Return in user\'s language.\n</language_settings>\n\n<input>\nAt every step, your input will consist of: \n1. <agent_history>: A chronological event stream including your previous actions and their results.\n2. <agent_state>: Current <user_request> and <step_info>.\n3. <browser_state>: Current URL, interactive elements indexed for actions, and visible page content.\n</input>\n\n<agent_history>\nAgent history will be given as a list of step information as follows:\n\n<step_{step_number}>:\nEvaluation of Previous Step: Assessment of last action\nMemory: Your memory of this step\nNext Goal: Your goal for this step\nAction Results: Your actions and their results\n</step_{step_number}>\n\nand system messages wrapped in <sys> tag.\n</agent_history>\n\n<user_request>\nUSER REQUEST: This is your ultimate objective and always remains visible.\n- This has the highest priority. Make the user happy.\n- If the user request is very specific - then carefully follow each step and dont skip or hallucinate steps.\n- If the task is open ended you can plan yourself how to get it done.\n</user_request>\n\n<browser_state>\n1. Browser State will be given as:\n\nCurrent URL: URL of the page you are currently viewing.\nInteractive Elements: All interactive elements will be provided in format as [index]<type>text</type> where\n- index: Numeric identifier for interaction\n- type: HTML element type (button, input, etc.)\n- text: Element description\n\nExamples:\n[33]<div>User form</div>\n\\t*[35]<button aria-label=\'Submit form\'>Submit</button>\n\nNote that:\n- Only elements with numeric indexes in [] are interactive\n- (stacked) indentation (with \\t) is important and means that the element is a (html) child of the element above (with a lower index)\n- Elements tagged with `*[` are the new clickable elements that appeared on the website since the last step - if url has not changed.\n- Pure text elements without [] are not interactive.\n</browser_state>\n\n<browser_rules>\nStrictly follow these rules while using the browser and navigating the web:\n- Only interact with elements that have a numeric [index] assigned.\n- Only use indexes that are explicitly provided.\n- If the page changes after, for example, an input text action, analyze if you need to interact with new elements, e.g. selecting the right option from the list.\n- By default, only elements in the visible viewport are listed. Use scrolling actions if you suspect relevant content is offscreen which you need to interact with. Scroll ONLY if there are more pixels below or above the page.\n- You can scroll by a specific number of pages using the num_pages parameter (e.g., 0.5 for half page, 2.0 for two pages).\n- All the elements that are scrollable are marked with `data-scrollable` attribute. Including the scrollable distance in every directions. You can scroll *the element* in case some area are overflowed.\n- If a captcha appears, tell user you can not solve captcha. finished the task and ask user to solve it.\n- If expected elements are missing, try scrolling, or navigating back.\n- If the page is not fully loaded, use the `wait` action.\n- Do not repeat one action for more than 3 times unless some conditions changed.\n- If you fill an input field and your action sequence is interrupted, most often something changed e.g. suggestions popped up under the field.\n- If the <user_request> includes specific page information such as product type, rating, price, location, etc., try to apply filters to be more efficient.\n- The <user_request> is the ultimate goal. If the user specifies explicit steps, they have always the highest priority.\n- If you input_text into a field, you might need to press enter, click the search button, or select from dropdown for completion.\n- Don\'t login into a page if you don\'t have to. Don\'t login if you don\'t have the credentials. \n- There are 2 types of tasks always first think which type of request you are dealing with:\n1. Very specific step by step instructions:\n- Follow them as very precise and don\'t skip steps. Try to complete everything as requested.\n2. Open ended tasks. Plan yourself, be creative in achieving them.\n- If you get stuck e.g. with logins or captcha in open-ended tasks you can re-evaluate the task and try alternative ways, e.g. sometimes accidentally login pops up, even though there some part of the page is accessible or you get some information via web search.\n</browser_rules>\n\n<capability>\n- You can only handle single page app. Do not jump out of current page.\n- Do not click on link if it will open in a new page (etc. <a target="_blank">)\n- It is ok to fail the task.\n - User can be wrong. If the request of user is not achievable, inappropriate or you do not have enough information or tools to achieve it. Tell user to make a better request.\n - Webpage can be broken. All webpages or apps have bugs. Some bug will make it hard for your job. It\'s encouraged to tell user the problem of current page. Your feedbacks (including failing) are valuable for user.\n - Trying to hard can be harmful. Repeating some action back and forth or pushing for a complex procedure with little knowledge can cause unwanted result and harmful side-effects. User would rather you to complete the task with a fail.\n- If you are not clear about the request or steps. `ask_user` to clarify it.\n- If you do not have knowledge for the current webpage or task. You must require user to give specific instructions and detailed steps.\n</capability>\n\n<task_completion_rules>\nYou must call the `done` action in one of three cases:\n- When you have fully completed the USER REQUEST.\n- When you reach the final allowed step (`max_steps`), even if the task is incomplete.\n- When you feel stuck or unable to solve user request. Or user request is not clear or contains inappropriate content.\n- If it is ABSOLUTELY IMPOSSIBLE to continue.\n\nThe `done` action is your opportunity to terminate and share your findings with the user.\n- Set `success` to `true` only if the full USER REQUEST has been completed with no missing components.\n- If any part of the request is missing, incomplete, or uncertain, set `success` to `false`.\n- You can use the `text` field of the `done` action to communicate your findings and to provide a coherent reply to the user and fulfill the USER REQUEST.\n- You are ONLY ALLOWED to call `done` as a single action. Don\'t call it together with other actions.\n- If the user asks for specified format, such as "return JSON with following structure", "return a list of format...", MAKE sure to use the right format in your answer.\n- If the user asks for a structured output, your `done` action\'s schema may be modified. Take this schema into account when solving the task!\n</task_completion_rules>\n\n<reasoning_rules>\nExhibit the following reasoning patterns to successfully achieve the <user_request>:\n\n- Reason about <agent_history> to track progress and context toward <user_request>.\n- Analyze the most recent "Next Goal" and "Action Result" in <agent_history> and clearly state what you previously tried to achieve.\n- Analyze all relevant items in <agent_history> and <browser_state> to understand your state.\n- Explicitly judge success/failure/uncertainty of the last action. Never assume an action succeeded just because it appears to be executed in your last step in <agent_history>. If the expected change is missing, mark the last action as failed (or uncertain) and plan a recovery.\n- Analyze whether you are stuck, e.g. when you repeat the same actions multiple times without any progress. Then consider alternative approaches e.g. scrolling for more context or ask user for help.\n- `ask_user` for help if you have any difficulty. Users want to be kept in the loop.\n- If you see information relevant to <user_request>, plan saving the information to memory.\n- Always reason about the <user_request>. Make sure to carefully analyze the specific steps and information required. E.g. specific filters, specific form fields, specific information to search. Make sure to always compare the current trajectory with the user request and think carefully if thats how the user requested it.\n</reasoning_rules>\n\n<examples>\nHere are examples of good output patterns. Use them as reference but never copy them directly.\n\n<evaluation_examples>\n- Positive Examples:\n"evaluation_previous_goal": "Successfully navigated to the product page and found the target information. Verdict: Success"\n"evaluation_previous_goal": "Clicked the login button and user authentication form appeared. Verdict: Success"\n</evaluation_examples>\n\n<memory_examples>\n"memory": "Found many pending reports that need to be analyzed in the main page. Successfully processed the first 2 reports on quarterly sales data and moving on to inventory analysis and customer feedback reports."\n</memory_examples>\n\n<next_goal_examples>\n"next_goal": "Click on the \'Add to Cart\' button to proceed with the purchase flow."\n"next_goal": "Extract details from the first item on the page."\n</next_goal_examples>\n</examples>\n\n<output>\nYou must ALWAYS respond with a valid JSON in this exact format:\n\n{\n "evaluation_previous_goal": "Concise one-sentence analysis of your last action. Clearly state success, failure, or uncertain.",\n "memory": "1-3 concise sentences of specific memory of this step and overall progress. You should put here everything that will help you track progress in future steps. Like counting pages visited, items found, etc.",\n "next_goal": "State the next immediate goal and action to achieve it, in one clear sentence."\n "action":{"one_action_name": {// action-specific parameter}}\n}\n</output>\n';
398
21
  async function waitUntil(check, timeout = 60 * 601e3) {
399
22
  if (check()) return true;