page-agent 0.2.4 → 0.3.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,710 +1,24 @@
1
1
  var __defProp = Object.defineProperty;
2
- var __typeError = (msg) => {
3
- throw TypeError(msg);
4
- };
5
- var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
6
2
  var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
7
- var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
8
- var __accessCheck = (obj, member, msg) => member.has(obj) || __typeError("Cannot " + msg);
9
- var __privateGet = (obj, member, getter) => (__accessCheck(obj, member, "read from private field"), getter ? getter.call(obj) : member.get(obj));
10
- var __privateAdd = (obj, member, value) => member.has(obj) ? __typeError("Cannot add the same private member more than once") : member instanceof WeakSet ? member.add(obj) : member.set(obj, value);
11
- var __privateSet = (obj, member, value, setter) => (__accessCheck(obj, member, "write to private field"), setter ? setter.call(obj, value) : member.set(obj, value), value);
12
- var __privateMethod = (obj, member, method) => (__accessCheck(obj, member, "access private method"), method);
13
- var _llm, _abortController, _llmRetryListener, _llmErrorListener, _beforeUnloadListener, _PageAgent_instances, packMacroTool_fn, getSystemPrompt_fn, getInstructions_fn, generateObservations_fn, assembleUserPrompt_fn, onDone_fn, getBrowserState_fn;
14
- import { LLM } from "@page-agent/llms";
3
+ import { PageAgentCore } from "@page-agent/core";
15
4
  import { PageController } from "@page-agent/page-controller";
16
5
  import { Panel } from "@page-agent/ui";
17
- import chalk from "chalk";
18
- import zod from "zod";
19
- const MAX_STEPS = 20;
20
- const SYSTEM_PROMPT = 'You are an AI agent designed to operate in an iterative loop to automate browser tasks. Your ultimate goal is accomplishing the task provided in <user_request>.\n\n<intro>\nYou excel at following tasks:\n1. Navigating complex websites and extracting precise information\n2. Automating form submissions and interactive web actions\n3. Gathering and saving information \n4. Operate effectively in an agent loop\n5. Efficiently performing diverse web tasks\n</intro>\n\n<language_settings>\n- Default working language: **中文**\n- Use the language that user is using. Return in user\'s language.\n</language_settings>\n\n<input>\nAt every step, your input will consist of: \n1. <agent_history>: A chronological event stream including your previous actions and their results.\n2. <agent_state>: Current <user_request> and <step_info>.\n3. <browser_state>: Current URL, interactive elements indexed for actions, and visible page content.\n</input>\n\n<agent_history>\nAgent history will be given as a list of step information as follows:\n\n<step_{step_number}>:\nEvaluation of Previous Step: Assessment of last action\nMemory: Your memory of this step\nNext Goal: Your goal for this step\nAction Results: Your actions and their results\n</step_{step_number}>\n\nand system messages wrapped in <sys> tag.\n</agent_history>\n\n<user_request>\nUSER REQUEST: This is your ultimate objective and always remains visible.\n- This has the highest priority. Make the user happy.\n- If the user request is very specific - then carefully follow each step and dont skip or hallucinate steps.\n- If the task is open ended you can plan yourself how to get it done.\n</user_request>\n\n<browser_state>\n1. Browser State will be given as:\n\nCurrent URL: URL of the page you are currently viewing.\nInteractive Elements: All interactive elements will be provided in format as [index]<type>text</type> where\n- index: Numeric identifier for interaction\n- type: HTML element type (button, input, etc.)\n- text: Element description\n\nExamples:\n[33]<div>User form</div>\n\\t*[35]<button aria-label=\'Submit form\'>Submit</button>\n\nNote that:\n- Only elements with numeric indexes in [] are interactive\n- (stacked) indentation (with \\t) is important and means that the element is a (html) child of the element above (with a lower index)\n- Elements tagged with `*[` are the new clickable elements that appeared on the website since the last step - if url has not changed.\n- Pure text elements without [] are not interactive.\n</browser_state>\n\n<browser_rules>\nStrictly follow these rules while using the browser and navigating the web:\n- Only interact with elements that have a numeric [index] assigned.\n- Only use indexes that are explicitly provided.\n- If the page changes after, for example, an input text action, analyze if you need to interact with new elements, e.g. selecting the right option from the list.\n- By default, only elements in the visible viewport are listed. Use scrolling actions if you suspect relevant content is offscreen which you need to interact with. Scroll ONLY if there are more pixels below or above the page.\n- You can scroll by a specific number of pages using the num_pages parameter (e.g., 0.5 for half page, 2.0 for two pages).\n- All the elements that are scrollable are marked with `data-scrollable` attribute. Including the scrollable distance in every directions. You can scroll *the element* in case some area are overflowed.\n- If a captcha appears, tell user you can not solve captcha. finished the task and ask user to solve it.\n- If expected elements are missing, try scrolling, or navigating back.\n- If the page is not fully loaded, use the `wait` action.\n- Do not repeat one action for more than 3 times unless some conditions changed.\n- If you fill an input field and your action sequence is interrupted, most often something changed e.g. suggestions popped up under the field.\n- If the <user_request> includes specific page information such as product type, rating, price, location, etc., try to apply filters to be more efficient.\n- The <user_request> is the ultimate goal. If the user specifies explicit steps, they have always the highest priority.\n- If you input_text into a field, you might need to press enter, click the search button, or select from dropdown for completion.\n- Don\'t login into a page if you don\'t have to. Don\'t login if you don\'t have the credentials. \n- There are 2 types of tasks always first think which type of request you are dealing with:\n1. Very specific step by step instructions:\n- Follow them as very precise and don\'t skip steps. Try to complete everything as requested.\n2. Open ended tasks. Plan yourself, be creative in achieving them.\n- If you get stuck e.g. with logins or captcha in open-ended tasks you can re-evaluate the task and try alternative ways, e.g. sometimes accidentally login pops up, even though there some part of the page is accessible or you get some information via web search.\n</browser_rules>\n\n<capability>\n- You can only handle single page app. Do not jump out of current page.\n- Do not click on link if it will open in a new page (etc. <a target="_blank">)\n- It is ok to fail the task.\n - User can be wrong. If the request of user is not achievable, inappropriate or you do not have enough information or tools to achieve it. Tell user to make a better request.\n - Webpage can be broken. All webpages or apps have bugs. Some bug will make it hard for your job. It\'s encouraged to tell user the problem of current page. Your feedbacks (including failing) are valuable for user.\n - Trying to hard can be harmful. Repeating some action back and forth or pushing for a complex procedure with little knowledge can cause unwanted result and harmful side-effects. User would rather you to complete the task with a fail.\n- If you are not clear about the request or steps. `ask_user` to clarify it.\n- If you do not have knowledge for the current webpage or task. You must require user to give specific instructions and detailed steps.\n</capability>\n\n<task_completion_rules>\nYou must call the `done` action in one of three cases:\n- When you have fully completed the USER REQUEST.\n- When you reach the final allowed step (`max_steps`), even if the task is incomplete.\n- When you feel stuck or unable to solve user request. Or user request is not clear or contains inappropriate content.\n- If it is ABSOLUTELY IMPOSSIBLE to continue.\n\nThe `done` action is your opportunity to terminate and share your findings with the user.\n- Set `success` to `true` only if the full USER REQUEST has been completed with no missing components.\n- If any part of the request is missing, incomplete, or uncertain, set `success` to `false`.\n- You can use the `text` field of the `done` action to communicate your findings and to provide a coherent reply to the user and fulfill the USER REQUEST.\n- You are ONLY ALLOWED to call `done` as a single action. Don\'t call it together with other actions.\n- If the user asks for specified format, such as "return JSON with following structure", "return a list of format...", MAKE sure to use the right format in your answer.\n- If the user asks for a structured output, your `done` action\'s schema may be modified. Take this schema into account when solving the task!\n</task_completion_rules>\n\n<reasoning_rules>\nExhibit the following reasoning patterns to successfully achieve the <user_request>:\n\n- Reason about <agent_history> to track progress and context toward <user_request>.\n- Analyze the most recent "Next Goal" and "Action Result" in <agent_history> and clearly state what you previously tried to achieve.\n- Analyze all relevant items in <agent_history> and <browser_state> to understand your state.\n- Explicitly judge success/failure/uncertainty of the last action. Never assume an action succeeded just because it appears to be executed in your last step in <agent_history>. If the expected change is missing, mark the last action as failed (or uncertain) and plan a recovery.\n- Analyze whether you are stuck, e.g. when you repeat the same actions multiple times without any progress. Then consider alternative approaches e.g. scrolling for more context or ask user for help.\n- `ask_user` for help if you have any difficulty. Users want to be kept in the loop.\n- If you see information relevant to <user_request>, plan saving the information to memory.\n- Always reason about the <user_request>. Make sure to carefully analyze the specific steps and information required. E.g. specific filters, specific form fields, specific information to search. Make sure to always compare the current trajectory with the user request and think carefully if thats how the user requested it.\n</reasoning_rules>\n\n<examples>\nHere are examples of good output patterns. Use them as reference but never copy them directly.\n\n<evaluation_examples>\n- Positive Examples:\n"evaluation_previous_goal": "Successfully navigated to the product page and found the target information. Verdict: Success"\n"evaluation_previous_goal": "Clicked the login button and user authentication form appeared. Verdict: Success"\n</evaluation_examples>\n\n<memory_examples>\n"memory": "Found many pending reports that need to be analyzed in the main page. Successfully processed the first 2 reports on quarterly sales data and moving on to inventory analysis and customer feedback reports."\n</memory_examples>\n\n<next_goal_examples>\n"next_goal": "Click on the \'Add to Cart\' button to proceed with the purchase flow."\n"next_goal": "Extract details from the first item on the page."\n</next_goal_examples>\n</examples>\n\n<output>\nYou must ALWAYS respond with a valid JSON in this exact format:\n\n{\n "evaluation_previous_goal": "Concise one-sentence analysis of your last action. Clearly state success, failure, or uncertain.",\n "memory": "1-3 concise sentences of specific memory of this step and overall progress. You should put here everything that will help you track progress in future steps. Like counting pages visited, items found, etc.",\n "next_goal": "State the next immediate goal and action to achieve it, in one clear sentence."\n "action":{"one_action_name": {// action-specific parameter}}\n}\n</output>\n';
21
- function normalizeResponse(response) {
22
- let resolvedArguments = null;
23
- const choice = response.choices?.[0];
24
- if (!choice) throw new Error("No choices in response");
25
- const message = choice.message;
26
- if (!message) throw new Error("No message in choice");
27
- const toolCall = message.tool_calls?.[0];
28
- if (toolCall?.function?.arguments) {
29
- resolvedArguments = safeJsonParse(toolCall.function.arguments);
30
- if (toolCall.function.name && toolCall.function.name !== "AgentOutput") {
31
- console.log(chalk.yellow(`[normalizeResponse] #1: fixing tool_call`));
32
- resolvedArguments = { action: safeJsonParse(resolvedArguments) };
33
- }
34
- } else {
35
- if (message.content) {
36
- const content = message.content.trim();
37
- const jsonInContent = retrieveJsonFromString(content);
38
- if (jsonInContent) {
39
- resolvedArguments = safeJsonParse(jsonInContent);
40
- if (resolvedArguments?.name === "AgentOutput") {
41
- console.log(chalk.yellow(`[normalizeResponse] #2: fixing tool_call`));
42
- resolvedArguments = safeJsonParse(resolvedArguments.arguments);
43
- }
44
- if (resolvedArguments?.type === "function") {
45
- console.log(chalk.yellow(`[normalizeResponse] #3: fixing tool_call`));
46
- resolvedArguments = safeJsonParse(resolvedArguments.function.arguments);
47
- }
48
- if (!resolvedArguments?.action && !resolvedArguments?.evaluation_previous_goal && !resolvedArguments?.memory && !resolvedArguments?.next_goal && !resolvedArguments?.thinking) {
49
- console.log(chalk.yellow(`[normalizeResponse] #4: fixing tool_call`));
50
- resolvedArguments = { action: safeJsonParse(resolvedArguments) };
51
- }
52
- } else {
53
- throw new Error("No tool_call and the message content does not contain valid JSON");
54
- }
55
- } else {
56
- throw new Error("No tool_call nor message content is present");
57
- }
58
- }
59
- resolvedArguments = safeJsonParse(resolvedArguments);
60
- if (resolvedArguments.action) {
61
- resolvedArguments.action = safeJsonParse(resolvedArguments.action);
62
- }
63
- if (!resolvedArguments.action) {
64
- console.log(chalk.yellow(`[normalizeResponse] #5: fixing tool_call`));
65
- resolvedArguments.action = { name: "wait", input: { seconds: 1 } };
66
- }
67
- return {
68
- ...response,
69
- choices: [
70
- {
71
- ...choice,
72
- message: {
73
- ...message,
74
- tool_calls: [
75
- {
76
- ...toolCall || {},
77
- function: {
78
- ...toolCall?.function || {},
79
- name: "AgentOutput",
80
- arguments: JSON.stringify(resolvedArguments)
81
- }
82
- }
83
- ]
84
- }
85
- }
86
- ]
87
- };
88
- }
89
- __name(normalizeResponse, "normalizeResponse");
90
- function safeJsonParse(input) {
91
- if (typeof input === "string") {
92
- try {
93
- return JSON.parse(input.trim());
94
- } catch {
95
- return input;
96
- }
97
- }
98
- return input;
99
- }
100
- __name(safeJsonParse, "safeJsonParse");
101
- function retrieveJsonFromString(str) {
102
- try {
103
- const json = /({[\s\S]*})/.exec(str) ?? [];
104
- if (json.length === 0) {
105
- return null;
106
- }
107
- return JSON.parse(json[0]);
108
- } catch {
109
- return null;
110
- }
111
- }
112
- __name(retrieveJsonFromString, "retrieveJsonFromString");
113
- async function waitFor(seconds) {
114
- await new Promise((resolve) => setTimeout(resolve, seconds * 1e3));
115
- }
116
- __name(waitFor, "waitFor");
117
- function trimLines(text) {
118
- return text.split("\n").map((line) => line.trim()).join("\n");
119
- }
120
- __name(trimLines, "trimLines");
121
- function randomID(existingIDs) {
122
- let id = Math.random().toString(36).substring(2, 11);
123
- if (!existingIDs) {
124
- return id;
125
- }
126
- const MAX_TRY = 1e3;
127
- let tryCount = 0;
128
- while (existingIDs.includes(id)) {
129
- id = Math.random().toString(36).substring(2, 11);
130
- tryCount++;
131
- if (tryCount > MAX_TRY) {
132
- throw new Error("randomID: too many try");
133
- }
134
- }
135
- return id;
136
- }
137
- __name(randomID, "randomID");
138
- if (!window.__PAGE_AGENT_IDS__) {
139
- window.__PAGE_AGENT_IDS__ = [];
140
- }
141
- const ids = window.__PAGE_AGENT_IDS__;
142
- function uid() {
143
- const id = randomID(ids);
144
- ids.push(id);
145
- return id;
146
- }
147
- __name(uid, "uid");
148
- function tool(options) {
149
- return options;
150
- }
151
- __name(tool, "tool");
152
- const tools = /* @__PURE__ */ new Map();
153
- tools.set(
154
- "done",
155
- tool({
156
- description: "Complete task - provide a summary of results for the user. Set success=True if task completed successfully, false otherwise. Text should be your response to the user summarizing results.",
157
- inputSchema: zod.object({
158
- text: zod.string(),
159
- success: zod.boolean().default(true)
160
- }),
161
- execute: /* @__PURE__ */ __name(async function(input) {
162
- return Promise.resolve("Task completed");
163
- }, "execute")
164
- })
165
- );
166
- tools.set(
167
- "wait",
168
- tool({
169
- description: "Wait for x seconds. default 1s (max 10 seconds, min 1 second). This can be used to wait until the page or data is fully loaded.",
170
- inputSchema: zod.object({
171
- seconds: zod.number().min(1).max(10).default(1)
172
- }),
173
- execute: /* @__PURE__ */ __name(async function(input) {
174
- const lastTimeUpdate = await this.pageController.getLastUpdateTime();
175
- const actualWaitTime = Math.max(0, input.seconds - (Date.now() - lastTimeUpdate) / 1e3);
176
- console.log(`actualWaitTime: ${actualWaitTime} seconds`);
177
- await waitFor(actualWaitTime);
178
- this.states.totalWaitTime += input.seconds;
179
- if (this.states.totalWaitTime >= 3) {
180
- this.pushObservation(
181
- `You have waited ${this.states.totalWaitTime} seconds accumulatively. Do NOT wait any longer unless you have a good reason.`
182
- );
183
- }
184
- return `✅ Waited for ${input.seconds} seconds.`;
185
- }, "execute")
186
- })
187
- );
188
- tools.set(
189
- "ask_user",
190
- tool({
191
- description: "Ask the user a question and wait for their answer. Use this if you need more information or clarification.",
192
- inputSchema: zod.object({
193
- question: zod.string()
194
- }),
195
- execute: /* @__PURE__ */ __name(async function(input) {
196
- const answer = await this.panel.askUser(input.question);
197
- return `✅ Received user answer: ${answer}`;
198
- }, "execute")
199
- })
200
- );
201
- tools.set(
202
- "click_element_by_index",
203
- tool({
204
- description: "Click element by index",
205
- inputSchema: zod.object({
206
- index: zod.int().min(0)
207
- }),
208
- execute: /* @__PURE__ */ __name(async function(input) {
209
- const result = await this.pageController.clickElement(input.index);
210
- return result.message;
211
- }, "execute")
212
- })
213
- );
214
- tools.set(
215
- "input_text",
216
- tool({
217
- description: "Click and input text into a input interactive element",
218
- inputSchema: zod.object({
219
- index: zod.int().min(0),
220
- text: zod.string()
221
- }),
222
- execute: /* @__PURE__ */ __name(async function(input) {
223
- const result = await this.pageController.inputText(input.index, input.text);
224
- return result.message;
225
- }, "execute")
226
- })
227
- );
228
- tools.set(
229
- "select_dropdown_option",
230
- tool({
231
- description: "Select dropdown option for interactive element index by the text of the option you want to select",
232
- inputSchema: zod.object({
233
- index: zod.int().min(0),
234
- text: zod.string()
235
- }),
236
- execute: /* @__PURE__ */ __name(async function(input) {
237
- const result = await this.pageController.selectOption(input.index, input.text);
238
- return result.message;
239
- }, "execute")
240
- })
241
- );
242
- tools.set(
243
- "scroll",
244
- tool({
245
- description: "Scroll the page by specified number of pages (set down=True to scroll down, down=False to scroll up, num_pages=number of pages to scroll like 0.5 for half page, 1.0 for one page, etc.). Optional index parameter to scroll within a specific element or its scroll container (works well for dropdowns and custom UI components). Optional pixels parameter to scroll by a specific number of pixels instead of pages.",
246
- inputSchema: zod.object({
247
- down: zod.boolean().default(true),
248
- num_pages: zod.number().min(0).max(10).optional().default(0.1),
249
- pixels: zod.number().int().min(0).optional(),
250
- index: zod.number().int().min(0).optional()
251
- }),
252
- execute: /* @__PURE__ */ __name(async function(input) {
253
- const result = await this.pageController.scroll({
254
- ...input,
255
- numPages: input.num_pages
256
- });
257
- return result.message;
258
- }, "execute")
259
- })
260
- );
261
- tools.set(
262
- "scroll_horizontally",
263
- tool({
264
- description: "Scroll the page or element horizontally (set right=True to scroll right, right=False to scroll left, pixels=number of pixels to scroll). Optional index parameter to scroll within a specific element or its scroll container (works well for wide tables).",
265
- inputSchema: zod.object({
266
- right: zod.boolean().default(true),
267
- pixels: zod.number().int().min(0),
268
- index: zod.number().int().min(0).optional()
269
- }),
270
- execute: /* @__PURE__ */ __name(async function(input) {
271
- const result = await this.pageController.scrollHorizontally(input);
272
- return result.message;
273
- }, "execute")
274
- })
275
- );
276
- tools.set(
277
- "execute_javascript",
278
- tool({
279
- description: "Execute JavaScript code on the current page. Supports async/await syntax. Use with caution!",
280
- inputSchema: zod.object({
281
- script: zod.string()
282
- }),
283
- execute: /* @__PURE__ */ __name(async function(input) {
284
- const result = await this.pageController.executeJavascript(input.script);
285
- return result.message;
286
- }, "execute")
287
- })
288
- );
289
- function assert(condition, message, silent) {
290
- if (!condition) {
291
- const errorMessage = message ?? "Assertion failed";
292
- console.error(chalk.red(`❌ assert: ${errorMessage}`));
293
- throw new Error(errorMessage);
294
- }
295
- }
296
- __name(assert, "assert");
297
- const _PageAgent = class _PageAgent extends EventTarget {
6
+ const _PageAgent = class _PageAgent extends PageAgentCore {
7
+ panel;
298
8
  constructor(config) {
299
- super();
300
- __privateAdd(this, _PageAgent_instances);
301
- __publicField(this, "config");
302
- __publicField(this, "id", uid());
303
- __publicField(this, "panel");
304
- __publicField(this, "tools");
305
- __publicField(this, "disposed", false);
306
- __publicField(this, "task", "");
307
- __publicField(this, "taskId", "");
308
- __privateAdd(this, _llm);
309
- __privateAdd(this, _abortController, new AbortController());
310
- __privateAdd(this, _llmRetryListener, null);
311
- __privateAdd(this, _llmErrorListener, null);
312
- __privateAdd(this, _beforeUnloadListener, null);
313
- /** PageController for DOM operations */
314
- __publicField(this, "pageController");
315
- /** Runtime states for tracking across steps */
316
- __publicField(this, "states", {
317
- /** Accumulated wait time in seconds, used by wait tool */
318
- totalWaitTime: 0,
319
- /** Last known URL for detecting navigation */
320
- lastURL: ""
321
- });
322
- /** History event stream */
323
- __publicField(this, "history", []);
324
- this.config = config;
325
- __privateSet(this, _llm, new LLM(this.config));
326
- this.panel = new Panel({
327
- language: this.config.language,
328
- onExecuteTask: /* @__PURE__ */ __name((task) => this.execute(task), "onExecuteTask"),
329
- onStop: /* @__PURE__ */ __name(() => this.dispose(), "onStop")
330
- });
331
- this.tools = new Map(tools);
332
- this.pageController = new PageController({
333
- ...this.config,
334
- enableMask: this.config.enableMask ?? true
335
- });
336
- __privateSet(this, _llmRetryListener, (e) => {
337
- const { current, max } = e.detail;
338
- this.panel.update({ type: "retry", current, max });
9
+ const pageController = new PageController({
10
+ ...config,
11
+ enableMask: config.enableMask ?? true
339
12
  });
340
- __privateSet(this, _llmErrorListener, (e) => {
341
- const { error } = e.detail;
342
- this.panel.update({ type: "error", message: `step failed: ${error.message}` });
13
+ super({ ...config, pageController });
14
+ this.panel = new Panel(this, {
15
+ language: config.language
343
16
  });
344
- __privateGet(this, _llm).addEventListener("retry", __privateGet(this, _llmRetryListener));
345
- __privateGet(this, _llm).addEventListener("error", __privateGet(this, _llmErrorListener));
346
- if (this.config.customTools) {
347
- for (const [name, tool2] of Object.entries(this.config.customTools)) {
348
- if (tool2 === null) {
349
- this.tools.delete(name);
350
- continue;
351
- }
352
- this.tools.set(name, tool2);
353
- }
354
- }
355
- if (!this.config.experimentalScriptExecutionTool) {
356
- this.tools.delete("execute_javascript");
357
- }
358
- __privateSet(this, _beforeUnloadListener, (e) => {
359
- if (!this.disposed) this.dispose("PAGE_UNLOADING");
360
- });
361
- window.addEventListener("beforeunload", __privateGet(this, _beforeUnloadListener));
362
- }
363
- /**
364
- * Push a persistent observation to the history event stream.
365
- * This will be visible in <agent_history> and remain in memory across steps.
366
- */
367
- pushObservation(content) {
368
- this.history.push({ type: "observation", content });
369
- this.panel.update({ type: "observation", content });
370
- }
371
- async execute(task) {
372
- if (!task) throw new Error("Task is required");
373
- this.task = task;
374
- this.taskId = uid();
375
- const onBeforeStep = this.config.onBeforeStep || (() => void 0);
376
- const onAfterStep = this.config.onAfterStep || (() => void 0);
377
- const onBeforeTask = this.config.onBeforeTask || (() => void 0);
378
- const onAfterTask = this.config.onAfterTask || (() => void 0);
379
- await onBeforeTask.call(this);
380
- this.pageController.showMask();
381
- this.panel.show();
382
- this.panel.reset();
383
- this.panel.update({ type: "input", task: this.task });
384
- if (__privateGet(this, _abortController)) {
385
- __privateGet(this, _abortController).abort();
386
- __privateSet(this, _abortController, new AbortController());
387
- }
388
- this.history = [];
389
- this.states = {
390
- totalWaitTime: 0,
391
- lastURL: ""
392
- };
393
- try {
394
- let step = 0;
395
- while (true) {
396
- await __privateMethod(this, _PageAgent_instances, generateObservations_fn).call(this, step);
397
- await onBeforeStep.call(this, step);
398
- console.group(`step: ${step}`);
399
- if (__privateGet(this, _abortController).signal.aborted) throw new Error("AbortError");
400
- console.log(chalk.blue("Thinking..."));
401
- this.panel.update({ type: "thinking" });
402
- const result = await __privateGet(this, _llm).invoke(
403
- [
404
- {
405
- role: "system",
406
- content: __privateMethod(this, _PageAgent_instances, getSystemPrompt_fn).call(this)
407
- },
408
- {
409
- role: "user",
410
- content: await __privateMethod(this, _PageAgent_instances, assembleUserPrompt_fn).call(this)
411
- }
412
- ],
413
- { AgentOutput: __privateMethod(this, _PageAgent_instances, packMacroTool_fn).call(this) },
414
- __privateGet(this, _abortController).signal,
415
- {
416
- toolChoiceName: "AgentOutput",
417
- normalizeResponse
418
- }
419
- );
420
- const macroResult = result.toolResult;
421
- const input = macroResult.input;
422
- const output = macroResult.output;
423
- const reflection = {
424
- evaluation_previous_goal: input.evaluation_previous_goal,
425
- memory: input.memory,
426
- next_goal: input.next_goal
427
- };
428
- const actionName = Object.keys(input.action)[0];
429
- const action = {
430
- name: actionName,
431
- input: input.action[actionName],
432
- output
433
- };
434
- this.history.push({
435
- type: "step",
436
- reflection,
437
- action,
438
- usage: result.usage
439
- });
440
- console.log(chalk.green("Step finished:"), actionName);
441
- console.groupEnd();
442
- await onAfterStep.call(this, step, this.history);
443
- step++;
444
- if (step > MAX_STEPS) {
445
- __privateMethod(this, _PageAgent_instances, onDone_fn).call(this, "Step count exceeded maximum limit", false);
446
- const result2 = {
447
- success: false,
448
- data: "Step count exceeded maximum limit",
449
- history: this.history
450
- };
451
- await onAfterTask.call(this, result2);
452
- return result2;
453
- }
454
- if (actionName === "done") {
455
- const success = action.input?.success ?? false;
456
- const text = action.input?.text || "no text provided";
457
- console.log(chalk.green.bold("Task completed"), success, text);
458
- __privateMethod(this, _PageAgent_instances, onDone_fn).call(this, text, success);
459
- const result2 = {
460
- success,
461
- data: text,
462
- history: this.history
463
- };
464
- await onAfterTask.call(this, result2);
465
- return result2;
466
- }
467
- }
468
- } catch (error) {
469
- console.error("Task failed", error);
470
- __privateMethod(this, _PageAgent_instances, onDone_fn).call(this, String(error), false);
471
- const result = {
472
- success: false,
473
- data: String(error),
474
- history: this.history
475
- };
476
- await onAfterTask.call(this, result);
477
- return result;
478
- }
479
- }
480
- dispose(reason) {
481
- console.log("Disposing PageAgent...");
482
- this.disposed = true;
483
- this.pageController.dispose();
484
- this.panel.dispose();
485
- this.history = [];
486
- __privateGet(this, _abortController).abort(reason ?? "PageAgent disposed");
487
- if (__privateGet(this, _llmRetryListener)) {
488
- __privateGet(this, _llm).removeEventListener("retry", __privateGet(this, _llmRetryListener));
489
- __privateSet(this, _llmRetryListener, null);
490
- }
491
- if (__privateGet(this, _llmErrorListener)) {
492
- __privateGet(this, _llm).removeEventListener("error", __privateGet(this, _llmErrorListener));
493
- __privateSet(this, _llmErrorListener, null);
494
- }
495
- if (__privateGet(this, _beforeUnloadListener)) {
496
- window.removeEventListener("beforeunload", __privateGet(this, _beforeUnloadListener));
497
- __privateSet(this, _beforeUnloadListener, null);
498
- }
499
- this.config.onDispose?.call(this, reason);
500
17
  }
501
18
  };
502
- _llm = new WeakMap();
503
- _abortController = new WeakMap();
504
- _llmRetryListener = new WeakMap();
505
- _llmErrorListener = new WeakMap();
506
- _beforeUnloadListener = new WeakMap();
507
- _PageAgent_instances = new WeakSet();
508
- /**
509
- * Merge all tools into a single MacroTool with the following input:
510
- * - thinking: string
511
- * - evaluation_previous_goal: string
512
- * - memory: string
513
- * - next_goal: string
514
- * - action: { toolName: toolInput }
515
- * where action must be selected from tools defined in this.tools
516
- */
517
- packMacroTool_fn = /* @__PURE__ */ __name(function() {
518
- const tools2 = this.tools;
519
- const actionSchemas = Array.from(tools2.entries()).map(([toolName, tool2]) => {
520
- return zod.object({ [toolName]: tool2.inputSchema }).describe(tool2.description);
521
- });
522
- const actionSchema = zod.union(
523
- actionSchemas
524
- );
525
- const macroToolSchema = zod.object({
526
- // thinking: zod.string().optional(),
527
- evaluation_previous_goal: zod.string().optional(),
528
- memory: zod.string().optional(),
529
- next_goal: zod.string().optional(),
530
- action: actionSchema
531
- });
532
- return {
533
- description: "You MUST call this tool every step. Outputs your reflections and next action.",
534
- inputSchema: macroToolSchema,
535
- execute: /* @__PURE__ */ __name(async (input) => {
536
- if (__privateGet(this, _abortController).signal.aborted) throw new Error("AbortError");
537
- console.log(chalk.blue.bold("MacroTool execute"), input);
538
- const action = input.action;
539
- const toolName = Object.keys(action)[0];
540
- const toolInput = action[toolName];
541
- const reflectionLines = [];
542
- if (input.evaluation_previous_goal)
543
- reflectionLines.push(`✅: ${input.evaluation_previous_goal}`);
544
- if (input.memory) reflectionLines.push(`💾: ${input.memory}`);
545
- if (input.next_goal) reflectionLines.push(`🎯: ${input.next_goal}`);
546
- const reflectionText = reflectionLines.length > 0 ? reflectionLines.join("\n") : "";
547
- if (reflectionText) {
548
- console.log(reflectionText);
549
- this.panel.update({ type: "thinking", text: reflectionText });
550
- }
551
- const tool2 = tools2.get(toolName);
552
- assert(tool2, `Tool ${toolName} not found. (@note should have been caught before this!!!)`);
553
- console.log(chalk.blue.bold(`Executing tool: ${toolName}`), toolInput);
554
- this.panel.update({ type: "toolExecuting", toolName, args: toolInput });
555
- const startTime = Date.now();
556
- const result = await tool2.execute.bind(this)(toolInput);
557
- const duration = Date.now() - startTime;
558
- console.log(chalk.green.bold(`Tool (${toolName}) executed for ${duration}ms`), result);
559
- if (toolName !== "wait") {
560
- this.states.totalWaitTime = 0;
561
- }
562
- this.panel.update({
563
- type: "toolCompleted",
564
- toolName,
565
- args: toolInput,
566
- result,
567
- duration
568
- });
569
- await new Promise((resolve) => setTimeout(resolve, 100));
570
- return {
571
- input,
572
- output: result
573
- };
574
- }, "execute")
575
- };
576
- }, "#packMacroTool");
577
- /**
578
- * Get system prompt, dynamically replace language settings based on configured language
579
- */
580
- getSystemPrompt_fn = /* @__PURE__ */ __name(function() {
581
- let systemPrompt = SYSTEM_PROMPT;
582
- const targetLanguage = this.config.language === "zh-CN" ? "中文" : "English";
583
- systemPrompt = systemPrompt.replace(
584
- /Default working language: \*\*.*?\*\*/,
585
- `Default working language: **${targetLanguage}**`
586
- );
587
- return systemPrompt;
588
- }, "#getSystemPrompt");
589
- getInstructions_fn = /* @__PURE__ */ __name(async function() {
590
- const { instructions } = this.config;
591
- if (!instructions) return "";
592
- const systemInstructions = instructions.system?.trim();
593
- const url = await this.pageController.getCurrentUrl();
594
- let pageInstructions;
595
- if (instructions.getPageInstructions) {
596
- try {
597
- pageInstructions = instructions.getPageInstructions(url)?.trim();
598
- } catch (error) {
599
- console.error(
600
- chalk.red("[PageAgent] Failed to execute getPageInstructions callback:"),
601
- error
602
- );
603
- }
604
- }
605
- if (!systemInstructions && !pageInstructions) return "";
606
- let result = "<instructions>\n";
607
- if (systemInstructions) {
608
- result += `<system_instructions>
609
- ${systemInstructions}
610
- </system_instructions>
611
- `;
612
- }
613
- if (pageInstructions) {
614
- result += `<page_instructions>
615
- ${pageInstructions}
616
- </page_instructions>
617
- `;
618
- }
619
- result += "</instructions>\n\n";
620
- return result;
621
- }, "#getInstructions");
622
- generateObservations_fn = /* @__PURE__ */ __name(async function(stepCount) {
623
- const currentURL = await this.pageController.getCurrentUrl();
624
- if (currentURL !== this.states.lastURL) {
625
- this.pushObservation(`Page navigated to → ${currentURL}`);
626
- this.states.lastURL = currentURL;
627
- }
628
- const remaining = MAX_STEPS - stepCount;
629
- if (remaining === 5) {
630
- this.pushObservation(
631
- `⚠️ Only ${remaining} steps remaining. Consider wrapping up or calling done with partial results.`
632
- );
633
- } else if (remaining === 2) {
634
- this.pushObservation(
635
- `⚠️ Critical: Only ${remaining} steps left! You must finish the task or call done immediately.`
636
- );
637
- }
638
- }, "#generateObservations");
639
- assembleUserPrompt_fn = /* @__PURE__ */ __name(async function() {
640
- let prompt = "";
641
- prompt += await __privateMethod(this, _PageAgent_instances, getInstructions_fn).call(this);
642
- const stepCount = this.history.filter((e) => e.type === "step").length;
643
- prompt += `<agent_state>
644
- <user_request>
645
- ${this.task}
646
- </user_request>
647
- <step_info>
648
- Step ${stepCount + 1} of ${MAX_STEPS} max possible steps
649
- Current date and time: ${(/* @__PURE__ */ new Date()).toISOString()}
650
- </step_info>
651
- </agent_state>
652
- `;
653
- prompt += "\n<agent_history>\n";
654
- let stepIndex = 0;
655
- for (const event of this.history) {
656
- if (event.type === "step") {
657
- stepIndex++;
658
- prompt += `<step_${stepIndex}>
659
- Evaluation of Previous Step: ${event.reflection.evaluation_previous_goal}
660
- Memory: ${event.reflection.memory}
661
- Next Goal: ${event.reflection.next_goal}
662
- Action Results: ${event.action.output}
663
- </step_${stepIndex}>
664
- `;
665
- } else if (event.type === "observation") {
666
- prompt += `<sys>${event.content}</sys>
667
- `;
668
- } else if (event.type === "user_takeover") {
669
- prompt += `<sys>User took over control and made changes to the page.</sys>
670
- `;
671
- }
672
- }
673
- prompt += "</agent_history>\n\n";
674
- prompt += await __privateMethod(this, _PageAgent_instances, getBrowserState_fn).call(this);
675
- return trimLines(prompt);
676
- }, "#assembleUserPrompt");
677
- onDone_fn = /* @__PURE__ */ __name(function(text, success = true) {
678
- this.pageController.cleanUpHighlights();
679
- if (success) {
680
- this.panel.update({ type: "output", text });
681
- } else {
682
- this.panel.update({ type: "error", message: text });
683
- }
684
- this.panel.update({ type: "completed" });
685
- this.pageController.hideMask();
686
- __privateGet(this, _abortController).abort();
687
- }, "#onDone");
688
- getBrowserState_fn = /* @__PURE__ */ __name(async function() {
689
- const state = await this.pageController.getBrowserState();
690
- let content = state.content;
691
- if (this.config.transformPageContent) {
692
- content = await this.config.transformPageContent(content);
693
- }
694
- return trimLines(`<browser_state>
695
- Current Page: [${state.title}](${state.url})
696
-
697
- ${state.header}
698
- ${content}
699
- ${state.footer}
700
-
701
- </browser_state>
702
- `);
703
- }, "#getBrowserState");
704
19
  __name(_PageAgent, "PageAgent");
705
20
  let PageAgent = _PageAgent;
706
21
  export {
707
- PageAgent,
708
- tool
22
+ PageAgent
709
23
  };
710
24
  //# sourceMappingURL=page-agent.js.map