page-agent 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -54,7 +54,7 @@ declare interface AgentConfig {
54
54
  getPageInstructions?: (url: string) => string | undefined | null;
55
55
  };
56
56
  onBeforeStep?: (this: PageAgent, stepCnt: number) => Promise<void> | void;
57
- onAfterStep?: (this: PageAgent, stepCnt: number, history: AgentHistory[]) => Promise<void> | void;
57
+ onAfterStep?: (this: PageAgent, stepCnt: number, history: HistoryEvent[]) => Promise<void> | void;
58
58
  onBeforeTask?: (this: PageAgent) => Promise<void> | void;
59
59
  onAfterTask?: (this: PageAgent, result: ExecutionResult) => Promise<void> | void;
60
60
  /**
@@ -100,24 +100,11 @@ declare interface AgentConfig {
100
100
  experimentalPreventNewPage?: boolean;
101
101
  }
102
102
 
103
- export declare interface AgentHistory {
104
- brain: Partial<AgentReflection>;
105
- action: {
106
- name: string;
107
- input: any;
108
- output: string;
109
- };
110
- usage: {
111
- promptTokens: number;
112
- completionTokens: number;
113
- totalTokens: number;
114
- cachedTokens?: number;
115
- reasoningTokens?: number;
116
- };
117
- }
103
+ /** @deprecated Use AgentStep instead */
104
+ export declare type AgentHistory = AgentStep;
118
105
 
119
106
  /**
120
- * Agent brain state - the reflection-before-action model
107
+ * Agent reflection state - the reflection-before-action model
121
108
  *
122
109
  * Every tool call must first reflect on:
123
110
  * - evaluation_previous_goal: How well did the previous action achieve its goal?
@@ -130,12 +117,37 @@ export declare interface AgentReflection {
130
117
  next_goal: string;
131
118
  }
132
119
 
120
+ /**
121
+ * A single agent step with reflection and action
122
+ */
123
+ export declare interface AgentStep {
124
+ type: 'step';
125
+ reflection: Partial<AgentReflection>;
126
+ action: {
127
+ name: string;
128
+ input: any;
129
+ output: string;
130
+ };
131
+ usage: {
132
+ promptTokens: number;
133
+ completionTokens: number;
134
+ totalTokens: number;
135
+ cachedTokens?: number;
136
+ reasoningTokens?: number;
137
+ };
138
+ }
139
+
133
140
  export declare interface ExecutionResult {
134
141
  success: boolean;
135
142
  data: string;
136
- history: AgentHistory[];
143
+ history: HistoryEvent[];
137
144
  }
138
145
 
146
+ /**
147
+ * Union type for all history events
148
+ */
149
+ export declare type HistoryEvent = AgentStep | ObservationEvent | UserTakeoverEvent;
150
+
139
151
  /**
140
152
  * MacroTool input structure
141
153
  *
@@ -154,21 +166,40 @@ export declare interface MacroToolResult {
154
166
  output: string;
155
167
  }
156
168
 
169
+ /**
170
+ * Persistent observation event (stays in memory)
171
+ */
172
+ export declare interface ObservationEvent {
173
+ type: 'observation';
174
+ content: string;
175
+ }
176
+
157
177
  export declare class PageAgent extends EventTarget {
158
178
  #private;
159
179
  config: PageAgentConfig;
160
180
  id: string;
161
181
  panel: Panel;
162
182
  tools: typeof tools;
163
- paused: boolean;
164
183
  disposed: boolean;
165
184
  task: string;
166
185
  taskId: string;
167
186
  /** PageController for DOM operations */
168
187
  pageController: PageController;
169
- /** History records */
170
- history: AgentHistory[];
188
+ /** Runtime states for tracking across steps */
189
+ states: {
190
+ /** Accumulated wait time in seconds, used by wait tool */
191
+ totalWaitTime: number;
192
+ /** Last known URL for detecting navigation */
193
+ lastURL: string;
194
+ };
195
+ /** History event stream */
196
+ history: HistoryEvent[];
171
197
  constructor(config: PageAgentConfig);
198
+ /**
199
+ * Push a persistent observation to the history event stream.
200
+ * This will be visible in <agent_history> and remain in memory across steps.
201
+ */
202
+ pushObservation(content: string): void;
172
203
  execute(task: string): Promise<ExecutionResult>;
173
204
  dispose(reason?: string): void;
174
205
  }
@@ -192,4 +223,11 @@ export declare function tool<TParams>(options: PageAgentTool<TParams>): PageAgen
192
223
  */
193
224
  declare const tools: Map<string, PageAgentTool<any>>;
194
225
 
226
+ /**
227
+ * User takeover event
228
+ */
229
+ export declare interface UserTakeoverEvent {
230
+ type: 'user_takeover';
231
+ }
232
+
195
233
  export { }
@@ -10,7 +10,7 @@ var __privateGet = (obj, member, getter) => (__accessCheck(obj, member, "read fr
10
10
  var __privateAdd = (obj, member, value) => member.has(obj) ? __typeError("Cannot add the same private member more than once") : member instanceof WeakSet ? member.add(obj) : member.set(obj, value);
11
11
  var __privateSet = (obj, member, value, setter) => (__accessCheck(obj, member, "write to private field"), setter ? setter.call(obj, value) : member.set(obj, value), value);
12
12
  var __privateMethod = (obj, member, method) => (__accessCheck(obj, member, "access private method"), method);
13
- var _llm, _totalWaitTime, _abortController, _llmRetryListener, _llmErrorListener, _beforeUnloadListener, _PageAgent_instances, packMacroTool_fn, getSystemPrompt_fn, getInstructions_fn, assembleUserPrompt_fn, onDone_fn, getBrowserState_fn;
13
+ var _llm, _abortController, _llmRetryListener, _llmErrorListener, _beforeUnloadListener, _PageAgent_instances, packMacroTool_fn, getSystemPrompt_fn, getInstructions_fn, generateObservations_fn, assembleUserPrompt_fn, onDone_fn, getBrowserState_fn;
14
14
  import { LLM } from "@page-agent/llms";
15
15
  import { PageController } from "@page-agent/page-controller";
16
16
  import { Panel } from "@page-agent/ui";
@@ -110,22 +110,6 @@ function retrieveJsonFromString(str) {
110
110
  }
111
111
  }
112
112
  __name(retrieveJsonFromString, "retrieveJsonFromString");
113
- async function waitUntil(check, timeout = 60 * 601e3) {
114
- if (check()) return true;
115
- return new Promise((resolve, reject) => {
116
- const start = Date.now();
117
- const interval = setInterval(() => {
118
- if (check()) {
119
- clearInterval(interval);
120
- resolve(true);
121
- } else if (Date.now() - start > timeout) {
122
- clearInterval(interval);
123
- reject(new Error("Timeout waiting for condition to become true"));
124
- }
125
- }, 100);
126
- });
127
- }
128
- __name(waitUntil, "waitUntil");
129
113
  async function waitFor(seconds) {
130
114
  await new Promise((resolve) => setTimeout(resolve, seconds * 1e3));
131
115
  }
@@ -191,6 +175,12 @@ tools.set(
191
175
  const actualWaitTime = Math.max(0, input.seconds - (Date.now() - lastTimeUpdate) / 1e3);
192
176
  console.log(`actualWaitTime: ${actualWaitTime} seconds`);
193
177
  await waitFor(actualWaitTime);
178
+ this.states.totalWaitTime += input.seconds;
179
+ if (this.states.totalWaitTime >= 3) {
180
+ this.pushObservation(
181
+ `You have waited ${this.states.totalWaitTime} seconds accumulatively. Do NOT wait any longer unless you have a good reason.`
182
+ );
183
+ }
194
184
  return `✅ Waited for ${input.seconds} seconds.`;
195
185
  }, "execute")
196
186
  })
@@ -312,31 +302,31 @@ const _PageAgent = class _PageAgent extends EventTarget {
312
302
  __publicField(this, "id", uid());
313
303
  __publicField(this, "panel");
314
304
  __publicField(this, "tools");
315
- __publicField(this, "paused", false);
316
305
  __publicField(this, "disposed", false);
317
306
  __publicField(this, "task", "");
318
307
  __publicField(this, "taskId", "");
319
308
  __privateAdd(this, _llm);
320
- __privateAdd(this, _totalWaitTime, 0);
321
309
  __privateAdd(this, _abortController, new AbortController());
322
310
  __privateAdd(this, _llmRetryListener, null);
323
311
  __privateAdd(this, _llmErrorListener, null);
324
312
  __privateAdd(this, _beforeUnloadListener, null);
325
313
  /** PageController for DOM operations */
326
314
  __publicField(this, "pageController");
327
- /** History records */
315
+ /** Runtime states for tracking across steps */
316
+ __publicField(this, "states", {
317
+ /** Accumulated wait time in seconds, used by wait tool */
318
+ totalWaitTime: 0,
319
+ /** Last known URL for detecting navigation */
320
+ lastURL: ""
321
+ });
322
+ /** History event stream */
328
323
  __publicField(this, "history", []);
329
324
  this.config = config;
330
325
  __privateSet(this, _llm, new LLM(this.config));
331
326
  this.panel = new Panel({
332
327
  language: this.config.language,
333
328
  onExecuteTask: /* @__PURE__ */ __name((task) => this.execute(task), "onExecuteTask"),
334
- onStop: /* @__PURE__ */ __name(() => this.dispose(), "onStop"),
335
- onPauseToggle: /* @__PURE__ */ __name(() => {
336
- this.paused = !this.paused;
337
- return this.paused;
338
- }, "onPauseToggle"),
339
- getPaused: /* @__PURE__ */ __name(() => this.paused, "getPaused")
329
+ onStop: /* @__PURE__ */ __name(() => this.dispose(), "onStop")
340
330
  });
341
331
  this.tools = new Map(tools);
342
332
  this.pageController = new PageController({
@@ -370,6 +360,14 @@ const _PageAgent = class _PageAgent extends EventTarget {
370
360
  });
371
361
  window.addEventListener("beforeunload", __privateGet(this, _beforeUnloadListener));
372
362
  }
363
+ /**
364
+ * Push a persistent observation to the history event stream.
365
+ * This will be visible in <agent_history> and remain in memory across steps.
366
+ */
367
+ pushObservation(content) {
368
+ this.history.push({ type: "observation", content });
369
+ this.panel.update({ type: "observation", content });
370
+ }
373
371
  async execute(task) {
374
372
  if (!task) throw new Error("Task is required");
375
373
  this.task = task;
@@ -388,13 +386,17 @@ const _PageAgent = class _PageAgent extends EventTarget {
388
386
  __privateSet(this, _abortController, new AbortController());
389
387
  }
390
388
  this.history = [];
389
+ this.states = {
390
+ totalWaitTime: 0,
391
+ lastURL: ""
392
+ };
391
393
  try {
392
394
  let step = 0;
393
395
  while (true) {
396
+ await __privateMethod(this, _PageAgent_instances, generateObservations_fn).call(this, step);
394
397
  await onBeforeStep.call(this, step);
395
398
  console.group(`step: ${step}`);
396
399
  if (__privateGet(this, _abortController).signal.aborted) throw new Error("AbortError");
397
- await waitUntil(() => !this.paused);
398
400
  console.log(chalk.blue("Thinking..."));
399
401
  this.panel.update({ type: "thinking" });
400
402
  const result = await __privateGet(this, _llm).invoke(
@@ -418,10 +420,10 @@ const _PageAgent = class _PageAgent extends EventTarget {
418
420
  const macroResult = result.toolResult;
419
421
  const input = macroResult.input;
420
422
  const output = macroResult.output;
421
- const brain = {
422
- evaluation_previous_goal: input.evaluation_previous_goal || "",
423
- memory: input.memory || "",
424
- next_goal: input.next_goal || ""
423
+ const reflection = {
424
+ evaluation_previous_goal: input.evaluation_previous_goal,
425
+ memory: input.memory,
426
+ next_goal: input.next_goal
425
427
  };
426
428
  const actionName = Object.keys(input.action)[0];
427
429
  const action = {
@@ -430,7 +432,8 @@ const _PageAgent = class _PageAgent extends EventTarget {
430
432
  output
431
433
  };
432
434
  this.history.push({
433
- brain,
435
+ type: "step",
436
+ reflection,
434
437
  action,
435
438
  usage: result.usage
436
439
  });
@@ -497,7 +500,6 @@ const _PageAgent = class _PageAgent extends EventTarget {
497
500
  }
498
501
  };
499
502
  _llm = new WeakMap();
500
- _totalWaitTime = new WeakMap();
501
503
  _abortController = new WeakMap();
502
504
  _llmRetryListener = new WeakMap();
503
505
  _llmErrorListener = new WeakMap();
@@ -532,34 +534,30 @@ packMacroTool_fn = /* @__PURE__ */ __name(function() {
532
534
  inputSchema: macroToolSchema,
533
535
  execute: /* @__PURE__ */ __name(async (input) => {
534
536
  if (__privateGet(this, _abortController).signal.aborted) throw new Error("AbortError");
535
- await waitUntil(() => !this.paused);
536
537
  console.log(chalk.blue.bold("MacroTool execute"), input);
537
538
  const action = input.action;
538
539
  const toolName = Object.keys(action)[0];
539
540
  const toolInput = action[toolName];
540
- const brain = trimLines(`✅: ${input.evaluation_previous_goal}
541
- 💾: ${input.memory}
542
- 🎯: ${input.next_goal}
543
- `);
544
- console.log(brain);
545
- this.panel.update({ type: "thinking", text: brain });
541
+ const reflectionLines = [];
542
+ if (input.evaluation_previous_goal)
543
+ reflectionLines.push(`✅: ${input.evaluation_previous_goal}`);
544
+ if (input.memory) reflectionLines.push(`💾: ${input.memory}`);
545
+ if (input.next_goal) reflectionLines.push(`🎯: ${input.next_goal}`);
546
+ const reflectionText = reflectionLines.length > 0 ? reflectionLines.join("\n") : "";
547
+ if (reflectionText) {
548
+ console.log(reflectionText);
549
+ this.panel.update({ type: "thinking", text: reflectionText });
550
+ }
546
551
  const tool2 = tools2.get(toolName);
547
552
  assert(tool2, `Tool ${toolName} not found. (@note should have been caught before this!!!)`);
548
553
  console.log(chalk.blue.bold(`Executing tool: ${toolName}`), toolInput);
549
554
  this.panel.update({ type: "toolExecuting", toolName, args: toolInput });
550
555
  const startTime = Date.now();
551
- let result = await tool2.execute.bind(this)(toolInput);
556
+ const result = await tool2.execute.bind(this)(toolInput);
552
557
  const duration = Date.now() - startTime;
553
558
  console.log(chalk.green.bold(`Tool (${toolName}) executed for ${duration}ms`), result);
554
- if (toolName === "wait") {
555
- __privateSet(this, _totalWaitTime, __privateGet(this, _totalWaitTime) + Math.round(toolInput.seconds + duration / 1e3));
556
- result += `
557
- <sys> You have waited ${__privateGet(this, _totalWaitTime)} seconds accumulatively.`;
558
- if (__privateGet(this, _totalWaitTime) >= 3)
559
- result += "\nDo NOT wait any longer unless you have a good reason.\n";
560
- result += "</sys>";
561
- } else {
562
- __privateSet(this, _totalWaitTime, 0);
559
+ if (toolName !== "wait") {
560
+ this.states.totalWaitTime = 0;
563
561
  }
564
562
  this.panel.update({
565
563
  type: "toolCompleted",
@@ -621,29 +619,57 @@ ${pageInstructions}
621
619
  result += "</instructions>\n\n";
622
620
  return result;
623
621
  }, "#getInstructions");
622
+ generateObservations_fn = /* @__PURE__ */ __name(async function(stepCount) {
623
+ const currentURL = await this.pageController.getCurrentUrl();
624
+ if (currentURL !== this.states.lastURL) {
625
+ this.pushObservation(`Page navigated to → ${currentURL}`);
626
+ this.states.lastURL = currentURL;
627
+ }
628
+ const remaining = MAX_STEPS - stepCount;
629
+ if (remaining === 5) {
630
+ this.pushObservation(
631
+ `⚠️ Only ${remaining} steps remaining. Consider wrapping up or calling done with partial results.`
632
+ );
633
+ } else if (remaining === 2) {
634
+ this.pushObservation(
635
+ `⚠️ Critical: Only ${remaining} steps left! You must finish the task or call done immediately.`
636
+ );
637
+ }
638
+ }, "#generateObservations");
624
639
  assembleUserPrompt_fn = /* @__PURE__ */ __name(async function() {
625
640
  let prompt = "";
626
641
  prompt += await __privateMethod(this, _PageAgent_instances, getInstructions_fn).call(this);
642
+ const stepCount = this.history.filter((e) => e.type === "step").length;
627
643
  prompt += `<agent_state>
628
644
  <user_request>
629
645
  ${this.task}
630
646
  </user_request>
631
647
  <step_info>
632
- Step ${this.history.length + 1} of ${MAX_STEPS} max possible steps
648
+ Step ${stepCount + 1} of ${MAX_STEPS} max possible steps
633
649
  Current date and time: ${(/* @__PURE__ */ new Date()).toISOString()}
634
650
  </step_info>
635
651
  </agent_state>
636
652
  `;
637
653
  prompt += "\n<agent_history>\n";
638
- this.history.forEach((history, index) => {
639
- prompt += `<step_${index + 1}>
640
- Evaluation of Previous Step: ${history.brain.evaluation_previous_goal}
641
- Memory: ${history.brain.memory}
642
- Next Goal: ${history.brain.next_goal}
643
- Action Results: ${history.action.output}
644
- </step_${index + 1}>
654
+ let stepIndex = 0;
655
+ for (const event of this.history) {
656
+ if (event.type === "step") {
657
+ stepIndex++;
658
+ prompt += `<step_${stepIndex}>
659
+ Evaluation of Previous Step: ${event.reflection.evaluation_previous_goal}
660
+ Memory: ${event.reflection.memory}
661
+ Next Goal: ${event.reflection.next_goal}
662
+ Action Results: ${event.action.output}
663
+ </step_${stepIndex}>
645
664
  `;
646
- });
665
+ } else if (event.type === "observation") {
666
+ prompt += `<sys>${event.content}</sys>
667
+ `;
668
+ } else if (event.type === "user_takeover") {
669
+ prompt += `<sys>User took over control and made changes to the page.</sys>
670
+ `;
671
+ }
672
+ }
647
673
  prompt += "</agent_history>\n\n";
648
674
  prompt += await __privateMethod(this, _PageAgent_instances, getBrowserState_fn).call(this);
649
675
  return trimLines(prompt);
@@ -660,45 +686,20 @@ onDone_fn = /* @__PURE__ */ __name(function(text, success = true) {
660
686
  __privateGet(this, _abortController).abort();
661
687
  }, "#onDone");
662
688
  getBrowserState_fn = /* @__PURE__ */ __name(async function() {
663
- const pageUrl = await this.pageController.getCurrentUrl();
664
- const pageTitle = await this.pageController.getPageTitle();
665
- const pi = await this.pageController.getPageInfo();
666
- const viewportExpansion = await this.pageController.getViewportExpansion();
667
- await this.pageController.updateTree();
668
- let simplifiedHTML = await this.pageController.getSimplifiedHTML();
689
+ const state = await this.pageController.getBrowserState();
690
+ let content = state.content;
669
691
  if (this.config.transformPageContent) {
670
- simplifiedHTML = await this.config.transformPageContent(simplifiedHTML);
692
+ content = await this.config.transformPageContent(content);
671
693
  }
672
- let prompt = trimLines(`<browser_state>
673
- Current Page: [${pageTitle}](${pageUrl})
694
+ return trimLines(`<browser_state>
695
+ Current Page: [${state.title}](${state.url})
674
696
 
675
- Page info: ${pi.viewport_width}x${pi.viewport_height}px viewport, ${pi.page_width}x${pi.page_height}px total page size, ${pi.pages_above.toFixed(1)} pages above, ${pi.pages_below.toFixed(1)} pages below, ${pi.total_pages.toFixed(1)} total pages, at ${(pi.current_page_position * 100).toFixed(0)}% of page
676
-
677
- ${viewportExpansion === -1 ? "Interactive elements from top layer of the current page (full page):" : "Interactive elements from top layer of the current page inside the viewport:"}
697
+ ${state.header}
698
+ ${content}
699
+ ${state.footer}
678
700
 
701
+ </browser_state>
679
702
  `);
680
- const has_content_above = pi.pixels_above > 4;
681
- if (has_content_above && viewportExpansion !== -1) {
682
- prompt += `... ${pi.pixels_above} pixels above (${pi.pages_above.toFixed(1)} pages) - scroll to see more ...
683
- `;
684
- } else {
685
- prompt += `[Start of page]
686
- `;
687
- }
688
- prompt += simplifiedHTML;
689
- prompt += `
690
- `;
691
- const has_content_below = pi.pixels_below > 4;
692
- if (has_content_below && viewportExpansion !== -1) {
693
- prompt += `... ${pi.pixels_below} pixels below (${pi.pages_below.toFixed(1)} pages) - scroll to see more ...
694
- `;
695
- } else {
696
- prompt += `[End of page]
697
- `;
698
- }
699
- prompt += `</browser_state>
700
- `;
701
- return prompt;
702
703
  }, "#getBrowserState");
703
704
  __name(_PageAgent, "PageAgent");
704
705
  let PageAgent = _PageAgent;