page-agent 0.0.24 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -54,7 +54,7 @@ declare interface AgentConfig {
54
54
  getPageInstructions?: (url: string) => string | undefined | null;
55
55
  };
56
56
  onBeforeStep?: (this: PageAgent, stepCnt: number) => Promise<void> | void;
57
- onAfterStep?: (this: PageAgent, stepCnt: number, history: AgentHistory[]) => Promise<void> | void;
57
+ onAfterStep?: (this: PageAgent, stepCnt: number, history: HistoryEvent[]) => Promise<void> | void;
58
58
  onBeforeTask?: (this: PageAgent) => Promise<void> | void;
59
59
  onAfterTask?: (this: PageAgent, result: ExecutionResult) => Promise<void> | void;
60
60
  /**
@@ -100,24 +100,11 @@ declare interface AgentConfig {
100
100
  experimentalPreventNewPage?: boolean;
101
101
  }
102
102
 
103
- export declare interface AgentHistory {
104
- brain: Partial<AgentReflection>;
105
- action: {
106
- name: string;
107
- input: any;
108
- output: string;
109
- };
110
- usage: {
111
- promptTokens: number;
112
- completionTokens: number;
113
- totalTokens: number;
114
- cachedTokens?: number;
115
- reasoningTokens?: number;
116
- };
117
- }
103
+ /** @deprecated Use AgentStep instead */
104
+ export declare type AgentHistory = AgentStep;
118
105
 
119
106
  /**
120
- * Agent brain state - the reflection-before-action model
107
+ * Agent reflection state - the reflection-before-action model
121
108
  *
122
109
  * Every tool call must first reflect on:
123
110
  * - evaluation_previous_goal: How well did the previous action achieve its goal?
@@ -130,12 +117,37 @@ export declare interface AgentReflection {
130
117
  next_goal: string;
131
118
  }
132
119
 
120
+ /**
121
+ * A single agent step with reflection and action
122
+ */
123
+ export declare interface AgentStep {
124
+ type: 'step';
125
+ reflection: Partial<AgentReflection>;
126
+ action: {
127
+ name: string;
128
+ input: any;
129
+ output: string;
130
+ };
131
+ usage: {
132
+ promptTokens: number;
133
+ completionTokens: number;
134
+ totalTokens: number;
135
+ cachedTokens?: number;
136
+ reasoningTokens?: number;
137
+ };
138
+ }
139
+
133
140
  export declare interface ExecutionResult {
134
141
  success: boolean;
135
142
  data: string;
136
- history: AgentHistory[];
143
+ history: HistoryEvent[];
137
144
  }
138
145
 
146
+ /**
147
+ * Union type for all history events
148
+ */
149
+ export declare type HistoryEvent = AgentStep | ObservationEvent | UserTakeoverEvent;
150
+
139
151
  /**
140
152
  * MacroTool input structure
141
153
  *
@@ -154,6 +166,14 @@ export declare interface MacroToolResult {
154
166
  output: string;
155
167
  }
156
168
 
169
+ /**
170
+ * Persistent observation event (stays in memory)
171
+ */
172
+ export declare interface ObservationEvent {
173
+ type: 'observation';
174
+ content: string;
175
+ }
176
+
157
177
  export declare class PageAgent extends EventTarget {
158
178
  #private;
159
179
  config: PageAgentConfig;
@@ -166,9 +186,21 @@ export declare class PageAgent extends EventTarget {
166
186
  taskId: string;
167
187
  /** PageController for DOM operations */
168
188
  pageController: PageController;
169
- /** History records */
170
- history: AgentHistory[];
171
- constructor(config?: PageAgentConfig);
189
+ /** Runtime states for tracking across steps */
190
+ states: {
191
+ /** Accumulated wait time in seconds, used by wait tool */
192
+ totalWaitTime: number;
193
+ /** Last known URL for detecting navigation */
194
+ lastURL: string;
195
+ };
196
+ /** History event stream */
197
+ history: HistoryEvent[];
198
+ constructor(config: PageAgentConfig);
199
+ /**
200
+ * Push a persistent observation to the history event stream.
201
+ * This will be visible in <agent_history> and remain in memory across steps.
202
+ */
203
+ pushObservation(content: string): void;
172
204
  execute(task: string): Promise<ExecutionResult>;
173
205
  dispose(reason?: string): void;
174
206
  }
@@ -192,4 +224,11 @@ export declare function tool<TParams>(options: PageAgentTool<TParams>): PageAgen
192
224
  */
193
225
  declare const tools: Map<string, PageAgentTool<any>>;
194
226
 
227
+ /**
228
+ * User takeover event
229
+ */
230
+ export declare interface UserTakeoverEvent {
231
+ type: 'user_takeover';
232
+ }
233
+
195
234
  export { }
@@ -10,7 +10,7 @@ var __privateGet = (obj, member, getter) => (__accessCheck(obj, member, "read fr
10
10
  var __privateAdd = (obj, member, value) => member.has(obj) ? __typeError("Cannot add the same private member more than once") : member instanceof WeakSet ? member.add(obj) : member.set(obj, value);
11
11
  var __privateSet = (obj, member, value, setter) => (__accessCheck(obj, member, "write to private field"), setter ? setter.call(obj, value) : member.set(obj, value), value);
12
12
  var __privateMethod = (obj, member, method) => (__accessCheck(obj, member, "access private method"), method);
13
- var _llm, _totalWaitTime, _abortController, _llmRetryListener, _llmErrorListener, _beforeUnloadListener, _PageAgent_instances, packMacroTool_fn, getSystemPrompt_fn, getInstructions_fn, assembleUserPrompt_fn, onDone_fn, getBrowserState_fn;
13
+ var _llm, _abortController, _llmRetryListener, _llmErrorListener, _beforeUnloadListener, _PageAgent_instances, packMacroTool_fn, getSystemPrompt_fn, getInstructions_fn, generateObservations_fn, assembleUserPrompt_fn, onDone_fn, getBrowserState_fn;
14
14
  import { LLM } from "@page-agent/llms";
15
15
  import { PageController } from "@page-agent/page-controller";
16
16
  import { Panel } from "@page-agent/ui";
@@ -191,6 +191,12 @@ tools.set(
191
191
  const actualWaitTime = Math.max(0, input.seconds - (Date.now() - lastTimeUpdate) / 1e3);
192
192
  console.log(`actualWaitTime: ${actualWaitTime} seconds`);
193
193
  await waitFor(actualWaitTime);
194
+ this.states.totalWaitTime += input.seconds;
195
+ if (this.states.totalWaitTime >= 3) {
196
+ this.pushObservation(
197
+ `You have waited ${this.states.totalWaitTime} seconds accumulatively. Do NOT wait any longer unless you have a good reason.`
198
+ );
199
+ }
194
200
  return `✅ Waited for ${input.seconds} seconds.`;
195
201
  }, "execute")
196
202
  })
@@ -305,7 +311,7 @@ function assert(condition, message, silent) {
305
311
  }
306
312
  __name(assert, "assert");
307
313
  const _PageAgent = class _PageAgent extends EventTarget {
308
- constructor(config = {}) {
314
+ constructor(config) {
309
315
  super();
310
316
  __privateAdd(this, _PageAgent_instances);
311
317
  __publicField(this, "config");
@@ -317,14 +323,20 @@ const _PageAgent = class _PageAgent extends EventTarget {
317
323
  __publicField(this, "task", "");
318
324
  __publicField(this, "taskId", "");
319
325
  __privateAdd(this, _llm);
320
- __privateAdd(this, _totalWaitTime, 0);
321
326
  __privateAdd(this, _abortController, new AbortController());
322
327
  __privateAdd(this, _llmRetryListener, null);
323
328
  __privateAdd(this, _llmErrorListener, null);
324
329
  __privateAdd(this, _beforeUnloadListener, null);
325
330
  /** PageController for DOM operations */
326
331
  __publicField(this, "pageController");
327
- /** History records */
332
+ /** Runtime states for tracking across steps */
333
+ __publicField(this, "states", {
334
+ /** Accumulated wait time in seconds, used by wait tool */
335
+ totalWaitTime: 0,
336
+ /** Last known URL for detecting navigation */
337
+ lastURL: ""
338
+ });
339
+ /** History event stream */
328
340
  __publicField(this, "history", []);
329
341
  this.config = config;
330
342
  __privateSet(this, _llm, new LLM(this.config));
@@ -370,6 +382,13 @@ const _PageAgent = class _PageAgent extends EventTarget {
370
382
  });
371
383
  window.addEventListener("beforeunload", __privateGet(this, _beforeUnloadListener));
372
384
  }
385
+ /**
386
+ * Push a persistent observation to the history event stream.
387
+ * This will be visible in <agent_history> and remain in memory across steps.
388
+ */
389
+ pushObservation(content) {
390
+ this.history.push({ type: "observation", content });
391
+ }
373
392
  async execute(task) {
374
393
  if (!task) throw new Error("Task is required");
375
394
  this.task = task;
@@ -388,9 +407,14 @@ const _PageAgent = class _PageAgent extends EventTarget {
388
407
  __privateSet(this, _abortController, new AbortController());
389
408
  }
390
409
  this.history = [];
410
+ this.states = {
411
+ totalWaitTime: 0,
412
+ lastURL: ""
413
+ };
391
414
  try {
392
415
  let step = 0;
393
416
  while (true) {
417
+ await __privateMethod(this, _PageAgent_instances, generateObservations_fn).call(this, step);
394
418
  await onBeforeStep.call(this, step);
395
419
  console.group(`step: ${step}`);
396
420
  if (__privateGet(this, _abortController).signal.aborted) throw new Error("AbortError");
@@ -418,10 +442,10 @@ const _PageAgent = class _PageAgent extends EventTarget {
418
442
  const macroResult = result.toolResult;
419
443
  const input = macroResult.input;
420
444
  const output = macroResult.output;
421
- const brain = {
422
- evaluation_previous_goal: input.evaluation_previous_goal || "",
423
- memory: input.memory || "",
424
- next_goal: input.next_goal || ""
445
+ const reflection = {
446
+ evaluation_previous_goal: input.evaluation_previous_goal,
447
+ memory: input.memory,
448
+ next_goal: input.next_goal
425
449
  };
426
450
  const actionName = Object.keys(input.action)[0];
427
451
  const action = {
@@ -430,7 +454,8 @@ const _PageAgent = class _PageAgent extends EventTarget {
430
454
  output
431
455
  };
432
456
  this.history.push({
433
- brain,
457
+ type: "step",
458
+ reflection,
434
459
  action,
435
460
  usage: result.usage
436
461
  });
@@ -497,7 +522,6 @@ const _PageAgent = class _PageAgent extends EventTarget {
497
522
  }
498
523
  };
499
524
  _llm = new WeakMap();
500
- _totalWaitTime = new WeakMap();
501
525
  _abortController = new WeakMap();
502
526
  _llmRetryListener = new WeakMap();
503
527
  _llmErrorListener = new WeakMap();
@@ -537,29 +561,26 @@ packMacroTool_fn = /* @__PURE__ */ __name(function() {
537
561
  const action = input.action;
538
562
  const toolName = Object.keys(action)[0];
539
563
  const toolInput = action[toolName];
540
- const brain = trimLines(`✅: ${input.evaluation_previous_goal}
541
- 💾: ${input.memory}
542
- 🎯: ${input.next_goal}
543
- `);
544
- console.log(brain);
545
- this.panel.update({ type: "thinking", text: brain });
564
+ const reflectionLines = [];
565
+ if (input.evaluation_previous_goal)
566
+ reflectionLines.push(`✅: ${input.evaluation_previous_goal}`);
567
+ if (input.memory) reflectionLines.push(`💾: ${input.memory}`);
568
+ if (input.next_goal) reflectionLines.push(`🎯: ${input.next_goal}`);
569
+ const reflectionText = reflectionLines.length > 0 ? reflectionLines.join("\n") : "";
570
+ if (reflectionText) {
571
+ console.log(reflectionText);
572
+ this.panel.update({ type: "thinking", text: reflectionText });
573
+ }
546
574
  const tool2 = tools2.get(toolName);
547
575
  assert(tool2, `Tool ${toolName} not found. (@note should have been caught before this!!!)`);
548
576
  console.log(chalk.blue.bold(`Executing tool: ${toolName}`), toolInput);
549
577
  this.panel.update({ type: "toolExecuting", toolName, args: toolInput });
550
578
  const startTime = Date.now();
551
- let result = await tool2.execute.bind(this)(toolInput);
579
+ const result = await tool2.execute.bind(this)(toolInput);
552
580
  const duration = Date.now() - startTime;
553
581
  console.log(chalk.green.bold(`Tool (${toolName}) executed for ${duration}ms`), result);
554
- if (toolName === "wait") {
555
- __privateSet(this, _totalWaitTime, __privateGet(this, _totalWaitTime) + Math.round(toolInput.seconds + duration / 1e3));
556
- result += `
557
- <sys> You have waited ${__privateGet(this, _totalWaitTime)} seconds accumulatively.`;
558
- if (__privateGet(this, _totalWaitTime) >= 3)
559
- result += "\nDo NOT wait any longer unless you have a good reason.\n";
560
- result += "</sys>";
561
- } else {
562
- __privateSet(this, _totalWaitTime, 0);
582
+ if (toolName !== "wait") {
583
+ this.states.totalWaitTime = 0;
563
584
  }
564
585
  this.panel.update({
565
586
  type: "toolCompleted",
@@ -621,29 +642,57 @@ ${pageInstructions}
621
642
  result += "</instructions>\n\n";
622
643
  return result;
623
644
  }, "#getInstructions");
645
+ generateObservations_fn = /* @__PURE__ */ __name(async function(stepCount) {
646
+ const currentURL = await this.pageController.getCurrentUrl();
647
+ if (currentURL !== this.states.lastURL) {
648
+ this.pushObservation(`Page navigated to → ${currentURL}`);
649
+ this.states.lastURL = currentURL;
650
+ }
651
+ const remaining = MAX_STEPS - stepCount;
652
+ if (remaining === 5) {
653
+ this.pushObservation(
654
+ `⚠️ Only ${remaining} steps remaining. Consider wrapping up or calling done with partial results.`
655
+ );
656
+ } else if (remaining === 2) {
657
+ this.pushObservation(
658
+ `⚠️ Critical: Only ${remaining} steps left! You must finish the task or call done immediately.`
659
+ );
660
+ }
661
+ }, "#generateObservations");
624
662
  assembleUserPrompt_fn = /* @__PURE__ */ __name(async function() {
625
663
  let prompt = "";
626
664
  prompt += await __privateMethod(this, _PageAgent_instances, getInstructions_fn).call(this);
665
+ const stepCount = this.history.filter((e) => e.type === "step").length;
627
666
  prompt += `<agent_state>
628
667
  <user_request>
629
668
  ${this.task}
630
669
  </user_request>
631
670
  <step_info>
632
- Step ${this.history.length + 1} of ${MAX_STEPS} max possible steps
671
+ Step ${stepCount + 1} of ${MAX_STEPS} max possible steps
633
672
  Current date and time: ${(/* @__PURE__ */ new Date()).toISOString()}
634
673
  </step_info>
635
674
  </agent_state>
636
675
  `;
637
676
  prompt += "\n<agent_history>\n";
638
- this.history.forEach((history, index) => {
639
- prompt += `<step_${index + 1}>
640
- Evaluation of Previous Step: ${history.brain.evaluation_previous_goal}
641
- Memory: ${history.brain.memory}
642
- Next Goal: ${history.brain.next_goal}
643
- Action Results: ${history.action.output}
644
- </step_${index + 1}>
677
+ let stepIndex = 0;
678
+ for (const event of this.history) {
679
+ if (event.type === "step") {
680
+ stepIndex++;
681
+ prompt += `<step_${stepIndex}>
682
+ Evaluation of Previous Step: ${event.reflection.evaluation_previous_goal}
683
+ Memory: ${event.reflection.memory}
684
+ Next Goal: ${event.reflection.next_goal}
685
+ Action Results: ${event.action.output}
686
+ </step_${stepIndex}>
645
687
  `;
646
- });
688
+ } else if (event.type === "observation") {
689
+ prompt += `<sys>${event.content}</sys>
690
+ `;
691
+ } else if (event.type === "user_takeover") {
692
+ prompt += `<sys>User took over control and made changes to the page.</sys>
693
+ `;
694
+ }
695
+ }
647
696
  prompt += "</agent_history>\n\n";
648
697
  prompt += await __privateMethod(this, _PageAgent_instances, getBrowserState_fn).call(this);
649
698
  return trimLines(prompt);
@@ -660,45 +709,20 @@ onDone_fn = /* @__PURE__ */ __name(function(text, success = true) {
660
709
  __privateGet(this, _abortController).abort();
661
710
  }, "#onDone");
662
711
  getBrowserState_fn = /* @__PURE__ */ __name(async function() {
663
- const pageUrl = await this.pageController.getCurrentUrl();
664
- const pageTitle = await this.pageController.getPageTitle();
665
- const pi = await this.pageController.getPageInfo();
666
- const viewportExpansion = await this.pageController.getViewportExpansion();
667
- await this.pageController.updateTree();
668
- let simplifiedHTML = await this.pageController.getSimplifiedHTML();
712
+ const state = await this.pageController.getBrowserState();
713
+ let content = state.content;
669
714
  if (this.config.transformPageContent) {
670
- simplifiedHTML = await this.config.transformPageContent(simplifiedHTML);
715
+ content = await this.config.transformPageContent(content);
671
716
  }
672
- let prompt = trimLines(`<browser_state>
673
- Current Page: [${pageTitle}](${pageUrl})
674
-
675
- Page info: ${pi.viewport_width}x${pi.viewport_height}px viewport, ${pi.page_width}x${pi.page_height}px total page size, ${pi.pages_above.toFixed(1)} pages above, ${pi.pages_below.toFixed(1)} pages below, ${pi.total_pages.toFixed(1)} total pages, at ${(pi.current_page_position * 100).toFixed(0)}% of page
717
+ return trimLines(`<browser_state>
718
+ Current Page: [${state.title}](${state.url})
676
719
 
677
- ${viewportExpansion === -1 ? "Interactive elements from top layer of the current page (full page):" : "Interactive elements from top layer of the current page inside the viewport:"}
720
+ ${state.header}
721
+ ${content}
722
+ ${state.footer}
678
723
 
724
+ </browser_state>
679
725
  `);
680
- const has_content_above = pi.pixels_above > 4;
681
- if (has_content_above && viewportExpansion !== -1) {
682
- prompt += `... ${pi.pixels_above} pixels above (${pi.pages_above.toFixed(1)} pages) - scroll to see more ...
683
- `;
684
- } else {
685
- prompt += `[Start of page]
686
- `;
687
- }
688
- prompt += simplifiedHTML;
689
- prompt += `
690
- `;
691
- const has_content_below = pi.pixels_below > 4;
692
- if (has_content_below && viewportExpansion !== -1) {
693
- prompt += `... ${pi.pixels_below} pixels below (${pi.pages_below.toFixed(1)} pages) - scroll to see more ...
694
- `;
695
- } else {
696
- prompt += `[End of page]
697
- `;
698
- }
699
- prompt += `</browser_state>
700
- `;
701
- return prompt;
702
726
  }, "#getBrowserState");
703
727
  __name(_PageAgent, "PageAgent");
704
728
  let PageAgent = _PageAgent;