page-agent 0.0.5 β†’ 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -67,16 +67,17 @@ await agent.execute('Click the login button')
67
67
 
68
68
  ## πŸ—οΈ Structure
69
69
 
70
- PageAgent follows a clean, modular architecture:
70
+ PageAgent adopts a simplified monorepo structure:
71
71
 
72
72
  ```
73
- src/
74
- β”œβ”€β”€ PageAgent.ts # Agent main loop
75
- β”œβ”€β”€ dom/ # DOM processing
76
- β”œβ”€β”€ tools/ # Agent tools
77
- β”œβ”€β”€ ui/ # UI components & panels
78
- β”œβ”€β”€ llms/ # LLM integration layer
79
- └── utils/ # Event bus & utilities
73
+ packages/
74
+ β”œβ”€β”€ page-agent/ # AI agent (npm: page-agent)
75
+ β”‚ β”œβ”€β”€ PageAgent # Agent main loop
76
+ β”‚ β”œβ”€β”€ tools/ # LLM tool definitions
77
+ β”‚ β”œβ”€β”€ ui/ # UI components & panels
78
+ β”‚ └── llms/ # LLM integration layer
79
+ β”œβ”€β”€ page-controller/ # DOM operations (npm: @page-agent/page-controller)
80
+ └── website/ # Demo & Documentation site
80
81
  ```
81
82
 
82
83
  ## 🀝 Contributing
@@ -1,4 +1,6 @@
1
1
  import { Motion } from 'ai-motion';
2
+ import { PageController } from '@page-agent/page-controller';
3
+ import { PageControllerConfig } from '@page-agent/page-controller';
2
4
  import { z } from 'zod';
3
5
 
4
6
  export declare interface AgentBrain {
@@ -89,31 +91,6 @@ declare type DeepStringify<T> = {
89
91
  [K in keyof T]: T[K] extends string ? string : T[K] extends object ? DeepStringify<T[K]> : T[K];
90
92
  };
91
93
 
92
- declare interface DomConfig {
93
- interactiveBlacklist?: (Element | (() => Element))[];
94
- interactiveWhitelist?: (Element | (() => Element))[];
95
- include_attributes?: string[];
96
- highlightOpacity?: number;
97
- highlightLabelOpacity?: number;
98
- }
99
-
100
- declare type DomNode = TextDomNode | ElementDomNode | InteractiveElementDomNode;
101
-
102
- declare interface ElementDomNode {
103
- tagName: string;
104
- attributes?: Record<string, string>;
105
- xpath?: string;
106
- children?: string[];
107
- isVisible?: boolean;
108
- isTopElement?: boolean;
109
- isInViewport?: boolean;
110
- isNew?: boolean;
111
- isInteractive?: false;
112
- highlightIndex?: number;
113
- extra?: Record<string, any>;
114
- [key: string]: unknown;
115
- }
116
-
117
94
  declare const enUS: {
118
95
  readonly ui: {
119
96
  readonly panel: {
@@ -196,11 +173,6 @@ export declare interface ExecutionResult {
196
173
  history: AgentHistory[];
197
174
  }
198
175
 
199
- declare interface FlatDomTree {
200
- rootId: string;
201
- map: Record<string, DomNode>;
202
- }
203
-
204
176
  declare class I18n {
205
177
  private language;
206
178
  private translations;
@@ -211,23 +183,6 @@ declare class I18n {
211
183
  getLanguage(): SupportedLanguage;
212
184
  }
213
185
 
214
- declare interface InteractiveElementDomNode {
215
- tagName: string;
216
- attributes?: Record<string, string>;
217
- xpath?: string;
218
- children?: string[];
219
- isVisible?: boolean;
220
- isTopElement?: boolean;
221
- isInViewport?: boolean;
222
- isInteractive: true;
223
- highlightIndex: number;
224
- /**
225
- * ε―δΊ€δΊ’ε…ƒη΄ ηš„ dom 引用
226
- */
227
- ref: HTMLElement;
228
- [key: string]: unknown;
229
- }
230
-
231
186
  declare interface LLMConfig {
232
187
  baseURL?: string;
233
188
  apiKey?: string;
@@ -370,19 +325,8 @@ export declare class PageAgent extends EventTarget {
370
325
  disposed: boolean;
371
326
  task: string;
372
327
  taskId: string;
373
- /** Corresponds to eval_page in browser-use */
374
- flatTree: FlatDomTree | null;
375
- /**
376
- * All highlighted index-mapped interactive elements
377
- * Corresponds to DOMState.selector_map in browser-use
378
- */
379
- selectorMap: Map<number, InteractiveElementDomNode>;
380
- /** highlight index -> element text */
381
- elementTextMap: Map<number, string>;
382
- /** Corresponds to clickable_elements_to_string in browser-use */
383
- simplifiedHTML: string;
384
- /** last time the tree was updated */
385
- lastTimeUpdate: number;
328
+ /** PageController for DOM operations */
329
+ pageController: PageController;
386
330
  /** Fullscreen mask */
387
331
  mask: SimulatorMask;
388
332
  /** History records */
@@ -395,7 +339,7 @@ export declare class PageAgent extends EventTarget {
395
339
  dispose(reason?: string): void;
396
340
  }
397
341
 
398
- export declare type PageAgentConfig = LLMConfig & AgentConfig & DomConfig;
342
+ export declare type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig;
399
343
 
400
344
  /**
401
345
  * Event mapping definitions
@@ -477,13 +421,6 @@ declare interface Step {
477
421
 
478
422
  declare type SupportedLanguage = keyof typeof locales;
479
423
 
480
- declare interface TextDomNode {
481
- type: 'TEXT_NODE';
482
- text: string;
483
- isVisible: boolean;
484
- [key: string]: unknown;
485
- }
486
-
487
424
  export declare function tool<TParams>(options: PageAgentTool<TParams>): PageAgentTool<TParams>;
488
425
 
489
426
  /**