page-agent 0.0.6 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -67,16 +67,17 @@ await agent.execute('Click the login button')
67
67
 
68
68
  ## 🏗️ Structure
69
69
 
70
- PageAgent follows a clean, modular architecture:
70
+ PageAgent adopts a simplified monorepo structure:
71
71
 
72
72
  ```
73
- src/
74
- ├── PageAgent.ts # Agent main loop
75
- ├── dom/ # DOM processing
76
- ├── tools/ # Agent tools
77
- ├── ui/ # UI components & panels
78
- ├── llms/ # LLM integration layer
79
- └── utils/ # Event bus & utilities
73
+ packages/
74
+ ├── page-agent/ # AI agent (npm: page-agent)
75
+ ├── PageAgent # Agent main loop
76
+ ├── tools/ # LLM tool definitions
77
+ ├── ui/ # UI components & panels
78
+ │ └── llms/ # LLM integration layer
79
+ ├── page-controller/ # DOM operations (npm: @page-agent/page-controller)
80
+ └── website/ # Demo & Documentation site
80
81
  ```
81
82
 
82
83
  ## 🤝 Contributing
@@ -1,4 +1,8 @@
1
- import { Motion } from 'ai-motion';
1
+ import { PageController } from '@page-agent/page-controller';
2
+ import { PageControllerConfig } from '@page-agent/page-controller';
3
+ import { Panel } from '@page-agent/ui';
4
+ import { SimulatorMask } from '@page-agent/ui';
5
+ import { SupportedLanguage } from '@page-agent/ui';
2
6
  import { z } from 'zod';
3
7
 
4
8
  export declare interface AgentBrain {
@@ -85,149 +89,12 @@ export declare interface AgentHistory {
85
89
  };
86
90
  }
87
91
 
88
- declare type DeepStringify<T> = {
89
- [K in keyof T]: T[K] extends string ? string : T[K] extends object ? DeepStringify<T[K]> : T[K];
90
- };
91
-
92
- declare interface DomConfig {
93
- interactiveBlacklist?: (Element | (() => Element))[];
94
- interactiveWhitelist?: (Element | (() => Element))[];
95
- include_attributes?: string[];
96
- highlightOpacity?: number;
97
- highlightLabelOpacity?: number;
98
- }
99
-
100
- declare type DomNode = TextDomNode | ElementDomNode | InteractiveElementDomNode;
101
-
102
- declare interface ElementDomNode {
103
- tagName: string;
104
- attributes?: Record<string, string>;
105
- xpath?: string;
106
- children?: string[];
107
- isVisible?: boolean;
108
- isTopElement?: boolean;
109
- isInViewport?: boolean;
110
- isNew?: boolean;
111
- isInteractive?: false;
112
- highlightIndex?: number;
113
- extra?: Record<string, any>;
114
- [key: string]: unknown;
115
- }
116
-
117
- declare const enUS: {
118
- readonly ui: {
119
- readonly panel: {
120
- readonly ready: "Ready";
121
- readonly thinking: "Thinking...";
122
- readonly paused: "Paused";
123
- readonly taskInput: "Enter new task, describe steps in detail, press Enter to submit";
124
- readonly userAnswerPrompt: "Please answer the question above, press Enter to submit";
125
- readonly taskTerminated: "Task terminated";
126
- readonly taskCompleted: "Task completed";
127
- readonly continueExecution: "Continue execution";
128
- readonly userAnswer: "User answer: {{input}}";
129
- readonly question: "Question: {{question}}";
130
- readonly waitingPlaceholder: "Waiting for task to start...";
131
- readonly pause: "Pause";
132
- readonly continue: "Continue";
133
- readonly stop: "Stop";
134
- readonly expand: "Expand history";
135
- readonly collapse: "Collapse history";
136
- readonly step: "Step {{number}} · {{time}}{{duration}}";
137
- };
138
- readonly tools: {
139
- readonly clicking: "Clicking element [{{index}}]...";
140
- readonly inputting: "Inputting text to element [{{index}}]...";
141
- readonly selecting: "Selecting option \"{{text}}\"...";
142
- readonly scrolling: "Scrolling page...";
143
- readonly waiting: "Waiting {{seconds}} seconds...";
144
- readonly done: "Task done";
145
- readonly clicked: "🖱️ Clicked element [{{index}}]";
146
- readonly inputted: "⌨️ Inputted text \"{{text}}\"";
147
- readonly selected: "☑️ Selected option \"{{text}}\"";
148
- readonly scrolled: "🛞 Page scrolled";
149
- readonly waited: "⌛️ Wait completed";
150
- readonly executing: "Executing {{toolName}}...";
151
- readonly resultSuccess: "success";
152
- readonly resultFailure: "failed";
153
- readonly resultError: "error";
154
- };
155
- readonly errors: {
156
- readonly elementNotFound: "No interactive element found at index {{index}}";
157
- readonly taskRequired: "Task description is required";
158
- readonly executionFailed: "Task execution failed";
159
- readonly notInputElement: "Element is not an input or textarea";
160
- readonly notSelectElement: "Element is not a select element";
161
- readonly optionNotFound: "Option \"{{text}}\" not found";
162
- };
163
- };
164
- };
165
-
166
- /**
167
- * Type-safe event bus
168
- * @note Mainly used to decouple logic and UI
169
- * @note All modules of a PageAgent instance share the same EventBus instance for communication
170
- * @note Use with caution if delivery guarantee is needed for logic communication
171
- * @note `on` `once` `emit` methods handle built-in events with type protection, use `addEventListener` for other events
172
- */
173
- declare class EventBus extends EventTarget {
174
- /**
175
- * Listen to built-in events
176
- */
177
- on<T extends keyof PageAgentEventMap>(event: T, handler: EventHandler<T>): void;
178
- /**
179
- * Listen to built-in events (one-time)
180
- */
181
- once<T extends keyof PageAgentEventMap>(event: T, handler: EventHandler<T>): void;
182
- /**
183
- * Emit built-in events
184
- */
185
- emit<T extends keyof PageAgentEventMap>(event: T, ...args: PageAgentEventMap[T]['params'] extends undefined ? [] : [PageAgentEventMap[T]['params']]): void;
186
- }
187
-
188
- /**
189
- * Event handler type definitions
190
- */
191
- declare type EventHandler<T extends keyof PageAgentEventMap> = PageAgentEventMap[T]['params'] extends undefined ? () => void : (params: PageAgentEventMap[T]['params']) => void;
192
-
193
92
  export declare interface ExecutionResult {
194
93
  success: boolean;
195
94
  data: string;
196
95
  history: AgentHistory[];
197
96
  }
198
97
 
199
- declare interface FlatDomTree {
200
- rootId: string;
201
- map: Record<string, DomNode>;
202
- }
203
-
204
- declare class I18n {
205
- private language;
206
- private translations;
207
- constructor(language?: SupportedLanguage);
208
- t(key: TranslationKey, params?: TranslationParams): string;
209
- private getNestedValue;
210
- private interpolate;
211
- getLanguage(): SupportedLanguage;
212
- }
213
-
214
- declare interface InteractiveElementDomNode {
215
- tagName: string;
216
- attributes?: Record<string, string>;
217
- xpath?: string;
218
- children?: string[];
219
- isVisible?: boolean;
220
- isTopElement?: boolean;
221
- isInViewport?: boolean;
222
- isInteractive: true;
223
- highlightIndex: number;
224
- /**
225
- * 可交互元素的 dom 引用
226
- */
227
- ref: HTMLElement;
228
- [key: string]: unknown;
229
- }
230
-
231
98
  declare interface LLMConfig {
232
99
  baseURL?: string;
233
100
  apiKey?: string;
@@ -237,105 +104,6 @@ declare interface LLMConfig {
237
104
  maxRetries?: number;
238
105
  }
239
106
 
240
- declare const locales: {
241
- readonly 'en-US': {
242
- readonly ui: {
243
- readonly panel: {
244
- readonly ready: "Ready";
245
- readonly thinking: "Thinking...";
246
- readonly paused: "Paused";
247
- readonly taskInput: "Enter new task, describe steps in detail, press Enter to submit";
248
- readonly userAnswerPrompt: "Please answer the question above, press Enter to submit";
249
- readonly taskTerminated: "Task terminated";
250
- readonly taskCompleted: "Task completed";
251
- readonly continueExecution: "Continue execution";
252
- readonly userAnswer: "User answer: {{input}}";
253
- readonly question: "Question: {{question}}";
254
- readonly waitingPlaceholder: "Waiting for task to start...";
255
- readonly pause: "Pause";
256
- readonly continue: "Continue";
257
- readonly stop: "Stop";
258
- readonly expand: "Expand history";
259
- readonly collapse: "Collapse history";
260
- readonly step: "Step {{number}} · {{time}}{{duration}}";
261
- };
262
- readonly tools: {
263
- readonly clicking: "Clicking element [{{index}}]...";
264
- readonly inputting: "Inputting text to element [{{index}}]...";
265
- readonly selecting: "Selecting option \"{{text}}\"...";
266
- readonly scrolling: "Scrolling page...";
267
- readonly waiting: "Waiting {{seconds}} seconds...";
268
- readonly done: "Task done";
269
- readonly clicked: "🖱️ Clicked element [{{index}}]";
270
- readonly inputted: "⌨️ Inputted text \"{{text}}\"";
271
- readonly selected: "☑️ Selected option \"{{text}}\"";
272
- readonly scrolled: "🛞 Page scrolled";
273
- readonly waited: "⌛️ Wait completed";
274
- readonly executing: "Executing {{toolName}}...";
275
- readonly resultSuccess: "success";
276
- readonly resultFailure: "failed";
277
- readonly resultError: "error";
278
- };
279
- readonly errors: {
280
- readonly elementNotFound: "No interactive element found at index {{index}}";
281
- readonly taskRequired: "Task description is required";
282
- readonly executionFailed: "Task execution failed";
283
- readonly notInputElement: "Element is not an input or textarea";
284
- readonly notSelectElement: "Element is not a select element";
285
- readonly optionNotFound: "Option \"{{text}}\" not found";
286
- };
287
- };
288
- };
289
- readonly 'zh-CN': {
290
- readonly ui: {
291
- readonly panel: {
292
- readonly ready: "准备就绪";
293
- readonly thinking: "正在思考...";
294
- readonly paused: "暂停中,稍后";
295
- readonly taskInput: "输入新任务,详细描述步骤,回车提交";
296
- readonly userAnswerPrompt: "请回答上面问题,回车提交";
297
- readonly taskTerminated: "任务已终止";
298
- readonly taskCompleted: "任务结束";
299
- readonly continueExecution: "继续执行";
300
- readonly userAnswer: "用户回答: {{input}}";
301
- readonly question: "询问: {{question}}";
302
- readonly waitingPlaceholder: "等待任务开始...";
303
- readonly pause: "暂停";
304
- readonly continue: "继续";
305
- readonly stop: "终止";
306
- readonly expand: "展开历史";
307
- readonly collapse: "收起历史";
308
- readonly step: "步骤 {{number}} · {{time}}{{duration}}";
309
- };
310
- readonly tools: {
311
- readonly clicking: "正在点击元素 [{{index}}]...";
312
- readonly inputting: "正在输入文本到元素 [{{index}}]...";
313
- readonly selecting: "正在选择选项 \"{{text}}\"...";
314
- readonly scrolling: "正在滚动页面...";
315
- readonly waiting: "等待 {{seconds}} 秒...";
316
- readonly done: "结束任务";
317
- readonly clicked: "🖱️ 已点击元素 [{{index}}]";
318
- readonly inputted: "⌨️ 已输入文本 \"{{text}}\"";
319
- readonly selected: "☑️ 已选择选项 \"{{text}}\"";
320
- readonly scrolled: "🛞 页面滚动完成";
321
- readonly waited: "⌛️ 等待完成";
322
- readonly executing: "正在执行 {{toolName}}...";
323
- readonly resultSuccess: "成功";
324
- readonly resultFailure: "失败";
325
- readonly resultError: "错误";
326
- };
327
- readonly errors: {
328
- readonly elementNotFound: "未找到索引为 {{index}} 的交互元素";
329
- readonly taskRequired: "任务描述不能为空";
330
- readonly executionFailed: "任务执行失败";
331
- readonly notInputElement: "元素不是输入框或文本域";
332
- readonly notSelectElement: "元素不是选择框";
333
- readonly optionNotFound: "未找到选项 \"{{text}}\"";
334
- };
335
- };
336
- };
337
- };
338
-
339
107
  /**
340
108
  * MacroTool input structure
341
109
  */
@@ -354,35 +122,18 @@ export declare interface MacroToolResult {
354
122
  output: string;
355
123
  }
356
124
 
357
- declare type NestedKeyOf<ObjectType extends object> = {
358
- [Key in keyof ObjectType & (string | number)]: ObjectType[Key] extends object ? `${Key}` | `${Key}.${NestedKeyOf<ObjectType[Key]>}` : `${Key}`;
359
- }[keyof ObjectType & (string | number)];
360
-
361
125
  export declare class PageAgent extends EventTarget {
362
126
  #private;
363
127
  config: PageAgentConfig;
364
128
  id: string;
365
- bus: EventBus;
366
- i18n: I18n;
367
129
  panel: Panel;
368
130
  tools: typeof tools;
369
131
  paused: boolean;
370
132
  disposed: boolean;
371
133
  task: string;
372
134
  taskId: string;
373
- /** Corresponds to eval_page in browser-use */
374
- flatTree: FlatDomTree | null;
375
- /**
376
- * All highlighted index-mapped interactive elements
377
- * Corresponds to DOMState.selector_map in browser-use
378
- */
379
- selectorMap: Map<number, InteractiveElementDomNode>;
380
- /** highlight index -> element text */
381
- elementTextMap: Map<number, string>;
382
- /** Corresponds to clickable_elements_to_string in browser-use */
383
- simplifiedHTML: string;
384
- /** last time the tree was updated */
385
- lastTimeUpdate: number;
135
+ /** PageController for DOM operations */
136
+ pageController: PageController;
386
137
  /** Fullscreen mask */
387
138
  mask: SimulatorMask;
388
139
  /** History records */
@@ -395,32 +146,7 @@ export declare class PageAgent extends EventTarget {
395
146
  dispose(reason?: string): void;
396
147
  }
397
148
 
398
- export declare type PageAgentConfig = LLMConfig & AgentConfig & DomConfig;
399
-
400
- /**
401
- * Event mapping definitions
402
- * @note Event bus callbacks must be repeatable without errors
403
- */
404
- declare interface PageAgentEventMap {
405
- 'panel:show': {
406
- params: undefined;
407
- };
408
- 'panel:hide': {
409
- params: undefined;
410
- };
411
- 'panel:reset': {
412
- params: undefined;
413
- };
414
- 'panel:update': {
415
- params: Omit<Step, 'id' | 'stepNumber' | 'timestamp'>;
416
- };
417
- 'panel:expand': {
418
- params: undefined;
419
- };
420
- 'panel:collapse': {
421
- params: undefined;
422
- };
423
- }
149
+ export declare type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig;
424
150
 
425
151
  /**
426
152
  * Internal tool definition that has access to PageAgent `this` context
@@ -431,59 +157,6 @@ export declare interface PageAgentTool<TParams = any> {
431
157
  execute: (this: PageAgent, args: TParams) => Promise<string>;
432
158
  }
433
159
 
434
- /**
435
- * Agent control panel
436
- */
437
- declare class Panel {
438
- #private;
439
- get wrapper(): HTMLElement;
440
- constructor(pageAgent: PageAgent);
441
- /**
442
- * Ask for user input
443
- */
444
- askUser(question: string): Promise<string>;
445
- /**
446
- * Dispose panel
447
- */
448
- dispose(): void;
449
- }
450
-
451
- declare class SimulatorMask {
452
- #private;
453
- wrapper: HTMLDivElement;
454
- motion: Motion;
455
- constructor();
456
- setCursorPosition(x: number, y: number): void;
457
- triggerClickAnimation(): void;
458
- show(): void;
459
- hide(): void;
460
- dispose(): void;
461
- }
462
-
463
- /**
464
- * Agent execution state management
465
- */
466
- declare interface Step {
467
- id: string;
468
- stepNumber: number;
469
- timestamp: Date;
470
- type: 'thinking' | 'tool_executing' | 'completed' | 'error' | 'output' | 'input' | 'retry';
471
- toolName?: string;
472
- toolArgs?: any;
473
- toolResult?: any;
474
- displayText: string;
475
- duration?: number;
476
- }
477
-
478
- declare type SupportedLanguage = keyof typeof locales;
479
-
480
- declare interface TextDomNode {
481
- type: 'TEXT_NODE';
482
- text: string;
483
- isVisible: boolean;
484
- [key: string]: unknown;
485
- }
486
-
487
160
  export declare function tool<TParams>(options: PageAgentTool<TParams>): PageAgentTool<TParams>;
488
161
 
489
162
  /**
@@ -492,10 +165,4 @@ export declare function tool<TParams>(options: PageAgentTool<TParams>): PageAgen
492
165
  */
493
166
  declare const tools: Map<string, PageAgentTool<any>>;
494
167
 
495
- declare type TranslationKey = NestedKeyOf<TranslationSchema>;
496
-
497
- declare type TranslationParams = Record<string, string | number>;
498
-
499
- declare type TranslationSchema = DeepStringify<typeof enUS>;
500
-
501
168
  export { }