page-agent 0.0.7 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # PageAgent 🤖🪄
2
2
 
3
- ![banner](https://img.alicdn.com/imgextra/i1/O1CN01RY0Wvh26ATVeDIX7v_!!6000000007621-0-tps-1672-512.jpg)
3
+ ![banner](https://img.alicdn.com/imgextra/i3/O1CN01MyVCS21EoKkIHUT1s_!!6000000000398-49-tps-1280-353.webp)
4
4
 
5
5
  [![npm version](https://badge.fury.io/js/page-agent.svg)](https://badge.fury.io/js/page-agent) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![TypeScript](https://img.shields.io/badge/%3C%2F%3E-TypeScript-%230074c1.svg)](http://www.typescriptlang.org/) [![Downloads](https://img.shields.io/npm/dt/page-agent.svg)](https://www.npmjs.com/package/page-agent) [![Bundle Size](https://img.shields.io/bundlephobia/minzip/page-agent)](https://bundlephobia.com/package/page-agent) [![GitHub stars](https://img.shields.io/github/stars/alibaba/page-agent.svg)](https://github.com/alibaba/page-agent)
6
6
 
@@ -72,11 +72,8 @@ PageAgent adopts a simplified monorepo structure:
72
72
  ```
73
73
  packages/
74
74
  ├── page-agent/ # AI agent (npm: page-agent)
75
- │ ├── PageAgent # Agent main loop
76
- │ ├── tools/ # LLM tool definitions
77
- │ ├── ui/ # UI components & panels
78
- │ └── llms/ # LLM integration layer
79
75
  ├── page-controller/ # DOM operations (npm: @page-agent/page-controller)
76
+ ├── ui/ # Panel & Mask & Mouse Animation (npm: @page-agent/ui)
80
77
  └── website/ # Demo & Documentation site
81
78
  ```
82
79
 
@@ -1,6 +1,8 @@
1
- import { Motion } from 'ai-motion';
2
1
  import { PageController } from '@page-agent/page-controller';
3
2
  import { PageControllerConfig } from '@page-agent/page-controller';
3
+ import { Panel } from '@page-agent/ui';
4
+ import { SimulatorMask } from '@page-agent/ui';
5
+ import { SupportedLanguage } from '@page-agent/ui';
4
6
  import { z } from 'zod';
5
7
 
6
8
  export declare interface AgentBrain {
@@ -87,102 +89,12 @@ export declare interface AgentHistory {
87
89
  };
88
90
  }
89
91
 
90
- declare type DeepStringify<T> = {
91
- [K in keyof T]: T[K] extends string ? string : T[K] extends object ? DeepStringify<T[K]> : T[K];
92
- };
93
-
94
- declare const enUS: {
95
- readonly ui: {
96
- readonly panel: {
97
- readonly ready: "Ready";
98
- readonly thinking: "Thinking...";
99
- readonly paused: "Paused";
100
- readonly taskInput: "Enter new task, describe steps in detail, press Enter to submit";
101
- readonly userAnswerPrompt: "Please answer the question above, press Enter to submit";
102
- readonly taskTerminated: "Task terminated";
103
- readonly taskCompleted: "Task completed";
104
- readonly continueExecution: "Continue execution";
105
- readonly userAnswer: "User answer: {{input}}";
106
- readonly question: "Question: {{question}}";
107
- readonly waitingPlaceholder: "Waiting for task to start...";
108
- readonly pause: "Pause";
109
- readonly continue: "Continue";
110
- readonly stop: "Stop";
111
- readonly expand: "Expand history";
112
- readonly collapse: "Collapse history";
113
- readonly step: "Step {{number}} · {{time}}{{duration}}";
114
- };
115
- readonly tools: {
116
- readonly clicking: "Clicking element [{{index}}]...";
117
- readonly inputting: "Inputting text to element [{{index}}]...";
118
- readonly selecting: "Selecting option \"{{text}}\"...";
119
- readonly scrolling: "Scrolling page...";
120
- readonly waiting: "Waiting {{seconds}} seconds...";
121
- readonly done: "Task done";
122
- readonly clicked: "🖱️ Clicked element [{{index}}]";
123
- readonly inputted: "⌨️ Inputted text \"{{text}}\"";
124
- readonly selected: "☑️ Selected option \"{{text}}\"";
125
- readonly scrolled: "🛞 Page scrolled";
126
- readonly waited: "⌛️ Wait completed";
127
- readonly executing: "Executing {{toolName}}...";
128
- readonly resultSuccess: "success";
129
- readonly resultFailure: "failed";
130
- readonly resultError: "error";
131
- };
132
- readonly errors: {
133
- readonly elementNotFound: "No interactive element found at index {{index}}";
134
- readonly taskRequired: "Task description is required";
135
- readonly executionFailed: "Task execution failed";
136
- readonly notInputElement: "Element is not an input or textarea";
137
- readonly notSelectElement: "Element is not a select element";
138
- readonly optionNotFound: "Option \"{{text}}\" not found";
139
- };
140
- };
141
- };
142
-
143
- /**
144
- * Type-safe event bus
145
- * @note Mainly used to decouple logic and UI
146
- * @note All modules of a PageAgent instance share the same EventBus instance for communication
147
- * @note Use with caution if delivery guarantee is needed for logic communication
148
- * @note `on` `once` `emit` methods handle built-in events with type protection, use `addEventListener` for other events
149
- */
150
- declare class EventBus extends EventTarget {
151
- /**
152
- * Listen to built-in events
153
- */
154
- on<T extends keyof PageAgentEventMap>(event: T, handler: EventHandler<T>): void;
155
- /**
156
- * Listen to built-in events (one-time)
157
- */
158
- once<T extends keyof PageAgentEventMap>(event: T, handler: EventHandler<T>): void;
159
- /**
160
- * Emit built-in events
161
- */
162
- emit<T extends keyof PageAgentEventMap>(event: T, ...args: PageAgentEventMap[T]['params'] extends undefined ? [] : [PageAgentEventMap[T]['params']]): void;
163
- }
164
-
165
- /**
166
- * Event handler type definitions
167
- */
168
- declare type EventHandler<T extends keyof PageAgentEventMap> = PageAgentEventMap[T]['params'] extends undefined ? () => void : (params: PageAgentEventMap[T]['params']) => void;
169
-
170
92
  export declare interface ExecutionResult {
171
93
  success: boolean;
172
94
  data: string;
173
95
  history: AgentHistory[];
174
96
  }
175
97
 
176
- declare class I18n {
177
- private language;
178
- private translations;
179
- constructor(language?: SupportedLanguage);
180
- t(key: TranslationKey, params?: TranslationParams): string;
181
- private getNestedValue;
182
- private interpolate;
183
- getLanguage(): SupportedLanguage;
184
- }
185
-
186
98
  declare interface LLMConfig {
187
99
  baseURL?: string;
188
100
  apiKey?: string;
@@ -192,105 +104,6 @@ declare interface LLMConfig {
192
104
  maxRetries?: number;
193
105
  }
194
106
 
195
- declare const locales: {
196
- readonly 'en-US': {
197
- readonly ui: {
198
- readonly panel: {
199
- readonly ready: "Ready";
200
- readonly thinking: "Thinking...";
201
- readonly paused: "Paused";
202
- readonly taskInput: "Enter new task, describe steps in detail, press Enter to submit";
203
- readonly userAnswerPrompt: "Please answer the question above, press Enter to submit";
204
- readonly taskTerminated: "Task terminated";
205
- readonly taskCompleted: "Task completed";
206
- readonly continueExecution: "Continue execution";
207
- readonly userAnswer: "User answer: {{input}}";
208
- readonly question: "Question: {{question}}";
209
- readonly waitingPlaceholder: "Waiting for task to start...";
210
- readonly pause: "Pause";
211
- readonly continue: "Continue";
212
- readonly stop: "Stop";
213
- readonly expand: "Expand history";
214
- readonly collapse: "Collapse history";
215
- readonly step: "Step {{number}} · {{time}}{{duration}}";
216
- };
217
- readonly tools: {
218
- readonly clicking: "Clicking element [{{index}}]...";
219
- readonly inputting: "Inputting text to element [{{index}}]...";
220
- readonly selecting: "Selecting option \"{{text}}\"...";
221
- readonly scrolling: "Scrolling page...";
222
- readonly waiting: "Waiting {{seconds}} seconds...";
223
- readonly done: "Task done";
224
- readonly clicked: "🖱️ Clicked element [{{index}}]";
225
- readonly inputted: "⌨️ Inputted text \"{{text}}\"";
226
- readonly selected: "☑️ Selected option \"{{text}}\"";
227
- readonly scrolled: "🛞 Page scrolled";
228
- readonly waited: "⌛️ Wait completed";
229
- readonly executing: "Executing {{toolName}}...";
230
- readonly resultSuccess: "success";
231
- readonly resultFailure: "failed";
232
- readonly resultError: "error";
233
- };
234
- readonly errors: {
235
- readonly elementNotFound: "No interactive element found at index {{index}}";
236
- readonly taskRequired: "Task description is required";
237
- readonly executionFailed: "Task execution failed";
238
- readonly notInputElement: "Element is not an input or textarea";
239
- readonly notSelectElement: "Element is not a select element";
240
- readonly optionNotFound: "Option \"{{text}}\" not found";
241
- };
242
- };
243
- };
244
- readonly 'zh-CN': {
245
- readonly ui: {
246
- readonly panel: {
247
- readonly ready: "准备就绪";
248
- readonly thinking: "正在思考...";
249
- readonly paused: "暂停中,稍后";
250
- readonly taskInput: "输入新任务,详细描述步骤,回车提交";
251
- readonly userAnswerPrompt: "请回答上面问题,回车提交";
252
- readonly taskTerminated: "任务已终止";
253
- readonly taskCompleted: "任务结束";
254
- readonly continueExecution: "继续执行";
255
- readonly userAnswer: "用户回答: {{input}}";
256
- readonly question: "询问: {{question}}";
257
- readonly waitingPlaceholder: "等待任务开始...";
258
- readonly pause: "暂停";
259
- readonly continue: "继续";
260
- readonly stop: "终止";
261
- readonly expand: "展开历史";
262
- readonly collapse: "收起历史";
263
- readonly step: "步骤 {{number}} · {{time}}{{duration}}";
264
- };
265
- readonly tools: {
266
- readonly clicking: "正在点击元素 [{{index}}]...";
267
- readonly inputting: "正在输入文本到元素 [{{index}}]...";
268
- readonly selecting: "正在选择选项 \"{{text}}\"...";
269
- readonly scrolling: "正在滚动页面...";
270
- readonly waiting: "等待 {{seconds}} 秒...";
271
- readonly done: "结束任务";
272
- readonly clicked: "🖱️ 已点击元素 [{{index}}]";
273
- readonly inputted: "⌨️ 已输入文本 \"{{text}}\"";
274
- readonly selected: "☑️ 已选择选项 \"{{text}}\"";
275
- readonly scrolled: "🛞 页面滚动完成";
276
- readonly waited: "⌛️ 等待完成";
277
- readonly executing: "正在执行 {{toolName}}...";
278
- readonly resultSuccess: "成功";
279
- readonly resultFailure: "失败";
280
- readonly resultError: "错误";
281
- };
282
- readonly errors: {
283
- readonly elementNotFound: "未找到索引为 {{index}} 的交互元素";
284
- readonly taskRequired: "任务描述不能为空";
285
- readonly executionFailed: "任务执行失败";
286
- readonly notInputElement: "元素不是输入框或文本域";
287
- readonly notSelectElement: "元素不是选择框";
288
- readonly optionNotFound: "未找到选项 \"{{text}}\"";
289
- };
290
- };
291
- };
292
- };
293
-
294
107
  /**
295
108
  * MacroTool input structure
296
109
  */
@@ -309,16 +122,10 @@ export declare interface MacroToolResult {
309
122
  output: string;
310
123
  }
311
124
 
312
- declare type NestedKeyOf<ObjectType extends object> = {
313
- [Key in keyof ObjectType & (string | number)]: ObjectType[Key] extends object ? `${Key}` | `${Key}.${NestedKeyOf<ObjectType[Key]>}` : `${Key}`;
314
- }[keyof ObjectType & (string | number)];
315
-
316
125
  export declare class PageAgent extends EventTarget {
317
126
  #private;
318
127
  config: PageAgentConfig;
319
128
  id: string;
320
- bus: EventBus;
321
- i18n: I18n;
322
129
  panel: Panel;
323
130
  tools: typeof tools;
324
131
  paused: boolean;
@@ -341,31 +148,6 @@ export declare class PageAgent extends EventTarget {
341
148
 
342
149
  export declare type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig;
343
150
 
344
- /**
345
- * Event mapping definitions
346
- * @note Event bus callbacks must be repeatable without errors
347
- */
348
- declare interface PageAgentEventMap {
349
- 'panel:show': {
350
- params: undefined;
351
- };
352
- 'panel:hide': {
353
- params: undefined;
354
- };
355
- 'panel:reset': {
356
- params: undefined;
357
- };
358
- 'panel:update': {
359
- params: Omit<Step, 'id' | 'stepNumber' | 'timestamp'>;
360
- };
361
- 'panel:expand': {
362
- params: undefined;
363
- };
364
- 'panel:collapse': {
365
- params: undefined;
366
- };
367
- }
368
-
369
151
  /**
370
152
  * Internal tool definition that has access to PageAgent `this` context
371
153
  */
@@ -375,52 +157,6 @@ export declare interface PageAgentTool<TParams = any> {
375
157
  execute: (this: PageAgent, args: TParams) => Promise<string>;
376
158
  }
377
159
 
378
- /**
379
- * Agent control panel
380
- */
381
- declare class Panel {
382
- #private;
383
- get wrapper(): HTMLElement;
384
- constructor(pageAgent: PageAgent);
385
- /**
386
- * Ask for user input
387
- */
388
- askUser(question: string): Promise<string>;
389
- /**
390
- * Dispose panel
391
- */
392
- dispose(): void;
393
- }
394
-
395
- declare class SimulatorMask {
396
- #private;
397
- wrapper: HTMLDivElement;
398
- motion: Motion;
399
- constructor();
400
- setCursorPosition(x: number, y: number): void;
401
- triggerClickAnimation(): void;
402
- show(): void;
403
- hide(): void;
404
- dispose(): void;
405
- }
406
-
407
- /**
408
- * Agent execution state management
409
- */
410
- declare interface Step {
411
- id: string;
412
- stepNumber: number;
413
- timestamp: Date;
414
- type: 'thinking' | 'tool_executing' | 'completed' | 'error' | 'output' | 'input' | 'retry';
415
- toolName?: string;
416
- toolArgs?: any;
417
- toolResult?: any;
418
- displayText: string;
419
- duration?: number;
420
- }
421
-
422
- declare type SupportedLanguage = keyof typeof locales;
423
-
424
160
  export declare function tool<TParams>(options: PageAgentTool<TParams>): PageAgentTool<TParams>;
425
161
 
426
162
  /**
@@ -429,10 +165,4 @@ export declare function tool<TParams>(options: PageAgentTool<TParams>): PageAgen
429
165
  */
430
166
  declare const tools: Map<string, PageAgentTool<any>>;
431
167
 
432
- declare type TranslationKey = NestedKeyOf<TranslationSchema>;
433
-
434
- declare type TranslationParams = Record<string, string | number>;
435
-
436
- declare type TranslationSchema = DeepStringify<typeof enUS>;
437
-
438
168
  export { }