page-agent 0.0.0 → 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -9
- package/dist/lib/PageAgent.d.ts +191 -53
- package/dist/lib/page-agent.js +496 -276
- package/dist/lib/page-agent.js.map +1 -1
- package/package.json +47 -34
- package/NOTICE +0 -23
package/README.md
CHANGED
|
@@ -1,21 +1,25 @@
|
|
|
1
|
-
# PageAgent
|
|
1
|
+
# PageAgent 🤖🪄
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
> Unfinished Project. See [**Roadmap**](./ROADMAP.md)
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+

|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
[](https://badge.fury.io/js/page-agent) [](https://opensource.org/licenses/MIT) [](http://www.typescriptlang.org/) [](https://www.npmjs.com/package/page-agent) [](https://bundlephobia.com/package/page-agent) [](https://github.com/alibaba/page-agent)
|
|
8
|
+
|
|
9
|
+
**Transform your webpage into an AI-powered application with a single script tag.**
|
|
10
|
+
|
|
11
|
+
An in-page UI agent in javascript. Control web interfaces with natural language.
|
|
8
12
|
|
|
9
13
|
🌐 **English** | [中文](./README-zh.md)
|
|
10
14
|
|
|
11
|
-
👉 [
|
|
15
|
+
👉 [🚀 **Demo**](https://alibaba.github.io/page-agent/) | [📖 **Documentation**](https://alibaba.github.io/page-agent/#/docs/introduction/overview)
|
|
12
16
|
|
|
13
17
|
---
|
|
14
18
|
|
|
15
19
|
## ✨ Features
|
|
16
20
|
|
|
17
|
-
- **🎯 Easy Integration**
|
|
18
|
-
- **🔐 Client-Side Processing**
|
|
21
|
+
- **🎯 Easy Integration**
|
|
22
|
+
- **🔐 Client-Side Processing**
|
|
19
23
|
- **🧠 DOM Extraction**
|
|
20
24
|
- **💬 Natural Language Interface**
|
|
21
25
|
- **🎨 UI with Human in the loop**
|
|
@@ -45,7 +49,7 @@ npm install page-agent
|
|
|
45
49
|
import { PageAgent } from 'page-agent'
|
|
46
50
|
|
|
47
51
|
const agent = new PageAgent({
|
|
48
|
-
modelName: 'gpt-4.1-mini'
|
|
52
|
+
modelName: 'gpt-4.1-mini',
|
|
49
53
|
baseURL: 'xxxx',
|
|
50
54
|
apiKey: 'xxxx'
|
|
51
55
|
})
|
|
@@ -95,7 +99,23 @@ PageAgent is designed for **client-side web enhancement**, not server-side autom
|
|
|
95
99
|
|
|
96
100
|
MIT License - see the [LICENSE](LICENSE) file for details.
|
|
97
101
|
|
|
98
|
-
|
|
102
|
+
|
|
103
|
+
```
|
|
104
|
+
DOM processing components and prompt are derived from browser-use:
|
|
105
|
+
|
|
106
|
+
Browser Use
|
|
107
|
+
Copyright (c) 2024 Gregor Zunic
|
|
108
|
+
Licensed under the MIT License
|
|
109
|
+
|
|
110
|
+
Original browser-use project: <https://github.com/browser-use/browser-use>
|
|
111
|
+
|
|
112
|
+
We gratefully acknowledge the browser-use project and its contributors for their
|
|
113
|
+
excellent work on web automation and DOM interaction patterns that helped make
|
|
114
|
+
this project possible.
|
|
115
|
+
|
|
116
|
+
Third-party dependencies and their licenses can be found in the package.json
|
|
117
|
+
file and in the node_modules directory after installation.
|
|
118
|
+
```
|
|
99
119
|
|
|
100
120
|
---
|
|
101
121
|
|
package/dist/lib/PageAgent.d.ts
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
import { LanguageModelUsage } from 'ai';
|
|
2
1
|
import { Motion } from 'ai-motion';
|
|
3
|
-
import {
|
|
2
|
+
import { z } from 'zod';
|
|
4
3
|
|
|
5
4
|
export declare interface AgentBrain {
|
|
6
5
|
evaluation_previous_goal: string;
|
|
@@ -13,11 +12,21 @@ export declare interface AgentHistory {
|
|
|
13
12
|
action: {
|
|
14
13
|
name: string;
|
|
15
14
|
input: any;
|
|
16
|
-
output:
|
|
15
|
+
output: string;
|
|
16
|
+
};
|
|
17
|
+
usage: {
|
|
18
|
+
promptTokens: number;
|
|
19
|
+
completionTokens: number;
|
|
20
|
+
totalTokens: number;
|
|
21
|
+
cachedTokens?: number;
|
|
22
|
+
reasoningTokens?: number;
|
|
17
23
|
};
|
|
18
|
-
usage: LanguageModelUsage;
|
|
19
24
|
}
|
|
20
25
|
|
|
26
|
+
declare type DeepStringify<T> = {
|
|
27
|
+
[K in keyof T]: T[K] extends string ? string : T[K] extends object ? DeepStringify<T[K]> : T[K];
|
|
28
|
+
};
|
|
29
|
+
|
|
21
30
|
declare interface DomConfig {
|
|
22
31
|
interactiveBlacklist?: (Element | (() => Element))[];
|
|
23
32
|
interactiveWhitelist?: (Element | (() => Element))[];
|
|
@@ -43,6 +52,55 @@ declare interface ElementDomNode {
|
|
|
43
52
|
[key: string]: unknown;
|
|
44
53
|
}
|
|
45
54
|
|
|
55
|
+
declare const enUS: {
|
|
56
|
+
readonly ui: {
|
|
57
|
+
readonly panel: {
|
|
58
|
+
readonly ready: "Ready";
|
|
59
|
+
readonly thinking: "Thinking...";
|
|
60
|
+
readonly paused: "Paused";
|
|
61
|
+
readonly taskInput: "Enter new task, describe steps in detail, press Enter to submit";
|
|
62
|
+
readonly userAnswerPrompt: "Please answer the question above, press Enter to submit";
|
|
63
|
+
readonly taskTerminated: "Task terminated";
|
|
64
|
+
readonly taskCompleted: "Task completed";
|
|
65
|
+
readonly continueExecution: "Continue execution";
|
|
66
|
+
readonly userAnswer: "User answer: {{input}}";
|
|
67
|
+
readonly question: "Question: {{question}}";
|
|
68
|
+
readonly waitingPlaceholder: "Waiting for task to start...";
|
|
69
|
+
readonly pause: "Pause";
|
|
70
|
+
readonly continue: "Continue";
|
|
71
|
+
readonly stop: "Stop";
|
|
72
|
+
readonly expand: "Expand history";
|
|
73
|
+
readonly collapse: "Collapse history";
|
|
74
|
+
readonly step: "Step {{number}} · {{time}}{{duration}}";
|
|
75
|
+
};
|
|
76
|
+
readonly tools: {
|
|
77
|
+
readonly clicking: "Clicking element [{{index}}]...";
|
|
78
|
+
readonly inputting: "Inputting text to element [{{index}}]...";
|
|
79
|
+
readonly selecting: "Selecting option \"{{text}}\"...";
|
|
80
|
+
readonly scrolling: "Scrolling page...";
|
|
81
|
+
readonly waiting: "Waiting {{seconds}} seconds...";
|
|
82
|
+
readonly done: "Task done";
|
|
83
|
+
readonly clicked: "🖱️ Clicked element [{{index}}]";
|
|
84
|
+
readonly inputted: "⌨️ Inputted text \"{{text}}\"";
|
|
85
|
+
readonly selected: "☑️ Selected option \"{{text}}\"";
|
|
86
|
+
readonly scrolled: "🛞 Page scrolled";
|
|
87
|
+
readonly waited: "⌛️ Wait completed";
|
|
88
|
+
readonly executing: "Executing {{toolName}}...";
|
|
89
|
+
readonly resultSuccess: "success";
|
|
90
|
+
readonly resultFailure: "failed";
|
|
91
|
+
readonly resultError: "error";
|
|
92
|
+
};
|
|
93
|
+
readonly errors: {
|
|
94
|
+
readonly elementNotFound: "No interactive element found at index {{index}}";
|
|
95
|
+
readonly taskRequired: "Task description is required";
|
|
96
|
+
readonly executionFailed: "Task execution failed";
|
|
97
|
+
readonly notInputElement: "Element is not an input or textarea";
|
|
98
|
+
readonly notSelectElement: "Element is not a select element";
|
|
99
|
+
readonly optionNotFound: "Option \"{{text}}\" not found";
|
|
100
|
+
};
|
|
101
|
+
};
|
|
102
|
+
};
|
|
103
|
+
|
|
46
104
|
/**
|
|
47
105
|
* Type-safe event bus
|
|
48
106
|
* @note Mainly used to decouple logic and UI
|
|
@@ -111,15 +169,129 @@ declare interface InteractiveElementDomNode {
|
|
|
111
169
|
declare interface LLMConfig {
|
|
112
170
|
baseURL?: string;
|
|
113
171
|
apiKey?: string;
|
|
114
|
-
|
|
172
|
+
model?: string;
|
|
173
|
+
temperature?: number;
|
|
174
|
+
maxTokens?: number;
|
|
115
175
|
maxRetries?: number;
|
|
116
176
|
}
|
|
117
177
|
|
|
118
178
|
declare const locales: {
|
|
119
|
-
readonly '
|
|
120
|
-
|
|
179
|
+
readonly 'en-US': {
|
|
180
|
+
readonly ui: {
|
|
181
|
+
readonly panel: {
|
|
182
|
+
readonly ready: "Ready";
|
|
183
|
+
readonly thinking: "Thinking...";
|
|
184
|
+
readonly paused: "Paused";
|
|
185
|
+
readonly taskInput: "Enter new task, describe steps in detail, press Enter to submit";
|
|
186
|
+
readonly userAnswerPrompt: "Please answer the question above, press Enter to submit";
|
|
187
|
+
readonly taskTerminated: "Task terminated";
|
|
188
|
+
readonly taskCompleted: "Task completed";
|
|
189
|
+
readonly continueExecution: "Continue execution";
|
|
190
|
+
readonly userAnswer: "User answer: {{input}}";
|
|
191
|
+
readonly question: "Question: {{question}}";
|
|
192
|
+
readonly waitingPlaceholder: "Waiting for task to start...";
|
|
193
|
+
readonly pause: "Pause";
|
|
194
|
+
readonly continue: "Continue";
|
|
195
|
+
readonly stop: "Stop";
|
|
196
|
+
readonly expand: "Expand history";
|
|
197
|
+
readonly collapse: "Collapse history";
|
|
198
|
+
readonly step: "Step {{number}} · {{time}}{{duration}}";
|
|
199
|
+
};
|
|
200
|
+
readonly tools: {
|
|
201
|
+
readonly clicking: "Clicking element [{{index}}]...";
|
|
202
|
+
readonly inputting: "Inputting text to element [{{index}}]...";
|
|
203
|
+
readonly selecting: "Selecting option \"{{text}}\"...";
|
|
204
|
+
readonly scrolling: "Scrolling page...";
|
|
205
|
+
readonly waiting: "Waiting {{seconds}} seconds...";
|
|
206
|
+
readonly done: "Task done";
|
|
207
|
+
readonly clicked: "🖱️ Clicked element [{{index}}]";
|
|
208
|
+
readonly inputted: "⌨️ Inputted text \"{{text}}\"";
|
|
209
|
+
readonly selected: "☑️ Selected option \"{{text}}\"";
|
|
210
|
+
readonly scrolled: "🛞 Page scrolled";
|
|
211
|
+
readonly waited: "⌛️ Wait completed";
|
|
212
|
+
readonly executing: "Executing {{toolName}}...";
|
|
213
|
+
readonly resultSuccess: "success";
|
|
214
|
+
readonly resultFailure: "failed";
|
|
215
|
+
readonly resultError: "error";
|
|
216
|
+
};
|
|
217
|
+
readonly errors: {
|
|
218
|
+
readonly elementNotFound: "No interactive element found at index {{index}}";
|
|
219
|
+
readonly taskRequired: "Task description is required";
|
|
220
|
+
readonly executionFailed: "Task execution failed";
|
|
221
|
+
readonly notInputElement: "Element is not an input or textarea";
|
|
222
|
+
readonly notSelectElement: "Element is not a select element";
|
|
223
|
+
readonly optionNotFound: "Option \"{{text}}\" not found";
|
|
224
|
+
};
|
|
225
|
+
};
|
|
226
|
+
};
|
|
227
|
+
readonly 'zh-CN': {
|
|
228
|
+
readonly ui: {
|
|
229
|
+
readonly panel: {
|
|
230
|
+
readonly ready: "准备就绪";
|
|
231
|
+
readonly thinking: "正在思考...";
|
|
232
|
+
readonly paused: "暂停中,稍后";
|
|
233
|
+
readonly taskInput: "输入新任务,详细描述步骤,回车提交";
|
|
234
|
+
readonly userAnswerPrompt: "请回答上面问题,回车提交";
|
|
235
|
+
readonly taskTerminated: "任务已终止";
|
|
236
|
+
readonly taskCompleted: "任务结束";
|
|
237
|
+
readonly continueExecution: "继续执行";
|
|
238
|
+
readonly userAnswer: "用户回答: {{input}}";
|
|
239
|
+
readonly question: "询问: {{question}}";
|
|
240
|
+
readonly waitingPlaceholder: "等待任务开始...";
|
|
241
|
+
readonly pause: "暂停";
|
|
242
|
+
readonly continue: "继续";
|
|
243
|
+
readonly stop: "终止";
|
|
244
|
+
readonly expand: "展开历史";
|
|
245
|
+
readonly collapse: "收起历史";
|
|
246
|
+
readonly step: "步骤 {{number}} · {{time}}{{duration}}";
|
|
247
|
+
};
|
|
248
|
+
readonly tools: {
|
|
249
|
+
readonly clicking: "正在点击元素 [{{index}}]...";
|
|
250
|
+
readonly inputting: "正在输入文本到元素 [{{index}}]...";
|
|
251
|
+
readonly selecting: "正在选择选项 \"{{text}}\"...";
|
|
252
|
+
readonly scrolling: "正在滚动页面...";
|
|
253
|
+
readonly waiting: "等待 {{seconds}} 秒...";
|
|
254
|
+
readonly done: "结束任务";
|
|
255
|
+
readonly clicked: "🖱️ 已点击元素 [{{index}}]";
|
|
256
|
+
readonly inputted: "⌨️ 已输入文本 \"{{text}}\"";
|
|
257
|
+
readonly selected: "☑️ 已选择选项 \"{{text}}\"";
|
|
258
|
+
readonly scrolled: "🛞 页面滚动完成";
|
|
259
|
+
readonly waited: "⌛️ 等待完成";
|
|
260
|
+
readonly executing: "正在执行 {{toolName}}...";
|
|
261
|
+
readonly resultSuccess: "成功";
|
|
262
|
+
readonly resultFailure: "失败";
|
|
263
|
+
readonly resultError: "错误";
|
|
264
|
+
};
|
|
265
|
+
readonly errors: {
|
|
266
|
+
readonly elementNotFound: "未找到索引为 {{index}} 的交互元素";
|
|
267
|
+
readonly taskRequired: "任务描述不能为空";
|
|
268
|
+
readonly executionFailed: "任务执行失败";
|
|
269
|
+
readonly notInputElement: "元素不是输入框或文本域";
|
|
270
|
+
readonly notSelectElement: "元素不是选择框";
|
|
271
|
+
readonly optionNotFound: "未找到选项 \"{{text}}\"";
|
|
272
|
+
};
|
|
273
|
+
};
|
|
274
|
+
};
|
|
121
275
|
};
|
|
122
276
|
|
|
277
|
+
/**
|
|
278
|
+
* MacroTool input structure
|
|
279
|
+
*/
|
|
280
|
+
export declare interface MacroToolInput {
|
|
281
|
+
evaluation_previous_goal?: string;
|
|
282
|
+
memory?: string;
|
|
283
|
+
next_goal?: string;
|
|
284
|
+
action: Record<string, any>;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* MacroTool output structure
|
|
289
|
+
*/
|
|
290
|
+
export declare interface MacroToolResult {
|
|
291
|
+
input: MacroToolInput;
|
|
292
|
+
output: string;
|
|
293
|
+
}
|
|
294
|
+
|
|
123
295
|
declare type NestedKeyOf<ObjectType extends object> = {
|
|
124
296
|
[Key in keyof ObjectType & (string | number)]: ObjectType[Key] extends object ? `${Key}` | `${Key}.${NestedKeyOf<ObjectType[Key]>}` : `${Key}`;
|
|
125
297
|
}[keyof ObjectType & (string | number)];
|
|
@@ -130,6 +302,7 @@ export declare class PageAgent extends EventTarget {
|
|
|
130
302
|
id: string;
|
|
131
303
|
bus: EventBus;
|
|
132
304
|
i18n: I18n;
|
|
305
|
+
panel: Panel;
|
|
133
306
|
paused: boolean;
|
|
134
307
|
disposed: boolean;
|
|
135
308
|
task: string;
|
|
@@ -147,11 +320,9 @@ export declare class PageAgent extends EventTarget {
|
|
|
147
320
|
/** last time the tree was updated */
|
|
148
321
|
lastTimeUpdate: number;
|
|
149
322
|
/** Corresponds to actions in browser-use */
|
|
150
|
-
tools: Map<string,
|
|
323
|
+
tools: Map<string, PageAgentTool<any>>;
|
|
151
324
|
/** Fullscreen mask */
|
|
152
325
|
mask: SimulatorMask;
|
|
153
|
-
/** Interactive panel */
|
|
154
|
-
panel: Panel;
|
|
155
326
|
/** History records */
|
|
156
327
|
history: AgentHistory[];
|
|
157
328
|
constructor(config?: PageAgentConfig);
|
|
@@ -189,6 +360,15 @@ declare interface PageAgentEventMap {
|
|
|
189
360
|
};
|
|
190
361
|
}
|
|
191
362
|
|
|
363
|
+
/**
|
|
364
|
+
* Internal tool definition that has access to PageAgent `this` context
|
|
365
|
+
*/
|
|
366
|
+
declare interface PageAgentTool<TParams = any> {
|
|
367
|
+
description: string;
|
|
368
|
+
inputSchema: z.ZodType<TParams>;
|
|
369
|
+
execute: (this: PageAgent, args: TParams) => Promise<string>;
|
|
370
|
+
}
|
|
371
|
+
|
|
192
372
|
/**
|
|
193
373
|
* Agent control panel
|
|
194
374
|
*/
|
|
@@ -246,49 +426,7 @@ declare type TranslationKey = NestedKeyOf<TranslationSchema>;
|
|
|
246
426
|
|
|
247
427
|
declare type TranslationParams = Record<string, string | number>;
|
|
248
428
|
|
|
249
|
-
declare
|
|
250
|
-
ui: {
|
|
251
|
-
panel: {
|
|
252
|
-
ready: string;
|
|
253
|
-
thinking: string;
|
|
254
|
-
paused: string;
|
|
255
|
-
taskInput: string;
|
|
256
|
-
userAnswerPrompt: string;
|
|
257
|
-
taskTerminated: string;
|
|
258
|
-
taskCompleted: string;
|
|
259
|
-
continueExecution: string;
|
|
260
|
-
userAnswer: string;
|
|
261
|
-
pause: string;
|
|
262
|
-
continue: string;
|
|
263
|
-
stop: string;
|
|
264
|
-
expand: string;
|
|
265
|
-
collapse: string;
|
|
266
|
-
step: string;
|
|
267
|
-
};
|
|
268
|
-
tools: {
|
|
269
|
-
clicking: string;
|
|
270
|
-
inputting: string;
|
|
271
|
-
selecting: string;
|
|
272
|
-
scrolling: string;
|
|
273
|
-
waiting: string;
|
|
274
|
-
done: string;
|
|
275
|
-
clicked: string;
|
|
276
|
-
inputted: string;
|
|
277
|
-
selected: string;
|
|
278
|
-
scrolled: string;
|
|
279
|
-
waited: string;
|
|
280
|
-
executing: string;
|
|
281
|
-
};
|
|
282
|
-
errors: {
|
|
283
|
-
elementNotFound: string;
|
|
284
|
-
taskRequired: string;
|
|
285
|
-
executionFailed: string;
|
|
286
|
-
notInputElement: string;
|
|
287
|
-
notSelectElement: string;
|
|
288
|
-
optionNotFound: string;
|
|
289
|
-
};
|
|
290
|
-
};
|
|
291
|
-
}
|
|
429
|
+
declare type TranslationSchema = DeepStringify<typeof enUS>;
|
|
292
430
|
|
|
293
431
|
declare interface UIConfig {
|
|
294
432
|
language?: SupportedLanguage;
|