page-agent 0.2.4 → 0.3.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -7
- package/dist/esm/PageAgent.d.ts +4 -209
- package/dist/esm/page-agent.js +10 -696
- package/dist/esm/page-agent.js.map +1 -1
- package/dist/iife/page-agent.demo.js +401 -0
- package/package.json +8 -6
package/README.md
CHANGED
|
@@ -37,7 +37,7 @@ Fastest way to try PageAgent:
|
|
|
37
37
|
|
|
38
38
|
```html
|
|
39
39
|
<script
|
|
40
|
-
src="https://cdn.jsdelivr.net/npm
|
|
40
|
+
src="https://cdn.jsdelivr.net/npm/page-agent/dist/iife/page-agent.demo.js"
|
|
41
41
|
crossorigin="true"
|
|
42
42
|
></script>
|
|
43
43
|
```
|
|
@@ -46,8 +46,8 @@ Fastest way to try PageAgent:
|
|
|
46
46
|
|
|
47
47
|
| Mirrors | URL |
|
|
48
48
|
| ------- | ----------------------------------------------------------------------------------- |
|
|
49
|
-
| Global | https://cdn.jsdelivr.net/npm
|
|
50
|
-
| China | https://registry.npmmirror.com
|
|
49
|
+
| Global | https://cdn.jsdelivr.net/npm/page-agent/dist/iife/page-agent.demo.js |
|
|
50
|
+
| China | https://registry.npmmirror.com/page-agent/latest/files/dist/iife/page-agent.demo.js |
|
|
51
51
|
|
|
52
52
|
### NPM Installation
|
|
53
53
|
|
|
@@ -68,19 +68,17 @@ const agent = new PageAgent({
|
|
|
68
68
|
await agent.execute('Click the login button')
|
|
69
69
|
```
|
|
70
70
|
|
|
71
|
-
For environments where NPM is not available. We do offer a IIFE build via CDN. [@see CDN Usage](https://alibaba.github.io/page-agent/#/docs/integration/cdn-setup)
|
|
72
|
-
|
|
73
71
|
## 🏗️ Structure
|
|
74
72
|
|
|
75
73
|
PageAgent adopts a simplified monorepo structure:
|
|
76
74
|
|
|
77
75
|
```
|
|
78
76
|
packages/
|
|
79
|
-
├── page-agent/ # AI agent (npm: page-agent)
|
|
77
|
+
├── page-agent/ # AI agent and demo(npm: page-agent)
|
|
78
|
+
├── core/ # Agent core logic without UI(npm: @page-agent/core)
|
|
80
79
|
├── llms/ # LLM client (npm: @page-agent/llms)
|
|
81
80
|
├── page-controller/ # DOM operations & Visual Mask (npm: @page-agent/page-controller)
|
|
82
81
|
├── ui/ # Panel & i18n (npm: @page-agent/ui)
|
|
83
|
-
├── cdn/ # CDN IIFE builds (npm: @page-agent/cdn)
|
|
84
82
|
└── website/ # Demo & Documentation site
|
|
85
83
|
```
|
|
86
84
|
|
package/dist/esm/PageAgent.d.ts
CHANGED
|
@@ -1,217 +1,12 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import { PageControllerConfig } from '@page-agent/page-controller';
|
|
1
|
+
import { PageAgentConfig } from '@page-agent/core';
|
|
2
|
+
import { PageAgentCore } from '@page-agent/core';
|
|
4
3
|
import { Panel } from '@page-agent/ui';
|
|
5
|
-
import { SupportedLanguage } from '@page-agent/ui';
|
|
6
|
-
import { z } from 'zod';
|
|
7
4
|
|
|
8
|
-
declare
|
|
9
|
-
language?: SupportedLanguage;
|
|
10
|
-
/**
|
|
11
|
-
* Custom tools to extend PageAgent capabilities
|
|
12
|
-
* @experimental
|
|
13
|
-
* @note You can also override or remove internal tools by using the same name.
|
|
14
|
-
* @see [tools](../tools/index.ts)
|
|
15
|
-
*
|
|
16
|
-
* @example
|
|
17
|
-
* // override internal tool
|
|
18
|
-
* import { tool } from 'page-agent'
|
|
19
|
-
* const customTools = {
|
|
20
|
-
* ask_user: tool({
|
|
21
|
-
* description:
|
|
22
|
-
* 'Ask the user or parent model a question and wait for their answer. Use this if you need more information or clarification.',
|
|
23
|
-
* inputSchema: zod.object({
|
|
24
|
-
* question: zod.string(),
|
|
25
|
-
* }),
|
|
26
|
-
* execute: async function (this: PageAgent, input) {
|
|
27
|
-
* const answer = await do_some_thing(input.question)
|
|
28
|
-
* return "✅ Received user answer: " + answer
|
|
29
|
-
* },
|
|
30
|
-
* })
|
|
31
|
-
* }
|
|
32
|
-
*
|
|
33
|
-
* @example
|
|
34
|
-
* // remove internal tool
|
|
35
|
-
* const customTools = {
|
|
36
|
-
* ask_user: null // never ask user questions
|
|
37
|
-
* }
|
|
38
|
-
*/
|
|
39
|
-
customTools?: Record<string, PageAgentTool | null>;
|
|
40
|
-
/**
|
|
41
|
-
* Instructions to guide the agent's behavior
|
|
42
|
-
*/
|
|
43
|
-
instructions?: {
|
|
44
|
-
/**
|
|
45
|
-
* Global system-level instructions, applied to all tasks
|
|
46
|
-
*/
|
|
47
|
-
system?: string;
|
|
48
|
-
/**
|
|
49
|
-
* Dynamic page-level instructions callback
|
|
50
|
-
* Called before each step to get instructions for the current page
|
|
51
|
-
* @param url - Current page URL (window.location.href)
|
|
52
|
-
* @returns Instructions string, or undefined/null to skip
|
|
53
|
-
*/
|
|
54
|
-
getPageInstructions?: (url: string) => string | undefined | null;
|
|
55
|
-
};
|
|
56
|
-
onBeforeStep?: (this: PageAgent, stepCnt: number) => Promise<void> | void;
|
|
57
|
-
onAfterStep?: (this: PageAgent, stepCnt: number, history: HistoryEvent[]) => Promise<void> | void;
|
|
58
|
-
onBeforeTask?: (this: PageAgent) => Promise<void> | void;
|
|
59
|
-
onAfterTask?: (this: PageAgent, result: ExecutionResult) => Promise<void> | void;
|
|
60
|
-
/**
|
|
61
|
-
* @note this hook can block the disposal process
|
|
62
|
-
* @note when dispose caused by page unload, reason will be 'PAGE_UNLOADING'. this method CANNOT block unloading. async operations may be cut.
|
|
63
|
-
* @todo remove `this` binding, pass agent as explicit parameter instead
|
|
64
|
-
*/
|
|
65
|
-
onDispose?: (this: PageAgent, reason?: string) => void;
|
|
66
|
-
/**
|
|
67
|
-
* @experimental
|
|
68
|
-
* Enable the experimental script execution tool that allows executing generated JavaScript code on the page.
|
|
69
|
-
* @note Can cause unpredictable side effects.
|
|
70
|
-
* @note May bypass some safe guards and data-masking mechanisms.
|
|
71
|
-
*/
|
|
72
|
-
experimentalScriptExecutionTool?: boolean;
|
|
73
|
-
/**
|
|
74
|
-
* Transform page content before sending to LLM.
|
|
75
|
-
* Called after DOM extraction and simplification, before LLM invocation.
|
|
76
|
-
* Use cases: inspect extraction results, modify page info, mask sensitive data.
|
|
77
|
-
*
|
|
78
|
-
* @param content - Simplified page content that will be sent to LLM
|
|
79
|
-
* @returns Transformed content
|
|
80
|
-
*
|
|
81
|
-
* @example
|
|
82
|
-
* // Mask phone numbers
|
|
83
|
-
* transformPageContent: async (content) => {
|
|
84
|
-
* return content.replace(/1[3-9]\d{9}/g, '***********')
|
|
85
|
-
* }
|
|
86
|
-
*/
|
|
87
|
-
transformPageContent?: (content: string) => Promise<string> | string;
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
/**
|
|
91
|
-
* Agent reflection state - the reflection-before-action model
|
|
92
|
-
*
|
|
93
|
-
* Every tool call must first reflect on:
|
|
94
|
-
* - evaluation_previous_goal: How well did the previous action achieve its goal?
|
|
95
|
-
* - memory: Key information to remember for future steps
|
|
96
|
-
* - next_goal: What should be accomplished in the next action?
|
|
97
|
-
*/
|
|
98
|
-
export declare interface AgentReflection {
|
|
99
|
-
evaluation_previous_goal: string;
|
|
100
|
-
memory: string;
|
|
101
|
-
next_goal: string;
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
/**
|
|
105
|
-
* A single agent step with reflection and action
|
|
106
|
-
*/
|
|
107
|
-
export declare interface AgentStep {
|
|
108
|
-
type: 'step';
|
|
109
|
-
reflection: Partial<AgentReflection>;
|
|
110
|
-
action: {
|
|
111
|
-
name: string;
|
|
112
|
-
input: any;
|
|
113
|
-
output: string;
|
|
114
|
-
};
|
|
115
|
-
usage: {
|
|
116
|
-
promptTokens: number;
|
|
117
|
-
completionTokens: number;
|
|
118
|
-
totalTokens: number;
|
|
119
|
-
cachedTokens?: number;
|
|
120
|
-
reasoningTokens?: number;
|
|
121
|
-
};
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
export declare interface ExecutionResult {
|
|
125
|
-
success: boolean;
|
|
126
|
-
data: string;
|
|
127
|
-
history: HistoryEvent[];
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
/**
|
|
131
|
-
* Union type for all history events
|
|
132
|
-
*/
|
|
133
|
-
export declare type HistoryEvent = AgentStep | ObservationEvent | UserTakeoverEvent;
|
|
134
|
-
|
|
135
|
-
/**
|
|
136
|
-
* MacroTool input structure
|
|
137
|
-
*
|
|
138
|
-
* This is the core abstraction that enforces the "reflection-before-action" mental model.
|
|
139
|
-
* Before executing any action, the LLM must output its reasoning state.
|
|
140
|
-
*/
|
|
141
|
-
export declare interface MacroToolInput extends Partial<AgentReflection> {
|
|
142
|
-
action: Record<string, any>;
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
/**
|
|
146
|
-
* MacroTool output structure
|
|
147
|
-
*/
|
|
148
|
-
export declare interface MacroToolResult {
|
|
149
|
-
input: MacroToolInput;
|
|
150
|
-
output: string;
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
/**
|
|
154
|
-
* Persistent observation event (stays in memory)
|
|
155
|
-
*/
|
|
156
|
-
export declare interface ObservationEvent {
|
|
157
|
-
type: 'observation';
|
|
158
|
-
content: string;
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
export declare class PageAgent extends EventTarget {
|
|
162
|
-
#private;
|
|
163
|
-
config: PageAgentConfig;
|
|
164
|
-
id: string;
|
|
5
|
+
export declare class PageAgent extends PageAgentCore {
|
|
165
6
|
panel: Panel;
|
|
166
|
-
tools: typeof tools;
|
|
167
|
-
disposed: boolean;
|
|
168
|
-
task: string;
|
|
169
|
-
taskId: string;
|
|
170
|
-
/** PageController for DOM operations */
|
|
171
|
-
pageController: PageController;
|
|
172
|
-
/** Runtime states for tracking across steps */
|
|
173
|
-
states: {
|
|
174
|
-
/** Accumulated wait time in seconds, used by wait tool */
|
|
175
|
-
totalWaitTime: number;
|
|
176
|
-
/** Last known URL for detecting navigation */
|
|
177
|
-
lastURL: string;
|
|
178
|
-
};
|
|
179
|
-
/** History event stream */
|
|
180
|
-
history: HistoryEvent[];
|
|
181
7
|
constructor(config: PageAgentConfig);
|
|
182
|
-
/**
|
|
183
|
-
* Push a persistent observation to the history event stream.
|
|
184
|
-
* This will be visible in <agent_history> and remain in memory across steps.
|
|
185
|
-
*/
|
|
186
|
-
pushObservation(content: string): void;
|
|
187
|
-
execute(task: string): Promise<ExecutionResult>;
|
|
188
|
-
dispose(reason?: string): void;
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
export declare type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig;
|
|
192
|
-
|
|
193
|
-
/**
|
|
194
|
-
* Internal tool definition that has access to PageAgent `this` context
|
|
195
|
-
*/
|
|
196
|
-
export declare interface PageAgentTool<TParams = any> {
|
|
197
|
-
description: string;
|
|
198
|
-
inputSchema: z.ZodType<TParams>;
|
|
199
|
-
execute: (this: PageAgent, args: TParams) => Promise<string>;
|
|
200
8
|
}
|
|
201
9
|
|
|
202
|
-
export
|
|
203
|
-
|
|
204
|
-
/**
|
|
205
|
-
* Internal tools for PageAgent.
|
|
206
|
-
* Note: Using any to allow different parameter types for each tool
|
|
207
|
-
*/
|
|
208
|
-
declare const tools: Map<string, PageAgentTool<any>>;
|
|
209
|
-
|
|
210
|
-
/**
|
|
211
|
-
* User takeover event
|
|
212
|
-
*/
|
|
213
|
-
export declare interface UserTakeoverEvent {
|
|
214
|
-
type: 'user_takeover';
|
|
215
|
-
}
|
|
10
|
+
export { PageAgentConfig }
|
|
216
11
|
|
|
217
12
|
export { }
|