page-agent 0.2.5 → 0.3.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -37,7 +37,7 @@ Fastest way to try PageAgent:
37
37
 
38
38
  ```html
39
39
  <script
40
- src="https://cdn.jsdelivr.net/npm/@page-agent/cdn/dist/page-agent.demo.js"
40
+ src="https://cdn.jsdelivr.net/npm/page-agent/dist/iife/page-agent.demo.js"
41
41
  crossorigin="true"
42
42
  ></script>
43
43
  ```
@@ -46,8 +46,8 @@ Fastest way to try PageAgent:
46
46
 
47
47
  | Mirrors | URL |
48
48
  | ------- | ----------------------------------------------------------------------------------- |
49
- | Global | https://cdn.jsdelivr.net/npm/@page-agent/cdn/dist/page-agent.demo.js |
50
- | China | https://registry.npmmirror.com/@page-agent/cdn/latest/files/dist/page-agent.demo.js |
49
+ | Global | https://cdn.jsdelivr.net/npm/page-agent/dist/iife/page-agent.demo.js |
50
+ | China | https://registry.npmmirror.com/page-agent/latest/files/dist/iife/page-agent.demo.js |
51
51
 
52
52
  ### NPM Installation
53
53
 
@@ -68,19 +68,17 @@ const agent = new PageAgent({
68
68
  await agent.execute('Click the login button')
69
69
  ```
70
70
 
71
- For environments where NPM is not available. We do offer a IIFE build via CDN. [@see CDN Usage](https://alibaba.github.io/page-agent/#/docs/integration/cdn-setup)
72
-
73
71
  ## 🏗️ Structure
74
72
 
75
73
  PageAgent adopts a simplified monorepo structure:
76
74
 
77
75
  ```
78
76
  packages/
79
- ├── page-agent/ # AI agent (npm: page-agent)
77
+ ├── page-agent/ # AI agent and demo(npm: page-agent)
78
+ ├── core/ # Agent core logic without UI(npm: @page-agent/core)
80
79
  ├── llms/ # LLM client (npm: @page-agent/llms)
81
80
  ├── page-controller/ # DOM operations & Visual Mask (npm: @page-agent/page-controller)
82
81
  ├── ui/ # Panel & i18n (npm: @page-agent/ui)
83
- ├── cdn/ # CDN IIFE builds (npm: @page-agent/cdn)
84
82
  └── website/ # Demo & Documentation site
85
83
  ```
86
84
 
@@ -1,235 +1,12 @@
1
- import { LLMConfig } from '@page-agent/llms';
2
- import { PageController } from '@page-agent/page-controller';
3
- import { PageControllerConfig } from '@page-agent/page-controller';
1
+ import { PageAgentConfig } from '@page-agent/core';
2
+ import { PageAgentCore } from '@page-agent/core';
4
3
  import { Panel } from '@page-agent/ui';
5
- import { SupportedLanguage } from '@page-agent/ui';
6
- import { z } from 'zod';
7
4
 
8
- declare interface AgentConfig {
9
- language?: SupportedLanguage;
10
- /**
11
- * Whether to prompt for next task after task completion
12
- * @default true
13
- */
14
- promptForNextTask?: boolean;
15
- /**
16
- * Enable the UI panel for visual feedback and user interaction
17
- * When disabled, the panel will not be created and all UI operations will be skipped.
18
- * Useful for automated testing or when integrating PageAgent as a library.
19
- * @default true
20
- */
21
- enablePanel?: boolean;
22
- /**
23
- * Enable the ask_user tool for agent to ask questions
24
- * When disabled, the agent cannot ask user questions during execution.
25
- * @default true
26
- */
27
- enableAskUser?: boolean;
28
- /**
29
- * Custom tools to extend PageAgent capabilities
30
- * @experimental
31
- * @note You can also override or remove internal tools by using the same name.
32
- * @see [tools](../tools/index.ts)
33
- *
34
- * @example
35
- * // override internal tool
36
- * import { tool } from 'page-agent'
37
- * const customTools = {
38
- * ask_user: tool({
39
- * description:
40
- * 'Ask the user or parent model a question and wait for their answer. Use this if you need more information or clarification.',
41
- * inputSchema: zod.object({
42
- * question: zod.string(),
43
- * }),
44
- * execute: async function (this: PageAgent, input) {
45
- * const answer = await do_some_thing(input.question)
46
- * return "✅ Received user answer: " + answer
47
- * },
48
- * })
49
- * }
50
- *
51
- * @example
52
- * // remove internal tool
53
- * const customTools = {
54
- * ask_user: null // never ask user questions
55
- * }
56
- */
57
- customTools?: Record<string, PageAgentTool | null>;
58
- /**
59
- * Instructions to guide the agent's behavior
60
- */
61
- instructions?: {
62
- /**
63
- * Global system-level instructions, applied to all tasks
64
- */
65
- system?: string;
66
- /**
67
- * Dynamic page-level instructions callback
68
- * Called before each step to get instructions for the current page
69
- * @param url - Current page URL (window.location.href)
70
- * @returns Instructions string, or undefined/null to skip
71
- */
72
- getPageInstructions?: (url: string) => string | undefined | null;
73
- };
74
- onBeforeStep?: (this: PageAgent, stepCnt: number) => Promise<void> | void;
75
- onAfterStep?: (this: PageAgent, stepCnt: number, history: HistoryEvent[]) => Promise<void> | void;
76
- onBeforeTask?: (this: PageAgent) => Promise<void> | void;
77
- onAfterTask?: (this: PageAgent, result: ExecutionResult) => Promise<void> | void;
78
- /**
79
- * @note this hook can block the disposal process
80
- * @note when dispose caused by page unload, reason will be 'PAGE_UNLOADING'. this method CANNOT block unloading. async operations may be cut.
81
- * @todo remove `this` binding, pass agent as explicit parameter instead
82
- */
83
- onDispose?: (this: PageAgent, reason?: string) => void;
84
- /**
85
- * @experimental
86
- * Enable the experimental script execution tool that allows executing generated JavaScript code on the page.
87
- * @note Can cause unpredictable side effects.
88
- * @note May bypass some safe guards and data-masking mechanisms.
89
- */
90
- experimentalScriptExecutionTool?: boolean;
91
- /**
92
- * Transform page content before sending to LLM.
93
- * Called after DOM extraction and simplification, before LLM invocation.
94
- * Use cases: inspect extraction results, modify page info, mask sensitive data.
95
- *
96
- * @param content - Simplified page content that will be sent to LLM
97
- * @returns Transformed content
98
- *
99
- * @example
100
- * // Mask phone numbers
101
- * transformPageContent: async (content) => {
102
- * return content.replace(/1[3-9]\d{9}/g, '***********')
103
- * }
104
- */
105
- transformPageContent?: (content: string) => Promise<string> | string;
106
- }
107
-
108
- /**
109
- * Agent reflection state - the reflection-before-action model
110
- *
111
- * Every tool call must first reflect on:
112
- * - evaluation_previous_goal: How well did the previous action achieve its goal?
113
- * - memory: Key information to remember for future steps
114
- * - next_goal: What should be accomplished in the next action?
115
- */
116
- export declare interface AgentReflection {
117
- evaluation_previous_goal: string;
118
- memory: string;
119
- next_goal: string;
120
- }
121
-
122
- /**
123
- * A single agent step with reflection and action
124
- */
125
- export declare interface AgentStep {
126
- type: 'step';
127
- reflection: Partial<AgentReflection>;
128
- action: {
129
- name: string;
130
- input: any;
131
- output: string;
132
- };
133
- usage: {
134
- promptTokens: number;
135
- completionTokens: number;
136
- totalTokens: number;
137
- cachedTokens?: number;
138
- reasoningTokens?: number;
139
- };
140
- }
141
-
142
- export declare interface ExecutionResult {
143
- success: boolean;
144
- data: string;
145
- history: HistoryEvent[];
146
- }
147
-
148
- /**
149
- * Union type for all history events
150
- */
151
- export declare type HistoryEvent = AgentStep | ObservationEvent | UserTakeoverEvent;
152
-
153
- /**
154
- * MacroTool input structure
155
- *
156
- * This is the core abstraction that enforces the "reflection-before-action" mental model.
157
- * Before executing any action, the LLM must output its reasoning state.
158
- */
159
- export declare interface MacroToolInput extends Partial<AgentReflection> {
160
- action: Record<string, any>;
161
- }
162
-
163
- /**
164
- * MacroTool output structure
165
- */
166
- export declare interface MacroToolResult {
167
- input: MacroToolInput;
168
- output: string;
169
- }
170
-
171
- /**
172
- * Persistent observation event (stays in memory)
173
- */
174
- export declare interface ObservationEvent {
175
- type: 'observation';
176
- content: string;
177
- }
178
-
179
- export declare class PageAgent extends EventTarget {
180
- #private;
181
- config: PageAgentConfig;
182
- id: string;
183
- panel: Panel | null;
184
- tools: typeof tools;
185
- disposed: boolean;
186
- task: string;
187
- taskId: string;
188
- /** PageController for DOM operations */
189
- pageController: PageController;
190
- /** Runtime states for tracking across steps */
191
- states: {
192
- /** Accumulated wait time in seconds, used by wait tool */
193
- totalWaitTime: number;
194
- /** Last known URL for detecting navigation */
195
- lastURL: string;
196
- };
197
- /** History events */
198
- history: HistoryEvent[];
5
+ export declare class PageAgent extends PageAgentCore {
6
+ panel: Panel;
199
7
  constructor(config: PageAgentConfig);
200
- /**
201
- * Push a persistent observation to the history event stream.
202
- * This will be visible in <agent_history> and remain in memory across steps.
203
- */
204
- pushObservation(content: string): void;
205
- execute(task: string): Promise<ExecutionResult>;
206
- dispose(reason?: string): void;
207
- }
208
-
209
- export declare type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig;
210
-
211
- /**
212
- * Internal tool definition that has access to PageAgent `this` context
213
- */
214
- export declare interface PageAgentTool<TParams = any> {
215
- description: string;
216
- inputSchema: z.ZodType<TParams>;
217
- execute: (this: PageAgent, args: TParams) => Promise<string>;
218
8
  }
219
9
 
220
- export declare function tool<TParams>(options: PageAgentTool<TParams>): PageAgentTool<TParams>;
221
-
222
- /**
223
- * Internal tools for PageAgent.
224
- * Note: Using any to allow different parameter types for each tool
225
- */
226
- declare const tools: Map<string, PageAgentTool<any>>;
227
-
228
- /**
229
- * User takeover event
230
- */
231
- export declare interface UserTakeoverEvent {
232
- type: 'user_takeover';
233
- }
10
+ export { PageAgentConfig }
234
11
 
235
12
  export { }