page-agent 0.2.4 → 0.3.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -37,7 +37,7 @@ Fastest way to try PageAgent:
37
37
 
38
38
  ```html
39
39
  <script
40
- src="https://cdn.jsdelivr.net/npm/@page-agent/cdn/dist/page-agent.demo.js"
40
+ src="https://cdn.jsdelivr.net/npm/page-agent/dist/iife/page-agent.demo.js"
41
41
  crossorigin="true"
42
42
  ></script>
43
43
  ```
@@ -46,8 +46,8 @@ Fastest way to try PageAgent:
46
46
 
47
47
  | Mirrors | URL |
48
48
  | ------- | ----------------------------------------------------------------------------------- |
49
- | Global | https://cdn.jsdelivr.net/npm/@page-agent/cdn/dist/page-agent.demo.js |
50
- | China | https://registry.npmmirror.com/@page-agent/cdn/latest/files/dist/page-agent.demo.js |
49
+ | Global | https://cdn.jsdelivr.net/npm/page-agent/dist/iife/page-agent.demo.js |
50
+ | China | https://registry.npmmirror.com/page-agent/latest/files/dist/iife/page-agent.demo.js |
51
51
 
52
52
  ### NPM Installation
53
53
 
@@ -68,19 +68,17 @@ const agent = new PageAgent({
68
68
  await agent.execute('Click the login button')
69
69
  ```
70
70
 
71
- For environments where NPM is not available. We do offer a IIFE build via CDN. [@see CDN Usage](https://alibaba.github.io/page-agent/#/docs/integration/cdn-setup)
72
-
73
71
  ## 🏗️ Structure
74
72
 
75
73
  PageAgent adopts a simplified monorepo structure:
76
74
 
77
75
  ```
78
76
  packages/
79
- ├── page-agent/ # AI agent (npm: page-agent)
77
+ ├── page-agent/ # AI agent and demo(npm: page-agent)
78
+ ├── core/ # Agent core logic without UI(npm: @page-agent/core)
80
79
  ├── llms/ # LLM client (npm: @page-agent/llms)
81
80
  ├── page-controller/ # DOM operations & Visual Mask (npm: @page-agent/page-controller)
82
81
  ├── ui/ # Panel & i18n (npm: @page-agent/ui)
83
- ├── cdn/ # CDN IIFE builds (npm: @page-agent/cdn)
84
82
  └── website/ # Demo & Documentation site
85
83
  ```
86
84
 
@@ -1,217 +1,12 @@
1
- import { LLMConfig } from '@page-agent/llms';
2
- import { PageController } from '@page-agent/page-controller';
3
- import { PageControllerConfig } from '@page-agent/page-controller';
1
+ import { PageAgentConfig } from '@page-agent/core';
2
+ import { PageAgentCore } from '@page-agent/core';
4
3
  import { Panel } from '@page-agent/ui';
5
- import { SupportedLanguage } from '@page-agent/ui';
6
- import { z } from 'zod';
7
4
 
8
- declare interface AgentConfig {
9
- language?: SupportedLanguage;
10
- /**
11
- * Custom tools to extend PageAgent capabilities
12
- * @experimental
13
- * @note You can also override or remove internal tools by using the same name.
14
- * @see [tools](../tools/index.ts)
15
- *
16
- * @example
17
- * // override internal tool
18
- * import { tool } from 'page-agent'
19
- * const customTools = {
20
- * ask_user: tool({
21
- * description:
22
- * 'Ask the user or parent model a question and wait for their answer. Use this if you need more information or clarification.',
23
- * inputSchema: zod.object({
24
- * question: zod.string(),
25
- * }),
26
- * execute: async function (this: PageAgent, input) {
27
- * const answer = await do_some_thing(input.question)
28
- * return "✅ Received user answer: " + answer
29
- * },
30
- * })
31
- * }
32
- *
33
- * @example
34
- * // remove internal tool
35
- * const customTools = {
36
- * ask_user: null // never ask user questions
37
- * }
38
- */
39
- customTools?: Record<string, PageAgentTool | null>;
40
- /**
41
- * Instructions to guide the agent's behavior
42
- */
43
- instructions?: {
44
- /**
45
- * Global system-level instructions, applied to all tasks
46
- */
47
- system?: string;
48
- /**
49
- * Dynamic page-level instructions callback
50
- * Called before each step to get instructions for the current page
51
- * @param url - Current page URL (window.location.href)
52
- * @returns Instructions string, or undefined/null to skip
53
- */
54
- getPageInstructions?: (url: string) => string | undefined | null;
55
- };
56
- onBeforeStep?: (this: PageAgent, stepCnt: number) => Promise<void> | void;
57
- onAfterStep?: (this: PageAgent, stepCnt: number, history: HistoryEvent[]) => Promise<void> | void;
58
- onBeforeTask?: (this: PageAgent) => Promise<void> | void;
59
- onAfterTask?: (this: PageAgent, result: ExecutionResult) => Promise<void> | void;
60
- /**
61
- * @note this hook can block the disposal process
62
- * @note when dispose caused by page unload, reason will be 'PAGE_UNLOADING'. this method CANNOT block unloading. async operations may be cut.
63
- * @todo remove `this` binding, pass agent as explicit parameter instead
64
- */
65
- onDispose?: (this: PageAgent, reason?: string) => void;
66
- /**
67
- * @experimental
68
- * Enable the experimental script execution tool that allows executing generated JavaScript code on the page.
69
- * @note Can cause unpredictable side effects.
70
- * @note May bypass some safe guards and data-masking mechanisms.
71
- */
72
- experimentalScriptExecutionTool?: boolean;
73
- /**
74
- * Transform page content before sending to LLM.
75
- * Called after DOM extraction and simplification, before LLM invocation.
76
- * Use cases: inspect extraction results, modify page info, mask sensitive data.
77
- *
78
- * @param content - Simplified page content that will be sent to LLM
79
- * @returns Transformed content
80
- *
81
- * @example
82
- * // Mask phone numbers
83
- * transformPageContent: async (content) => {
84
- * return content.replace(/1[3-9]\d{9}/g, '***********')
85
- * }
86
- */
87
- transformPageContent?: (content: string) => Promise<string> | string;
88
- }
89
-
90
- /**
91
- * Agent reflection state - the reflection-before-action model
92
- *
93
- * Every tool call must first reflect on:
94
- * - evaluation_previous_goal: How well did the previous action achieve its goal?
95
- * - memory: Key information to remember for future steps
96
- * - next_goal: What should be accomplished in the next action?
97
- */
98
- export declare interface AgentReflection {
99
- evaluation_previous_goal: string;
100
- memory: string;
101
- next_goal: string;
102
- }
103
-
104
- /**
105
- * A single agent step with reflection and action
106
- */
107
- export declare interface AgentStep {
108
- type: 'step';
109
- reflection: Partial<AgentReflection>;
110
- action: {
111
- name: string;
112
- input: any;
113
- output: string;
114
- };
115
- usage: {
116
- promptTokens: number;
117
- completionTokens: number;
118
- totalTokens: number;
119
- cachedTokens?: number;
120
- reasoningTokens?: number;
121
- };
122
- }
123
-
124
- export declare interface ExecutionResult {
125
- success: boolean;
126
- data: string;
127
- history: HistoryEvent[];
128
- }
129
-
130
- /**
131
- * Union type for all history events
132
- */
133
- export declare type HistoryEvent = AgentStep | ObservationEvent | UserTakeoverEvent;
134
-
135
- /**
136
- * MacroTool input structure
137
- *
138
- * This is the core abstraction that enforces the "reflection-before-action" mental model.
139
- * Before executing any action, the LLM must output its reasoning state.
140
- */
141
- export declare interface MacroToolInput extends Partial<AgentReflection> {
142
- action: Record<string, any>;
143
- }
144
-
145
- /**
146
- * MacroTool output structure
147
- */
148
- export declare interface MacroToolResult {
149
- input: MacroToolInput;
150
- output: string;
151
- }
152
-
153
- /**
154
- * Persistent observation event (stays in memory)
155
- */
156
- export declare interface ObservationEvent {
157
- type: 'observation';
158
- content: string;
159
- }
160
-
161
- export declare class PageAgent extends EventTarget {
162
- #private;
163
- config: PageAgentConfig;
164
- id: string;
5
+ export declare class PageAgent extends PageAgentCore {
165
6
  panel: Panel;
166
- tools: typeof tools;
167
- disposed: boolean;
168
- task: string;
169
- taskId: string;
170
- /** PageController for DOM operations */
171
- pageController: PageController;
172
- /** Runtime states for tracking across steps */
173
- states: {
174
- /** Accumulated wait time in seconds, used by wait tool */
175
- totalWaitTime: number;
176
- /** Last known URL for detecting navigation */
177
- lastURL: string;
178
- };
179
- /** History event stream */
180
- history: HistoryEvent[];
181
7
  constructor(config: PageAgentConfig);
182
- /**
183
- * Push a persistent observation to the history event stream.
184
- * This will be visible in <agent_history> and remain in memory across steps.
185
- */
186
- pushObservation(content: string): void;
187
- execute(task: string): Promise<ExecutionResult>;
188
- dispose(reason?: string): void;
189
- }
190
-
191
- export declare type PageAgentConfig = LLMConfig & AgentConfig & PageControllerConfig;
192
-
193
- /**
194
- * Internal tool definition that has access to PageAgent `this` context
195
- */
196
- export declare interface PageAgentTool<TParams = any> {
197
- description: string;
198
- inputSchema: z.ZodType<TParams>;
199
- execute: (this: PageAgent, args: TParams) => Promise<string>;
200
8
  }
201
9
 
202
- export declare function tool<TParams>(options: PageAgentTool<TParams>): PageAgentTool<TParams>;
203
-
204
- /**
205
- * Internal tools for PageAgent.
206
- * Note: Using any to allow different parameter types for each tool
207
- */
208
- declare const tools: Map<string, PageAgentTool<any>>;
209
-
210
- /**
211
- * User takeover event
212
- */
213
- export declare interface UserTakeoverEvent {
214
- type: 'user_takeover';
215
- }
10
+ export { PageAgentConfig }
216
11
 
217
12
  export { }