@page-agent/page-controller 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/lib/PageController.d.ts +255 -0
- package/dist/lib/page-controller.js +1901 -0
- package/dist/lib/page-controller.js.map +1 -0
- package/package.json +40 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Alibaba
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
declare interface ActionResult {
|
|
2
|
+
success: boolean;
|
|
3
|
+
message: string;
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
declare function cleanUpHighlights(): void;
|
|
7
|
+
|
|
8
|
+
declare namespace dom {
|
|
9
|
+
export {
|
|
10
|
+
getFlatTree,
|
|
11
|
+
flatTreeToString,
|
|
12
|
+
getSelectorMap,
|
|
13
|
+
getElementTextMap,
|
|
14
|
+
cleanUpHighlights,
|
|
15
|
+
DomConfig,
|
|
16
|
+
getAllTextTillNextClickableElement
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
declare interface DomConfig {
|
|
21
|
+
interactiveBlacklist?: (Element | (() => Element))[];
|
|
22
|
+
interactiveWhitelist?: (Element | (() => Element))[];
|
|
23
|
+
include_attributes?: string[];
|
|
24
|
+
highlightOpacity?: number;
|
|
25
|
+
highlightLabelOpacity?: number;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
declare type DomNode = TextDomNode | ElementDomNode | InteractiveElementDomNode;
|
|
29
|
+
|
|
30
|
+
declare interface ElementDomNode {
|
|
31
|
+
tagName: string;
|
|
32
|
+
attributes?: Record<string, string>;
|
|
33
|
+
xpath?: string;
|
|
34
|
+
children?: string[];
|
|
35
|
+
isVisible?: boolean;
|
|
36
|
+
isTopElement?: boolean;
|
|
37
|
+
isInViewport?: boolean;
|
|
38
|
+
isNew?: boolean;
|
|
39
|
+
isInteractive?: false;
|
|
40
|
+
highlightIndex?: number;
|
|
41
|
+
extra?: Record<string, any>;
|
|
42
|
+
[key: string]: unknown;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
declare interface FlatDomTree {
|
|
46
|
+
rootId: string;
|
|
47
|
+
map: Record<string, DomNode>;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* 对应 python 中的 views::clickable_elements_to_string,
|
|
52
|
+
* 将 dom 信息处理成适合 llm 阅读的文本格式
|
|
53
|
+
* @形如
|
|
54
|
+
* ``` text
|
|
55
|
+
* [0]<a aria-label=page-agent.js 首页 />
|
|
56
|
+
* [1]<div >P />
|
|
57
|
+
* [2]<div >page-agent.js
|
|
58
|
+
* UI Agent in your webpage />
|
|
59
|
+
* [3]<a >文档 />
|
|
60
|
+
* [4]<a aria-label=查看源码(在新窗口打开)>源码 />
|
|
61
|
+
* UI Agent in your webpage
|
|
62
|
+
* 用户输入需求,AI 理解页面并自动操作。
|
|
63
|
+
* [5]<a role=button>快速开始 />
|
|
64
|
+
* [6]<a role=button>查看文档 />
|
|
65
|
+
* 无需后端
|
|
66
|
+
* ```
|
|
67
|
+
* 其中可交互元素用序号标出,提示llm可以用序号操作。
|
|
68
|
+
* 缩进代表父子关系。
|
|
69
|
+
* 普通文本则直接列出来。
|
|
70
|
+
*
|
|
71
|
+
* @todo 数据脱敏过滤器
|
|
72
|
+
*/
|
|
73
|
+
declare function flatTreeToString(flatTree: FlatDomTree, include_attributes?: string[]): string;
|
|
74
|
+
|
|
75
|
+
declare const getAllTextTillNextClickableElement: (node: TreeNode, maxDepth?: number) => string;
|
|
76
|
+
|
|
77
|
+
declare function getElementTextMap(simplifiedHTML: string): Map<number, string>;
|
|
78
|
+
|
|
79
|
+
declare function getFlatTree(config: DomConfig): FlatDomTree;
|
|
80
|
+
|
|
81
|
+
declare function getSelectorMap(flatTree: FlatDomTree): Map<number, InteractiveElementDomNode>;
|
|
82
|
+
|
|
83
|
+
declare interface InteractiveElementDomNode {
|
|
84
|
+
tagName: string;
|
|
85
|
+
attributes?: Record<string, string>;
|
|
86
|
+
xpath?: string;
|
|
87
|
+
children?: string[];
|
|
88
|
+
isVisible?: boolean;
|
|
89
|
+
isTopElement?: boolean;
|
|
90
|
+
isInViewport?: boolean;
|
|
91
|
+
isInteractive: true;
|
|
92
|
+
highlightIndex: number;
|
|
93
|
+
/**
|
|
94
|
+
* 可交互元素的 dom 引用
|
|
95
|
+
*/
|
|
96
|
+
ref: HTMLElement;
|
|
97
|
+
[key: string]: unknown;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* PageController manages DOM state and element interactions.
|
|
102
|
+
* It provides async methods for all DOM operations, keeping state isolated.
|
|
103
|
+
*
|
|
104
|
+
* @lifecycle
|
|
105
|
+
* - beforeUpdate: Emitted before the DOM tree is updated.
|
|
106
|
+
* - afterUpdate: Emitted after the DOM tree is updated.
|
|
107
|
+
*/
|
|
108
|
+
export declare class PageController extends EventTarget {
|
|
109
|
+
private config;
|
|
110
|
+
/** Corresponds to eval_page in browser-use */
|
|
111
|
+
private flatTree;
|
|
112
|
+
/**
|
|
113
|
+
* All highlighted index-mapped interactive elements
|
|
114
|
+
* Corresponds to DOMState.selector_map in browser-use
|
|
115
|
+
*/
|
|
116
|
+
private selectorMap;
|
|
117
|
+
/** Index -> element text description mapping */
|
|
118
|
+
private elementTextMap;
|
|
119
|
+
/**
|
|
120
|
+
* Simplified HTML for LLM consumption.
|
|
121
|
+
* Corresponds to clickable_elements_to_string in browser-use
|
|
122
|
+
*/
|
|
123
|
+
private simplifiedHTML;
|
|
124
|
+
/** last time the tree was updated */
|
|
125
|
+
private lastTimeUpdate;
|
|
126
|
+
constructor(config?: PageControllerConfig);
|
|
127
|
+
/**
|
|
128
|
+
* Get current page URL
|
|
129
|
+
*/
|
|
130
|
+
getCurrentUrl(): Promise<string>;
|
|
131
|
+
/**
|
|
132
|
+
* Get current page title
|
|
133
|
+
*/
|
|
134
|
+
getPageTitle(): Promise<string>;
|
|
135
|
+
/**
|
|
136
|
+
* Get page scroll and size info
|
|
137
|
+
*/
|
|
138
|
+
getPageInfo(): Promise<{
|
|
139
|
+
viewport_width: number;
|
|
140
|
+
viewport_height: number;
|
|
141
|
+
page_width: number;
|
|
142
|
+
page_height: number;
|
|
143
|
+
scroll_x: number;
|
|
144
|
+
scroll_y: number;
|
|
145
|
+
pixels_above: number;
|
|
146
|
+
pixels_below: number;
|
|
147
|
+
pages_above: number;
|
|
148
|
+
pages_below: number;
|
|
149
|
+
total_pages: number;
|
|
150
|
+
current_page_position: number;
|
|
151
|
+
pixels_left: number;
|
|
152
|
+
pixels_right: number;
|
|
153
|
+
}>;
|
|
154
|
+
/**
|
|
155
|
+
* Get the simplified HTML representation of the page.
|
|
156
|
+
* This is used by LLM to understand the page structure.
|
|
157
|
+
*/
|
|
158
|
+
getSimplifiedHTML(): Promise<string>;
|
|
159
|
+
/**
|
|
160
|
+
* Get text description for an element by index
|
|
161
|
+
*/
|
|
162
|
+
getElementText(index: number): Promise<string | undefined>;
|
|
163
|
+
/**
|
|
164
|
+
* Get total number of indexed interactive elements
|
|
165
|
+
*/
|
|
166
|
+
getElementCount(): Promise<number>;
|
|
167
|
+
/**
|
|
168
|
+
* Get last tree update timestamp
|
|
169
|
+
*/
|
|
170
|
+
getLastUpdateTime(): Promise<number>;
|
|
171
|
+
/**
|
|
172
|
+
* Get the viewport expansion setting
|
|
173
|
+
*/
|
|
174
|
+
getViewportExpansion(): Promise<number>;
|
|
175
|
+
/**
|
|
176
|
+
* Update DOM tree, returns simplified HTML for LLM.
|
|
177
|
+
* This is the main method to refresh the page state.
|
|
178
|
+
*/
|
|
179
|
+
updateTree(): Promise<string>;
|
|
180
|
+
/**
|
|
181
|
+
* Clean up all element highlights
|
|
182
|
+
*/
|
|
183
|
+
cleanUpHighlights(): Promise<void>;
|
|
184
|
+
/**
|
|
185
|
+
* Click element by index
|
|
186
|
+
*/
|
|
187
|
+
clickElement(index: number): Promise<ActionResult>;
|
|
188
|
+
/**
|
|
189
|
+
* Input text into element by index
|
|
190
|
+
*/
|
|
191
|
+
inputText(index: number, text: string): Promise<ActionResult>;
|
|
192
|
+
/**
|
|
193
|
+
* Select dropdown option by index and option text
|
|
194
|
+
*/
|
|
195
|
+
selectOption(index: number, optionText: string): Promise<ActionResult>;
|
|
196
|
+
/**
|
|
197
|
+
* Scroll vertically
|
|
198
|
+
*/
|
|
199
|
+
scroll(options: {
|
|
200
|
+
down: boolean;
|
|
201
|
+
numPages: number;
|
|
202
|
+
pixels?: number;
|
|
203
|
+
index?: number;
|
|
204
|
+
}): Promise<ActionResult>;
|
|
205
|
+
/**
|
|
206
|
+
* Scroll horizontally
|
|
207
|
+
*/
|
|
208
|
+
scrollHorizontally(options: {
|
|
209
|
+
right: boolean;
|
|
210
|
+
pixels: number;
|
|
211
|
+
index?: number;
|
|
212
|
+
}): Promise<ActionResult>;
|
|
213
|
+
/**
|
|
214
|
+
* Execute arbitrary JavaScript on the page
|
|
215
|
+
*/
|
|
216
|
+
executeJavascript(script: string): Promise<ActionResult>;
|
|
217
|
+
/**
|
|
218
|
+
* Dispose and clean up resources
|
|
219
|
+
*/
|
|
220
|
+
dispose(): void;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Configuration for PageController
|
|
225
|
+
*/
|
|
226
|
+
export declare interface PageControllerConfig extends dom.DomConfig {
|
|
227
|
+
viewportExpansion?: number;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
declare interface TextDomNode {
|
|
231
|
+
type: 'TEXT_NODE';
|
|
232
|
+
text: string;
|
|
233
|
+
isVisible: boolean;
|
|
234
|
+
[key: string]: unknown;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/**
|
|
238
|
+
* elementsToString 内部使用的类型
|
|
239
|
+
*/
|
|
240
|
+
declare interface TreeNode {
|
|
241
|
+
type: 'text' | 'element';
|
|
242
|
+
parent: TreeNode | null;
|
|
243
|
+
children: TreeNode[];
|
|
244
|
+
isVisible: boolean;
|
|
245
|
+
text?: string;
|
|
246
|
+
tagName?: string;
|
|
247
|
+
attributes?: Record<string, string>;
|
|
248
|
+
isInteractive?: boolean;
|
|
249
|
+
isTopElement?: boolean;
|
|
250
|
+
isNew?: boolean;
|
|
251
|
+
highlightIndex?: number;
|
|
252
|
+
extra?: Record<string, any>;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
export { }
|