@page-agent/page-controller 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Alibaba
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,255 @@
1
+ declare interface ActionResult {
2
+ success: boolean;
3
+ message: string;
4
+ }
5
+
6
+ declare function cleanUpHighlights(): void;
7
+
8
+ declare namespace dom {
9
+ export {
10
+ getFlatTree,
11
+ flatTreeToString,
12
+ getSelectorMap,
13
+ getElementTextMap,
14
+ cleanUpHighlights,
15
+ DomConfig,
16
+ getAllTextTillNextClickableElement
17
+ }
18
+ }
19
+
20
+ declare interface DomConfig {
21
+ interactiveBlacklist?: (Element | (() => Element))[];
22
+ interactiveWhitelist?: (Element | (() => Element))[];
23
+ include_attributes?: string[];
24
+ highlightOpacity?: number;
25
+ highlightLabelOpacity?: number;
26
+ }
27
+
28
+ declare type DomNode = TextDomNode | ElementDomNode | InteractiveElementDomNode;
29
+
30
+ declare interface ElementDomNode {
31
+ tagName: string;
32
+ attributes?: Record<string, string>;
33
+ xpath?: string;
34
+ children?: string[];
35
+ isVisible?: boolean;
36
+ isTopElement?: boolean;
37
+ isInViewport?: boolean;
38
+ isNew?: boolean;
39
+ isInteractive?: false;
40
+ highlightIndex?: number;
41
+ extra?: Record<string, any>;
42
+ [key: string]: unknown;
43
+ }
44
+
45
+ declare interface FlatDomTree {
46
+ rootId: string;
47
+ map: Record<string, DomNode>;
48
+ }
49
+
50
+ /**
51
+ * 对应 python 中的 views::clickable_elements_to_string,
52
+ * 将 dom 信息处理成适合 llm 阅读的文本格式
53
+ * @形如
54
+ * ``` text
55
+ * [0]<a aria-label=page-agent.js 首页 />
56
+ * [1]<div >P />
57
+ * [2]<div >page-agent.js
58
+ * UI Agent in your webpage />
59
+ * [3]<a >文档 />
60
+ * [4]<a aria-label=查看源码(在新窗口打开)>源码 />
61
+ * UI Agent in your webpage
62
+ * 用户输入需求,AI 理解页面并自动操作。
63
+ * [5]<a role=button>快速开始 />
64
+ * [6]<a role=button>查看文档 />
65
+ * 无需后端
66
+ * ```
67
+ * 其中可交互元素用序号标出,提示llm可以用序号操作。
68
+ * 缩进代表父子关系。
69
+ * 普通文本则直接列出来。
70
+ *
71
+ * @todo 数据脱敏过滤器
72
+ */
73
+ declare function flatTreeToString(flatTree: FlatDomTree, include_attributes?: string[]): string;
74
+
75
+ declare const getAllTextTillNextClickableElement: (node: TreeNode, maxDepth?: number) => string;
76
+
77
+ declare function getElementTextMap(simplifiedHTML: string): Map<number, string>;
78
+
79
+ declare function getFlatTree(config: DomConfig): FlatDomTree;
80
+
81
+ declare function getSelectorMap(flatTree: FlatDomTree): Map<number, InteractiveElementDomNode>;
82
+
83
+ declare interface InteractiveElementDomNode {
84
+ tagName: string;
85
+ attributes?: Record<string, string>;
86
+ xpath?: string;
87
+ children?: string[];
88
+ isVisible?: boolean;
89
+ isTopElement?: boolean;
90
+ isInViewport?: boolean;
91
+ isInteractive: true;
92
+ highlightIndex: number;
93
+ /**
94
+ * 可交互元素的 dom 引用
95
+ */
96
+ ref: HTMLElement;
97
+ [key: string]: unknown;
98
+ }
99
+
100
+ /**
101
+ * PageController manages DOM state and element interactions.
102
+ * It provides async methods for all DOM operations, keeping state isolated.
103
+ *
104
+ * @lifecycle
105
+ * - beforeUpdate: Emitted before the DOM tree is updated.
106
+ * - afterUpdate: Emitted after the DOM tree is updated.
107
+ */
108
+ export declare class PageController extends EventTarget {
109
+ private config;
110
+ /** Corresponds to eval_page in browser-use */
111
+ private flatTree;
112
+ /**
113
+ * All highlighted index-mapped interactive elements
114
+ * Corresponds to DOMState.selector_map in browser-use
115
+ */
116
+ private selectorMap;
117
+ /** Index -> element text description mapping */
118
+ private elementTextMap;
119
+ /**
120
+ * Simplified HTML for LLM consumption.
121
+ * Corresponds to clickable_elements_to_string in browser-use
122
+ */
123
+ private simplifiedHTML;
124
+ /** last time the tree was updated */
125
+ private lastTimeUpdate;
126
+ constructor(config?: PageControllerConfig);
127
+ /**
128
+ * Get current page URL
129
+ */
130
+ getCurrentUrl(): Promise<string>;
131
+ /**
132
+ * Get current page title
133
+ */
134
+ getPageTitle(): Promise<string>;
135
+ /**
136
+ * Get page scroll and size info
137
+ */
138
+ getPageInfo(): Promise<{
139
+ viewport_width: number;
140
+ viewport_height: number;
141
+ page_width: number;
142
+ page_height: number;
143
+ scroll_x: number;
144
+ scroll_y: number;
145
+ pixels_above: number;
146
+ pixels_below: number;
147
+ pages_above: number;
148
+ pages_below: number;
149
+ total_pages: number;
150
+ current_page_position: number;
151
+ pixels_left: number;
152
+ pixels_right: number;
153
+ }>;
154
+ /**
155
+ * Get the simplified HTML representation of the page.
156
+ * This is used by LLM to understand the page structure.
157
+ */
158
+ getSimplifiedHTML(): Promise<string>;
159
+ /**
160
+ * Get text description for an element by index
161
+ */
162
+ getElementText(index: number): Promise<string | undefined>;
163
+ /**
164
+ * Get total number of indexed interactive elements
165
+ */
166
+ getElementCount(): Promise<number>;
167
+ /**
168
+ * Get last tree update timestamp
169
+ */
170
+ getLastUpdateTime(): Promise<number>;
171
+ /**
172
+ * Get the viewport expansion setting
173
+ */
174
+ getViewportExpansion(): Promise<number>;
175
+ /**
176
+ * Update DOM tree, returns simplified HTML for LLM.
177
+ * This is the main method to refresh the page state.
178
+ */
179
+ updateTree(): Promise<string>;
180
+ /**
181
+ * Clean up all element highlights
182
+ */
183
+ cleanUpHighlights(): Promise<void>;
184
+ /**
185
+ * Click element by index
186
+ */
187
+ clickElement(index: number): Promise<ActionResult>;
188
+ /**
189
+ * Input text into element by index
190
+ */
191
+ inputText(index: number, text: string): Promise<ActionResult>;
192
+ /**
193
+ * Select dropdown option by index and option text
194
+ */
195
+ selectOption(index: number, optionText: string): Promise<ActionResult>;
196
+ /**
197
+ * Scroll vertically
198
+ */
199
+ scroll(options: {
200
+ down: boolean;
201
+ numPages: number;
202
+ pixels?: number;
203
+ index?: number;
204
+ }): Promise<ActionResult>;
205
+ /**
206
+ * Scroll horizontally
207
+ */
208
+ scrollHorizontally(options: {
209
+ right: boolean;
210
+ pixels: number;
211
+ index?: number;
212
+ }): Promise<ActionResult>;
213
+ /**
214
+ * Execute arbitrary JavaScript on the page
215
+ */
216
+ executeJavascript(script: string): Promise<ActionResult>;
217
+ /**
218
+ * Dispose and clean up resources
219
+ */
220
+ dispose(): void;
221
+ }
222
+
223
+ /**
224
+ * Configuration for PageController
225
+ */
226
+ export declare interface PageControllerConfig extends dom.DomConfig {
227
+ viewportExpansion?: number;
228
+ }
229
+
230
+ declare interface TextDomNode {
231
+ type: 'TEXT_NODE';
232
+ text: string;
233
+ isVisible: boolean;
234
+ [key: string]: unknown;
235
+ }
236
+
237
+ /**
238
+ * elementsToString 内部使用的类型
239
+ */
240
+ declare interface TreeNode {
241
+ type: 'text' | 'element';
242
+ parent: TreeNode | null;
243
+ children: TreeNode[];
244
+ isVisible: boolean;
245
+ text?: string;
246
+ tagName?: string;
247
+ attributes?: Record<string, string>;
248
+ isInteractive?: boolean;
249
+ isTopElement?: boolean;
250
+ isNew?: boolean;
251
+ highlightIndex?: number;
252
+ extra?: Record<string, any>;
253
+ }
254
+
255
+ export { }