@midscene/web 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,160 +0,0 @@
1
- // import { TEXT_MAX_SIZE } from './constants';
2
-
3
- export function logger(...msg: any[]): void {
4
- // console.log(...msg);
5
- }
6
-
7
- // const nodeIndexCounter = 0;
8
-
9
- const taskIdKey = '_midscene_retrieve_task_id';
10
- // const nodeDataIdKey = 'data-midscene-task-';
11
- // const nodeIndexKey = '_midscene_retrieve_node_index';
12
-
13
- function selectorForValue(val: number): string {
14
- return `[${taskIdKey}='${val}']`;
15
- }
16
-
17
- export function setDataForNode(node: HTMLElement | Node, nodeIndex: number): string {
18
- const taskId = taskIdKey;
19
- if (!(node instanceof HTMLElement)) {
20
- return '';
21
- }
22
- if (!taskId) {
23
- console.error('No task id found');
24
- return '';
25
- }
26
-
27
- const selector = selectorForValue(nodeIndex);
28
- node.setAttribute(taskIdKey, nodeIndex.toString());
29
- return selector;
30
- }
31
-
32
- export function getPseudoElementContent(element: Node): { before: string; after: string } {
33
- if (!(element instanceof HTMLElement)) {
34
- return { before: '', after: '' };
35
- }
36
- const beforeContent = window.getComputedStyle(element, '::before').getPropertyValue('content');
37
- const afterContent = window.getComputedStyle(element, '::after').getPropertyValue('content');
38
- return {
39
- before: beforeContent === 'none' ? '' : beforeContent.replace(/"/g, ''),
40
- after: afterContent === 'none' ? '' : afterContent.replace(/"/g, ''),
41
- };
42
- }
43
-
44
- export function hasOverflowY(element: HTMLElement): boolean {
45
- const style = window.getComputedStyle(element);
46
- return style.overflowY === 'scroll' || style.overflowY === 'auto' || style.overflowY === 'hidden';
47
- }
48
-
49
- export function visibleRect(
50
- el: HTMLElement | Node | null,
51
- ): { left: number; top: number; width: number; height: number } | false {
52
- if (!el) {
53
- logger('Element is not in the DOM hierarchy');
54
- return false;
55
- }
56
-
57
- if (!(el instanceof HTMLElement)) {
58
- logger('Element is not in the DOM hierarchy');
59
- return false;
60
- }
61
-
62
- const style = window.getComputedStyle(el);
63
- if (
64
- style.display === 'none' ||
65
- style.visibility === 'hidden' ||
66
- (style.opacity === '0' && el.tagName !== 'INPUT')
67
- ) {
68
- logger('Element is hidden');
69
- return false;
70
- }
71
-
72
- const rect = el.getBoundingClientRect();
73
- if (rect.width === 0 && rect.height === 0) {
74
- logger('Element has no size');
75
- return false;
76
- }
77
-
78
- const scrollLeft = window.pageXOffset || document.documentElement.scrollLeft;
79
- const scrollTop = window.pageYOffset || document.documentElement.scrollTop;
80
- const isInViewport =
81
- rect.top >= 0 + scrollTop &&
82
- rect.left >= 0 + scrollLeft &&
83
- rect.bottom <= (window.innerHeight || document.documentElement.clientHeight) + scrollTop &&
84
- rect.right <= (window.innerWidth || document.documentElement.clientWidth) + scrollLeft;
85
-
86
- if (!isInViewport) {
87
- logger('Element is not in the viewport');
88
- logger(rect, window.innerHeight, window.innerWidth, scrollTop, scrollLeft);
89
- return false;
90
- }
91
-
92
- let parent: HTMLElement | null = el;
93
- while (parent && parent !== document.body) {
94
- const parentStyle = window.getComputedStyle(parent);
95
- if (parentStyle.overflow === 'hidden') {
96
- const parentRect = parent.getBoundingClientRect();
97
- const tolerance = 10;
98
- if (
99
- rect.top < parentRect.top - tolerance ||
100
- rect.left < parentRect.left - tolerance ||
101
- rect.bottom > parentRect.bottom + tolerance ||
102
- rect.right > parentRect.right + tolerance
103
- ) {
104
- logger('Element is clipped by an ancestor', parent, rect, parentRect);
105
- return false;
106
- }
107
- }
108
- parent = parent.parentElement;
109
- }
110
-
111
- return {
112
- left: Math.round(rect.left - scrollLeft),
113
- top: Math.round(rect.top - scrollTop),
114
- width: Math.round(rect.width),
115
- height: Math.round(rect.height),
116
- };
117
- }
118
-
119
- export function validTextNodeContent(node: Node): string | false {
120
- if (!node) {
121
- return false;
122
- }
123
- console.log('node', node);
124
- if (node.nodeType === Node.COMMENT_NODE) {
125
- return false;
126
- }
127
-
128
- const everyChildNodeIsText = Array.from(node.childNodes).findIndex(
129
- (child) => child.nodeType === Node.TEXT_NODE,
130
- );
131
-
132
- if (everyChildNodeIsText === -1) {
133
- return false;
134
- }
135
-
136
- const content = node.textContent || (node as HTMLElement).innerText;
137
- if (content && !/^\s*$/.test(content)) {
138
- return content.trim();
139
- }
140
-
141
- return false;
142
- }
143
-
144
- export function getNodeAttributes(node: HTMLElement | Node): Record<string, string> {
145
- if (!node || !(node instanceof HTMLElement) || !node.attributes) {
146
- return {};
147
- }
148
-
149
- const attributesList = Array.from(node.attributes).map((attr) => {
150
- if (attr.name === 'class') {
151
- return [attr.name, `.${attr.value.split(' ').join('.')}`];
152
- }
153
- if (!attr.value) {
154
- return [];
155
- }
156
- return [attr.name, attr.value];
157
- });
158
-
159
- return Object.fromEntries(attributesList);
160
- }
package/src/img/img.ts DELETED
@@ -1,132 +0,0 @@
1
- import assert from 'assert';
2
- import { Buffer } from 'node:buffer';
3
- import sharp from 'sharp';
4
- import { NodeType } from '@/html-element/constants';
5
-
6
- // Define picture path
7
- type ElementType = {
8
- x: number;
9
- y: number;
10
- width: number;
11
- height: number;
12
- label: string;
13
- attributes: {
14
- [key: string]: string;
15
- nodeType: NodeType;
16
- };
17
- };
18
-
19
- const createSvgOverlay = (elements: Array<ElementType>, imageWidth: number, imageHeight: number) => {
20
- let svgContent = `<svg width="${imageWidth}" height="${imageHeight}" xmlns="http://www.w3.org/2000/svg">`;
21
-
22
- // Define color array
23
- const colors = [
24
- { rect: 'blue', text: 'white' },
25
- { rect: 'green', text: 'white' },
26
- ];
27
-
28
- // Define clipping path
29
- svgContent += `<defs>`;
30
- elements.forEach((element, index) => {
31
- svgContent += `
32
- <clipPath id="clip${index}">
33
- <rect x="${element.x}" y="${element.y}" width="${element.width}" height="${element.height}" />
34
- </clipPath>
35
- `;
36
- });
37
- svgContent += `</defs>`;
38
-
39
- elements.forEach((element, index) => {
40
- // Calculate the width and height of the text
41
- const textWidth = element.label.length * 8; // Assume that each character is 8px wide
42
- const textHeight = 12; // Assume that the text height is 20px
43
-
44
- // Calculates the position of the initial color block so that it wraps and centers the text
45
- const rectWidth = textWidth + 5;
46
- const rectHeight = textHeight + 4;
47
- let rectX = element.x - rectWidth;
48
- let rectY = element.y + element.height / 2 - textHeight / 2 - 2;
49
-
50
- // Initial text position
51
- let textX = rectX + rectWidth / 2;
52
- let textY = rectY + rectHeight / 2 + 6;
53
-
54
- // Check to see if it's obscured by the left
55
- if (rectX < 0) {
56
- rectX = element.x;
57
- rectY = element.y - rectHeight;
58
- textX = rectX + rectWidth / 2;
59
- textY = rectY + rectHeight / 2 + 6;
60
- }
61
-
62
- // Choose color
63
- const color = colors[index % colors.length];
64
-
65
- // Draw boxes and text
66
- svgContent += `
67
- <rect x="${element.x}" y="${element.y}" width="${element.width}" height="${element.height}"
68
- style="fill:none;stroke:${color.rect};stroke-width:4" clip-path="url(#clip${index})" />
69
- <rect x="${rectX}" y="${rectY}" width="${rectWidth}" height="${rectHeight}" style="fill:${color.rect};" />
70
- <text x="${textX}" y="${textY}"
71
- text-anchor="middle" dominant-baseline="middle" style="fill:${color.text};font-size:12px;font-weight:bold;">
72
- ${element.label}
73
- </text>
74
- `;
75
- });
76
-
77
- svgContent += `</svg>`;
78
- return Buffer.from(svgContent);
79
- };
80
-
81
- export const processImageElementInfo = async (options: {
82
- inputImgBase64: string;
83
- elementsPostionInfo: Array<ElementType>;
84
- elementsPostionInfoWithoutText: Array<ElementType>;
85
- }) => {
86
- // Get the size of the original image
87
- const base64Image = options.inputImgBase64.split(';base64,').pop();
88
- assert(base64Image, 'base64Image is undefined');
89
-
90
- const imageBuffer = Buffer.from(base64Image, 'base64');
91
- const metadata = await sharp(imageBuffer).metadata();
92
- const { width, height } = metadata;
93
-
94
- if (width && height) {
95
- // Create svg overlay
96
- const svgOverlay = createSvgOverlay(options.elementsPostionInfo, width, height);
97
- const svgOverlayWithoutText = createSvgOverlay(options.elementsPostionInfoWithoutText, width, height);
98
-
99
- // Composite picture
100
- const compositeElementInfoImgBase64 = await sharp(imageBuffer)
101
- // .resize(newDimensions.width, newDimensions.height)
102
- .composite([{ input: svgOverlay, blend: 'over' }])
103
- .toBuffer()
104
- .then((data) => {
105
- // Convert image data to base64 encoding
106
- return data.toString('base64');
107
- })
108
- .catch((err) => {
109
- throw err;
110
- });
111
-
112
- // Composite picture withtoutText
113
- const compositeElementInfoImgWithoutTextBase64 = await sharp(imageBuffer)
114
- // .resize(newDimensions.width, newDimensions.height)
115
- .composite([{ input: svgOverlayWithoutText, blend: 'over' }])
116
- .toBuffer()
117
- .then((data) => {
118
- // Convert image data to base64 encoding
119
- return data.toString('base64');
120
- })
121
- .catch((err) => {
122
- throw err;
123
- });
124
-
125
- return {
126
- compositeElementInfoImgBase64,
127
- compositeElementInfoImgWithoutTextBase64,
128
- };
129
- } else {
130
- throw Error('Image processing failed because width or height is undefined');
131
- }
132
- };
package/src/img/util.ts DELETED
@@ -1,28 +0,0 @@
1
- import { getElementInfosFromPage } from '../playwright/utils';
2
- import { NodeType } from '@/html-element/constants';
3
- import { ElementInfo } from '@/html-element/extractInfo';
4
-
5
- export async function getElementInfos(page: any) {
6
- const captureElementSnapshot: Array<ElementInfo> = await getElementInfosFromPage(page);
7
- const elementsPostionInfo = captureElementSnapshot.map((elementInfo) => {
8
- return {
9
- label: elementInfo.id.toString(),
10
- x: elementInfo.rect.left,
11
- y: elementInfo.rect.top,
12
- width: elementInfo.rect.width,
13
- height: elementInfo.rect.height,
14
- attributes: elementInfo.attributes,
15
- };
16
- });
17
- const elementsPostionInfoWithoutText = elementsPostionInfo.filter((elementInfo) => {
18
- if (elementInfo.attributes.nodeType === NodeType.TEXT) {
19
- return false;
20
- }
21
- return true;
22
- });
23
- return {
24
- elementsPostionInfo,
25
- captureElementSnapshot,
26
- elementsPostionInfoWithoutText,
27
- };
28
- }
package/src/index.ts DELETED
@@ -1,2 +0,0 @@
1
- export { PlaywrightAiFixture } from './playwright';
2
- export type { PlayWrightAiFixtureType } from './playwright';
@@ -1,276 +0,0 @@
1
- import assert from 'assert';
2
- import type { Page as PlaywrightPage } from 'playwright';
3
- import Insight, {
4
- DumpSubscriber,
5
- ExecutionDump,
6
- ExecutionRecorderItem,
7
- ExecutionTaskActionApply,
8
- ExecutionTaskApply,
9
- ExecutionTaskInsightLocateApply,
10
- ExecutionTaskInsightQueryApply,
11
- ExecutionTaskPlanningApply,
12
- Executor,
13
- InsightDump,
14
- InsightExtractParam,
15
- PlanningAction,
16
- PlanningActionParamHover,
17
- PlanningActionParamInputOrKeyPress,
18
- PlanningActionParamScroll,
19
- PlanningActionParamTap,
20
- plan,
21
- } from '@midscene/core';
22
- import { commonScreenshotParam, getTmpFile, sleep } from '@midscene/core/utils';
23
- import { base64Encoded } from '@midscene/core/image';
24
- import { parseContextFromPlaywrightPage } from './utils';
25
- import { WebElementInfo } from './element';
26
-
27
- export class PlayWrightActionAgent {
28
- page: PlaywrightPage;
29
-
30
- insight: Insight<WebElementInfo>;
31
-
32
- executor: Executor;
33
-
34
- actionDump?: ExecutionDump;
35
-
36
- constructor(page: PlaywrightPage, opt?: { taskName?: string }) {
37
- this.page = page;
38
- this.insight = new Insight<WebElementInfo>(async () => {
39
- return await parseContextFromPlaywrightPage(page);
40
- });
41
- this.executor = new Executor(opt?.taskName || 'MidScene - PlayWrightAI');
42
- }
43
-
44
- private async recordScreenshot(timing: ExecutionRecorderItem['timing']) {
45
- const file = getTmpFile('jpeg');
46
- await this.page.screenshot({
47
- ...commonScreenshotParam,
48
- path: file,
49
- });
50
- const item: ExecutionRecorderItem = {
51
- type: 'screenshot',
52
- ts: Date.now(),
53
- screenshot: base64Encoded(file),
54
- timing,
55
- };
56
- return item;
57
- }
58
-
59
- private wrapExecutorWithScreenshot(taskApply: ExecutionTaskApply): ExecutionTaskApply {
60
- const taskWithScreenshot: ExecutionTaskApply = {
61
- ...taskApply,
62
- executor: async (param, context, ...args) => {
63
- const recorder: ExecutionRecorderItem[] = [];
64
- const { task } = context;
65
- // set the recorder before executor in case of error
66
- task.recorder = recorder;
67
- const shot = await this.recordScreenshot(`before ${task.type}`);
68
- recorder.push(shot);
69
- const result = await taskApply.executor(param, context, ...args);
70
- if (taskApply.type === 'Action') {
71
- await sleep(1000);
72
- const shot2 = await this.recordScreenshot('after Action');
73
- recorder.push(shot2);
74
- }
75
- return result;
76
- },
77
- };
78
- return taskWithScreenshot;
79
- }
80
-
81
- private async convertPlanToExecutable(plans: PlanningAction[]) {
82
- const tasks: ExecutionTaskApply[] = plans
83
- .map((plan) => {
84
- if (plan.type === 'Locate') {
85
- const taskFind: ExecutionTaskInsightLocateApply = {
86
- type: 'Insight',
87
- subType: 'Locate',
88
- param: {
89
- prompt: plan.thought,
90
- },
91
- executor: async (param) => {
92
- let insightDump: InsightDump | undefined;
93
- const dumpCollector: DumpSubscriber = (dump) => {
94
- insightDump = dump;
95
- };
96
- this.insight.onceDumpUpdatedFn = dumpCollector;
97
- const element = await this.insight.locate(param.prompt);
98
- assert(element, `Element not found: ${param.prompt}`);
99
- return {
100
- output: {
101
- element,
102
- },
103
- log: {
104
- dump: insightDump,
105
- },
106
- };
107
- },
108
- };
109
- return taskFind;
110
- } else if (plan.type === 'Input') {
111
- const taskActionInput: ExecutionTaskActionApply<PlanningActionParamInputOrKeyPress> = {
112
- type: 'Action',
113
- subType: 'Input',
114
- param: plan.param,
115
- executor: async (taskParam) => {
116
- assert(taskParam.value, 'No value to input');
117
- await this.page.keyboard.type(taskParam.value);
118
- },
119
- };
120
- // TODO: return a recorder Object
121
- return taskActionInput;
122
- } else if (plan.type === 'KeyboardPress') {
123
- const taskActionKeyboardPress: ExecutionTaskActionApply<PlanningActionParamInputOrKeyPress> = {
124
- type: 'Action',
125
- subType: 'KeyboardPress',
126
- param: plan.param,
127
- executor: async (taskParam) => {
128
- assert(taskParam.value, 'No key to press');
129
- await this.page.keyboard.press(taskParam.value);
130
- },
131
- };
132
- return taskActionKeyboardPress;
133
- } else if (plan.type === 'Tap') {
134
- const taskActionTap: ExecutionTaskActionApply<PlanningActionParamTap> = {
135
- type: 'Action',
136
- subType: 'Tap',
137
- executor: async (param, { element }) => {
138
- assert(element, 'Element not found, cannot tap');
139
- await this.page.mouse.click(element.center[0], element.center[1]);
140
- },
141
- };
142
- return taskActionTap;
143
- } else if (plan.type === 'Hover') {
144
- const taskActionHover: ExecutionTaskActionApply<PlanningActionParamHover> = {
145
- type: 'Action',
146
- subType: 'Hover',
147
- executor: async (param, { element }) => {
148
- // console.log('executor args', param, element);
149
- assert(element, 'Element not found, cannot hover');
150
- await this.page.mouse.move(element.center[0], element.center[1]);
151
- },
152
- };
153
- return taskActionHover;
154
- } else if (plan.type === 'Scroll') {
155
- const taskActionScroll: ExecutionTaskActionApply<PlanningActionParamScroll> = {
156
- type: 'Action',
157
- subType: 'Scroll',
158
- param: plan.param,
159
- executor: async (taskParam) => {
160
- const scrollToEventName = taskParam.scrollType;
161
- const innerHeight = await this.page.evaluate(() => window.innerHeight);
162
-
163
- switch (scrollToEventName) {
164
- case 'ScrollUntilTop':
165
- await this.page.mouse.wheel(0, -9999999);
166
- break;
167
- case 'ScrollUntilBottom':
168
- await this.page.mouse.wheel(0, 9999999);
169
- break;
170
- case 'ScrollUp':
171
- await this.page.mouse.wheel(0, -innerHeight);
172
- break;
173
- case 'ScrollDown':
174
- await this.page.mouse.wheel(0, innerHeight);
175
- break;
176
- default:
177
- console.error('Unknown scroll event type:', scrollToEventName);
178
- }
179
- },
180
- };
181
- return taskActionScroll;
182
- } else if (plan.type === 'Error') {
183
- throw new Error(`Got a task plan with type Error: ${plan.thought}`);
184
- } else {
185
- throw new Error(`Unknown or Unsupported task type: ${plan.type}`);
186
- }
187
- })
188
- .map((task: ExecutionTaskApply) => {
189
- return this.wrapExecutorWithScreenshot(task);
190
- });
191
-
192
- return tasks;
193
- }
194
-
195
- async action(userPrompt: string /* , actionInfo?: { actionType?: EventActions[number]['action'] } */) {
196
- this.executor.description = userPrompt;
197
- const pageContext = await this.insight.contextRetrieverFn();
198
-
199
- let plans: PlanningAction[] = [];
200
- const planningTask: ExecutionTaskPlanningApply = {
201
- type: 'Planning',
202
- param: {
203
- userPrompt,
204
- },
205
- async executor(param) {
206
- const planResult = await plan(pageContext, param.userPrompt);
207
- assert(planResult.plans.length > 0, 'No plans found');
208
- // eslint-disable-next-line prefer-destructuring
209
- plans = planResult.plans;
210
- return {
211
- output: planResult,
212
- };
213
- },
214
- };
215
-
216
- try {
217
- // plan
218
- await this.executor.append(this.wrapExecutorWithScreenshot(planningTask));
219
- await this.executor.flush();
220
- this.actionDump = this.executor.dump();
221
-
222
- // append tasks
223
- const executables = await this.convertPlanToExecutable(plans);
224
- await this.executor.append(executables);
225
-
226
- // flush actions
227
- await this.executor.flush();
228
- this.actionDump = this.executor.dump();
229
-
230
- assert(
231
- this.executor.status !== 'error',
232
- `failed to execute tasks: ${this.executor.status}, msg: ${this.executor.errorMsg || ''}`,
233
- );
234
- } catch (e: any) {
235
- // keep the dump before throwing
236
- this.actionDump = this.executor.dump();
237
- const err = new Error(e.message, { cause: e });
238
- throw err;
239
- }
240
- }
241
-
242
- async query(demand: InsightExtractParam) {
243
- this.executor.description = JSON.stringify(demand);
244
- let data: any;
245
- const queryTask: ExecutionTaskInsightQueryApply = {
246
- type: 'Insight',
247
- subType: 'Query',
248
- param: {
249
- dataDemand: demand,
250
- },
251
- executor: async (param) => {
252
- let insightDump: InsightDump | undefined;
253
- const dumpCollector: DumpSubscriber = (dump) => {
254
- insightDump = dump;
255
- };
256
- this.insight.onceDumpUpdatedFn = dumpCollector;
257
- data = await this.insight.extract<any>(param.dataDemand);
258
- return {
259
- output: data,
260
- log: { dump: insightDump },
261
- };
262
- },
263
- };
264
- try {
265
- await this.executor.append(this.wrapExecutorWithScreenshot(queryTask));
266
- await this.executor.flush();
267
- this.actionDump = this.executor.dump();
268
- } catch (e: any) {
269
- // keep the dump before throwing
270
- this.actionDump = this.executor.dump();
271
- const err = new Error(e.message, { cause: e });
272
- throw err;
273
- }
274
- return data;
275
- }
276
- }