@midscene/web 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/index.js +337 -110
- package/dist/es/playwright-report.js +2380 -0
- package/dist/lib/index.js +338 -110
- package/dist/lib/playwright-report.js +2383 -0
- package/dist/script/htmlElement.js +596 -25
- package/dist/script/types/htmlElement.d.ts +2 -0
- package/dist/types/index.d.ts +116 -21
- package/dist/types/playwright-report.d.ts +10 -0
- package/package.json +25 -4
- package/modern.config.ts +0 -13
- package/modern.inspect.config.ts +0 -20
- package/playwright.config.ts +0 -42
- package/src/html-element/constants.ts +0 -10
- package/src/html-element/debug.ts +0 -3
- package/src/html-element/dom-util.ts +0 -11
- package/src/html-element/extractInfo.ts +0 -168
- package/src/html-element/index.ts +0 -1
- package/src/html-element/util.ts +0 -160
- package/src/img/img.ts +0 -132
- package/src/img/util.ts +0 -28
- package/src/index.ts +0 -2
- package/src/playwright/actions.ts +0 -276
- package/src/playwright/cdp.ts +0 -322
- package/src/playwright/element.ts +0 -74
- package/src/playwright/index.ts +0 -120
- package/src/playwright/utils.ts +0 -88
- package/src/puppeteer/element.ts +0 -49
- package/src/puppeteer/index.ts +0 -6
- package/src/puppeteer/utils.ts +0 -116
- package/tests/e2e/ai-auto-todo.spec.ts +0 -24
- package/tests/e2e/ai-xicha.spec.ts +0 -34
- package/tests/e2e/fixture.ts +0 -6
- package/tests/e2e/generate-test-data.spec.ts +0 -60
- package/tests/e2e/todo-app-midscene.spec.ts +0 -98
- package/tests/e2e/tool.ts +0 -63
- package/tsconfig.json +0 -23
- package/vitest.config.ts +0 -14
package/src/html-element/util.ts
DELETED
|
@@ -1,160 +0,0 @@
|
|
|
1
|
-
// import { TEXT_MAX_SIZE } from './constants';
|
|
2
|
-
|
|
3
|
-
export function logger(...msg: any[]): void {
|
|
4
|
-
// console.log(...msg);
|
|
5
|
-
}
|
|
6
|
-
|
|
7
|
-
// const nodeIndexCounter = 0;
|
|
8
|
-
|
|
9
|
-
const taskIdKey = '_midscene_retrieve_task_id';
|
|
10
|
-
// const nodeDataIdKey = 'data-midscene-task-';
|
|
11
|
-
// const nodeIndexKey = '_midscene_retrieve_node_index';
|
|
12
|
-
|
|
13
|
-
function selectorForValue(val: number): string {
|
|
14
|
-
return `[${taskIdKey}='${val}']`;
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
export function setDataForNode(node: HTMLElement | Node, nodeIndex: number): string {
|
|
18
|
-
const taskId = taskIdKey;
|
|
19
|
-
if (!(node instanceof HTMLElement)) {
|
|
20
|
-
return '';
|
|
21
|
-
}
|
|
22
|
-
if (!taskId) {
|
|
23
|
-
console.error('No task id found');
|
|
24
|
-
return '';
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
const selector = selectorForValue(nodeIndex);
|
|
28
|
-
node.setAttribute(taskIdKey, nodeIndex.toString());
|
|
29
|
-
return selector;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
export function getPseudoElementContent(element: Node): { before: string; after: string } {
|
|
33
|
-
if (!(element instanceof HTMLElement)) {
|
|
34
|
-
return { before: '', after: '' };
|
|
35
|
-
}
|
|
36
|
-
const beforeContent = window.getComputedStyle(element, '::before').getPropertyValue('content');
|
|
37
|
-
const afterContent = window.getComputedStyle(element, '::after').getPropertyValue('content');
|
|
38
|
-
return {
|
|
39
|
-
before: beforeContent === 'none' ? '' : beforeContent.replace(/"/g, ''),
|
|
40
|
-
after: afterContent === 'none' ? '' : afterContent.replace(/"/g, ''),
|
|
41
|
-
};
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
export function hasOverflowY(element: HTMLElement): boolean {
|
|
45
|
-
const style = window.getComputedStyle(element);
|
|
46
|
-
return style.overflowY === 'scroll' || style.overflowY === 'auto' || style.overflowY === 'hidden';
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
export function visibleRect(
|
|
50
|
-
el: HTMLElement | Node | null,
|
|
51
|
-
): { left: number; top: number; width: number; height: number } | false {
|
|
52
|
-
if (!el) {
|
|
53
|
-
logger('Element is not in the DOM hierarchy');
|
|
54
|
-
return false;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
if (!(el instanceof HTMLElement)) {
|
|
58
|
-
logger('Element is not in the DOM hierarchy');
|
|
59
|
-
return false;
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
const style = window.getComputedStyle(el);
|
|
63
|
-
if (
|
|
64
|
-
style.display === 'none' ||
|
|
65
|
-
style.visibility === 'hidden' ||
|
|
66
|
-
(style.opacity === '0' && el.tagName !== 'INPUT')
|
|
67
|
-
) {
|
|
68
|
-
logger('Element is hidden');
|
|
69
|
-
return false;
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
const rect = el.getBoundingClientRect();
|
|
73
|
-
if (rect.width === 0 && rect.height === 0) {
|
|
74
|
-
logger('Element has no size');
|
|
75
|
-
return false;
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
const scrollLeft = window.pageXOffset || document.documentElement.scrollLeft;
|
|
79
|
-
const scrollTop = window.pageYOffset || document.documentElement.scrollTop;
|
|
80
|
-
const isInViewport =
|
|
81
|
-
rect.top >= 0 + scrollTop &&
|
|
82
|
-
rect.left >= 0 + scrollLeft &&
|
|
83
|
-
rect.bottom <= (window.innerHeight || document.documentElement.clientHeight) + scrollTop &&
|
|
84
|
-
rect.right <= (window.innerWidth || document.documentElement.clientWidth) + scrollLeft;
|
|
85
|
-
|
|
86
|
-
if (!isInViewport) {
|
|
87
|
-
logger('Element is not in the viewport');
|
|
88
|
-
logger(rect, window.innerHeight, window.innerWidth, scrollTop, scrollLeft);
|
|
89
|
-
return false;
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
let parent: HTMLElement | null = el;
|
|
93
|
-
while (parent && parent !== document.body) {
|
|
94
|
-
const parentStyle = window.getComputedStyle(parent);
|
|
95
|
-
if (parentStyle.overflow === 'hidden') {
|
|
96
|
-
const parentRect = parent.getBoundingClientRect();
|
|
97
|
-
const tolerance = 10;
|
|
98
|
-
if (
|
|
99
|
-
rect.top < parentRect.top - tolerance ||
|
|
100
|
-
rect.left < parentRect.left - tolerance ||
|
|
101
|
-
rect.bottom > parentRect.bottom + tolerance ||
|
|
102
|
-
rect.right > parentRect.right + tolerance
|
|
103
|
-
) {
|
|
104
|
-
logger('Element is clipped by an ancestor', parent, rect, parentRect);
|
|
105
|
-
return false;
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
parent = parent.parentElement;
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
return {
|
|
112
|
-
left: Math.round(rect.left - scrollLeft),
|
|
113
|
-
top: Math.round(rect.top - scrollTop),
|
|
114
|
-
width: Math.round(rect.width),
|
|
115
|
-
height: Math.round(rect.height),
|
|
116
|
-
};
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
export function validTextNodeContent(node: Node): string | false {
|
|
120
|
-
if (!node) {
|
|
121
|
-
return false;
|
|
122
|
-
}
|
|
123
|
-
console.log('node', node);
|
|
124
|
-
if (node.nodeType === Node.COMMENT_NODE) {
|
|
125
|
-
return false;
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
const everyChildNodeIsText = Array.from(node.childNodes).findIndex(
|
|
129
|
-
(child) => child.nodeType === Node.TEXT_NODE,
|
|
130
|
-
);
|
|
131
|
-
|
|
132
|
-
if (everyChildNodeIsText === -1) {
|
|
133
|
-
return false;
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
const content = node.textContent || (node as HTMLElement).innerText;
|
|
137
|
-
if (content && !/^\s*$/.test(content)) {
|
|
138
|
-
return content.trim();
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
return false;
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
export function getNodeAttributes(node: HTMLElement | Node): Record<string, string> {
|
|
145
|
-
if (!node || !(node instanceof HTMLElement) || !node.attributes) {
|
|
146
|
-
return {};
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
const attributesList = Array.from(node.attributes).map((attr) => {
|
|
150
|
-
if (attr.name === 'class') {
|
|
151
|
-
return [attr.name, `.${attr.value.split(' ').join('.')}`];
|
|
152
|
-
}
|
|
153
|
-
if (!attr.value) {
|
|
154
|
-
return [];
|
|
155
|
-
}
|
|
156
|
-
return [attr.name, attr.value];
|
|
157
|
-
});
|
|
158
|
-
|
|
159
|
-
return Object.fromEntries(attributesList);
|
|
160
|
-
}
|
package/src/img/img.ts
DELETED
|
@@ -1,132 +0,0 @@
|
|
|
1
|
-
import assert from 'assert';
|
|
2
|
-
import { Buffer } from 'node:buffer';
|
|
3
|
-
import sharp from 'sharp';
|
|
4
|
-
import { NodeType } from '@/html-element/constants';
|
|
5
|
-
|
|
6
|
-
// Define picture path
|
|
7
|
-
type ElementType = {
|
|
8
|
-
x: number;
|
|
9
|
-
y: number;
|
|
10
|
-
width: number;
|
|
11
|
-
height: number;
|
|
12
|
-
label: string;
|
|
13
|
-
attributes: {
|
|
14
|
-
[key: string]: string;
|
|
15
|
-
nodeType: NodeType;
|
|
16
|
-
};
|
|
17
|
-
};
|
|
18
|
-
|
|
19
|
-
const createSvgOverlay = (elements: Array<ElementType>, imageWidth: number, imageHeight: number) => {
|
|
20
|
-
let svgContent = `<svg width="${imageWidth}" height="${imageHeight}" xmlns="http://www.w3.org/2000/svg">`;
|
|
21
|
-
|
|
22
|
-
// Define color array
|
|
23
|
-
const colors = [
|
|
24
|
-
{ rect: 'blue', text: 'white' },
|
|
25
|
-
{ rect: 'green', text: 'white' },
|
|
26
|
-
];
|
|
27
|
-
|
|
28
|
-
// Define clipping path
|
|
29
|
-
svgContent += `<defs>`;
|
|
30
|
-
elements.forEach((element, index) => {
|
|
31
|
-
svgContent += `
|
|
32
|
-
<clipPath id="clip${index}">
|
|
33
|
-
<rect x="${element.x}" y="${element.y}" width="${element.width}" height="${element.height}" />
|
|
34
|
-
</clipPath>
|
|
35
|
-
`;
|
|
36
|
-
});
|
|
37
|
-
svgContent += `</defs>`;
|
|
38
|
-
|
|
39
|
-
elements.forEach((element, index) => {
|
|
40
|
-
// Calculate the width and height of the text
|
|
41
|
-
const textWidth = element.label.length * 8; // Assume that each character is 8px wide
|
|
42
|
-
const textHeight = 12; // Assume that the text height is 20px
|
|
43
|
-
|
|
44
|
-
// Calculates the position of the initial color block so that it wraps and centers the text
|
|
45
|
-
const rectWidth = textWidth + 5;
|
|
46
|
-
const rectHeight = textHeight + 4;
|
|
47
|
-
let rectX = element.x - rectWidth;
|
|
48
|
-
let rectY = element.y + element.height / 2 - textHeight / 2 - 2;
|
|
49
|
-
|
|
50
|
-
// Initial text position
|
|
51
|
-
let textX = rectX + rectWidth / 2;
|
|
52
|
-
let textY = rectY + rectHeight / 2 + 6;
|
|
53
|
-
|
|
54
|
-
// Check to see if it's obscured by the left
|
|
55
|
-
if (rectX < 0) {
|
|
56
|
-
rectX = element.x;
|
|
57
|
-
rectY = element.y - rectHeight;
|
|
58
|
-
textX = rectX + rectWidth / 2;
|
|
59
|
-
textY = rectY + rectHeight / 2 + 6;
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
// Choose color
|
|
63
|
-
const color = colors[index % colors.length];
|
|
64
|
-
|
|
65
|
-
// Draw boxes and text
|
|
66
|
-
svgContent += `
|
|
67
|
-
<rect x="${element.x}" y="${element.y}" width="${element.width}" height="${element.height}"
|
|
68
|
-
style="fill:none;stroke:${color.rect};stroke-width:4" clip-path="url(#clip${index})" />
|
|
69
|
-
<rect x="${rectX}" y="${rectY}" width="${rectWidth}" height="${rectHeight}" style="fill:${color.rect};" />
|
|
70
|
-
<text x="${textX}" y="${textY}"
|
|
71
|
-
text-anchor="middle" dominant-baseline="middle" style="fill:${color.text};font-size:12px;font-weight:bold;">
|
|
72
|
-
${element.label}
|
|
73
|
-
</text>
|
|
74
|
-
`;
|
|
75
|
-
});
|
|
76
|
-
|
|
77
|
-
svgContent += `</svg>`;
|
|
78
|
-
return Buffer.from(svgContent);
|
|
79
|
-
};
|
|
80
|
-
|
|
81
|
-
export const processImageElementInfo = async (options: {
|
|
82
|
-
inputImgBase64: string;
|
|
83
|
-
elementsPostionInfo: Array<ElementType>;
|
|
84
|
-
elementsPostionInfoWithoutText: Array<ElementType>;
|
|
85
|
-
}) => {
|
|
86
|
-
// Get the size of the original image
|
|
87
|
-
const base64Image = options.inputImgBase64.split(';base64,').pop();
|
|
88
|
-
assert(base64Image, 'base64Image is undefined');
|
|
89
|
-
|
|
90
|
-
const imageBuffer = Buffer.from(base64Image, 'base64');
|
|
91
|
-
const metadata = await sharp(imageBuffer).metadata();
|
|
92
|
-
const { width, height } = metadata;
|
|
93
|
-
|
|
94
|
-
if (width && height) {
|
|
95
|
-
// Create svg overlay
|
|
96
|
-
const svgOverlay = createSvgOverlay(options.elementsPostionInfo, width, height);
|
|
97
|
-
const svgOverlayWithoutText = createSvgOverlay(options.elementsPostionInfoWithoutText, width, height);
|
|
98
|
-
|
|
99
|
-
// Composite picture
|
|
100
|
-
const compositeElementInfoImgBase64 = await sharp(imageBuffer)
|
|
101
|
-
// .resize(newDimensions.width, newDimensions.height)
|
|
102
|
-
.composite([{ input: svgOverlay, blend: 'over' }])
|
|
103
|
-
.toBuffer()
|
|
104
|
-
.then((data) => {
|
|
105
|
-
// Convert image data to base64 encoding
|
|
106
|
-
return data.toString('base64');
|
|
107
|
-
})
|
|
108
|
-
.catch((err) => {
|
|
109
|
-
throw err;
|
|
110
|
-
});
|
|
111
|
-
|
|
112
|
-
// Composite picture withtoutText
|
|
113
|
-
const compositeElementInfoImgWithoutTextBase64 = await sharp(imageBuffer)
|
|
114
|
-
// .resize(newDimensions.width, newDimensions.height)
|
|
115
|
-
.composite([{ input: svgOverlayWithoutText, blend: 'over' }])
|
|
116
|
-
.toBuffer()
|
|
117
|
-
.then((data) => {
|
|
118
|
-
// Convert image data to base64 encoding
|
|
119
|
-
return data.toString('base64');
|
|
120
|
-
})
|
|
121
|
-
.catch((err) => {
|
|
122
|
-
throw err;
|
|
123
|
-
});
|
|
124
|
-
|
|
125
|
-
return {
|
|
126
|
-
compositeElementInfoImgBase64,
|
|
127
|
-
compositeElementInfoImgWithoutTextBase64,
|
|
128
|
-
};
|
|
129
|
-
} else {
|
|
130
|
-
throw Error('Image processing failed because width or height is undefined');
|
|
131
|
-
}
|
|
132
|
-
};
|
package/src/img/util.ts
DELETED
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
import { getElementInfosFromPage } from '../playwright/utils';
|
|
2
|
-
import { NodeType } from '@/html-element/constants';
|
|
3
|
-
import { ElementInfo } from '@/html-element/extractInfo';
|
|
4
|
-
|
|
5
|
-
export async function getElementInfos(page: any) {
|
|
6
|
-
const captureElementSnapshot: Array<ElementInfo> = await getElementInfosFromPage(page);
|
|
7
|
-
const elementsPostionInfo = captureElementSnapshot.map((elementInfo) => {
|
|
8
|
-
return {
|
|
9
|
-
label: elementInfo.id.toString(),
|
|
10
|
-
x: elementInfo.rect.left,
|
|
11
|
-
y: elementInfo.rect.top,
|
|
12
|
-
width: elementInfo.rect.width,
|
|
13
|
-
height: elementInfo.rect.height,
|
|
14
|
-
attributes: elementInfo.attributes,
|
|
15
|
-
};
|
|
16
|
-
});
|
|
17
|
-
const elementsPostionInfoWithoutText = elementsPostionInfo.filter((elementInfo) => {
|
|
18
|
-
if (elementInfo.attributes.nodeType === NodeType.TEXT) {
|
|
19
|
-
return false;
|
|
20
|
-
}
|
|
21
|
-
return true;
|
|
22
|
-
});
|
|
23
|
-
return {
|
|
24
|
-
elementsPostionInfo,
|
|
25
|
-
captureElementSnapshot,
|
|
26
|
-
elementsPostionInfoWithoutText,
|
|
27
|
-
};
|
|
28
|
-
}
|
package/src/index.ts
DELETED
|
@@ -1,276 +0,0 @@
|
|
|
1
|
-
import assert from 'assert';
|
|
2
|
-
import type { Page as PlaywrightPage } from 'playwright';
|
|
3
|
-
import Insight, {
|
|
4
|
-
DumpSubscriber,
|
|
5
|
-
ExecutionDump,
|
|
6
|
-
ExecutionRecorderItem,
|
|
7
|
-
ExecutionTaskActionApply,
|
|
8
|
-
ExecutionTaskApply,
|
|
9
|
-
ExecutionTaskInsightLocateApply,
|
|
10
|
-
ExecutionTaskInsightQueryApply,
|
|
11
|
-
ExecutionTaskPlanningApply,
|
|
12
|
-
Executor,
|
|
13
|
-
InsightDump,
|
|
14
|
-
InsightExtractParam,
|
|
15
|
-
PlanningAction,
|
|
16
|
-
PlanningActionParamHover,
|
|
17
|
-
PlanningActionParamInputOrKeyPress,
|
|
18
|
-
PlanningActionParamScroll,
|
|
19
|
-
PlanningActionParamTap,
|
|
20
|
-
plan,
|
|
21
|
-
} from '@midscene/core';
|
|
22
|
-
import { commonScreenshotParam, getTmpFile, sleep } from '@midscene/core/utils';
|
|
23
|
-
import { base64Encoded } from '@midscene/core/image';
|
|
24
|
-
import { parseContextFromPlaywrightPage } from './utils';
|
|
25
|
-
import { WebElementInfo } from './element';
|
|
26
|
-
|
|
27
|
-
export class PlayWrightActionAgent {
|
|
28
|
-
page: PlaywrightPage;
|
|
29
|
-
|
|
30
|
-
insight: Insight<WebElementInfo>;
|
|
31
|
-
|
|
32
|
-
executor: Executor;
|
|
33
|
-
|
|
34
|
-
actionDump?: ExecutionDump;
|
|
35
|
-
|
|
36
|
-
constructor(page: PlaywrightPage, opt?: { taskName?: string }) {
|
|
37
|
-
this.page = page;
|
|
38
|
-
this.insight = new Insight<WebElementInfo>(async () => {
|
|
39
|
-
return await parseContextFromPlaywrightPage(page);
|
|
40
|
-
});
|
|
41
|
-
this.executor = new Executor(opt?.taskName || 'MidScene - PlayWrightAI');
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
private async recordScreenshot(timing: ExecutionRecorderItem['timing']) {
|
|
45
|
-
const file = getTmpFile('jpeg');
|
|
46
|
-
await this.page.screenshot({
|
|
47
|
-
...commonScreenshotParam,
|
|
48
|
-
path: file,
|
|
49
|
-
});
|
|
50
|
-
const item: ExecutionRecorderItem = {
|
|
51
|
-
type: 'screenshot',
|
|
52
|
-
ts: Date.now(),
|
|
53
|
-
screenshot: base64Encoded(file),
|
|
54
|
-
timing,
|
|
55
|
-
};
|
|
56
|
-
return item;
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
private wrapExecutorWithScreenshot(taskApply: ExecutionTaskApply): ExecutionTaskApply {
|
|
60
|
-
const taskWithScreenshot: ExecutionTaskApply = {
|
|
61
|
-
...taskApply,
|
|
62
|
-
executor: async (param, context, ...args) => {
|
|
63
|
-
const recorder: ExecutionRecorderItem[] = [];
|
|
64
|
-
const { task } = context;
|
|
65
|
-
// set the recorder before executor in case of error
|
|
66
|
-
task.recorder = recorder;
|
|
67
|
-
const shot = await this.recordScreenshot(`before ${task.type}`);
|
|
68
|
-
recorder.push(shot);
|
|
69
|
-
const result = await taskApply.executor(param, context, ...args);
|
|
70
|
-
if (taskApply.type === 'Action') {
|
|
71
|
-
await sleep(1000);
|
|
72
|
-
const shot2 = await this.recordScreenshot('after Action');
|
|
73
|
-
recorder.push(shot2);
|
|
74
|
-
}
|
|
75
|
-
return result;
|
|
76
|
-
},
|
|
77
|
-
};
|
|
78
|
-
return taskWithScreenshot;
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
private async convertPlanToExecutable(plans: PlanningAction[]) {
|
|
82
|
-
const tasks: ExecutionTaskApply[] = plans
|
|
83
|
-
.map((plan) => {
|
|
84
|
-
if (plan.type === 'Locate') {
|
|
85
|
-
const taskFind: ExecutionTaskInsightLocateApply = {
|
|
86
|
-
type: 'Insight',
|
|
87
|
-
subType: 'Locate',
|
|
88
|
-
param: {
|
|
89
|
-
prompt: plan.thought,
|
|
90
|
-
},
|
|
91
|
-
executor: async (param) => {
|
|
92
|
-
let insightDump: InsightDump | undefined;
|
|
93
|
-
const dumpCollector: DumpSubscriber = (dump) => {
|
|
94
|
-
insightDump = dump;
|
|
95
|
-
};
|
|
96
|
-
this.insight.onceDumpUpdatedFn = dumpCollector;
|
|
97
|
-
const element = await this.insight.locate(param.prompt);
|
|
98
|
-
assert(element, `Element not found: ${param.prompt}`);
|
|
99
|
-
return {
|
|
100
|
-
output: {
|
|
101
|
-
element,
|
|
102
|
-
},
|
|
103
|
-
log: {
|
|
104
|
-
dump: insightDump,
|
|
105
|
-
},
|
|
106
|
-
};
|
|
107
|
-
},
|
|
108
|
-
};
|
|
109
|
-
return taskFind;
|
|
110
|
-
} else if (plan.type === 'Input') {
|
|
111
|
-
const taskActionInput: ExecutionTaskActionApply<PlanningActionParamInputOrKeyPress> = {
|
|
112
|
-
type: 'Action',
|
|
113
|
-
subType: 'Input',
|
|
114
|
-
param: plan.param,
|
|
115
|
-
executor: async (taskParam) => {
|
|
116
|
-
assert(taskParam.value, 'No value to input');
|
|
117
|
-
await this.page.keyboard.type(taskParam.value);
|
|
118
|
-
},
|
|
119
|
-
};
|
|
120
|
-
// TODO: return a recorder Object
|
|
121
|
-
return taskActionInput;
|
|
122
|
-
} else if (plan.type === 'KeyboardPress') {
|
|
123
|
-
const taskActionKeyboardPress: ExecutionTaskActionApply<PlanningActionParamInputOrKeyPress> = {
|
|
124
|
-
type: 'Action',
|
|
125
|
-
subType: 'KeyboardPress',
|
|
126
|
-
param: plan.param,
|
|
127
|
-
executor: async (taskParam) => {
|
|
128
|
-
assert(taskParam.value, 'No key to press');
|
|
129
|
-
await this.page.keyboard.press(taskParam.value);
|
|
130
|
-
},
|
|
131
|
-
};
|
|
132
|
-
return taskActionKeyboardPress;
|
|
133
|
-
} else if (plan.type === 'Tap') {
|
|
134
|
-
const taskActionTap: ExecutionTaskActionApply<PlanningActionParamTap> = {
|
|
135
|
-
type: 'Action',
|
|
136
|
-
subType: 'Tap',
|
|
137
|
-
executor: async (param, { element }) => {
|
|
138
|
-
assert(element, 'Element not found, cannot tap');
|
|
139
|
-
await this.page.mouse.click(element.center[0], element.center[1]);
|
|
140
|
-
},
|
|
141
|
-
};
|
|
142
|
-
return taskActionTap;
|
|
143
|
-
} else if (plan.type === 'Hover') {
|
|
144
|
-
const taskActionHover: ExecutionTaskActionApply<PlanningActionParamHover> = {
|
|
145
|
-
type: 'Action',
|
|
146
|
-
subType: 'Hover',
|
|
147
|
-
executor: async (param, { element }) => {
|
|
148
|
-
// console.log('executor args', param, element);
|
|
149
|
-
assert(element, 'Element not found, cannot hover');
|
|
150
|
-
await this.page.mouse.move(element.center[0], element.center[1]);
|
|
151
|
-
},
|
|
152
|
-
};
|
|
153
|
-
return taskActionHover;
|
|
154
|
-
} else if (plan.type === 'Scroll') {
|
|
155
|
-
const taskActionScroll: ExecutionTaskActionApply<PlanningActionParamScroll> = {
|
|
156
|
-
type: 'Action',
|
|
157
|
-
subType: 'Scroll',
|
|
158
|
-
param: plan.param,
|
|
159
|
-
executor: async (taskParam) => {
|
|
160
|
-
const scrollToEventName = taskParam.scrollType;
|
|
161
|
-
const innerHeight = await this.page.evaluate(() => window.innerHeight);
|
|
162
|
-
|
|
163
|
-
switch (scrollToEventName) {
|
|
164
|
-
case 'ScrollUntilTop':
|
|
165
|
-
await this.page.mouse.wheel(0, -9999999);
|
|
166
|
-
break;
|
|
167
|
-
case 'ScrollUntilBottom':
|
|
168
|
-
await this.page.mouse.wheel(0, 9999999);
|
|
169
|
-
break;
|
|
170
|
-
case 'ScrollUp':
|
|
171
|
-
await this.page.mouse.wheel(0, -innerHeight);
|
|
172
|
-
break;
|
|
173
|
-
case 'ScrollDown':
|
|
174
|
-
await this.page.mouse.wheel(0, innerHeight);
|
|
175
|
-
break;
|
|
176
|
-
default:
|
|
177
|
-
console.error('Unknown scroll event type:', scrollToEventName);
|
|
178
|
-
}
|
|
179
|
-
},
|
|
180
|
-
};
|
|
181
|
-
return taskActionScroll;
|
|
182
|
-
} else if (plan.type === 'Error') {
|
|
183
|
-
throw new Error(`Got a task plan with type Error: ${plan.thought}`);
|
|
184
|
-
} else {
|
|
185
|
-
throw new Error(`Unknown or Unsupported task type: ${plan.type}`);
|
|
186
|
-
}
|
|
187
|
-
})
|
|
188
|
-
.map((task: ExecutionTaskApply) => {
|
|
189
|
-
return this.wrapExecutorWithScreenshot(task);
|
|
190
|
-
});
|
|
191
|
-
|
|
192
|
-
return tasks;
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
async action(userPrompt: string /* , actionInfo?: { actionType?: EventActions[number]['action'] } */) {
|
|
196
|
-
this.executor.description = userPrompt;
|
|
197
|
-
const pageContext = await this.insight.contextRetrieverFn();
|
|
198
|
-
|
|
199
|
-
let plans: PlanningAction[] = [];
|
|
200
|
-
const planningTask: ExecutionTaskPlanningApply = {
|
|
201
|
-
type: 'Planning',
|
|
202
|
-
param: {
|
|
203
|
-
userPrompt,
|
|
204
|
-
},
|
|
205
|
-
async executor(param) {
|
|
206
|
-
const planResult = await plan(pageContext, param.userPrompt);
|
|
207
|
-
assert(planResult.plans.length > 0, 'No plans found');
|
|
208
|
-
// eslint-disable-next-line prefer-destructuring
|
|
209
|
-
plans = planResult.plans;
|
|
210
|
-
return {
|
|
211
|
-
output: planResult,
|
|
212
|
-
};
|
|
213
|
-
},
|
|
214
|
-
};
|
|
215
|
-
|
|
216
|
-
try {
|
|
217
|
-
// plan
|
|
218
|
-
await this.executor.append(this.wrapExecutorWithScreenshot(planningTask));
|
|
219
|
-
await this.executor.flush();
|
|
220
|
-
this.actionDump = this.executor.dump();
|
|
221
|
-
|
|
222
|
-
// append tasks
|
|
223
|
-
const executables = await this.convertPlanToExecutable(plans);
|
|
224
|
-
await this.executor.append(executables);
|
|
225
|
-
|
|
226
|
-
// flush actions
|
|
227
|
-
await this.executor.flush();
|
|
228
|
-
this.actionDump = this.executor.dump();
|
|
229
|
-
|
|
230
|
-
assert(
|
|
231
|
-
this.executor.status !== 'error',
|
|
232
|
-
`failed to execute tasks: ${this.executor.status}, msg: ${this.executor.errorMsg || ''}`,
|
|
233
|
-
);
|
|
234
|
-
} catch (e: any) {
|
|
235
|
-
// keep the dump before throwing
|
|
236
|
-
this.actionDump = this.executor.dump();
|
|
237
|
-
const err = new Error(e.message, { cause: e });
|
|
238
|
-
throw err;
|
|
239
|
-
}
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
async query(demand: InsightExtractParam) {
|
|
243
|
-
this.executor.description = JSON.stringify(demand);
|
|
244
|
-
let data: any;
|
|
245
|
-
const queryTask: ExecutionTaskInsightQueryApply = {
|
|
246
|
-
type: 'Insight',
|
|
247
|
-
subType: 'Query',
|
|
248
|
-
param: {
|
|
249
|
-
dataDemand: demand,
|
|
250
|
-
},
|
|
251
|
-
executor: async (param) => {
|
|
252
|
-
let insightDump: InsightDump | undefined;
|
|
253
|
-
const dumpCollector: DumpSubscriber = (dump) => {
|
|
254
|
-
insightDump = dump;
|
|
255
|
-
};
|
|
256
|
-
this.insight.onceDumpUpdatedFn = dumpCollector;
|
|
257
|
-
data = await this.insight.extract<any>(param.dataDemand);
|
|
258
|
-
return {
|
|
259
|
-
output: data,
|
|
260
|
-
log: { dump: insightDump },
|
|
261
|
-
};
|
|
262
|
-
},
|
|
263
|
-
};
|
|
264
|
-
try {
|
|
265
|
-
await this.executor.append(this.wrapExecutorWithScreenshot(queryTask));
|
|
266
|
-
await this.executor.flush();
|
|
267
|
-
this.actionDump = this.executor.dump();
|
|
268
|
-
} catch (e: any) {
|
|
269
|
-
// keep the dump before throwing
|
|
270
|
-
this.actionDump = this.executor.dump();
|
|
271
|
-
const err = new Error(e.message, { cause: e });
|
|
272
|
-
throw err;
|
|
273
|
-
}
|
|
274
|
-
return data;
|
|
275
|
-
}
|
|
276
|
-
}
|