@midscene/web 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,442 @@
1
+ var __defProp = Object.defineProperty;
2
+ var __defProps = Object.defineProperties;
3
+ var __getOwnPropDescs = Object.getOwnPropertyDescriptors;
4
+ var __getOwnPropSymbols = Object.getOwnPropertySymbols;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __propIsEnum = Object.prototype.propertyIsEnumerable;
7
+ var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
8
+ var __spreadValues = (a, b) => {
9
+ for (var prop in b || (b = {}))
10
+ if (__hasOwnProp.call(b, prop))
11
+ __defNormalProp(a, prop, b[prop]);
12
+ if (__getOwnPropSymbols)
13
+ for (var prop of __getOwnPropSymbols(b)) {
14
+ if (__propIsEnum.call(b, prop))
15
+ __defNormalProp(a, prop, b[prop]);
16
+ }
17
+ return a;
18
+ };
19
+ var __spreadProps = (a, b) => __defProps(a, __getOwnPropDescs(b));
20
+
21
+ // src/playwright/index.ts
22
+ import { groupedActionDumpFileExt, writeDumpFile } from "@midscene/core/utils";
23
+
24
+ // src/playwright/actions.ts
25
+ import assert2 from "assert";
26
+ import Insight, {
27
+ Executor,
28
+ plan
29
+ } from "@midscene/core";
30
+ import { commonScreenshotParam, getTmpFile, sleep } from "@midscene/core/utils";
31
+ import { base64Encoded as base64Encoded2 } from "@midscene/core/image";
32
+
33
+ // src/playwright/utils.ts
34
+ import fs, { readFileSync } from "fs";
35
+ import assert from "assert";
36
+ import path from "path";
37
+ import { alignCoordByTrim, base64Encoded, imageInfoOfBase64 } from "@midscene/core/image";
38
+
39
+ // src/playwright/element.ts
40
+ var WebElementInfo = class {
41
+ constructor({
42
+ content,
43
+ rect,
44
+ page,
45
+ locator,
46
+ id,
47
+ attributes
48
+ }) {
49
+ this.content = content;
50
+ this.rect = rect;
51
+ this.center = [Math.floor(rect.left + rect.width / 2), Math.floor(rect.top + rect.height / 2)];
52
+ this.page = page;
53
+ this.locator = locator;
54
+ this.id = id;
55
+ this.attributes = attributes;
56
+ }
57
+ async tap() {
58
+ await this.page.mouse.click(this.center[0], this.center[1]);
59
+ }
60
+ async hover() {
61
+ await this.page.mouse.move(this.center[0], this.center[1]);
62
+ }
63
+ async type(text) {
64
+ await this.page.keyboard.type(text);
65
+ }
66
+ async press(key) {
67
+ await this.page.keyboard.press(key);
68
+ }
69
+ };
70
+
71
+ // src/playwright/utils.ts
72
+ async function parseContextFromPlaywrightPage(page, _opt) {
73
+ assert(page, "page is required");
74
+ const file = "/Users/bytedance/workspace/midscene/packages/midscene/tests/fixtures/heytea.jpeg";
75
+ await page.screenshot({ path: file, type: "jpeg", quality: 75 });
76
+ const screenshotBuffer = readFileSync(file);
77
+ const screenshotBase64 = base64Encoded(file);
78
+ const captureElementSnapshot = await getElementInfosFromPage(page);
79
+ const elementsInfo = await alignElements(screenshotBuffer, captureElementSnapshot, page);
80
+ const size = await imageInfoOfBase64(screenshotBase64);
81
+ return {
82
+ content: elementsInfo,
83
+ size,
84
+ screenshotBase64
85
+ };
86
+ }
87
+ async function getElementInfosFromPage(page) {
88
+ const pathDir = findNearestPackageJson(__dirname);
89
+ assert(pathDir, `can't find pathDir, with ${__dirname}`);
90
+ const scriptPath = path.join(pathDir, "./dist/script/htmlElement.js");
91
+ const elementInfosScriptContent = readFileSync(scriptPath, "utf-8");
92
+ const extraReturnLogic = `${elementInfosScriptContent}midscene_element_inspector.extractTextWithPositionDFS()`;
93
+ const captureElementSnapshot = await page.evaluate(extraReturnLogic);
94
+ return captureElementSnapshot;
95
+ }
96
+ async function alignElements(screenshotBuffer, elements, page) {
97
+ const textsAligned = [];
98
+ for (const item of elements) {
99
+ const { rect } = item;
100
+ const aligned = await alignCoordByTrim(screenshotBuffer, rect);
101
+ item.rect = aligned;
102
+ item.center = [
103
+ Math.round(aligned.left + aligned.width / 2),
104
+ Math.round(aligned.top + aligned.height / 2)
105
+ ];
106
+ textsAligned.push(
107
+ new WebElementInfo(__spreadProps(__spreadValues({}, item), {
108
+ page
109
+ }))
110
+ );
111
+ }
112
+ return textsAligned;
113
+ }
114
+ function findNearestPackageJson(dir) {
115
+ const packageJsonPath = path.join(dir, "package.json");
116
+ if (fs.existsSync(packageJsonPath)) {
117
+ return dir;
118
+ }
119
+ const parentDir = path.dirname(dir);
120
+ if (parentDir === dir) {
121
+ return null;
122
+ }
123
+ return findNearestPackageJson(parentDir);
124
+ }
125
+
126
+ // src/playwright/actions.ts
127
+ var PlayWrightActionAgent = class {
128
+ constructor(page, opt) {
129
+ this.page = page;
130
+ this.insight = new Insight(async () => {
131
+ return await parseContextFromPlaywrightPage(page);
132
+ });
133
+ this.executor = new Executor((opt == null ? void 0 : opt.taskName) || "MidScene - PlayWrightAI");
134
+ }
135
+ async recordScreenshot(timing) {
136
+ const file = getTmpFile("jpeg");
137
+ await this.page.screenshot(__spreadProps(__spreadValues({}, commonScreenshotParam), {
138
+ path: file
139
+ }));
140
+ const item = {
141
+ type: "screenshot",
142
+ ts: Date.now(),
143
+ screenshot: base64Encoded2(file),
144
+ timing
145
+ };
146
+ return item;
147
+ }
148
+ wrapExecutorWithScreenshot(taskApply) {
149
+ const taskWithScreenshot = __spreadProps(__spreadValues({}, taskApply), {
150
+ executor: async (param, context, ...args) => {
151
+ const recorder = [];
152
+ const { task } = context;
153
+ task.recorder = recorder;
154
+ const shot = await this.recordScreenshot(`before ${task.type}`);
155
+ recorder.push(shot);
156
+ const result = await taskApply.executor(param, context, ...args);
157
+ if (taskApply.type === "Action") {
158
+ await sleep(1e3);
159
+ const shot2 = await this.recordScreenshot("after Action");
160
+ recorder.push(shot2);
161
+ }
162
+ return result;
163
+ }
164
+ });
165
+ return taskWithScreenshot;
166
+ }
167
+ async convertPlanToExecutable(plans) {
168
+ const tasks = plans.map((plan2) => {
169
+ if (plan2.type === "Locate") {
170
+ const taskFind = {
171
+ type: "Insight",
172
+ subType: "Locate",
173
+ param: {
174
+ prompt: plan2.thought
175
+ },
176
+ executor: async (param) => {
177
+ let insightDump;
178
+ const dumpCollector = (dump) => {
179
+ insightDump = dump;
180
+ };
181
+ this.insight.onceDumpUpdatedFn = dumpCollector;
182
+ const element = await this.insight.locate(param.prompt);
183
+ assert2(element, `Element not found: ${param.prompt}`);
184
+ return {
185
+ output: {
186
+ element
187
+ },
188
+ log: {
189
+ dump: insightDump
190
+ }
191
+ };
192
+ }
193
+ };
194
+ return taskFind;
195
+ } else if (plan2.type === "Input") {
196
+ const taskActionInput = {
197
+ type: "Action",
198
+ subType: "Input",
199
+ param: plan2.param,
200
+ executor: async (taskParam) => {
201
+ assert2(taskParam.value, "No value to input");
202
+ await this.page.keyboard.type(taskParam.value);
203
+ }
204
+ };
205
+ return taskActionInput;
206
+ } else if (plan2.type === "KeyboardPress") {
207
+ const taskActionKeyboardPress = {
208
+ type: "Action",
209
+ subType: "KeyboardPress",
210
+ param: plan2.param,
211
+ executor: async (taskParam) => {
212
+ assert2(taskParam.value, "No key to press");
213
+ await this.page.keyboard.press(taskParam.value);
214
+ }
215
+ };
216
+ return taskActionKeyboardPress;
217
+ } else if (plan2.type === "Tap") {
218
+ const taskActionTap = {
219
+ type: "Action",
220
+ subType: "Tap",
221
+ executor: async (param, { element }) => {
222
+ assert2(element, "Element not found, cannot tap");
223
+ await this.page.mouse.click(element.center[0], element.center[1]);
224
+ }
225
+ };
226
+ return taskActionTap;
227
+ } else if (plan2.type === "Hover") {
228
+ const taskActionHover = {
229
+ type: "Action",
230
+ subType: "Hover",
231
+ executor: async (param, { element }) => {
232
+ assert2(element, "Element not found, cannot hover");
233
+ await this.page.mouse.move(element.center[0], element.center[1]);
234
+ }
235
+ };
236
+ return taskActionHover;
237
+ } else if (plan2.type === "Scroll") {
238
+ const taskActionScroll = {
239
+ type: "Action",
240
+ subType: "Scroll",
241
+ param: plan2.param,
242
+ executor: async (taskParam) => {
243
+ const scrollToEventName = taskParam.scrollType;
244
+ const innerHeight = await this.page.evaluate(() => window.innerHeight);
245
+ switch (scrollToEventName) {
246
+ case "ScrollUntilTop":
247
+ await this.page.mouse.wheel(0, -9999999);
248
+ break;
249
+ case "ScrollUntilBottom":
250
+ await this.page.mouse.wheel(0, 9999999);
251
+ break;
252
+ case "ScrollUp":
253
+ await this.page.mouse.wheel(0, -innerHeight);
254
+ break;
255
+ case "ScrollDown":
256
+ await this.page.mouse.wheel(0, innerHeight);
257
+ break;
258
+ default:
259
+ console.error("Unknown scroll event type:", scrollToEventName);
260
+ }
261
+ }
262
+ };
263
+ return taskActionScroll;
264
+ } else if (plan2.type === "Error") {
265
+ throw new Error(`Got a task plan with type Error: ${plan2.thought}`);
266
+ } else {
267
+ throw new Error(`Unknown or Unsupported task type: ${plan2.type}`);
268
+ }
269
+ }).map((task) => {
270
+ return this.wrapExecutorWithScreenshot(task);
271
+ });
272
+ return tasks;
273
+ }
274
+ async action(userPrompt) {
275
+ this.executor.description = userPrompt;
276
+ const pageContext = await this.insight.contextRetrieverFn();
277
+ let plans = [];
278
+ const planningTask = {
279
+ type: "Planning",
280
+ param: {
281
+ userPrompt
282
+ },
283
+ async executor(param) {
284
+ const planResult = await plan(pageContext, param.userPrompt);
285
+ assert2(planResult.plans.length > 0, "No plans found");
286
+ plans = planResult.plans;
287
+ return {
288
+ output: planResult
289
+ };
290
+ }
291
+ };
292
+ try {
293
+ await this.executor.append(this.wrapExecutorWithScreenshot(planningTask));
294
+ await this.executor.flush();
295
+ this.actionDump = this.executor.dump();
296
+ const executables = await this.convertPlanToExecutable(plans);
297
+ await this.executor.append(executables);
298
+ await this.executor.flush();
299
+ this.actionDump = this.executor.dump();
300
+ assert2(
301
+ this.executor.status !== "error",
302
+ `failed to execute tasks: ${this.executor.status}, msg: ${this.executor.errorMsg || ""}`
303
+ );
304
+ } catch (e) {
305
+ this.actionDump = this.executor.dump();
306
+ const err = new Error(e.message, { cause: e });
307
+ throw err;
308
+ }
309
+ }
310
+ async query(demand) {
311
+ this.executor.description = JSON.stringify(demand);
312
+ let data;
313
+ const queryTask = {
314
+ type: "Insight",
315
+ subType: "Query",
316
+ param: {
317
+ dataDemand: demand
318
+ },
319
+ executor: async (param) => {
320
+ let insightDump;
321
+ const dumpCollector = (dump) => {
322
+ insightDump = dump;
323
+ };
324
+ this.insight.onceDumpUpdatedFn = dumpCollector;
325
+ data = await this.insight.extract(param.dataDemand);
326
+ return {
327
+ output: data,
328
+ log: { dump: insightDump }
329
+ };
330
+ }
331
+ };
332
+ try {
333
+ await this.executor.append(this.wrapExecutorWithScreenshot(queryTask));
334
+ await this.executor.flush();
335
+ this.actionDump = this.executor.dump();
336
+ } catch (e) {
337
+ this.actionDump = this.executor.dump();
338
+ const err = new Error(e.message, { cause: e });
339
+ throw err;
340
+ }
341
+ return data;
342
+ }
343
+ };
344
+
345
+ // src/playwright/index.ts
346
+ var PlaywrightAiFixture = () => {
347
+ const dumps = [];
348
+ const appendDump = (groupName, execution) => {
349
+ let currentDump = dumps.find((dump) => dump.groupName === groupName);
350
+ if (!currentDump) {
351
+ currentDump = {
352
+ groupName,
353
+ executions: []
354
+ };
355
+ dumps.push(currentDump);
356
+ }
357
+ currentDump.executions.push(execution);
358
+ };
359
+ const writeOutActionDumps = () => {
360
+ writeDumpFile(`playwright-${process.pid}`, groupedActionDumpFileExt, JSON.stringify(dumps));
361
+ };
362
+ const groupAndCaseForTest = (testInfo) => {
363
+ let groupName;
364
+ let caseName;
365
+ const titlePath = [...testInfo.titlePath];
366
+ if (titlePath.length > 1) {
367
+ caseName = titlePath.pop();
368
+ groupName = titlePath.join(" > ");
369
+ } else if (titlePath.length === 1) {
370
+ caseName = titlePath[0];
371
+ groupName = caseName;
372
+ } else {
373
+ caseName = "unnamed";
374
+ groupName = "unnamed";
375
+ }
376
+ return { groupName, caseName };
377
+ };
378
+ const aiAction = async (page, testInfo, taskPrompt) => {
379
+ const { groupName, caseName } = groupAndCaseForTest(testInfo);
380
+ const actionAgent = new PlayWrightActionAgent(page, { taskName: caseName });
381
+ let error;
382
+ try {
383
+ await actionAgent.action(taskPrompt);
384
+ } catch (e) {
385
+ error = e;
386
+ }
387
+ if (actionAgent.actionDump) {
388
+ appendDump(groupName, actionAgent.actionDump);
389
+ writeOutActionDumps();
390
+ }
391
+ if (error) {
392
+ console.error(error);
393
+ throw new Error(error.message, { cause: error });
394
+ }
395
+ };
396
+ const aiQuery = async (page, testInfo, demand) => {
397
+ const { groupName, caseName } = groupAndCaseForTest(testInfo);
398
+ const actionAgent = new PlayWrightActionAgent(page, { taskName: caseName });
399
+ let error;
400
+ let result;
401
+ try {
402
+ result = await actionAgent.query(demand);
403
+ } catch (e) {
404
+ error = e;
405
+ }
406
+ if (actionAgent.actionDump) {
407
+ appendDump(groupName, actionAgent.actionDump);
408
+ writeOutActionDumps();
409
+ }
410
+ if (error) {
411
+ console.error(error);
412
+ throw new Error(error.message, { cause: error });
413
+ }
414
+ return result;
415
+ };
416
+ return {
417
+ // shortcut
418
+ ai: async ({ page }, use, testInfo) => {
419
+ await use(async (taskPrompt, type = "action") => {
420
+ if (type === "action") {
421
+ return aiAction(page, testInfo, taskPrompt);
422
+ } else if (type === "query") {
423
+ return aiQuery(page, testInfo, taskPrompt);
424
+ }
425
+ throw new Error(`Unknown or Unsupported task type: ${type}, only support 'action' or 'query'`);
426
+ });
427
+ },
428
+ aiAction: async ({ page }, use, testInfo) => {
429
+ await use(async (taskPrompt) => {
430
+ await aiAction(page, testInfo, taskPrompt);
431
+ });
432
+ },
433
+ aiQuery: async ({ page }, use, testInfo) => {
434
+ await use(async function(demand) {
435
+ return aiQuery(page, testInfo, demand);
436
+ });
437
+ }
438
+ };
439
+ };
440
+ export {
441
+ PlaywrightAiFixture
442
+ };