@midscene/web 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,473 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __defProps = Object.defineProperties;
5
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
6
+ var __getOwnPropDescs = Object.getOwnPropertyDescriptors;
7
+ var __getOwnPropNames = Object.getOwnPropertyNames;
8
+ var __getOwnPropSymbols = Object.getOwnPropertySymbols;
9
+ var __getProtoOf = Object.getPrototypeOf;
10
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
11
+ var __propIsEnum = Object.prototype.propertyIsEnumerable;
12
+ var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
13
+ var __spreadValues = (a, b) => {
14
+ for (var prop in b || (b = {}))
15
+ if (__hasOwnProp.call(b, prop))
16
+ __defNormalProp(a, prop, b[prop]);
17
+ if (__getOwnPropSymbols)
18
+ for (var prop of __getOwnPropSymbols(b)) {
19
+ if (__propIsEnum.call(b, prop))
20
+ __defNormalProp(a, prop, b[prop]);
21
+ }
22
+ return a;
23
+ };
24
+ var __spreadProps = (a, b) => __defProps(a, __getOwnPropDescs(b));
25
+ var __export = (target, all) => {
26
+ for (var name in all)
27
+ __defProp(target, name, { get: all[name], enumerable: true });
28
+ };
29
+ var __copyProps = (to, from, except, desc) => {
30
+ if (from && typeof from === "object" || typeof from === "function") {
31
+ for (let key of __getOwnPropNames(from))
32
+ if (!__hasOwnProp.call(to, key) && key !== except)
33
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
34
+ }
35
+ return to;
36
+ };
37
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
38
+ // If the importer is in node compatibility mode or this is not an ESM
39
+ // file that has been converted to a CommonJS file using a Babel-
40
+ // compatible transform (i.e. "__esModule" has not been set), then set
41
+ // "default" to the CommonJS "module.exports" for node compatibility.
42
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
43
+ mod
44
+ ));
45
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
46
+
47
+ // src/index.ts
48
+ var src_exports = {};
49
+ __export(src_exports, {
50
+ PlaywrightAiFixture: () => PlaywrightAiFixture
51
+ });
52
+ module.exports = __toCommonJS(src_exports);
53
+
54
+ // src/playwright/index.ts
55
+ var import_utils3 = require("@midscene/core/utils");
56
+
57
+ // src/playwright/actions.ts
58
+ var import_assert2 = __toESM(require("assert"));
59
+ var import_core = __toESM(require("@midscene/core"));
60
+ var import_utils = require("@midscene/core/utils");
61
+ var import_image2 = require("@midscene/core/image");
62
+
63
+ // src/playwright/utils.ts
64
+ var import_fs = __toESM(require("fs"));
65
+ var import_assert = __toESM(require("assert"));
66
+ var import_path = __toESM(require("path"));
67
+ var import_image = require("@midscene/core/image");
68
+
69
+ // src/playwright/element.ts
70
+ var WebElementInfo = class {
71
+ constructor({
72
+ content,
73
+ rect,
74
+ page,
75
+ locator,
76
+ id,
77
+ attributes
78
+ }) {
79
+ this.content = content;
80
+ this.rect = rect;
81
+ this.center = [Math.floor(rect.left + rect.width / 2), Math.floor(rect.top + rect.height / 2)];
82
+ this.page = page;
83
+ this.locator = locator;
84
+ this.id = id;
85
+ this.attributes = attributes;
86
+ }
87
+ async tap() {
88
+ await this.page.mouse.click(this.center[0], this.center[1]);
89
+ }
90
+ async hover() {
91
+ await this.page.mouse.move(this.center[0], this.center[1]);
92
+ }
93
+ async type(text) {
94
+ await this.page.keyboard.type(text);
95
+ }
96
+ async press(key) {
97
+ await this.page.keyboard.press(key);
98
+ }
99
+ };
100
+
101
+ // src/playwright/utils.ts
102
+ async function parseContextFromPlaywrightPage(page, _opt) {
103
+ (0, import_assert.default)(page, "page is required");
104
+ const file = "/Users/bytedance/workspace/midscene/packages/midscene/tests/fixtures/heytea.jpeg";
105
+ await page.screenshot({ path: file, type: "jpeg", quality: 75 });
106
+ const screenshotBuffer = (0, import_fs.readFileSync)(file);
107
+ const screenshotBase64 = (0, import_image.base64Encoded)(file);
108
+ const captureElementSnapshot = await getElementInfosFromPage(page);
109
+ const elementsInfo = await alignElements(screenshotBuffer, captureElementSnapshot, page);
110
+ const size = await (0, import_image.imageInfoOfBase64)(screenshotBase64);
111
+ return {
112
+ content: elementsInfo,
113
+ size,
114
+ screenshotBase64
115
+ };
116
+ }
117
+ async function getElementInfosFromPage(page) {
118
+ const pathDir = findNearestPackageJson(__dirname);
119
+ (0, import_assert.default)(pathDir, `can't find pathDir, with ${__dirname}`);
120
+ const scriptPath = import_path.default.join(pathDir, "./dist/script/htmlElement.js");
121
+ const elementInfosScriptContent = (0, import_fs.readFileSync)(scriptPath, "utf-8");
122
+ const extraReturnLogic = `${elementInfosScriptContent}midscene_element_inspector.extractTextWithPositionDFS()`;
123
+ const captureElementSnapshot = await page.evaluate(extraReturnLogic);
124
+ return captureElementSnapshot;
125
+ }
126
+ async function alignElements(screenshotBuffer, elements, page) {
127
+ const textsAligned = [];
128
+ for (const item of elements) {
129
+ const { rect } = item;
130
+ const aligned = await (0, import_image.alignCoordByTrim)(screenshotBuffer, rect);
131
+ item.rect = aligned;
132
+ item.center = [
133
+ Math.round(aligned.left + aligned.width / 2),
134
+ Math.round(aligned.top + aligned.height / 2)
135
+ ];
136
+ textsAligned.push(
137
+ new WebElementInfo(__spreadProps(__spreadValues({}, item), {
138
+ page
139
+ }))
140
+ );
141
+ }
142
+ return textsAligned;
143
+ }
144
+ function findNearestPackageJson(dir) {
145
+ const packageJsonPath = import_path.default.join(dir, "package.json");
146
+ if (import_fs.default.existsSync(packageJsonPath)) {
147
+ return dir;
148
+ }
149
+ const parentDir = import_path.default.dirname(dir);
150
+ if (parentDir === dir) {
151
+ return null;
152
+ }
153
+ return findNearestPackageJson(parentDir);
154
+ }
155
+
156
+ // src/playwright/actions.ts
157
+ var PlayWrightActionAgent = class {
158
+ constructor(page, opt) {
159
+ this.page = page;
160
+ this.insight = new import_core.default(async () => {
161
+ return await parseContextFromPlaywrightPage(page);
162
+ });
163
+ this.executor = new import_core.Executor((opt == null ? void 0 : opt.taskName) || "MidScene - PlayWrightAI");
164
+ }
165
+ async recordScreenshot(timing) {
166
+ const file = (0, import_utils.getTmpFile)("jpeg");
167
+ await this.page.screenshot(__spreadProps(__spreadValues({}, import_utils.commonScreenshotParam), {
168
+ path: file
169
+ }));
170
+ const item = {
171
+ type: "screenshot",
172
+ ts: Date.now(),
173
+ screenshot: (0, import_image2.base64Encoded)(file),
174
+ timing
175
+ };
176
+ return item;
177
+ }
178
+ wrapExecutorWithScreenshot(taskApply) {
179
+ const taskWithScreenshot = __spreadProps(__spreadValues({}, taskApply), {
180
+ executor: async (param, context, ...args) => {
181
+ const recorder = [];
182
+ const { task } = context;
183
+ task.recorder = recorder;
184
+ const shot = await this.recordScreenshot(`before ${task.type}`);
185
+ recorder.push(shot);
186
+ const result = await taskApply.executor(param, context, ...args);
187
+ if (taskApply.type === "Action") {
188
+ await (0, import_utils.sleep)(1e3);
189
+ const shot2 = await this.recordScreenshot("after Action");
190
+ recorder.push(shot2);
191
+ }
192
+ return result;
193
+ }
194
+ });
195
+ return taskWithScreenshot;
196
+ }
197
+ async convertPlanToExecutable(plans) {
198
+ const tasks = plans.map((plan2) => {
199
+ if (plan2.type === "Locate") {
200
+ const taskFind = {
201
+ type: "Insight",
202
+ subType: "Locate",
203
+ param: {
204
+ prompt: plan2.thought
205
+ },
206
+ executor: async (param) => {
207
+ let insightDump;
208
+ const dumpCollector = (dump) => {
209
+ insightDump = dump;
210
+ };
211
+ this.insight.onceDumpUpdatedFn = dumpCollector;
212
+ const element = await this.insight.locate(param.prompt);
213
+ (0, import_assert2.default)(element, `Element not found: ${param.prompt}`);
214
+ return {
215
+ output: {
216
+ element
217
+ },
218
+ log: {
219
+ dump: insightDump
220
+ }
221
+ };
222
+ }
223
+ };
224
+ return taskFind;
225
+ } else if (plan2.type === "Input") {
226
+ const taskActionInput = {
227
+ type: "Action",
228
+ subType: "Input",
229
+ param: plan2.param,
230
+ executor: async (taskParam) => {
231
+ (0, import_assert2.default)(taskParam.value, "No value to input");
232
+ await this.page.keyboard.type(taskParam.value);
233
+ }
234
+ };
235
+ return taskActionInput;
236
+ } else if (plan2.type === "KeyboardPress") {
237
+ const taskActionKeyboardPress = {
238
+ type: "Action",
239
+ subType: "KeyboardPress",
240
+ param: plan2.param,
241
+ executor: async (taskParam) => {
242
+ (0, import_assert2.default)(taskParam.value, "No key to press");
243
+ await this.page.keyboard.press(taskParam.value);
244
+ }
245
+ };
246
+ return taskActionKeyboardPress;
247
+ } else if (plan2.type === "Tap") {
248
+ const taskActionTap = {
249
+ type: "Action",
250
+ subType: "Tap",
251
+ executor: async (param, { element }) => {
252
+ (0, import_assert2.default)(element, "Element not found, cannot tap");
253
+ await this.page.mouse.click(element.center[0], element.center[1]);
254
+ }
255
+ };
256
+ return taskActionTap;
257
+ } else if (plan2.type === "Hover") {
258
+ const taskActionHover = {
259
+ type: "Action",
260
+ subType: "Hover",
261
+ executor: async (param, { element }) => {
262
+ (0, import_assert2.default)(element, "Element not found, cannot hover");
263
+ await this.page.mouse.move(element.center[0], element.center[1]);
264
+ }
265
+ };
266
+ return taskActionHover;
267
+ } else if (plan2.type === "Scroll") {
268
+ const taskActionScroll = {
269
+ type: "Action",
270
+ subType: "Scroll",
271
+ param: plan2.param,
272
+ executor: async (taskParam) => {
273
+ const scrollToEventName = taskParam.scrollType;
274
+ const innerHeight = await this.page.evaluate(() => window.innerHeight);
275
+ switch (scrollToEventName) {
276
+ case "ScrollUntilTop":
277
+ await this.page.mouse.wheel(0, -9999999);
278
+ break;
279
+ case "ScrollUntilBottom":
280
+ await this.page.mouse.wheel(0, 9999999);
281
+ break;
282
+ case "ScrollUp":
283
+ await this.page.mouse.wheel(0, -innerHeight);
284
+ break;
285
+ case "ScrollDown":
286
+ await this.page.mouse.wheel(0, innerHeight);
287
+ break;
288
+ default:
289
+ console.error("Unknown scroll event type:", scrollToEventName);
290
+ }
291
+ }
292
+ };
293
+ return taskActionScroll;
294
+ } else if (plan2.type === "Error") {
295
+ throw new Error(`Got a task plan with type Error: ${plan2.thought}`);
296
+ } else {
297
+ throw new Error(`Unknown or Unsupported task type: ${plan2.type}`);
298
+ }
299
+ }).map((task) => {
300
+ return this.wrapExecutorWithScreenshot(task);
301
+ });
302
+ return tasks;
303
+ }
304
+ async action(userPrompt) {
305
+ this.executor.description = userPrompt;
306
+ const pageContext = await this.insight.contextRetrieverFn();
307
+ let plans = [];
308
+ const planningTask = {
309
+ type: "Planning",
310
+ param: {
311
+ userPrompt
312
+ },
313
+ async executor(param) {
314
+ const planResult = await (0, import_core.plan)(pageContext, param.userPrompt);
315
+ (0, import_assert2.default)(planResult.plans.length > 0, "No plans found");
316
+ plans = planResult.plans;
317
+ return {
318
+ output: planResult
319
+ };
320
+ }
321
+ };
322
+ try {
323
+ await this.executor.append(this.wrapExecutorWithScreenshot(planningTask));
324
+ await this.executor.flush();
325
+ this.actionDump = this.executor.dump();
326
+ const executables = await this.convertPlanToExecutable(plans);
327
+ await this.executor.append(executables);
328
+ await this.executor.flush();
329
+ this.actionDump = this.executor.dump();
330
+ (0, import_assert2.default)(
331
+ this.executor.status !== "error",
332
+ `failed to execute tasks: ${this.executor.status}, msg: ${this.executor.errorMsg || ""}`
333
+ );
334
+ } catch (e) {
335
+ this.actionDump = this.executor.dump();
336
+ const err = new Error(e.message, { cause: e });
337
+ throw err;
338
+ }
339
+ }
340
+ async query(demand) {
341
+ this.executor.description = JSON.stringify(demand);
342
+ let data;
343
+ const queryTask = {
344
+ type: "Insight",
345
+ subType: "Query",
346
+ param: {
347
+ dataDemand: demand
348
+ },
349
+ executor: async (param) => {
350
+ let insightDump;
351
+ const dumpCollector = (dump) => {
352
+ insightDump = dump;
353
+ };
354
+ this.insight.onceDumpUpdatedFn = dumpCollector;
355
+ data = await this.insight.extract(param.dataDemand);
356
+ return {
357
+ output: data,
358
+ log: { dump: insightDump }
359
+ };
360
+ }
361
+ };
362
+ try {
363
+ await this.executor.append(this.wrapExecutorWithScreenshot(queryTask));
364
+ await this.executor.flush();
365
+ this.actionDump = this.executor.dump();
366
+ } catch (e) {
367
+ this.actionDump = this.executor.dump();
368
+ const err = new Error(e.message, { cause: e });
369
+ throw err;
370
+ }
371
+ return data;
372
+ }
373
+ };
374
+
375
+ // src/playwright/index.ts
376
+ var PlaywrightAiFixture = () => {
377
+ const dumps = [];
378
+ const appendDump = (groupName, execution) => {
379
+ let currentDump = dumps.find((dump) => dump.groupName === groupName);
380
+ if (!currentDump) {
381
+ currentDump = {
382
+ groupName,
383
+ executions: []
384
+ };
385
+ dumps.push(currentDump);
386
+ }
387
+ currentDump.executions.push(execution);
388
+ };
389
+ const writeOutActionDumps = () => {
390
+ (0, import_utils3.writeDumpFile)(`playwright-${process.pid}`, import_utils3.groupedActionDumpFileExt, JSON.stringify(dumps));
391
+ };
392
+ const groupAndCaseForTest = (testInfo) => {
393
+ let groupName;
394
+ let caseName;
395
+ const titlePath = [...testInfo.titlePath];
396
+ if (titlePath.length > 1) {
397
+ caseName = titlePath.pop();
398
+ groupName = titlePath.join(" > ");
399
+ } else if (titlePath.length === 1) {
400
+ caseName = titlePath[0];
401
+ groupName = caseName;
402
+ } else {
403
+ caseName = "unnamed";
404
+ groupName = "unnamed";
405
+ }
406
+ return { groupName, caseName };
407
+ };
408
+ const aiAction = async (page, testInfo, taskPrompt) => {
409
+ const { groupName, caseName } = groupAndCaseForTest(testInfo);
410
+ const actionAgent = new PlayWrightActionAgent(page, { taskName: caseName });
411
+ let error;
412
+ try {
413
+ await actionAgent.action(taskPrompt);
414
+ } catch (e) {
415
+ error = e;
416
+ }
417
+ if (actionAgent.actionDump) {
418
+ appendDump(groupName, actionAgent.actionDump);
419
+ writeOutActionDumps();
420
+ }
421
+ if (error) {
422
+ console.error(error);
423
+ throw new Error(error.message, { cause: error });
424
+ }
425
+ };
426
+ const aiQuery = async (page, testInfo, demand) => {
427
+ const { groupName, caseName } = groupAndCaseForTest(testInfo);
428
+ const actionAgent = new PlayWrightActionAgent(page, { taskName: caseName });
429
+ let error;
430
+ let result;
431
+ try {
432
+ result = await actionAgent.query(demand);
433
+ } catch (e) {
434
+ error = e;
435
+ }
436
+ if (actionAgent.actionDump) {
437
+ appendDump(groupName, actionAgent.actionDump);
438
+ writeOutActionDumps();
439
+ }
440
+ if (error) {
441
+ console.error(error);
442
+ throw new Error(error.message, { cause: error });
443
+ }
444
+ return result;
445
+ };
446
+ return {
447
+ // shortcut
448
+ ai: async ({ page }, use, testInfo) => {
449
+ await use(async (taskPrompt, type = "action") => {
450
+ if (type === "action") {
451
+ return aiAction(page, testInfo, taskPrompt);
452
+ } else if (type === "query") {
453
+ return aiQuery(page, testInfo, taskPrompt);
454
+ }
455
+ throw new Error(`Unknown or Unsupported task type: ${type}, only support 'action' or 'query'`);
456
+ });
457
+ },
458
+ aiAction: async ({ page }, use, testInfo) => {
459
+ await use(async (taskPrompt) => {
460
+ await aiAction(page, testInfo, taskPrompt);
461
+ });
462
+ },
463
+ aiQuery: async ({ page }, use, testInfo) => {
464
+ await use(async function(demand) {
465
+ return aiQuery(page, testInfo, demand);
466
+ });
467
+ }
468
+ };
469
+ };
470
+ // Annotate the CommonJS export names for ESM import in node:
471
+ 0 && (module.exports = {
472
+ PlaywrightAiFixture
473
+ });