@midscene/web 0.0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/es/index.js CHANGED
@@ -19,24 +19,21 @@ var __spreadValues = (a, b) => {
19
19
  var __spreadProps = (a, b) => __defProps(a, __getOwnPropDescs(b));
20
20
 
21
21
  // src/playwright/index.ts
22
- import { groupedActionDumpFileExt, writeDumpFile } from "@midscene/core/utils";
22
+ import { randomUUID } from "crypto";
23
23
 
24
- // src/playwright/actions.ts
25
- import assert2 from "assert";
26
- import Insight, {
27
- Executor,
28
- plan
29
- } from "@midscene/core";
30
- import { commonScreenshotParam, getTmpFile, sleep } from "@midscene/core/utils";
31
- import { base64Encoded as base64Encoded2 } from "@midscene/core/image";
24
+ // src/playwright/cache.ts
25
+ import path2, { join } from "path";
26
+ import fs2 from "fs";
27
+ import { writeDumpFile, getDumpDirPath } from "@midscene/core/utils";
32
28
 
33
- // src/playwright/utils.ts
29
+ // src/common/utils.ts
34
30
  import fs, { readFileSync } from "fs";
35
31
  import assert from "assert";
36
32
  import path from "path";
37
33
  import { alignCoordByTrim, base64Encoded, imageInfoOfBase64 } from "@midscene/core/image";
34
+ import { getTmpFile } from "@midscene/core/utils";
38
35
 
39
- // src/playwright/element.ts
36
+ // src/web-element.ts
40
37
  var WebElementInfo = class {
41
38
  constructor({
42
39
  content,
@@ -54,24 +51,13 @@ var WebElementInfo = class {
54
51
  this.id = id;
55
52
  this.attributes = attributes;
56
53
  }
57
- async tap() {
58
- await this.page.mouse.click(this.center[0], this.center[1]);
59
- }
60
- async hover() {
61
- await this.page.mouse.move(this.center[0], this.center[1]);
62
- }
63
- async type(text) {
64
- await this.page.keyboard.type(text);
65
- }
66
- async press(key) {
67
- await this.page.keyboard.press(key);
68
- }
69
54
  };
70
55
 
71
- // src/playwright/utils.ts
72
- async function parseContextFromPlaywrightPage(page, _opt) {
56
+ // src/common/utils.ts
57
+ async function parseContextFromWebPage(page, _opt) {
73
58
  assert(page, "page is required");
74
- const file = "/Users/bytedance/workspace/midscene/packages/midscene/tests/fixtures/heytea.jpeg";
59
+ const url = page.url();
60
+ const file = getTmpFile("jpeg");
75
61
  await page.screenshot({ path: file, type: "jpeg", quality: 75 });
76
62
  const screenshotBuffer = readFileSync(file);
77
63
  const screenshotBase64 = base64Encoded(file);
@@ -81,7 +67,8 @@ async function parseContextFromPlaywrightPage(page, _opt) {
81
67
  return {
82
68
  content: elementsInfo,
83
69
  size,
84
- screenshotBase64
70
+ screenshotBase64,
71
+ url
85
72
  };
86
73
  }
87
74
  async function getElementInfosFromPage(page) {
@@ -123,17 +110,138 @@ function findNearestPackageJson(dir) {
123
110
  return findNearestPackageJson(parentDir);
124
111
  }
125
112
 
126
- // src/playwright/actions.ts
127
- var PlayWrightActionAgent = class {
128
- constructor(page, opt) {
113
+ // src/playwright/cache.ts
114
+ function writeTestCache(taskFile, taskTitle, taskCacheJson) {
115
+ const packageJson = getPkgInfo();
116
+ writeDumpFile({
117
+ fileName: `${taskFile}(${taskTitle})`,
118
+ fileExt: "json",
119
+ fileContent: JSON.stringify(
120
+ __spreadValues({
121
+ pkgName: packageJson.name,
122
+ pkgVersion: packageJson.version,
123
+ taskFile,
124
+ taskTitle
125
+ }, taskCacheJson),
126
+ null,
127
+ 2
128
+ ),
129
+ type: "cache"
130
+ });
131
+ }
132
+ function readTestCache(taskFile, taskTitle) {
133
+ const cacheFile = join(getDumpDirPath("cache"), `${taskFile}(${taskTitle}).json`);
134
+ const pkgInfo = getPkgInfo();
135
+ if (process.env.MIDSCENE_CACHE === "true" && fs2.existsSync(cacheFile)) {
136
+ try {
137
+ const data = fs2.readFileSync(cacheFile, "utf8");
138
+ const jsonData = JSON.parse(data);
139
+ if (jsonData.pkgName !== pkgInfo.name || jsonData.pkgVersion !== pkgInfo.version) {
140
+ return void 0;
141
+ }
142
+ return jsonData;
143
+ } catch (err) {
144
+ return void 0;
145
+ }
146
+ }
147
+ return void 0;
148
+ }
149
+ function getPkgInfo() {
150
+ const packageJsonDir = findNearestPackageJson(__dirname);
151
+ if (!packageJsonDir) {
152
+ console.error("Cannot find package.json directory: ", __dirname);
153
+ return {
154
+ name: "@midscene/web",
155
+ version: "0.0.0"
156
+ };
157
+ }
158
+ const packageJsonPath = path2.join(packageJsonDir, "package.json");
159
+ const data = fs2.readFileSync(packageJsonPath, "utf8");
160
+ const packageJson = JSON.parse(data);
161
+ return {
162
+ name: packageJson.name,
163
+ version: packageJson.version
164
+ };
165
+ }
166
+
167
+ // src/common/agent.ts
168
+ import { groupedActionDumpFileExt, writeDumpFile as writeDumpFile2 } from "@midscene/core/utils";
169
+
170
+ // src/common/tasks.ts
171
+ import assert2 from "assert";
172
+ import Insight, {
173
+ Executor,
174
+ plan
175
+ } from "@midscene/core";
176
+ import { commonScreenshotParam, getTmpFile as getTmpFile2, sleep } from "@midscene/core/utils";
177
+ import { base64Encoded as base64Encoded2 } from "@midscene/core/image";
178
+
179
+ // src/common/task-cache.ts
180
+ var TaskCache = class {
181
+ constructor(opts) {
182
+ this.cache = opts == null ? void 0 : opts.cache;
183
+ this.newCache = {
184
+ aiTasks: []
185
+ };
186
+ }
187
+ readCache(pageContext, type, userPrompt) {
188
+ var _a;
189
+ if (this.cache) {
190
+ const { aiTasks } = this.cache;
191
+ const index = aiTasks.findIndex((item) => item.prompt === userPrompt);
192
+ if (index === -1) {
193
+ return false;
194
+ }
195
+ const taskRes = aiTasks.splice(index, 1)[0];
196
+ if ((taskRes == null ? void 0 : taskRes.type) === "locate" && !((_a = taskRes.response) == null ? void 0 : _a.elements.every((element) => {
197
+ const findIndex = pageContext.content.findIndex(
198
+ (contentElement) => contentElement.id === element.id
199
+ );
200
+ if (findIndex === -1) {
201
+ return false;
202
+ }
203
+ return true;
204
+ }))) {
205
+ return false;
206
+ }
207
+ if (taskRes && taskRes.type === type && taskRes.prompt === userPrompt && this.pageContextEqual(taskRes.pageContext, pageContext)) {
208
+ return taskRes.response;
209
+ }
210
+ }
211
+ return false;
212
+ }
213
+ saveCache(cache) {
214
+ var _a;
215
+ if (cache) {
216
+ (_a = this.newCache) == null ? void 0 : _a.aiTasks.push(cache);
217
+ }
218
+ }
219
+ pageContextEqual(taskPageContext, pageContext) {
220
+ return taskPageContext.size.width === pageContext.size.width && taskPageContext.size.height === pageContext.size.height;
221
+ }
222
+ /**
223
+ * Generate task cache data.
224
+ * This method is mainly used to create or obtain some cached data for tasks, and it returns a new cache object.
225
+ * In the cache object, it may contain task-related information, states, or other necessary data.
226
+ * It is assumed that the `newCache` property already exists in the current class or object and is a data structure used to store task cache.
227
+ * @returns {Object} Returns a new cache object, which may include task cache data.
228
+ */
229
+ generateTaskCache() {
230
+ return this.newCache;
231
+ }
232
+ };
233
+
234
+ // src/common/tasks.ts
235
+ var PageTaskExecutor = class {
236
+ constructor(page, opts) {
129
237
  this.page = page;
130
238
  this.insight = new Insight(async () => {
131
- return await parseContextFromPlaywrightPage(page);
239
+ return await parseContextFromWebPage(page);
132
240
  });
133
- this.executor = new Executor((opt == null ? void 0 : opt.taskName) || "MidScene - PlayWrightAI");
241
+ this.taskCache = new TaskCache(opts);
134
242
  }
135
243
  async recordScreenshot(timing) {
136
- const file = getTmpFile("jpeg");
244
+ const file = getTmpFile2("jpeg");
137
245
  await this.page.screenshot(__spreadProps(__spreadValues({}, commonScreenshotParam), {
138
246
  path: file
139
247
  }));
@@ -170,23 +278,48 @@ var PlayWrightActionAgent = class {
170
278
  const taskFind = {
171
279
  type: "Insight",
172
280
  subType: "Locate",
173
- param: {
174
- prompt: plan2.thought
175
- },
281
+ param: plan2.param,
176
282
  executor: async (param) => {
177
283
  let insightDump;
178
284
  const dumpCollector = (dump) => {
179
285
  insightDump = dump;
180
286
  };
181
287
  this.insight.onceDumpUpdatedFn = dumpCollector;
182
- const element = await this.insight.locate(param.prompt);
288
+ const pageContext = await this.insight.contextRetrieverFn();
289
+ const locateCache = this.taskCache.readCache(pageContext, "locate", param.prompt);
290
+ let locateResult;
291
+ const callAI = this.insight.aiVendorFn;
292
+ const element = await this.insight.locate(param.prompt, {
293
+ callAI: async (message) => {
294
+ if (locateCache) {
295
+ locateResult = locateCache;
296
+ return Promise.resolve(locateCache);
297
+ }
298
+ locateResult = await callAI(message);
299
+ return locateResult;
300
+ }
301
+ });
183
302
  assert2(element, `Element not found: ${param.prompt}`);
303
+ if (locateResult) {
304
+ this.taskCache.saveCache({
305
+ type: "locate",
306
+ pageContext: {
307
+ url: pageContext.url,
308
+ size: pageContext.size
309
+ },
310
+ prompt: param.prompt,
311
+ response: locateResult
312
+ });
313
+ }
184
314
  return {
185
315
  output: {
186
316
  element
187
317
  },
188
318
  log: {
189
319
  dump: insightDump
320
+ },
321
+ cache: {
322
+ hit: Boolean(locateResult)
190
323
  }
191
324
  };
192
325
  }
@@ -197,7 +330,10 @@ var PlayWrightActionAgent = class {
197
330
  type: "Action",
198
331
  subType: "Input",
199
332
  param: plan2.param,
200
- executor: async (taskParam) => {
333
+ executor: async (taskParam, { element }) => {
334
+ if (element) {
335
+ await this.page.mouse.click(element.center[0], element.center[1]);
336
+ }
201
337
  assert2(taskParam.value, "No value to input");
202
338
  await this.page.keyboard.type(taskParam.value);
203
339
  }
@@ -272,43 +408,66 @@ var PlayWrightActionAgent = class {
272
408
  return tasks;
273
409
  }
274
410
  async action(userPrompt) {
275
- this.executor.description = userPrompt;
276
- const pageContext = await this.insight.contextRetrieverFn();
411
+ const taskExecutor = new Executor(userPrompt);
412
+ taskExecutor.description = userPrompt;
277
413
  let plans = [];
278
414
  const planningTask = {
279
415
  type: "Planning",
280
416
  param: {
281
417
  userPrompt
282
418
  },
283
- async executor(param) {
284
- const planResult = await plan(pageContext, param.userPrompt);
419
+ executor: async (param) => {
420
+ const pageContext = await this.insight.contextRetrieverFn();
421
+ let planResult;
422
+ const planCache = this.taskCache.readCache(pageContext, "plan", userPrompt);
423
+ if (planCache) {
424
+ planResult = planCache;
425
+ } else {
426
+ planResult = await plan(param.userPrompt, {
427
+ context: pageContext
428
+ });
429
+ }
285
430
  assert2(planResult.plans.length > 0, "No plans found");
286
431
  plans = planResult.plans;
432
+ this.taskCache.saveCache({
433
+ type: "plan",
434
+ pageContext: {
435
+ url: pageContext.url,
436
+ size: pageContext.size
437
+ },
438
+ prompt: userPrompt,
439
+ response: planResult
440
+ });
287
441
  return {
288
- output: planResult
442
+ output: planResult,
443
+ cache: {
444
+ hint: Boolean(planCache)
445
+ }
289
446
  };
290
447
  }
291
448
  };
292
449
  try {
293
- await this.executor.append(this.wrapExecutorWithScreenshot(planningTask));
294
- await this.executor.flush();
295
- this.actionDump = this.executor.dump();
450
+ await taskExecutor.append(this.wrapExecutorWithScreenshot(planningTask));
451
+ await taskExecutor.flush();
452
+ this.executionDump = taskExecutor.dump();
296
453
  const executables = await this.convertPlanToExecutable(plans);
297
- await this.executor.append(executables);
298
- await this.executor.flush();
299
- this.actionDump = this.executor.dump();
454
+ await taskExecutor.append(executables);
455
+ await taskExecutor.flush();
456
+ this.executionDump = taskExecutor.dump();
300
457
  assert2(
301
- this.executor.status !== "error",
302
- `failed to execute tasks: ${this.executor.status}, msg: ${this.executor.errorMsg || ""}`
458
+ taskExecutor.status !== "error",
459
+ `failed to execute tasks: ${taskExecutor.status}, msg: ${taskExecutor.errorMsg || ""}`
303
460
  );
304
461
  } catch (e) {
305
- this.actionDump = this.executor.dump();
462
+ this.executionDump = taskExecutor.dump();
306
463
  const err = new Error(e.message, { cause: e });
307
464
  throw err;
308
465
  }
309
466
  }
310
467
  async query(demand) {
311
- this.executor.description = JSON.stringify(demand);
468
+ const description = JSON.stringify(demand);
469
+ const taskExecutor = new Executor(description);
470
+ taskExecutor.description = description;
312
471
  let data;
313
472
  const queryTask = {
314
473
  type: "Insight",
@@ -330,11 +489,11 @@ var PlayWrightActionAgent = class {
330
489
  }
331
490
  };
332
491
  try {
333
- await this.executor.append(this.wrapExecutorWithScreenshot(queryTask));
334
- await this.executor.flush();
335
- this.actionDump = this.executor.dump();
492
+ await taskExecutor.append(this.wrapExecutorWithScreenshot(queryTask));
493
+ await taskExecutor.flush();
494
+ this.executionDump = taskExecutor.dump();
336
495
  } catch (e) {
337
- this.actionDump = this.executor.dump();
496
+ this.executionDump = taskExecutor.dump();
338
497
  const err = new Error(e.message, { cause: e });
339
498
  throw err;
340
499
  }
@@ -342,101 +501,170 @@ var PlayWrightActionAgent = class {
342
501
  }
343
502
  };
344
503
 
345
- // src/playwright/index.ts
346
- var PlaywrightAiFixture = () => {
347
- const dumps = [];
348
- const appendDump = (groupName, execution) => {
349
- let currentDump = dumps.find((dump) => dump.groupName === groupName);
350
- if (!currentDump) {
351
- currentDump = {
352
- groupName,
504
+ // src/common/agent.ts
505
+ var PageAgent = class {
506
+ constructor(page, opts) {
507
+ this.page = page;
508
+ this.dumps = [
509
+ {
510
+ groupName: (opts == null ? void 0 : opts.taskFile) || "unnamed",
353
511
  executions: []
354
- };
355
- dumps.push(currentDump);
356
- }
512
+ }
513
+ ];
514
+ this.testId = (opts == null ? void 0 : opts.testId) || String(process.pid);
515
+ this.actionAgent = new PageTaskExecutor(this.page, {
516
+ cache: (opts == null ? void 0 : opts.cache) || { aiTasks: [] }
517
+ });
518
+ }
519
+ appendDump(execution) {
520
+ const currentDump = this.dumps[0];
357
521
  currentDump.executions.push(execution);
358
- };
359
- const writeOutActionDumps = () => {
360
- writeDumpFile(`playwright-${process.pid}`, groupedActionDumpFileExt, JSON.stringify(dumps));
361
- };
362
- const groupAndCaseForTest = (testInfo) => {
363
- let groupName;
364
- let caseName;
365
- const titlePath = [...testInfo.titlePath];
366
- if (titlePath.length > 1) {
367
- caseName = titlePath.pop();
368
- groupName = titlePath.join(" > ");
369
- } else if (titlePath.length === 1) {
370
- caseName = titlePath[0];
371
- groupName = caseName;
372
- } else {
373
- caseName = "unnamed";
374
- groupName = "unnamed";
375
- }
376
- return { groupName, caseName };
377
- };
378
- const aiAction = async (page, testInfo, taskPrompt) => {
379
- const { groupName, caseName } = groupAndCaseForTest(testInfo);
380
- const actionAgent = new PlayWrightActionAgent(page, { taskName: caseName });
522
+ }
523
+ writeOutActionDumps() {
524
+ this.dumpFile = writeDumpFile2({
525
+ fileName: `playwright-${this.testId}`,
526
+ fileExt: groupedActionDumpFileExt,
527
+ fileContent: JSON.stringify(this.dumps)
528
+ });
529
+ }
530
+ async aiAction(taskPrompt) {
381
531
  let error;
382
532
  try {
383
- await actionAgent.action(taskPrompt);
533
+ await this.actionAgent.action(taskPrompt);
384
534
  } catch (e) {
385
535
  error = e;
386
536
  }
387
- if (actionAgent.actionDump) {
388
- appendDump(groupName, actionAgent.actionDump);
389
- writeOutActionDumps();
537
+ if (this.actionAgent.executionDump) {
538
+ this.appendDump(this.actionAgent.executionDump);
539
+ this.writeOutActionDumps();
390
540
  }
391
541
  if (error) {
392
542
  console.error(error);
393
543
  throw new Error(error.message, { cause: error });
394
544
  }
395
- };
396
- const aiQuery = async (page, testInfo, demand) => {
397
- const { groupName, caseName } = groupAndCaseForTest(testInfo);
398
- const actionAgent = new PlayWrightActionAgent(page, { taskName: caseName });
545
+ }
546
+ async aiQuery(demand) {
399
547
  let error;
400
548
  let result;
401
549
  try {
402
- result = await actionAgent.query(demand);
550
+ result = await this.actionAgent.query(demand);
403
551
  } catch (e) {
404
552
  error = e;
405
553
  }
406
- if (actionAgent.actionDump) {
407
- appendDump(groupName, actionAgent.actionDump);
408
- writeOutActionDumps();
554
+ if (this.actionAgent.executionDump) {
555
+ this.appendDump(this.actionAgent.executionDump);
556
+ this.writeOutActionDumps();
409
557
  }
410
558
  if (error) {
411
559
  console.error(error);
412
560
  throw new Error(error.message, { cause: error });
413
561
  }
414
562
  return result;
563
+ }
564
+ async ai(taskPrompt, type = "action") {
565
+ if (type === "action") {
566
+ return this.aiAction(taskPrompt);
567
+ } else if (type === "query") {
568
+ return this.aiQuery(taskPrompt);
569
+ }
570
+ throw new Error(`Unknown or Unsupported task type: ${type}, only support 'action' or 'query'`);
571
+ }
572
+ };
573
+
574
+ // src/playwright/index.ts
575
+ var groupAndCaseForTest = (testInfo) => {
576
+ let taskFile;
577
+ let taskTitle;
578
+ const titlePath = [...testInfo.titlePath];
579
+ if (titlePath.length > 1) {
580
+ taskTitle = titlePath.pop();
581
+ taskFile = `${titlePath.join(" > ")}:${testInfo.line}`;
582
+ } else if (titlePath.length === 1) {
583
+ taskTitle = titlePath[0];
584
+ taskFile = `${taskTitle}:${testInfo.line}`;
585
+ } else {
586
+ taskTitle = "unnamed";
587
+ taskFile = "unnamed";
588
+ }
589
+ return { taskFile, taskTitle };
590
+ };
591
+ var midSceneAgentKeyId = "_midSceneAgentId";
592
+ var PlaywrightAiFixture = () => {
593
+ const pageAgentMap = {};
594
+ const agentForPage = (page, opts) => {
595
+ let idForPage = page[midSceneAgentKeyId];
596
+ if (!idForPage) {
597
+ idForPage = randomUUID();
598
+ page[midSceneAgentKeyId] = idForPage;
599
+ const testCase = readTestCache(opts.taskFile, opts.taskTitle) || { aiTasks: [] };
600
+ pageAgentMap[idForPage] = new PageAgent(page, {
601
+ testId: `${opts.testId}-${idForPage}`,
602
+ taskFile: opts.taskFile,
603
+ cache: testCase
604
+ });
605
+ }
606
+ return pageAgentMap[idForPage];
415
607
  };
416
608
  return {
417
- // shortcut
418
609
  ai: async ({ page }, use, testInfo) => {
419
- await use(async (taskPrompt, type = "action") => {
420
- if (type === "action") {
421
- return aiAction(page, testInfo, taskPrompt);
422
- } else if (type === "query") {
423
- return aiQuery(page, testInfo, taskPrompt);
424
- }
425
- throw new Error(`Unknown or Unsupported task type: ${type}, only support 'action' or 'query'`);
610
+ const { taskFile, taskTitle } = groupAndCaseForTest(testInfo);
611
+ const agent = agentForPage(page, { testId: testInfo.testId, taskFile, taskTitle });
612
+ await use(async (taskPrompt, opts) => {
613
+ await page.waitForLoadState("networkidle");
614
+ const actionType = (opts == null ? void 0 : opts.type) || "action";
615
+ const result = await agent.ai(taskPrompt, actionType);
616
+ return result;
426
617
  });
618
+ const taskCacheJson = agent.actionAgent.taskCache.generateTaskCache();
619
+ writeTestCache(taskFile, taskTitle, taskCacheJson);
620
+ if (agent.dumpFile) {
621
+ testInfo.annotations.push({
622
+ type: "MIDSCENE_AI_ACTION",
623
+ description: JSON.stringify({
624
+ testId: testInfo.testId,
625
+ dumpPath: agent.dumpFile
626
+ })
627
+ });
628
+ }
427
629
  },
428
630
  aiAction: async ({ page }, use, testInfo) => {
631
+ const { taskFile, taskTitle } = groupAndCaseForTest(testInfo);
632
+ const agent = agentForPage(page, { testId: testInfo.testId, taskFile, taskTitle });
429
633
  await use(async (taskPrompt) => {
430
- await aiAction(page, testInfo, taskPrompt);
634
+ await page.waitForLoadState("networkidle");
635
+ await agent.aiAction(taskPrompt);
431
636
  });
637
+ if (agent.dumpFile) {
638
+ testInfo.annotations.push({
639
+ type: "MIDSCENE_AI_ACTION",
640
+ description: JSON.stringify({
641
+ testId: testInfo.testId,
642
+ dumpPath: agent.dumpFile
643
+ })
644
+ });
645
+ }
432
646
  },
433
647
  aiQuery: async ({ page }, use, testInfo) => {
648
+ const { taskFile, taskTitle } = groupAndCaseForTest(testInfo);
649
+ const agent = agentForPage(page, { testId: testInfo.testId, taskFile, taskTitle });
434
650
  await use(async function(demand) {
435
- return aiQuery(page, testInfo, demand);
651
+ await page.waitForLoadState("networkidle");
652
+ const result = await agent.aiQuery(demand);
653
+ return result;
436
654
  });
655
+ if (agent.dumpFile) {
656
+ testInfo.annotations.push({
657
+ type: "MIDSCENE_AI_ACTION",
658
+ description: JSON.stringify({
659
+ testId: testInfo.testId,
660
+ dumpPath: agent.dumpFile
661
+ })
662
+ });
663
+ }
437
664
  }
438
665
  };
439
666
  };
440
667
  export {
441
- PlaywrightAiFixture
668
+ PlaywrightAiFixture,
669
+ PageAgent as PuppeteerAgent
442
670
  };