misoai-web 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +8 -8
  3. package/bin/midscene-playground +2 -2
  4. package/dist/es/agent.js +167 -44
  5. package/dist/es/agent.js.map +1 -1
  6. package/dist/es/bridge-mode-browser.js +64 -17
  7. package/dist/es/bridge-mode-browser.js.map +1 -1
  8. package/dist/es/bridge-mode.js +169 -46
  9. package/dist/es/bridge-mode.js.map +1 -1
  10. package/dist/es/chrome-extension.js +229 -59
  11. package/dist/es/chrome-extension.js.map +1 -1
  12. package/dist/es/index.js +183 -45
  13. package/dist/es/index.js.map +1 -1
  14. package/dist/es/midscene-playground.js +173 -44
  15. package/dist/es/midscene-playground.js.map +1 -1
  16. package/dist/es/midscene-server.js.map +1 -1
  17. package/dist/es/playground.js +173 -44
  18. package/dist/es/playground.js.map +1 -1
  19. package/dist/es/playwright-report.js.map +1 -1
  20. package/dist/es/playwright.js +183 -45
  21. package/dist/es/playwright.js.map +1 -1
  22. package/dist/es/puppeteer-agent-launcher.js +183 -45
  23. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  24. package/dist/es/puppeteer.js +183 -45
  25. package/dist/es/puppeteer.js.map +1 -1
  26. package/dist/es/ui-utils.js.map +1 -1
  27. package/dist/es/utils.js.map +1 -1
  28. package/dist/es/yaml.js +21 -3
  29. package/dist/es/yaml.js.map +1 -1
  30. package/dist/lib/agent.js +167 -44
  31. package/dist/lib/agent.js.map +1 -1
  32. package/dist/lib/bridge-mode-browser.js +64 -17
  33. package/dist/lib/bridge-mode-browser.js.map +1 -1
  34. package/dist/lib/bridge-mode.js +169 -46
  35. package/dist/lib/bridge-mode.js.map +1 -1
  36. package/dist/lib/chrome-extension.js +229 -59
  37. package/dist/lib/chrome-extension.js.map +1 -1
  38. package/dist/lib/index.js +181 -46
  39. package/dist/lib/index.js.map +1 -1
  40. package/dist/lib/midscene-playground.js +173 -44
  41. package/dist/lib/midscene-playground.js.map +1 -1
  42. package/dist/lib/midscene-server.js.map +1 -1
  43. package/dist/lib/playground.js +173 -44
  44. package/dist/lib/playground.js.map +1 -1
  45. package/dist/lib/playwright-report.js.map +1 -1
  46. package/dist/lib/playwright.js +181 -46
  47. package/dist/lib/playwright.js.map +1 -1
  48. package/dist/lib/puppeteer-agent-launcher.js +181 -46
  49. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  50. package/dist/lib/puppeteer.js +181 -46
  51. package/dist/lib/puppeteer.js.map +1 -1
  52. package/dist/lib/ui-utils.js.map +1 -1
  53. package/dist/lib/utils.js.map +1 -1
  54. package/dist/lib/yaml.js +21 -3
  55. package/dist/lib/yaml.js.map +1 -1
  56. package/dist/types/agent.d.ts +16 -6
  57. package/dist/types/bridge-mode-browser.d.ts +2 -2
  58. package/dist/types/bridge-mode.d.ts +2 -2
  59. package/dist/types/{browser-d447695b.d.ts → browser-a1877d18.d.ts} +1 -1
  60. package/dist/types/chrome-extension.d.ts +2 -2
  61. package/dist/types/index.d.ts +1 -1
  62. package/dist/types/midscene-server.d.ts +1 -1
  63. package/dist/types/{page-b8ada1f3.d.ts → page-663ece08.d.ts} +41 -30
  64. package/dist/types/playground.d.ts +2 -2
  65. package/dist/types/playwright.d.ts +1 -1
  66. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  67. package/dist/types/puppeteer.d.ts +1 -1
  68. package/dist/types/utils.d.ts +1 -1
  69. package/dist/types/yaml.d.ts +1 -1
  70. package/iife-script/htmlElement.js +99 -37
  71. package/iife-script/htmlElementDebug.js +92 -9
  72. package/package.json +23 -24
@@ -264,7 +264,9 @@ var ScriptPlayer = class {
264
264
  typeof prompt === "string",
265
265
  "prompt for aiAction must be a string"
266
266
  );
267
- await agent.aiAction(prompt);
267
+ await agent.aiAction(prompt, {
268
+ cacheable: actionTask.cacheable
269
+ });
268
270
  } else if ("aiAssert" in flowItem) {
269
271
  const assertTask = flowItem;
270
272
  const prompt = assertTask.aiAssert;
@@ -466,8 +468,24 @@ function interpolateEnvVars(content) {
466
468
  });
467
469
  }
468
470
  function parseYamlScript(content, filePath, ignoreCheckingTarget) {
469
- const interpolatedContent = interpolateEnvVars(content);
470
- const obj = import_js_yaml2.default.load(interpolatedContent);
471
+ let processedContent = content;
472
+ if (content.indexOf("android") !== -1 && content.match(/deviceId:\s*(\d+)/)) {
473
+ let matchedDeviceId;
474
+ processedContent = content.replace(
475
+ /deviceId:\s*(\d+)/g,
476
+ (match, deviceId) => {
477
+ matchedDeviceId = deviceId;
478
+ return `deviceId: '${deviceId}'`;
479
+ }
480
+ );
481
+ console.warn(
482
+ `please use string-style deviceId in yaml script, for example: deviceId: "${matchedDeviceId}"`
483
+ );
484
+ }
485
+ const interpolatedContent = interpolateEnvVars(processedContent);
486
+ const obj = import_js_yaml2.default.load(interpolatedContent, {
487
+ schema: import_js_yaml2.default.JSON_SCHEMA
488
+ });
471
489
  const pathTip = filePath ? `, failed to load ${filePath}` : "";
472
490
  const android = typeof obj.android !== "undefined" ? Object.assign({}, obj.android || {}) : void 0;
473
491
  const webConfig = obj.web || obj.target;
@@ -508,7 +526,6 @@ var import_misoai_core = require("misoai-core");
508
526
  var import_ai_model2 = require("misoai-core/ai-model");
509
527
  var import_utils5 = require("misoai-core/utils");
510
528
  var import_constants = require("misoai-shared/constants");
511
- var import_fs = require("misoai-shared/fs");
512
529
  var import_logger = require("misoai-shared/logger");
513
530
  var import_utils6 = require("misoai-shared/utils");
514
531
 
@@ -649,16 +666,18 @@ var PageTaskExecutor = class {
649
666
  );
650
667
  if (info?.id) {
651
668
  elementId = info.id;
669
+ } else {
670
+ debug(
671
+ "no element id found for position node, will not update cache",
672
+ element
673
+ );
652
674
  }
653
675
  }
654
676
  if (!elementId) {
655
677
  return void 0;
656
678
  }
657
679
  try {
658
- const elementInfosScriptContent = (0, import_fs.getElementInfosScriptContent)();
659
- const result = await this.page.evaluateJavaScript?.(
660
- `${elementInfosScriptContent}midscene_element_inspector.getXpathsById('${elementId}')`
661
- );
680
+ const result = await this.page.getXpathsById(elementId);
662
681
  return result;
663
682
  } catch (error) {
664
683
  debug("getXpathsById error: ", error);
@@ -697,7 +716,7 @@ var PageTaskExecutor = class {
697
716
  };
698
717
  return taskWithScreenshot;
699
718
  }
700
- async convertPlanToExecutable(plans) {
719
+ async convertPlanToExecutable(plans, opts) {
701
720
  const tasks = [];
702
721
  plans.forEach((plan2) => {
703
722
  if (plan2.type === "Locate") {
@@ -707,7 +726,10 @@ var PageTaskExecutor = class {
707
726
  const taskFind = {
708
727
  type: "Insight",
709
728
  subType: "Locate",
710
- param: plan2.locate || void 0,
729
+ param: plan2.locate ? {
730
+ ...plan2.locate,
731
+ cacheable: opts?.cacheable
732
+ } : void 0,
711
733
  thought: plan2.thought,
712
734
  locate: plan2.locate,
713
735
  executor: async (param, taskContext) => {
@@ -744,19 +766,21 @@ var PageTaskExecutor = class {
744
766
  let elementFromCache = null;
745
767
  try {
746
768
  if (xpaths?.length && this.taskCache?.isCacheResultUsed && param?.cacheable !== false) {
747
- const elementInfosScriptContent = (0, import_fs.getElementInfosScriptContent)();
748
- const element2 = await this.page.evaluateJavaScript?.(
749
- `${elementInfosScriptContent}midscene_element_inspector.getElementInfoByXpath('${xpaths[0]}')`
750
- );
751
- if (element2?.id) {
752
- elementFromCache = element2;
753
- debug("cache hit, prompt: %s", cachePrompt);
754
- cacheHitFlag = true;
755
- debug(
756
- "found a new new element with same xpath, xpath: %s, id: %s",
757
- xpaths[0],
758
- element2?.id
769
+ for (let i = 0; i < xpaths.length; i++) {
770
+ const element2 = await this.page.getElementInfoByXpath(
771
+ xpaths[i]
759
772
  );
773
+ if (element2?.id) {
774
+ elementFromCache = element2;
775
+ debug("cache hit, prompt: %s", cachePrompt);
776
+ cacheHitFlag = true;
777
+ debug(
778
+ "found a new new element with same xpath, xpath: %s, id: %s",
779
+ xpaths[i],
780
+ element2?.id
781
+ );
782
+ break;
783
+ }
760
784
  }
761
785
  }
762
786
  } catch (error) {
@@ -769,12 +793,14 @@ var PageTaskExecutor = class {
769
793
  context: pageContext
770
794
  })).element;
771
795
  const aiCost = Date.now() - startTime;
796
+ let currentXpaths;
772
797
  if (element && this.taskCache && !cacheHitFlag && param?.cacheable !== false) {
773
798
  const elementXpaths = await this.getElementXpath(
774
799
  pageContext,
775
800
  element
776
801
  );
777
- if (elementXpaths) {
802
+ if (elementXpaths?.length) {
803
+ currentXpaths = elementXpaths;
778
804
  this.taskCache.updateOrAppendCacheRecord(
779
805
  {
780
806
  type: "locate",
@@ -784,7 +810,11 @@ var PageTaskExecutor = class {
784
810
  locateCacheRecord
785
811
  );
786
812
  } else {
787
- debug("no xpaths found, will not update cache", cachePrompt);
813
+ debug(
814
+ "no xpaths found, will not update cache",
815
+ cachePrompt,
816
+ elementXpaths
817
+ );
788
818
  }
789
819
  }
790
820
  if (!element) {
@@ -796,7 +826,9 @@ var PageTaskExecutor = class {
796
826
  },
797
827
  pageContext,
798
828
  cache: {
799
- hit: cacheHitFlag
829
+ hit: cacheHitFlag,
830
+ originalXpaths: xpaths,
831
+ currentXpaths
800
832
  },
801
833
  aiCost
802
834
  };
@@ -1164,6 +1196,7 @@ var PageTaskExecutor = class {
1164
1196
  sleep: sleep3
1165
1197
  } = planResult;
1166
1198
  executorContext.task.log = {
1199
+ ...executorContext.task.log || {},
1167
1200
  rawResponse
1168
1201
  };
1169
1202
  executorContext.task.usage = usage;
@@ -1288,11 +1321,11 @@ var PageTaskExecutor = class {
1288
1321
  };
1289
1322
  return task;
1290
1323
  }
1291
- async runPlans(title, plans) {
1324
+ async runPlans(title, plans, opts) {
1292
1325
  const taskExecutor = new import_misoai_core.Executor(title, {
1293
1326
  onTaskStart: this.onTaskStartCallback
1294
1327
  });
1295
- const { tasks } = await this.convertPlanToExecutable(plans);
1328
+ const { tasks } = await this.convertPlanToExecutable(plans, opts);
1296
1329
  await taskExecutor.append(tasks);
1297
1330
  const result = await taskExecutor.flush();
1298
1331
  return {
@@ -1300,7 +1333,7 @@ var PageTaskExecutor = class {
1300
1333
  executor: taskExecutor
1301
1334
  };
1302
1335
  }
1303
- async action(userPrompt, actionContext) {
1336
+ async action(userPrompt, actionContext, opts) {
1304
1337
  const taskExecutor = new import_misoai_core.Executor(taskTitleStr("Action", userPrompt), {
1305
1338
  onTaskStart: this.onTaskStartCallback
1306
1339
  });
@@ -1325,7 +1358,7 @@ var PageTaskExecutor = class {
1325
1358
  yamlFlow.push(...planResult.yamlFlow || []);
1326
1359
  let executables;
1327
1360
  try {
1328
- executables = await this.convertPlanToExecutable(plans);
1361
+ executables = await this.convertPlanToExecutable(plans, opts);
1329
1362
  taskExecutor.append(executables.tasks);
1330
1363
  } catch (error) {
1331
1364
  return this.appendErrorPlan(
@@ -1363,7 +1396,7 @@ var PageTaskExecutor = class {
1363
1396
  executor: taskExecutor
1364
1397
  };
1365
1398
  }
1366
- async actionToGoal(userPrompt) {
1399
+ async actionToGoal(userPrompt, opts) {
1367
1400
  const taskExecutor = new import_misoai_core.Executor(taskTitleStr("Action", userPrompt), {
1368
1401
  onTaskStart: this.onTaskStartCallback
1369
1402
  });
@@ -1387,7 +1420,7 @@ var PageTaskExecutor = class {
1387
1420
  yamlFlow.push(...output.yamlFlow || []);
1388
1421
  let executables;
1389
1422
  try {
1390
- executables = await this.convertPlanToExecutable(plans);
1423
+ executables = await this.convertPlanToExecutable(plans, opts);
1391
1424
  taskExecutor.append(executables.tasks);
1392
1425
  } catch (error) {
1393
1426
  return this.appendErrorPlan(
@@ -1692,7 +1725,7 @@ var import_js_yaml3 = __toESM(require("js-yaml"));
1692
1725
  var import_semver = __toESM(require("semver"));
1693
1726
 
1694
1727
  // package.json
1695
- var version = "1.0.2";
1728
+ var version = "1.0.4";
1696
1729
 
1697
1730
  // src/common/task-cache.ts
1698
1731
  var debug3 = (0, import_logger3.getDebug)("cache");
@@ -2035,9 +2068,9 @@ var PageAgent = class {
2035
2068
  buildDetailedLocateParam(locatePrompt, opt) {
2036
2069
  (0, import_utils12.assert)(locatePrompt, "missing locate prompt");
2037
2070
  if (typeof opt === "object") {
2038
- const prompt = opt.prompt || locatePrompt;
2039
- const deepThink = opt.deepThink || false;
2040
- const cacheable = opt.cacheable || true;
2071
+ const prompt = opt.prompt ?? locatePrompt;
2072
+ const deepThink = opt.deepThink ?? false;
2073
+ const cacheable = opt.cacheable ?? true;
2041
2074
  return {
2042
2075
  prompt,
2043
2076
  deepThink,
@@ -2056,7 +2089,8 @@ var PageAgent = class {
2056
2089
  const plans = buildPlans("Tap", detailedLocateParam);
2057
2090
  const { executor, output } = await this.taskExecutor.runPlans(
2058
2091
  taskTitleStr("Tap", locateParamStr(detailedLocateParam)),
2059
- plans
2092
+ plans,
2093
+ { cacheable: opt?.cacheable }
2060
2094
  );
2061
2095
  const metadata = this.afterTaskRunning(executor);
2062
2096
  return {
@@ -2072,7 +2106,8 @@ var PageAgent = class {
2072
2106
  const plans = buildPlans("Hover", detailedLocateParam);
2073
2107
  const { executor, output } = await this.taskExecutor.runPlans(
2074
2108
  taskTitleStr("Hover", locateParamStr(detailedLocateParam)),
2075
- plans
2109
+ plans,
2110
+ { cacheable: opt?.cacheable }
2076
2111
  );
2077
2112
  const metadata = this.afterTaskRunning(executor);
2078
2113
  return {
@@ -2095,7 +2130,8 @@ var PageAgent = class {
2095
2130
  });
2096
2131
  const { executor, output } = await this.taskExecutor.runPlans(
2097
2132
  taskTitleStr("Input", locateParamStr(detailedLocateParam)),
2098
- plans
2133
+ plans,
2134
+ { cacheable: opt?.cacheable }
2099
2135
  );
2100
2136
  const metadata = this.afterTaskRunning(executor);
2101
2137
  return {
@@ -2111,7 +2147,8 @@ var PageAgent = class {
2111
2147
  });
2112
2148
  const { executor, output } = await this.taskExecutor.runPlans(
2113
2149
  taskTitleStr("KeyboardPress", locateParamStr(detailedLocateParam)),
2114
- plans
2150
+ plans,
2151
+ { cacheable: opt?.cacheable }
2115
2152
  );
2116
2153
  const metadata = this.afterTaskRunning(executor);
2117
2154
  return {
@@ -2125,7 +2162,8 @@ var PageAgent = class {
2125
2162
  const paramInTitle = locatePrompt ? `${locateParamStr(detailedLocateParam)} - ${scrollParamStr(scrollParam)}` : scrollParamStr(scrollParam);
2126
2163
  const { executor, output } = await this.taskExecutor.runPlans(
2127
2164
  taskTitleStr("Scroll", paramInTitle),
2128
- plans
2165
+ plans,
2166
+ { cacheable: opt?.cacheable }
2129
2167
  );
2130
2168
  const metadata = this.afterTaskRunning(executor);
2131
2169
  return {
@@ -2134,6 +2172,19 @@ var PageAgent = class {
2134
2172
  };
2135
2173
  }
2136
2174
  async aiAction(taskPrompt, opt) {
2175
+ try {
2176
+ const aiModel = await import("misoai-core/ai-model");
2177
+ const contextStore = aiModel.getContextStore();
2178
+ const processedPrompt = contextStore.replaceAllReferences(taskPrompt, "action");
2179
+ contextStore.addStep({
2180
+ type: "action",
2181
+ summary: `Action: ${processedPrompt}`,
2182
+ prompt: processedPrompt
2183
+ });
2184
+ taskPrompt = processedPrompt;
2185
+ } catch (error) {
2186
+ debug4("Context store not available:", error);
2187
+ }
2137
2188
  const cacheable = opt?.cacheable;
2138
2189
  const isVlmUiTars = (0, import_env2.vlLocateMode)() === "vlm-ui-tars";
2139
2190
  const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
@@ -2151,7 +2202,9 @@ var PageAgent = class {
2151
2202
  metadata: metadata2
2152
2203
  };
2153
2204
  }
2154
- const { output, executor } = await (isVlmUiTars ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
2205
+ const { output, executor } = await (isVlmUiTars ? this.taskExecutor.actionToGoal(taskPrompt, { cacheable }) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext, {
2206
+ cacheable
2207
+ }));
2155
2208
  if (this.taskCache && output?.yamlFlow && cacheable !== false) {
2156
2209
  const yamlContent = {
2157
2210
  tasks: [
@@ -2178,7 +2231,63 @@ var PageAgent = class {
2178
2231
  };
2179
2232
  }
2180
2233
  async aiQuery(demand) {
2181
- const { output, executor } = await this.taskExecutor.query(demand);
2234
+ let processedDemand = demand;
2235
+ let storageKey;
2236
+ try {
2237
+ const aiModel = await import("misoai-core/ai-model");
2238
+ const contextStore = aiModel.getContextStore();
2239
+ if (typeof demand === "string") {
2240
+ const storageInstruction = contextStore.parseStorageInstruction(demand);
2241
+ if (storageInstruction) {
2242
+ storageKey = storageInstruction.key;
2243
+ processedDemand = storageInstruction.cleanText;
2244
+ contextStore._pendingAliases = storageInstruction.aliases;
2245
+ } else {
2246
+ const storageMatch = demand.match(/store\s+(?:as\s+)?(\w+)/i);
2247
+ if (storageMatch) {
2248
+ storageKey = storageMatch[1];
2249
+ processedDemand = demand.replace(/,?\s*store\s+(?:as\s+)?\w+/i, "").trim();
2250
+ }
2251
+ }
2252
+ }
2253
+ } catch (error) {
2254
+ debug4("Context store not available:", error);
2255
+ }
2256
+ const { output, executor } = await this.taskExecutor.query(processedDemand);
2257
+ if (storageKey && output) {
2258
+ try {
2259
+ const aiModel = await import("misoai-core/ai-model");
2260
+ const contextStore = aiModel.getContextStore();
2261
+ const pendingAliases = contextStore._pendingAliases;
2262
+ if (pendingAliases) {
2263
+ contextStore.storeDataWithAliases(storageKey, output, pendingAliases, typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand));
2264
+ delete contextStore._pendingAliases;
2265
+ } else {
2266
+ contextStore.storeData(storageKey, output);
2267
+ }
2268
+ contextStore.addStep({
2269
+ type: "query",
2270
+ summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)} (stored as ${storageKey})`,
2271
+ data: output,
2272
+ prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2273
+ });
2274
+ } catch (error) {
2275
+ debug4("Failed to store query result:", error);
2276
+ }
2277
+ } else {
2278
+ try {
2279
+ const aiModel = await import("misoai-core/ai-model");
2280
+ const contextStore = aiModel.getContextStore();
2281
+ contextStore.addStep({
2282
+ type: "query",
2283
+ summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)}`,
2284
+ data: output,
2285
+ prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2286
+ });
2287
+ } catch (error) {
2288
+ debug4("Failed to add query step:", error);
2289
+ }
2290
+ }
2182
2291
  const metadata = this.afterTaskRunning(executor);
2183
2292
  return {
2184
2293
  result: output,
@@ -2273,7 +2382,8 @@ var PageAgent = class {
2273
2382
  const plans = buildPlans("Locate", detailedLocateParam);
2274
2383
  const { executor, output } = await this.taskExecutor.runPlans(
2275
2384
  taskTitleStr("Locate", locateParamStr(detailedLocateParam)),
2276
- plans
2385
+ plans,
2386
+ { cacheable: opt?.cacheable }
2277
2387
  );
2278
2388
  const metadata = this.afterTaskRunning(executor);
2279
2389
  const { element } = output;
@@ -2287,6 +2397,19 @@ var PageAgent = class {
2287
2397
  };
2288
2398
  }
2289
2399
  async aiAssert(assertion, msg, opt) {
2400
+ let processedAssertion = assertion;
2401
+ try {
2402
+ const aiModel = await import("misoai-core/ai-model");
2403
+ const contextStore = aiModel.getContextStore();
2404
+ processedAssertion = contextStore.replaceAllReferences(assertion, "assertion");
2405
+ contextStore.addStep({
2406
+ type: "assertion",
2407
+ summary: `Assertion: ${processedAssertion}`,
2408
+ prompt: processedAssertion
2409
+ });
2410
+ } catch (error) {
2411
+ debug4("Context store not available:", error);
2412
+ }
2290
2413
  let currentUrl = "";
2291
2414
  if (this.page.url) {
2292
2415
  try {
@@ -2294,7 +2417,7 @@ var PageAgent = class {
2294
2417
  } catch (e) {
2295
2418
  }
2296
2419
  }
2297
- const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${assertion}` : assertion;
2420
+ const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${processedAssertion}` : processedAssertion;
2298
2421
  const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2299
2422
  const metadata = this.afterTaskRunning(executor, true);
2300
2423
  if (output && opt?.keepRawResponse) {
@@ -2725,30 +2848,54 @@ function sleep2(ms) {
2725
2848
  var ChromeExtensionProxyPage = class {
2726
2849
  constructor(forceSameTabNavigation) {
2727
2850
  this.pageType = "chrome-extension-proxy";
2728
- this.version = "1.0.2";
2851
+ this.version = "1.0.4";
2729
2852
  this.activeTabId = null;
2730
2853
  this.tabIdOfDebuggerAttached = null;
2731
2854
  this.attachingDebugger = null;
2732
2855
  this.destroyed = false;
2856
+ this.isMobileEmulation = null;
2733
2857
  this.latestMouseX = 100;
2734
2858
  this.latestMouseY = 100;
2735
2859
  this.mouse = {
2736
2860
  click: async (x, y) => {
2737
2861
  await this.mouse.move(x, y);
2738
- await this.sendCommandToDebugger("Input.dispatchMouseEvent", {
2739
- type: "mousePressed",
2740
- x,
2741
- y,
2742
- button: "left",
2743
- clickCount: 1
2744
- });
2745
- await this.sendCommandToDebugger("Input.dispatchMouseEvent", {
2746
- type: "mouseReleased",
2747
- x,
2748
- y,
2749
- button: "left",
2750
- clickCount: 1
2751
- });
2862
+ if (this.isMobileEmulation === null) {
2863
+ const result = await this.sendCommandToDebugger("Runtime.evaluate", {
2864
+ expression: `(() => {
2865
+ return /Android|iPhone|iPad|iPod|Mobile/i.test(navigator.userAgent);
2866
+ })()`,
2867
+ returnByValue: true
2868
+ });
2869
+ this.isMobileEmulation = result?.result?.value;
2870
+ }
2871
+ if (this.isMobileEmulation) {
2872
+ const touchPoints = [{ x: Math.round(x), y: Math.round(y) }];
2873
+ await this.sendCommandToDebugger("Input.dispatchTouchEvent", {
2874
+ type: "touchStart",
2875
+ touchPoints,
2876
+ modifiers: 0
2877
+ });
2878
+ await this.sendCommandToDebugger("Input.dispatchTouchEvent", {
2879
+ type: "touchEnd",
2880
+ touchPoints: [],
2881
+ modifiers: 0
2882
+ });
2883
+ } else {
2884
+ await this.sendCommandToDebugger("Input.dispatchMouseEvent", {
2885
+ type: "mousePressed",
2886
+ x,
2887
+ y,
2888
+ button: "left",
2889
+ clickCount: 1
2890
+ });
2891
+ await this.sendCommandToDebugger("Input.dispatchMouseEvent", {
2892
+ type: "mouseReleased",
2893
+ x,
2894
+ y,
2895
+ button: "left",
2896
+ clickCount: 1
2897
+ });
2898
+ }
2752
2899
  },
2753
2900
  wheel: async (deltaX, deltaY, startX, startY) => {
2754
2901
  const finalX = startX || this.latestMouseX;
@@ -2981,6 +3128,7 @@ var ChromeExtensionProxyPage = class {
2981
3128
  expression: script
2982
3129
  });
2983
3130
  const expression = () => {
3131
+ window.midscene_element_inspector.setNodeHashCacheListOnWindow();
2984
3132
  return {
2985
3133
  tree: window.midscene_element_inspector.webExtractNodeTree(),
2986
3134
  size: {
@@ -3034,6 +3182,28 @@ var ChromeExtensionProxyPage = class {
3034
3182
  const tree = await this.getElementsNodeTree();
3035
3183
  return (0, import_extractor2.treeToList)(tree);
3036
3184
  }
3185
+ async getXpathsById(id) {
3186
+ const script = await getHtmlElementScript();
3187
+ await this.sendCommandToDebugger("Runtime.evaluate", {
3188
+ expression: script
3189
+ });
3190
+ const result = await this.sendCommandToDebugger("Runtime.evaluate", {
3191
+ expression: `window.midscene_element_inspector.getXpathsById('${id}')`,
3192
+ returnByValue: true
3193
+ });
3194
+ return result.result.value;
3195
+ }
3196
+ async getElementInfoByXpath(xpath) {
3197
+ const script = await getHtmlElementScript();
3198
+ await this.sendCommandToDebugger("Runtime.evaluate", {
3199
+ expression: script
3200
+ });
3201
+ const result = await this.sendCommandToDebugger("Runtime.evaluate", {
3202
+ expression: `window.midscene_element_inspector.getElementInfoByXpath('${xpath}')`,
3203
+ returnByValue: true
3204
+ });
3205
+ return result.result.value;
3206
+ }
3037
3207
  async getElementsNodeTree() {
3038
3208
  await this.hideMousePointer();
3039
3209
  const content = await this.getPageContentByCDP();