@midscene/web 0.17.2 → 0.17.3-beta-20250526042111.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/dist/es/agent.js +64 -40
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +26 -3
  4. package/dist/es/bridge-mode-browser.js.map +1 -1
  5. package/dist/es/bridge-mode.js +66 -42
  6. package/dist/es/bridge-mode.js.map +1 -1
  7. package/dist/es/chrome-extension.js +88 -41
  8. package/dist/es/chrome-extension.js.map +1 -1
  9. package/dist/es/index.js +80 -41
  10. package/dist/es/index.js.map +1 -1
  11. package/dist/es/midscene-playground.js +70 -40
  12. package/dist/es/midscene-playground.js.map +1 -1
  13. package/dist/es/playground.js +70 -40
  14. package/dist/es/playground.js.map +1 -1
  15. package/dist/es/playwright.js +80 -41
  16. package/dist/es/playwright.js.map +1 -1
  17. package/dist/es/puppeteer-agent-launcher.js +80 -41
  18. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  19. package/dist/es/puppeteer.js +80 -41
  20. package/dist/es/puppeteer.js.map +1 -1
  21. package/dist/es/yaml.js +3 -1
  22. package/dist/es/yaml.js.map +1 -1
  23. package/dist/lib/agent.js +64 -40
  24. package/dist/lib/agent.js.map +1 -1
  25. package/dist/lib/bridge-mode-browser.js +26 -3
  26. package/dist/lib/bridge-mode-browser.js.map +1 -1
  27. package/dist/lib/bridge-mode.js +66 -42
  28. package/dist/lib/bridge-mode.js.map +1 -1
  29. package/dist/lib/chrome-extension.js +88 -41
  30. package/dist/lib/chrome-extension.js.map +1 -1
  31. package/dist/lib/index.js +78 -42
  32. package/dist/lib/index.js.map +1 -1
  33. package/dist/lib/midscene-playground.js +70 -40
  34. package/dist/lib/midscene-playground.js.map +1 -1
  35. package/dist/lib/playground.js +70 -40
  36. package/dist/lib/playground.js.map +1 -1
  37. package/dist/lib/playwright.js +78 -42
  38. package/dist/lib/playwright.js.map +1 -1
  39. package/dist/lib/puppeteer-agent-launcher.js +78 -42
  40. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  41. package/dist/lib/puppeteer.js +78 -42
  42. package/dist/lib/puppeteer.js.map +1 -1
  43. package/dist/lib/yaml.js +3 -1
  44. package/dist/lib/yaml.js.map +1 -1
  45. package/dist/types/agent.d.ts +16 -6
  46. package/dist/types/bridge-mode-browser.d.ts +2 -2
  47. package/dist/types/bridge-mode.d.ts +2 -2
  48. package/dist/types/{browser-afe2cee6.d.ts → browser-dd81e35b.d.ts} +1 -1
  49. package/dist/types/chrome-extension.d.ts +2 -2
  50. package/dist/types/index.d.ts +1 -1
  51. package/dist/types/midscene-server.d.ts +1 -1
  52. package/dist/types/{page-bbb72c29.d.ts → page-cd833f42.d.ts} +10 -0
  53. package/dist/types/playground.d.ts +2 -2
  54. package/dist/types/playwright.d.ts +1 -1
  55. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  56. package/dist/types/puppeteer.d.ts +1 -1
  57. package/dist/types/utils.d.ts +1 -1
  58. package/dist/types/yaml.d.ts +1 -1
  59. package/iife-script/htmlElement.js +97 -35
  60. package/iife-script/htmlElementDebug.js +90 -7
  61. package/package.json +4 -5
package/dist/es/agent.js CHANGED
@@ -90,7 +90,9 @@ var ScriptPlayer = class {
90
90
  typeof prompt === "string",
91
91
  "prompt for aiAction must be a string"
92
92
  );
93
- await agent.aiAction(prompt);
93
+ await agent.aiAction(prompt, {
94
+ cacheable: actionTask.cacheable
95
+ });
94
96
  } else if ("aiAssert" in flowItem) {
95
97
  const assertTask = flowItem;
96
98
  const prompt = assertTask.aiAssert;
@@ -365,7 +367,6 @@ import {
365
367
  } from "@midscene/core/ai-model";
366
368
  import { sleep } from "@midscene/core/utils";
367
369
  import { NodeType } from "@midscene/shared/constants";
368
- import { getElementInfosScriptContent } from "@midscene/shared/fs";
369
370
  import { getDebug } from "@midscene/shared/logger";
370
371
  import { assert as assert4 } from "@midscene/shared/utils";
371
372
 
@@ -602,16 +603,18 @@ var PageTaskExecutor = class {
602
603
  );
603
604
  if (info?.id) {
604
605
  elementId = info.id;
606
+ } else {
607
+ debug(
608
+ "no element id found for position node, will not update cache",
609
+ element
610
+ );
605
611
  }
606
612
  }
607
613
  if (!elementId) {
608
614
  return void 0;
609
615
  }
610
616
  try {
611
- const elementInfosScriptContent = getElementInfosScriptContent();
612
- const result = await this.page.evaluateJavaScript?.(
613
- `${elementInfosScriptContent}midscene_element_inspector.getXpathsById('${elementId}')`
614
- );
617
+ const result = await this.page.getXpathsById(elementId);
615
618
  return result;
616
619
  } catch (error) {
617
620
  debug("getXpathsById error: ", error);
@@ -650,7 +653,7 @@ var PageTaskExecutor = class {
650
653
  };
651
654
  return taskWithScreenshot;
652
655
  }
653
- async convertPlanToExecutable(plans) {
656
+ async convertPlanToExecutable(plans, opts) {
654
657
  const tasks = [];
655
658
  plans.forEach((plan2) => {
656
659
  if (plan2.type === "Locate") {
@@ -660,7 +663,10 @@ var PageTaskExecutor = class {
660
663
  const taskFind = {
661
664
  type: "Insight",
662
665
  subType: "Locate",
663
- param: plan2.locate || void 0,
666
+ param: plan2.locate ? {
667
+ ...plan2.locate,
668
+ cacheable: opts?.cacheable
669
+ } : void 0,
664
670
  thought: plan2.thought,
665
671
  locate: plan2.locate,
666
672
  executor: async (param, taskContext) => {
@@ -697,19 +703,21 @@ var PageTaskExecutor = class {
697
703
  let elementFromCache = null;
698
704
  try {
699
705
  if (xpaths?.length && this.taskCache?.isCacheResultUsed && param?.cacheable !== false) {
700
- const elementInfosScriptContent = getElementInfosScriptContent();
701
- const element2 = await this.page.evaluateJavaScript?.(
702
- `${elementInfosScriptContent}midscene_element_inspector.getElementInfoByXpath('${xpaths[0]}')`
703
- );
704
- if (element2?.id) {
705
- elementFromCache = element2;
706
- debug("cache hit, prompt: %s", cachePrompt);
707
- cacheHitFlag = true;
708
- debug(
709
- "found a new new element with same xpath, xpath: %s, id: %s",
710
- xpaths[0],
711
- element2?.id
706
+ for (let i = 0; i < xpaths.length; i++) {
707
+ const element2 = await this.page.getElementInfoByXpath(
708
+ xpaths[i]
712
709
  );
710
+ if (element2?.id) {
711
+ elementFromCache = element2;
712
+ debug("cache hit, prompt: %s", cachePrompt);
713
+ cacheHitFlag = true;
714
+ debug(
715
+ "found a new new element with same xpath, xpath: %s, id: %s",
716
+ xpaths[i],
717
+ element2?.id
718
+ );
719
+ break;
720
+ }
713
721
  }
714
722
  }
715
723
  } catch (error) {
@@ -722,12 +730,14 @@ var PageTaskExecutor = class {
722
730
  context: pageContext
723
731
  })).element;
724
732
  const aiCost = Date.now() - startTime;
733
+ let currentXpaths;
725
734
  if (element && this.taskCache && !cacheHitFlag && param?.cacheable !== false) {
726
735
  const elementXpaths = await this.getElementXpath(
727
736
  pageContext,
728
737
  element
729
738
  );
730
- if (elementXpaths) {
739
+ if (elementXpaths?.length) {
740
+ currentXpaths = elementXpaths;
731
741
  this.taskCache.updateOrAppendCacheRecord(
732
742
  {
733
743
  type: "locate",
@@ -737,7 +747,11 @@ var PageTaskExecutor = class {
737
747
  locateCacheRecord
738
748
  );
739
749
  } else {
740
- debug("no xpaths found, will not update cache", cachePrompt);
750
+ debug(
751
+ "no xpaths found, will not update cache",
752
+ cachePrompt,
753
+ elementXpaths
754
+ );
741
755
  }
742
756
  }
743
757
  if (!element) {
@@ -749,7 +763,9 @@ var PageTaskExecutor = class {
749
763
  },
750
764
  pageContext,
751
765
  cache: {
752
- hit: cacheHitFlag
766
+ hit: cacheHitFlag,
767
+ originalXpaths: xpaths,
768
+ currentXpaths
753
769
  },
754
770
  aiCost
755
771
  };
@@ -1241,11 +1257,11 @@ var PageTaskExecutor = class {
1241
1257
  };
1242
1258
  return task;
1243
1259
  }
1244
- async runPlans(title, plans) {
1260
+ async runPlans(title, plans, opts) {
1245
1261
  const taskExecutor = new Executor(title, {
1246
1262
  onTaskStart: this.onTaskStartCallback
1247
1263
  });
1248
- const { tasks } = await this.convertPlanToExecutable(plans);
1264
+ const { tasks } = await this.convertPlanToExecutable(plans, opts);
1249
1265
  await taskExecutor.append(tasks);
1250
1266
  const result = await taskExecutor.flush();
1251
1267
  return {
@@ -1253,7 +1269,7 @@ var PageTaskExecutor = class {
1253
1269
  executor: taskExecutor
1254
1270
  };
1255
1271
  }
1256
- async action(userPrompt, actionContext) {
1272
+ async action(userPrompt, actionContext, opts) {
1257
1273
  const taskExecutor = new Executor(taskTitleStr("Action", userPrompt), {
1258
1274
  onTaskStart: this.onTaskStartCallback
1259
1275
  });
@@ -1278,7 +1294,7 @@ var PageTaskExecutor = class {
1278
1294
  yamlFlow.push(...planResult.yamlFlow || []);
1279
1295
  let executables;
1280
1296
  try {
1281
- executables = await this.convertPlanToExecutable(plans);
1297
+ executables = await this.convertPlanToExecutable(plans, opts);
1282
1298
  taskExecutor.append(executables.tasks);
1283
1299
  } catch (error) {
1284
1300
  return this.appendErrorPlan(
@@ -1316,7 +1332,7 @@ var PageTaskExecutor = class {
1316
1332
  executor: taskExecutor
1317
1333
  };
1318
1334
  }
1319
- async actionToGoal(userPrompt) {
1335
+ async actionToGoal(userPrompt, opts) {
1320
1336
  const taskExecutor = new Executor(taskTitleStr("Action", userPrompt), {
1321
1337
  onTaskStart: this.onTaskStartCallback
1322
1338
  });
@@ -1340,7 +1356,7 @@ var PageTaskExecutor = class {
1340
1356
  yamlFlow.push(...output.yamlFlow || []);
1341
1357
  let executables;
1342
1358
  try {
1343
- executables = await this.convertPlanToExecutable(plans);
1359
+ executables = await this.convertPlanToExecutable(plans, opts);
1344
1360
  taskExecutor.append(executables.tasks);
1345
1361
  } catch (error) {
1346
1362
  return this.appendErrorPlan(
@@ -1645,7 +1661,7 @@ import yaml3 from "js-yaml";
1645
1661
  import semver from "semver";
1646
1662
 
1647
1663
  // package.json
1648
- var version = "0.17.2";
1664
+ var version = "0.17.3-beta-20250526042111.0";
1649
1665
 
1650
1666
  // src/common/task-cache.ts
1651
1667
  var debug3 = getDebug3("cache");
@@ -1923,9 +1939,9 @@ ${errorTask?.errorStack}`);
1923
1939
  buildDetailedLocateParam(locatePrompt, opt) {
1924
1940
  assert7(locatePrompt, "missing locate prompt");
1925
1941
  if (typeof opt === "object") {
1926
- const prompt = opt.prompt || locatePrompt;
1927
- const deepThink = opt.deepThink || false;
1928
- const cacheable = opt.cacheable || true;
1942
+ const prompt = opt.prompt ?? locatePrompt;
1943
+ const deepThink = opt.deepThink ?? false;
1944
+ const cacheable = opt.cacheable ?? true;
1929
1945
  return {
1930
1946
  prompt,
1931
1947
  deepThink,
@@ -1944,7 +1960,8 @@ ${errorTask?.errorStack}`);
1944
1960
  const plans = buildPlans("Tap", detailedLocateParam);
1945
1961
  const { executor, output } = await this.taskExecutor.runPlans(
1946
1962
  taskTitleStr("Tap", locateParamStr(detailedLocateParam)),
1947
- plans
1963
+ plans,
1964
+ { cacheable: opt?.cacheable }
1948
1965
  );
1949
1966
  this.afterTaskRunning(executor);
1950
1967
  return output;
@@ -1957,7 +1974,8 @@ ${errorTask?.errorStack}`);
1957
1974
  const plans = buildPlans("Hover", detailedLocateParam);
1958
1975
  const { executor, output } = await this.taskExecutor.runPlans(
1959
1976
  taskTitleStr("Hover", locateParamStr(detailedLocateParam)),
1960
- plans
1977
+ plans,
1978
+ { cacheable: opt?.cacheable }
1961
1979
  );
1962
1980
  this.afterTaskRunning(executor);
1963
1981
  return output;
@@ -1977,7 +1995,8 @@ ${errorTask?.errorStack}`);
1977
1995
  });
1978
1996
  const { executor, output } = await this.taskExecutor.runPlans(
1979
1997
  taskTitleStr("Input", locateParamStr(detailedLocateParam)),
1980
- plans
1998
+ plans,
1999
+ { cacheable: opt?.cacheable }
1981
2000
  );
1982
2001
  this.afterTaskRunning(executor);
1983
2002
  return output;
@@ -1990,7 +2009,8 @@ ${errorTask?.errorStack}`);
1990
2009
  });
1991
2010
  const { executor, output } = await this.taskExecutor.runPlans(
1992
2011
  taskTitleStr("KeyboardPress", locateParamStr(detailedLocateParam)),
1993
- plans
2012
+ plans,
2013
+ { cacheable: opt?.cacheable }
1994
2014
  );
1995
2015
  this.afterTaskRunning(executor);
1996
2016
  return output;
@@ -2001,7 +2021,8 @@ ${errorTask?.errorStack}`);
2001
2021
  const paramInTitle = locatePrompt ? `${locateParamStr(detailedLocateParam)} - ${scrollParamStr(scrollParam)}` : scrollParamStr(scrollParam);
2002
2022
  const { executor, output } = await this.taskExecutor.runPlans(
2003
2023
  taskTitleStr("Scroll", paramInTitle),
2004
- plans
2024
+ plans,
2025
+ { cacheable: opt?.cacheable }
2005
2026
  );
2006
2027
  this.afterTaskRunning(executor);
2007
2028
  return output;
@@ -2020,7 +2041,9 @@ ${errorTask?.errorStack}`);
2020
2041
  const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2021
2042
  return this.runYaml(yaml5);
2022
2043
  }
2023
- const { output, executor } = await (isVlmUiTars ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
2044
+ const { output, executor } = await (isVlmUiTars ? this.taskExecutor.actionToGoal(taskPrompt, { cacheable }) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext, {
2045
+ cacheable
2046
+ }));
2024
2047
  if (this.taskCache && output?.yamlFlow && cacheable !== false) {
2025
2048
  const yamlContent = {
2026
2049
  tasks: [
@@ -2129,7 +2152,8 @@ ${errorTask?.errorStack}`);
2129
2152
  const plans = buildPlans("Locate", detailedLocateParam);
2130
2153
  const { executor, output } = await this.taskExecutor.runPlans(
2131
2154
  taskTitleStr("Locate", locateParamStr(detailedLocateParam)),
2132
- plans
2155
+ plans,
2156
+ { cacheable: opt?.cacheable }
2133
2157
  );
2134
2158
  this.afterTaskRunning(executor);
2135
2159
  const { element } = output;