@midscene/core 0.14.3 → 0.14.4-beta-20250415065130.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/es/ai-model.d.ts +3 -3
  2. package/dist/es/ai-model.js +2 -2
  3. package/dist/es/{chunk-SAXMKI7Z.js → chunk-6MPYG6WR.js} +3 -3
  4. package/dist/es/{chunk-UBGEKXK7.js → chunk-6SYVFZ6C.js} +4 -1
  5. package/dist/es/chunk-6SYVFZ6C.js.map +1 -0
  6. package/dist/es/{chunk-5EO33FHK.js → chunk-QX6K65KH.js} +2 -2
  7. package/dist/es/env.d.ts +1 -1
  8. package/dist/es/env.js +1 -1
  9. package/dist/es/index.d.ts +4 -4
  10. package/dist/es/index.js +3 -3
  11. package/dist/es/{llm-planning-bd80e99e.d.ts → llm-planning-d084250f.d.ts} +1 -1
  12. package/dist/es/tree.d.ts +1 -1
  13. package/dist/{lib/types-e2a418c3.d.ts → es/types-d2831105.d.ts} +12 -3
  14. package/dist/es/utils.d.ts +1 -1
  15. package/dist/es/utils.js +2 -2
  16. package/dist/lib/ai-model.d.ts +3 -3
  17. package/dist/lib/ai-model.js +3 -3
  18. package/dist/lib/{chunk-SAXMKI7Z.js → chunk-6MPYG6WR.js} +5 -5
  19. package/dist/lib/{chunk-UBGEKXK7.js → chunk-6SYVFZ6C.js} +4 -1
  20. package/dist/lib/chunk-6SYVFZ6C.js.map +1 -0
  21. package/dist/lib/{chunk-5EO33FHK.js → chunk-QX6K65KH.js} +48 -48
  22. package/dist/lib/env.d.ts +1 -1
  23. package/dist/lib/env.js +2 -2
  24. package/dist/lib/index.d.ts +4 -4
  25. package/dist/lib/index.js +20 -20
  26. package/dist/lib/{llm-planning-bd80e99e.d.ts → llm-planning-d084250f.d.ts} +1 -1
  27. package/dist/lib/tree.d.ts +1 -1
  28. package/dist/{es/types-e2a418c3.d.ts → lib/types-d2831105.d.ts} +12 -3
  29. package/dist/lib/utils.d.ts +1 -1
  30. package/dist/lib/utils.js +3 -3
  31. package/dist/types/ai-model.d.ts +3 -3
  32. package/dist/types/env.d.ts +1 -1
  33. package/dist/types/index.d.ts +4 -4
  34. package/dist/types/{llm-planning-bd80e99e.d.ts → llm-planning-d084250f.d.ts} +1 -1
  35. package/dist/types/tree.d.ts +1 -1
  36. package/dist/types/{types-e2a418c3.d.ts → types-d2831105.d.ts} +12 -3
  37. package/dist/types/utils.d.ts +1 -1
  38. package/package.json +2 -2
  39. package/report/index.html +19 -19
  40. package/dist/es/chunk-UBGEKXK7.js.map +0 -1
  41. package/dist/lib/chunk-UBGEKXK7.js.map +0 -1
  42. /package/dist/es/{chunk-SAXMKI7Z.js.map → chunk-6MPYG6WR.js.map} +0 -0
  43. /package/dist/es/{chunk-5EO33FHK.js.map → chunk-QX6K65KH.js.map} +0 -0
  44. /package/dist/lib/{chunk-SAXMKI7Z.js.map → chunk-6MPYG6WR.js.map} +0 -0
  45. /package/dist/lib/{chunk-5EO33FHK.js.map → chunk-QX6K65KH.js.map} +0 -0
@@ -25,7 +25,7 @@
25
25
 
26
26
 
27
27
 
28
- var _chunkUBGEKXK7js = require('./chunk-UBGEKXK7.js');
28
+ var _chunk6SYVFZ6Cjs = require('./chunk-6SYVFZ6C.js');
29
29
 
30
30
  // src/ai-model/service-caller/index.ts
31
31
  var _sdk = require('@anthropic-ai/sdk');
@@ -118,7 +118,7 @@ function adaptDoubaoBbox(bbox, width, height, errorMsg) {
118
118
  throw new Error(msg);
119
119
  }
120
120
  function adaptBbox(bbox, width, height, errorMsg) {
121
- if (_chunkUBGEKXK7js.vlLocateMode.call(void 0, ) === "doubao-vision") {
121
+ if (_chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, ) === "doubao-vision") {
122
122
  return adaptDoubaoBbox(bbox, width, height, errorMsg);
123
123
  }
124
124
  return adaptQwenBbox(bbox, errorMsg);
@@ -598,7 +598,7 @@ async function describeUserPage(context, opt) {
598
598
  _optionalChain([opt, 'optionalAccess', _10 => _10.filterNonTextContent])
599
599
  );
600
600
  const sizeDescription = describeSize({ width, height });
601
- const pageDescription = _chunkUBGEKXK7js.vlLocateMode.call(void 0, ) ? "" : `The size of the page: ${sizeDescription}
601
+ const pageDescription = _chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, ) ? "" : `The size of the page: ${sizeDescription}
602
602
  Some of the elements are marked with a rectangle in the screenshot, some are not.
603
603
  The page elements tree:
604
604
  ${contentTree}`;
@@ -819,7 +819,7 @@ Reason:
819
819
  * Since the option button is not shown in the screenshot, there are still more actions to be done, so the \`more_actions_needed_by_instruction\` field should be true
820
820
  `;
821
821
  async function systemPromptToTaskPlanning() {
822
- if (_chunkUBGEKXK7js.vlLocateMode.call(void 0, )) {
822
+ if (_chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, )) {
823
823
  return systemTemplateOfVLPlanning;
824
824
  }
825
825
  const promptTemplate = new (0, _prompts.PromptTemplate)({
@@ -961,7 +961,7 @@ Here is the user's instruction:
961
961
  `;
962
962
  };
963
963
  var automationUserPrompt = () => {
964
- if (_chunkUBGEKXK7js.vlLocateMode.call(void 0, )) {
964
+ if (_chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, )) {
965
965
  return new (0, _prompts.PromptTemplate)({
966
966
  template: "{taskBackgroundContext}",
967
967
  inputVariables: ["taskBackgroundContext"]
@@ -981,18 +981,18 @@ pageDescription:
981
981
 
982
982
  // src/ai-model/service-caller/index.ts
983
983
  function checkAIConfig() {
984
- if (_chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.OPENAI_API_KEY))
984
+ if (_chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.OPENAI_API_KEY))
985
985
  return true;
986
- if (_chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.MIDSCENE_USE_AZURE_OPENAI))
986
+ if (_chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_USE_AZURE_OPENAI))
987
987
  return true;
988
- if (_chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.ANTHROPIC_API_KEY))
988
+ if (_chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.ANTHROPIC_API_KEY))
989
989
  return true;
990
- return Boolean(_chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.MIDSCENE_OPENAI_INIT_CONFIG_JSON));
990
+ return Boolean(_chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_OPENAI_INIT_CONFIG_JSON));
991
991
  }
992
992
  var debugProfileStats = _logger.getDebug.call(void 0, "ai:profile:stats");
993
993
  var debugProfileDetail = _logger.getDebug.call(void 0, "ai:profile:detail");
994
994
  var debugCall = _logger.getDebug.call(void 0, "ai:call");
995
- var shouldPrintTiming = _chunkUBGEKXK7js.getAIConfigInBoolean.call(void 0, _chunkUBGEKXK7js.MIDSCENE_DEBUG_AI_PROFILE);
995
+ var shouldPrintTiming = _chunk6SYVFZ6Cjs.getAIConfigInBoolean.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_DEBUG_AI_PROFILE);
996
996
  var debugConfig = "";
997
997
  if (shouldPrintTiming) {
998
998
  console.warn(
@@ -1000,7 +1000,7 @@ if (shouldPrintTiming) {
1000
1000
  );
1001
1001
  debugConfig = "ai:profile";
1002
1002
  }
1003
- var shouldPrintAIResponse = _chunkUBGEKXK7js.getAIConfigInBoolean.call(void 0, _chunkUBGEKXK7js.MIDSCENE_DEBUG_AI_RESPONSE);
1003
+ var shouldPrintAIResponse = _chunk6SYVFZ6Cjs.getAIConfigInBoolean.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_DEBUG_AI_RESPONSE);
1004
1004
  if (shouldPrintAIResponse) {
1005
1005
  console.warn(
1006
1006
  "MIDSCENE_DEBUG_AI_RESPONSE is deprecated, use DEBUG=midscene:ai:response instead"
@@ -1017,7 +1017,7 @@ if (debugConfig) {
1017
1017
  var defaultModel = "gpt-4o";
1018
1018
  function getModelName() {
1019
1019
  let modelName = defaultModel;
1020
- const nameInConfig = _chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.MIDSCENE_MODEL_NAME);
1020
+ const nameInConfig = _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_MODEL_NAME);
1021
1021
  if (nameInConfig) {
1022
1022
  modelName = nameInConfig;
1023
1023
  }
@@ -1027,22 +1027,22 @@ async function createChatClient({
1027
1027
  AIActionTypeValue
1028
1028
  }) {
1029
1029
  let openai;
1030
- const extraConfig = _chunkUBGEKXK7js.getAIConfigInJson.call(void 0, _chunkUBGEKXK7js.MIDSCENE_OPENAI_INIT_CONFIG_JSON);
1031
- const socksProxy = _chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.MIDSCENE_OPENAI_SOCKS_PROXY);
1030
+ const extraConfig = _chunk6SYVFZ6Cjs.getAIConfigInJson.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_OPENAI_INIT_CONFIG_JSON);
1031
+ const socksProxy = _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_OPENAI_SOCKS_PROXY);
1032
1032
  const socksAgent = socksProxy ? new (0, _socksproxyagent.SocksProxyAgent)(socksProxy) : void 0;
1033
- if (_chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.OPENAI_USE_AZURE)) {
1033
+ if (_chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.OPENAI_USE_AZURE)) {
1034
1034
  openai = new (0, _openai.AzureOpenAI)({
1035
- baseURL: _chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.OPENAI_BASE_URL),
1036
- apiKey: _chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.OPENAI_API_KEY),
1035
+ baseURL: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.OPENAI_BASE_URL),
1036
+ apiKey: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.OPENAI_API_KEY),
1037
1037
  httpAgent: socksAgent,
1038
1038
  ...extraConfig,
1039
1039
  dangerouslyAllowBrowser: true
1040
1040
  });
1041
- } else if (_chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.MIDSCENE_USE_AZURE_OPENAI)) {
1042
- const extraAzureConfig = _chunkUBGEKXK7js.getAIConfigInJson.call(void 0,
1043
- _chunkUBGEKXK7js.MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON
1041
+ } else if (_chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_USE_AZURE_OPENAI)) {
1042
+ const extraAzureConfig = _chunk6SYVFZ6Cjs.getAIConfigInJson.call(void 0,
1043
+ _chunk6SYVFZ6Cjs.MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON
1044
1044
  );
1045
- const scope = _chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.MIDSCENE_AZURE_OPENAI_SCOPE);
1045
+ const scope = _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_AZURE_OPENAI_SCOPE);
1046
1046
  let tokenProvider = void 0;
1047
1047
  if (scope) {
1048
1048
  _utils.assert.call(void 0,
@@ -1054,25 +1054,25 @@ async function createChatClient({
1054
1054
  tokenProvider = _identity.getBearerTokenProvider.call(void 0, credential, scope);
1055
1055
  openai = new (0, _openai.AzureOpenAI)({
1056
1056
  azureADTokenProvider: tokenProvider,
1057
- endpoint: _chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.AZURE_OPENAI_ENDPOINT),
1058
- apiVersion: _chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.AZURE_OPENAI_API_VERSION),
1059
- deployment: _chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.AZURE_OPENAI_DEPLOYMENT),
1057
+ endpoint: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.AZURE_OPENAI_ENDPOINT),
1058
+ apiVersion: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.AZURE_OPENAI_API_VERSION),
1059
+ deployment: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.AZURE_OPENAI_DEPLOYMENT),
1060
1060
  ...extraConfig,
1061
1061
  ...extraAzureConfig
1062
1062
  });
1063
1063
  } else {
1064
1064
  openai = new (0, _openai.AzureOpenAI)({
1065
- apiKey: _chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.AZURE_OPENAI_KEY),
1066
- endpoint: _chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.AZURE_OPENAI_ENDPOINT),
1067
- apiVersion: _chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.AZURE_OPENAI_API_VERSION),
1068
- deployment: _chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.AZURE_OPENAI_DEPLOYMENT),
1065
+ apiKey: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.AZURE_OPENAI_KEY),
1066
+ endpoint: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.AZURE_OPENAI_ENDPOINT),
1067
+ apiVersion: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.AZURE_OPENAI_API_VERSION),
1068
+ deployment: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.AZURE_OPENAI_DEPLOYMENT),
1069
1069
  dangerouslyAllowBrowser: true,
1070
1070
  ...extraConfig,
1071
1071
  ...extraAzureConfig
1072
1072
  });
1073
1073
  }
1074
- } else if (!_chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.MIDSCENE_USE_ANTHROPIC_SDK)) {
1075
- const baseURL = _chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.OPENAI_BASE_URL);
1074
+ } else if (!_chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_USE_ANTHROPIC_SDK)) {
1075
+ const baseURL = _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.OPENAI_BASE_URL);
1076
1076
  if (typeof baseURL === "string") {
1077
1077
  if (!/^https?:\/\//.test(baseURL)) {
1078
1078
  throw new Error(
@@ -1082,18 +1082,18 @@ Please check your config.`
1082
1082
  }
1083
1083
  }
1084
1084
  openai = new (0, _openai2.default)({
1085
- baseURL: _chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.OPENAI_BASE_URL),
1086
- apiKey: _chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.OPENAI_API_KEY),
1085
+ baseURL: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.OPENAI_BASE_URL),
1086
+ apiKey: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.OPENAI_API_KEY),
1087
1087
  httpAgent: socksAgent,
1088
1088
  ...extraConfig,
1089
1089
  defaultHeaders: {
1090
1090
  ..._optionalChain([extraConfig, 'optionalAccess', _11 => _11.defaultHeaders]) || {},
1091
- [_chunkUBGEKXK7js.MIDSCENE_API_TYPE]: AIActionTypeValue.toString()
1091
+ [_chunk6SYVFZ6Cjs.MIDSCENE_API_TYPE]: AIActionTypeValue.toString()
1092
1092
  },
1093
1093
  dangerouslyAllowBrowser: true
1094
1094
  });
1095
1095
  }
1096
- if (openai && _chunkUBGEKXK7js.getAIConfigInBoolean.call(void 0, _chunkUBGEKXK7js.MIDSCENE_LANGSMITH_DEBUG)) {
1096
+ if (openai && _chunk6SYVFZ6Cjs.getAIConfigInBoolean.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_LANGSMITH_DEBUG)) {
1097
1097
  if (_utils.ifInBrowser) {
1098
1098
  throw new Error("langsmith is not supported in browser");
1099
1099
  }
@@ -1107,8 +1107,8 @@ Please check your config.`
1107
1107
  style: "openai"
1108
1108
  };
1109
1109
  }
1110
- if (_chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.MIDSCENE_USE_ANTHROPIC_SDK)) {
1111
- const apiKey = _chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.ANTHROPIC_API_KEY);
1110
+ if (_chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_USE_ANTHROPIC_SDK)) {
1111
+ const apiKey = _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.ANTHROPIC_API_KEY);
1112
1112
  _utils.assert.call(void 0, apiKey, "ANTHROPIC_API_KEY is required");
1113
1113
  openai = new (0, _sdk.Anthropic)({
1114
1114
  apiKey,
@@ -1127,16 +1127,16 @@ async function call(messages, AIActionTypeValue, responseFormat) {
1127
1127
  const { completion, style } = await createChatClient({
1128
1128
  AIActionTypeValue
1129
1129
  });
1130
- const maxTokens = _chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.OPENAI_MAX_TOKENS);
1130
+ const maxTokens = _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.OPENAI_MAX_TOKENS);
1131
1131
  const startTime = Date.now();
1132
1132
  const model = getModelName();
1133
1133
  let content;
1134
1134
  let usage;
1135
1135
  const commonConfig = {
1136
- temperature: _chunkUBGEKXK7js.getAIConfigInBoolean.call(void 0, _chunkUBGEKXK7js.MIDSCENE_USE_VLM_UI_TARS) ? 0 : 0.1,
1136
+ temperature: _chunk6SYVFZ6Cjs.getAIConfigInBoolean.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_USE_VLM_UI_TARS) ? 0 : 0.1,
1137
1137
  stream: false,
1138
1138
  max_tokens: typeof maxTokens === "number" ? maxTokens : Number.parseInt(maxTokens || "2048", 10),
1139
- ..._chunkUBGEKXK7js.getAIConfigInBoolean.call(void 0, _chunkUBGEKXK7js.MIDSCENE_USE_QWEN_VL) ? {
1139
+ ..._chunk6SYVFZ6Cjs.getAIConfigInBoolean.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_USE_QWEN_VL) ? {
1140
1140
  vl_high_resolution_images: true
1141
1141
  } : {}
1142
1142
  };
@@ -1160,7 +1160,7 @@ async function call(messages, AIActionTypeValue, responseFormat) {
1160
1160
  throw newError;
1161
1161
  }
1162
1162
  debugProfileStats(
1163
- `model, ${model}, mode, ${_chunkUBGEKXK7js.vlLocateMode.call(void 0, ) || "default"}, prompt-tokens, ${_optionalChain([result, 'access', _12 => _12.usage, 'optionalAccess', _13 => _13.prompt_tokens]) || ""}, completion-tokens, ${_optionalChain([result, 'access', _14 => _14.usage, 'optionalAccess', _15 => _15.completion_tokens]) || ""}, total-tokens, ${_optionalChain([result, 'access', _16 => _16.usage, 'optionalAccess', _17 => _17.total_tokens]) || ""}, cost-ms, ${Date.now() - startTime}, requestId, ${result._request_id || ""}`
1163
+ `model, ${model}, mode, ${_chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, ) || "default"}, prompt-tokens, ${_optionalChain([result, 'access', _12 => _12.usage, 'optionalAccess', _13 => _13.prompt_tokens]) || ""}, completion-tokens, ${_optionalChain([result, 'access', _14 => _14.usage, 'optionalAccess', _15 => _15.completion_tokens]) || ""}, total-tokens, ${_optionalChain([result, 'access', _16 => _16.usage, 'optionalAccess', _17 => _17.total_tokens]) || ""}, cost-ms, ${Date.now() - startTime}, requestId, ${result._request_id || ""}`
1164
1164
  );
1165
1165
  debugProfileDetail("model usage detail: %s", JSON.stringify(result.usage));
1166
1166
  _utils.assert.call(void 0,
@@ -1271,7 +1271,7 @@ function safeParseJson(input) {
1271
1271
  return _dirtyjson2.default.parse(cleanJsonString);
1272
1272
  } catch (e) {
1273
1273
  }
1274
- if (_chunkUBGEKXK7js.vlLocateMode.call(void 0, ) === "doubao-vision") {
1274
+ if (_chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, ) === "doubao-vision") {
1275
1275
  const jsonString = preprocessDoubaoBboxJson(cleanJsonString);
1276
1276
  return _dirtyjson2.default.parse(jsonString);
1277
1277
  }
@@ -1411,7 +1411,7 @@ async function AiLocateElement(options) {
1411
1411
  pageDescription: description,
1412
1412
  targetElementDescription
1413
1413
  });
1414
- const systemPrompt = systemPromptToLocateElement(!!_chunkUBGEKXK7js.vlLocateMode.call(void 0, ));
1414
+ const systemPrompt = systemPromptToLocateElement(!!_chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, ));
1415
1415
  let imagePayload = screenshotBase64WithElementMarker || screenshotBase64;
1416
1416
  if (options.searchConfig) {
1417
1417
  _utils.assert.call(void 0,
@@ -1423,7 +1423,7 @@ async function AiLocateElement(options) {
1423
1423
  "searchArea is provided but its imageBase64 cannot be found. Failed to locate element"
1424
1424
  );
1425
1425
  imagePayload = options.searchConfig.imageBase64;
1426
- } else if (_chunkUBGEKXK7js.getAIConfigInBoolean.call(void 0, _chunkUBGEKXK7js.MIDSCENE_USE_QWEN_VL)) {
1426
+ } else if (_chunk6SYVFZ6Cjs.getAIConfigInBoolean.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_USE_QWEN_VL)) {
1427
1427
  imagePayload = await _img.paddingToMatchBlockByBase64.call(void 0, imagePayload);
1428
1428
  }
1429
1429
  const msgs = [
@@ -1542,7 +1542,7 @@ async function AiLocateSection(options) {
1542
1542
  imageBase64 = await _img.cropByRect.call(void 0,
1543
1543
  screenshotBase64,
1544
1544
  sectionRect,
1545
- _chunkUBGEKXK7js.getAIConfigInBoolean.call(void 0, _chunkUBGEKXK7js.MIDSCENE_USE_QWEN_VL)
1545
+ _chunk6SYVFZ6Cjs.getAIConfigInBoolean.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_USE_QWEN_VL)
1546
1546
  );
1547
1547
  }
1548
1548
  return {
@@ -1609,7 +1609,7 @@ async function AiAssert(options) {
1609
1609
  _utils.assert.call(void 0, assertion, "assertion should be a string");
1610
1610
  const { screenshotBase64 } = context;
1611
1611
  const systemPrompt = systemPromptToAssert({
1612
- isUITars: _chunkUBGEKXK7js.getAIConfigInBoolean.call(void 0, _chunkUBGEKXK7js.MIDSCENE_USE_VLM_UI_TARS)
1612
+ isUITars: _chunk6SYVFZ6Cjs.getAIConfigInBoolean.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_USE_VLM_UI_TARS)
1613
1613
  });
1614
1614
  const msgs = [
1615
1615
  { role: "system", content: systemPrompt },
@@ -1663,7 +1663,7 @@ async function plan(userInstruction, opts) {
1663
1663
  taskBackgroundContext: taskBackgroundContextText
1664
1664
  });
1665
1665
  let imagePayload = screenshotBase64WithElementMarker || screenshotBase64;
1666
- if (_chunkUBGEKXK7js.vlLocateMode.call(void 0, ) === "qwen-vl") {
1666
+ if (_chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, ) === "qwen-vl") {
1667
1667
  imagePayload = await _img.paddingToMatchBlockByBase64.call(void 0, imagePayload);
1668
1668
  }
1669
1669
  warnGPT4oSizeLimit(size);
@@ -1698,7 +1698,7 @@ async function plan(userInstruction, opts) {
1698
1698
  usage
1699
1699
  };
1700
1700
  _utils.assert.call(void 0, planFromAI, "can't get plans from AI");
1701
- if (_chunkUBGEKXK7js.vlLocateMode.call(void 0, )) {
1701
+ if (_chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, )) {
1702
1702
  actions.forEach((action) => {
1703
1703
  if (action.locate) {
1704
1704
  action.locate = fillLocateParam(
@@ -1888,4 +1888,4 @@ function getPoint(startBox, size) {
1888
1888
 
1889
1889
  exports.systemPromptToLocateElement = systemPromptToLocateElement; exports.describeUserPage = describeUserPage; exports.callToGetJSONObject = callToGetJSONObject; exports.callAiFn = callAiFn; exports.adaptBboxToRect = adaptBboxToRect; exports.AiLocateElement = AiLocateElement; exports.AiLocateSection = AiLocateSection; exports.AiExtractElementInfo = AiExtractElementInfo; exports.AiAssert = AiAssert; exports.plan = plan; exports.vlmPlanning = vlmPlanning;
1890
1890
 
1891
- //# sourceMappingURL=chunk-5EO33FHK.js.map
1891
+ //# sourceMappingURL=chunk-QX6K65KH.js.map
package/dist/lib/env.d.ts CHANGED
@@ -60,7 +60,7 @@ declare const allConfigFromEnv: () => {
60
60
  AZURE_OPENAI_DEPLOYMENT: string | undefined;
61
61
  };
62
62
  declare let userConfig: Partial<ReturnType<typeof allConfigFromEnv>>;
63
- declare const vlLocateMode: () => "qwen-vl" | "doubao-vision" | "vl-model" | false;
63
+ declare const vlLocateMode: () => "qwen-vl" | "doubao-vision" | "vl-model" | "vlm-ui-tars" | false;
64
64
  declare const getAIConfig: (configKey: keyof typeof userConfig) => string | undefined;
65
65
  declare const getAIConfigInBoolean: (configKey: keyof typeof userConfig) => boolean;
66
66
  declare const getAIConfigInJson: (configKey: keyof typeof userConfig) => any;
package/dist/lib/env.js CHANGED
@@ -35,7 +35,7 @@
35
35
 
36
36
 
37
37
 
38
- var _chunkUBGEKXK7js = require('./chunk-UBGEKXK7.js');
38
+ var _chunk6SYVFZ6Cjs = require('./chunk-6SYVFZ6C.js');
39
39
 
40
40
 
41
41
 
@@ -73,4 +73,4 @@ var _chunkUBGEKXK7js = require('./chunk-UBGEKXK7.js');
73
73
 
74
74
 
75
75
 
76
- exports.ANTHROPIC_API_KEY = _chunkUBGEKXK7js.ANTHROPIC_API_KEY; exports.AZURE_OPENAI_API_VERSION = _chunkUBGEKXK7js.AZURE_OPENAI_API_VERSION; exports.AZURE_OPENAI_DEPLOYMENT = _chunkUBGEKXK7js.AZURE_OPENAI_DEPLOYMENT; exports.AZURE_OPENAI_ENDPOINT = _chunkUBGEKXK7js.AZURE_OPENAI_ENDPOINT; exports.AZURE_OPENAI_KEY = _chunkUBGEKXK7js.AZURE_OPENAI_KEY; exports.MATCH_BY_POSITION = _chunkUBGEKXK7js.MATCH_BY_POSITION; exports.MIDSCENE_API_TYPE = _chunkUBGEKXK7js.MIDSCENE_API_TYPE; exports.MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON = _chunkUBGEKXK7js.MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON; exports.MIDSCENE_AZURE_OPENAI_SCOPE = _chunkUBGEKXK7js.MIDSCENE_AZURE_OPENAI_SCOPE; exports.MIDSCENE_CACHE = _chunkUBGEKXK7js.MIDSCENE_CACHE; exports.MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG = _chunkUBGEKXK7js.MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG; exports.MIDSCENE_DEBUG_AI_PROFILE = _chunkUBGEKXK7js.MIDSCENE_DEBUG_AI_PROFILE; exports.MIDSCENE_DEBUG_AI_RESPONSE = _chunkUBGEKXK7js.MIDSCENE_DEBUG_AI_RESPONSE; exports.MIDSCENE_DEBUG_MODE = _chunkUBGEKXK7js.MIDSCENE_DEBUG_MODE; exports.MIDSCENE_FORCE_DEEP_THINK = _chunkUBGEKXK7js.MIDSCENE_FORCE_DEEP_THINK; exports.MIDSCENE_LANGSMITH_DEBUG = _chunkUBGEKXK7js.MIDSCENE_LANGSMITH_DEBUG; exports.MIDSCENE_MODEL_NAME = _chunkUBGEKXK7js.MIDSCENE_MODEL_NAME; exports.MIDSCENE_OPENAI_INIT_CONFIG_JSON = _chunkUBGEKXK7js.MIDSCENE_OPENAI_INIT_CONFIG_JSON; exports.MIDSCENE_OPENAI_SOCKS_PROXY = _chunkUBGEKXK7js.MIDSCENE_OPENAI_SOCKS_PROXY; exports.MIDSCENE_REPORT_TAG_NAME = _chunkUBGEKXK7js.MIDSCENE_REPORT_TAG_NAME; exports.MIDSCENE_USE_ANTHROPIC_SDK = _chunkUBGEKXK7js.MIDSCENE_USE_ANTHROPIC_SDK; exports.MIDSCENE_USE_AZURE_OPENAI = _chunkUBGEKXK7js.MIDSCENE_USE_AZURE_OPENAI; exports.MIDSCENE_USE_DOUBAO_VISION = _chunkUBGEKXK7js.MIDSCENE_USE_DOUBAO_VISION; exports.MIDSCENE_USE_QWEN_VL = _chunkUBGEKXK7js.MIDSCENE_USE_QWEN_VL; exports.MIDSCENE_USE_VLM_UI_TARS = _chunkUBGEKXK7js.MIDSCENE_USE_VLM_UI_TARS; exports.MIDSCENE_USE_VL_MODEL = _chunkUBGEKXK7js.MIDSCENE_USE_VL_MODEL; exports.OPENAI_API_KEY = _chunkUBGEKXK7js.OPENAI_API_KEY; exports.OPENAI_BASE_URL = _chunkUBGEKXK7js.OPENAI_BASE_URL; exports.OPENAI_MAX_TOKENS = _chunkUBGEKXK7js.OPENAI_MAX_TOKENS; exports.OPENAI_USE_AZURE = _chunkUBGEKXK7js.OPENAI_USE_AZURE; exports.allAIConfig = _chunkUBGEKXK7js.allAIConfig; exports.getAIConfig = _chunkUBGEKXK7js.getAIConfig; exports.getAIConfigInBoolean = _chunkUBGEKXK7js.getAIConfigInBoolean; exports.getAIConfigInJson = _chunkUBGEKXK7js.getAIConfigInJson; exports.overrideAIConfig = _chunkUBGEKXK7js.overrideAIConfig; exports.vlLocateMode = _chunkUBGEKXK7js.vlLocateMode;
76
+ exports.ANTHROPIC_API_KEY = _chunk6SYVFZ6Cjs.ANTHROPIC_API_KEY; exports.AZURE_OPENAI_API_VERSION = _chunk6SYVFZ6Cjs.AZURE_OPENAI_API_VERSION; exports.AZURE_OPENAI_DEPLOYMENT = _chunk6SYVFZ6Cjs.AZURE_OPENAI_DEPLOYMENT; exports.AZURE_OPENAI_ENDPOINT = _chunk6SYVFZ6Cjs.AZURE_OPENAI_ENDPOINT; exports.AZURE_OPENAI_KEY = _chunk6SYVFZ6Cjs.AZURE_OPENAI_KEY; exports.MATCH_BY_POSITION = _chunk6SYVFZ6Cjs.MATCH_BY_POSITION; exports.MIDSCENE_API_TYPE = _chunk6SYVFZ6Cjs.MIDSCENE_API_TYPE; exports.MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON = _chunk6SYVFZ6Cjs.MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON; exports.MIDSCENE_AZURE_OPENAI_SCOPE = _chunk6SYVFZ6Cjs.MIDSCENE_AZURE_OPENAI_SCOPE; exports.MIDSCENE_CACHE = _chunk6SYVFZ6Cjs.MIDSCENE_CACHE; exports.MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG = _chunk6SYVFZ6Cjs.MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG; exports.MIDSCENE_DEBUG_AI_PROFILE = _chunk6SYVFZ6Cjs.MIDSCENE_DEBUG_AI_PROFILE; exports.MIDSCENE_DEBUG_AI_RESPONSE = _chunk6SYVFZ6Cjs.MIDSCENE_DEBUG_AI_RESPONSE; exports.MIDSCENE_DEBUG_MODE = _chunk6SYVFZ6Cjs.MIDSCENE_DEBUG_MODE; exports.MIDSCENE_FORCE_DEEP_THINK = _chunk6SYVFZ6Cjs.MIDSCENE_FORCE_DEEP_THINK; exports.MIDSCENE_LANGSMITH_DEBUG = _chunk6SYVFZ6Cjs.MIDSCENE_LANGSMITH_DEBUG; exports.MIDSCENE_MODEL_NAME = _chunk6SYVFZ6Cjs.MIDSCENE_MODEL_NAME; exports.MIDSCENE_OPENAI_INIT_CONFIG_JSON = _chunk6SYVFZ6Cjs.MIDSCENE_OPENAI_INIT_CONFIG_JSON; exports.MIDSCENE_OPENAI_SOCKS_PROXY = _chunk6SYVFZ6Cjs.MIDSCENE_OPENAI_SOCKS_PROXY; exports.MIDSCENE_REPORT_TAG_NAME = _chunk6SYVFZ6Cjs.MIDSCENE_REPORT_TAG_NAME; exports.MIDSCENE_USE_ANTHROPIC_SDK = _chunk6SYVFZ6Cjs.MIDSCENE_USE_ANTHROPIC_SDK; exports.MIDSCENE_USE_AZURE_OPENAI = _chunk6SYVFZ6Cjs.MIDSCENE_USE_AZURE_OPENAI; exports.MIDSCENE_USE_DOUBAO_VISION = _chunk6SYVFZ6Cjs.MIDSCENE_USE_DOUBAO_VISION; exports.MIDSCENE_USE_QWEN_VL = _chunk6SYVFZ6Cjs.MIDSCENE_USE_QWEN_VL; exports.MIDSCENE_USE_VLM_UI_TARS = _chunk6SYVFZ6Cjs.MIDSCENE_USE_VLM_UI_TARS; exports.MIDSCENE_USE_VL_MODEL = _chunk6SYVFZ6Cjs.MIDSCENE_USE_VL_MODEL; exports.OPENAI_API_KEY = _chunk6SYVFZ6Cjs.OPENAI_API_KEY; exports.OPENAI_BASE_URL = _chunk6SYVFZ6Cjs.OPENAI_BASE_URL; exports.OPENAI_MAX_TOKENS = _chunk6SYVFZ6Cjs.OPENAI_MAX_TOKENS; exports.OPENAI_USE_AZURE = _chunk6SYVFZ6Cjs.OPENAI_USE_AZURE; exports.allAIConfig = _chunk6SYVFZ6Cjs.allAIConfig; exports.getAIConfig = _chunk6SYVFZ6Cjs.getAIConfig; exports.getAIConfigInBoolean = _chunk6SYVFZ6Cjs.getAIConfigInBoolean; exports.getAIConfigInJson = _chunk6SYVFZ6Cjs.getAIConfigInJson; exports.overrideAIConfig = _chunk6SYVFZ6Cjs.overrideAIConfig; exports.vlLocateMode = _chunk6SYVFZ6Cjs.vlLocateMode;
@@ -1,7 +1,7 @@
1
- import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, B as BaseElement, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightAssertionResponse, A as AISingleElementResponse } from './types-e2a418c3.js';
2
- export { t as AIAssertionResponse, r as AIDataExtractionResponse, p as AIElementCoordinatesResponse, o as AIElementLocatorResponse, q as AIElementResponse, l as AIResponseFormat, s as AISectionLocatorResponse, m as AISingleElementResponseById, n as AISingleElementResponseByPosition, k as AIUsageInfo, K as AgentAssertOpt, J as AgentWaitForOpt, a2 as BaseAgentParserOpt, C as CallAIFn, a1 as Color, x as DumpMeta, H as ElementById, j as ElementTreeNode, u as EnsureObject, a5 as ExecutionRecorderItem, an as ExecutionTaskAction, am as ExecutionTaskActionApply, al as ExecutionTaskInsightAssertion, ak as ExecutionTaskInsightAssertionApply, aj as ExecutionTaskInsightAssertionParam, ac as ExecutionTaskInsightDumpLog, ae as ExecutionTaskInsightLocate, ad as ExecutionTaskInsightLocateApply, ab as ExecutionTaskInsightLocateOutput, aa as ExecutionTaskInsightLocateParam, ai as ExecutionTaskInsightQuery, ah as ExecutionTaskInsightQueryApply, ag as ExecutionTaskInsightQueryOutput, af as ExecutionTaskInsightQueryParam, ap as ExecutionTaskPlanning, ao as ExecutionTaskPlanningApply, a9 as ExecutionTaskReturn, a6 as ExecutionTaskType, a7 as ExecutorContext, aE as FreeFn, aq as GroupedActionDump, z as InsightDump, v as InsightExtractParam, G as LiteUISection, ar as LocateOption, w as LocateResultElement, i as MidsceneYamlFlowItem, au as MidsceneYamlFlowItemAIAction, av as MidsceneYamlFlowItemAIAssert, az as MidsceneYamlFlowItemAIHover, aA as MidsceneYamlFlowItemAIInput, aB as MidsceneYamlFlowItemAIKeyboardPress, aw as MidsceneYamlFlowItemAIQuery, aC as MidsceneYamlFlowItemAIScroll, ay as MidsceneYamlFlowItemAITap, ax as MidsceneYamlFlowItemAIWaitFor, aD as MidsceneYamlFlowItemSleep, M as MidsceneYamlScript, at as MidsceneYamlScriptEnv, h as MidsceneYamlTask, O as OnTaskStartTip, F as PartialInsightDumpFromSDK, T as PlanningAIResponse, Q as PlanningAction, Z as PlanningActionParamAssert, $ as PlanningActionParamError, W as PlanningActionParamHover, X as PlanningActionParamInputOrKeyPress, Y as PlanningActionParamScroll, _ as PlanningActionParamSleep, V as PlanningActionParamTap, a0 as PlanningActionParamWaitFor, N as PlanningLocateParam, a4 as PlaywrightParserOpt, P as Point, a3 as PuppeteerParserOpt, R as Rect, y as ReportDumpWithAttributes, aG as ScriptPlayerStatusValue, aF as ScriptPlayerTaskStatus, S as Size, a8 as TaskCacheInfo, as as scrollParam } from './types-e2a418c3.js';
3
- import { c as callAiFn } from './llm-planning-bd80e99e.js';
4
- export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-bd80e99e.js';
1
+ import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, B as BaseElement, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightAssertionResponse, A as AISingleElementResponse } from './types-d2831105.js';
2
+ export { t as AIAssertionResponse, r as AIDataExtractionResponse, p as AIElementCoordinatesResponse, o as AIElementLocatorResponse, q as AIElementResponse, l as AIResponseFormat, s as AISectionLocatorResponse, m as AISingleElementResponseById, n as AISingleElementResponseByPosition, k as AIUsageInfo, K as AgentAssertOpt, J as AgentWaitForOpt, a2 as BaseAgentParserOpt, C as CallAIFn, a1 as Color, x as DumpMeta, H as ElementById, j as ElementTreeNode, u as EnsureObject, a5 as ExecutionRecorderItem, an as ExecutionTaskAction, am as ExecutionTaskActionApply, al as ExecutionTaskInsightAssertion, ak as ExecutionTaskInsightAssertionApply, aj as ExecutionTaskInsightAssertionParam, ac as ExecutionTaskInsightDumpLog, ae as ExecutionTaskInsightLocate, ad as ExecutionTaskInsightLocateApply, ab as ExecutionTaskInsightLocateOutput, aa as ExecutionTaskInsightLocateParam, ai as ExecutionTaskInsightQuery, ah as ExecutionTaskInsightQueryApply, ag as ExecutionTaskInsightQueryOutput, af as ExecutionTaskInsightQueryParam, ap as ExecutionTaskPlanning, ao as ExecutionTaskPlanningApply, a9 as ExecutionTaskReturn, a6 as ExecutionTaskType, a7 as ExecutorContext, aH as FreeFn, aq as GroupedActionDump, z as InsightDump, v as InsightExtractParam, G as LiteUISection, ar as LocateOption, w as LocateResultElement, i as MidsceneYamlFlowItem, ax as MidsceneYamlFlowItemAIAction, ay as MidsceneYamlFlowItemAIAssert, aC as MidsceneYamlFlowItemAIHover, aD as MidsceneYamlFlowItemAIInput, aE as MidsceneYamlFlowItemAIKeyboardPress, az as MidsceneYamlFlowItemAIQuery, aF as MidsceneYamlFlowItemAIScroll, aB as MidsceneYamlFlowItemAITap, aA as MidsceneYamlFlowItemAIWaitFor, aG as MidsceneYamlFlowItemSleep, M as MidsceneYamlScript, av as MidsceneYamlScriptAndroidEnv, aw as MidsceneYamlScriptEnv, at as MidsceneYamlScriptEnvBase, au as MidsceneYamlScriptWebEnv, h as MidsceneYamlTask, O as OnTaskStartTip, F as PartialInsightDumpFromSDK, T as PlanningAIResponse, Q as PlanningAction, Z as PlanningActionParamAssert, $ as PlanningActionParamError, W as PlanningActionParamHover, X as PlanningActionParamInputOrKeyPress, Y as PlanningActionParamScroll, _ as PlanningActionParamSleep, V as PlanningActionParamTap, a0 as PlanningActionParamWaitFor, N as PlanningLocateParam, a4 as PlaywrightParserOpt, P as Point, a3 as PuppeteerParserOpt, R as Rect, y as ReportDumpWithAttributes, aJ as ScriptPlayerStatusValue, aI as ScriptPlayerTaskStatus, S as Size, a8 as TaskCacheInfo, as as scrollParam } from './types-d2831105.js';
3
+ import { c as callAiFn } from './llm-planning-d084250f.js';
4
+ export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-d084250f.js';
5
5
  export { getLogDirByType, getVersion, setLogDir } from './utils.js';
6
6
  export { MIDSCENE_MODEL_NAME, getAIConfig } from './env.js';
7
7
  import '@midscene/shared/constants';
package/dist/lib/index.js CHANGED
@@ -3,7 +3,7 @@
3
3
 
4
4
 
5
5
 
6
- var _chunkSAXMKI7Zjs = require('./chunk-SAXMKI7Z.js');
6
+ var _chunk6MPYG6WRjs = require('./chunk-6MPYG6WR.js');
7
7
 
8
8
 
9
9
 
@@ -12,7 +12,7 @@ var _chunkSAXMKI7Zjs = require('./chunk-SAXMKI7Z.js');
12
12
 
13
13
 
14
14
 
15
- var _chunk5EO33FHKjs = require('./chunk-5EO33FHK.js');
15
+ var _chunkQX6K65KHjs = require('./chunk-QX6K65KH.js');
16
16
 
17
17
 
18
18
 
@@ -20,7 +20,7 @@ var _chunk5EO33FHKjs = require('./chunk-5EO33FHK.js');
20
20
 
21
21
 
22
22
 
23
- var _chunkUBGEKXK7js = require('./chunk-UBGEKXK7.js');
23
+ var _chunk6SYVFZ6Cjs = require('./chunk-6SYVFZ6C.js');
24
24
 
25
25
  // src/ai-model/action-executor.ts
26
26
  var _utils = require('@midscene/shared/utils');
@@ -166,8 +166,8 @@ ${_optionalChain([this, 'access', _7 => _7.latestErrorTask, 'call', _8 => _8(),
166
166
  }
167
167
  dump() {
168
168
  const dumpData = {
169
- sdkVersion: _chunkSAXMKI7Zjs.getVersion.call(void 0, ),
170
- model_name: _chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.MIDSCENE_MODEL_NAME) || "",
169
+ sdkVersion: _chunk6MPYG6WRjs.getVersion.call(void 0, ),
170
+ model_name: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_MODEL_NAME) || "",
171
171
  logTime: Date.now(),
172
172
  name: this.name,
173
173
  tasks: this.tasks
@@ -184,18 +184,18 @@ var _logger = require('@midscene/shared/logger');
184
184
 
185
185
 
186
186
  function emitInsightDump(data, dumpSubscriber) {
187
- const logDir = _chunkSAXMKI7Zjs.getLogDir.call(void 0, );
187
+ const logDir = _chunk6MPYG6WRjs.getLogDir.call(void 0, );
188
188
  _utils.assert.call(void 0, logDir, "logDir should be set before writing dump file");
189
189
  let modelDescription = "";
190
- if (_chunkUBGEKXK7js.getAIConfigInBoolean.call(void 0, _chunkUBGEKXK7js.MIDSCENE_USE_VLM_UI_TARS)) {
190
+ if (_chunk6SYVFZ6Cjs.getAIConfigInBoolean.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_USE_VLM_UI_TARS)) {
191
191
  modelDescription = "vlm-ui-tars mode";
192
- } else if (_chunkUBGEKXK7js.vlLocateMode.call(void 0, )) {
193
- modelDescription = `${_chunkUBGEKXK7js.vlLocateMode.call(void 0, )} mode`;
192
+ } else if (_chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, )) {
193
+ modelDescription = `${_chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, )} mode`;
194
194
  }
195
195
  const baseData = {
196
- sdkVersion: _chunkSAXMKI7Zjs.getVersion.call(void 0, ),
196
+ sdkVersion: _chunk6MPYG6WRjs.getVersion.call(void 0, ),
197
197
  logTime: Date.now(),
198
- model_name: _chunkUBGEKXK7js.getAIConfig.call(void 0, _chunkUBGEKXK7js.MIDSCENE_MODEL_NAME) || "",
198
+ model_name: _chunk6SYVFZ6Cjs.getAIConfig.call(void 0, _chunk6SYVFZ6Cjs.MIDSCENE_MODEL_NAME) || "",
199
199
  model_description: modelDescription
200
200
  };
201
201
  const finalData = {
@@ -210,7 +210,7 @@ function emitInsightDump(data, dumpSubscriber) {
210
210
  var debug = _logger.getDebug.call(void 0, "ai:insight");
211
211
  var Insight = class {
212
212
  constructor(context, opt) {
213
- this.aiVendorFn = _chunk5EO33FHKjs.callAiFn;
213
+ this.aiVendorFn = _chunkQX6K65KHjs.callAiFn;
214
214
  _utils.assert.call(void 0, context, "context is required for Insight");
215
215
  if (typeof context === "function") {
216
216
  this.contextRetrieverFn = context;
@@ -234,8 +234,8 @@ var Insight = class {
234
234
  const dumpSubscriber = this.onceDumpUpdatedFn;
235
235
  this.onceDumpUpdatedFn = void 0;
236
236
  _utils.assert.call(void 0, typeof query === "object", "query should be an object for locate");
237
- const globalDeepThinkSwitch = _chunkUBGEKXK7js.getAIConfigInBoolean.call(void 0,
238
- _chunkUBGEKXK7js.MIDSCENE_FORCE_DEEP_THINK
237
+ const globalDeepThinkSwitch = _chunk6SYVFZ6Cjs.getAIConfigInBoolean.call(void 0,
238
+ _chunk6SYVFZ6Cjs.MIDSCENE_FORCE_DEEP_THINK
239
239
  );
240
240
  if (globalDeepThinkSwitch) {
241
241
  debug("globalDeepThinkSwitch", globalDeepThinkSwitch);
@@ -244,7 +244,7 @@ var Insight = class {
244
244
  if (query.deepThink || globalDeepThinkSwitch) {
245
245
  searchAreaPrompt = query.prompt;
246
246
  }
247
- if (searchAreaPrompt && !_chunkUBGEKXK7js.vlLocateMode.call(void 0, )) {
247
+ if (searchAreaPrompt && !_chunk6SYVFZ6Cjs.vlLocateMode.call(void 0, )) {
248
248
  console.warn(
249
249
  'The "deepThink" feature is not supported with general purposed LLM. Please config VL model for Midscene. https://midscenejs.com/choose-a-model'
250
250
  );
@@ -256,7 +256,7 @@ var Insight = class {
256
256
  let searchAreaUsage = void 0;
257
257
  let searchAreaResponse = void 0;
258
258
  if (searchAreaPrompt) {
259
- searchAreaResponse = await _chunk5EO33FHKjs.AiLocateSection.call(void 0, {
259
+ searchAreaResponse = await _chunkQX6K65KHjs.AiLocateSection.call(void 0, {
260
260
  context,
261
261
  sectionDescription: searchAreaPrompt
262
262
  });
@@ -269,7 +269,7 @@ var Insight = class {
269
269
  searchArea = searchAreaResponse.rect;
270
270
  }
271
271
  const startTime = Date.now();
272
- const { parseResult, rect, elementById, rawResponse, usage } = await _chunk5EO33FHKjs.AiLocateElement.call(void 0, {
272
+ const { parseResult, rect, elementById, rawResponse, usage } = await _chunkQX6K65KHjs.AiLocateElement.call(void 0, {
273
273
  callAI: callAI || this.aiVendorFn,
274
274
  context,
275
275
  targetElementDescription: queryPrompt,
@@ -358,7 +358,7 @@ ${parseResult.errors.join("\n")}`;
358
358
  this.onceDumpUpdatedFn = void 0;
359
359
  const context = await this.contextRetrieverFn("extract");
360
360
  const startTime = Date.now();
361
- const { parseResult, usage } = await _chunk5EO33FHKjs.AiExtractElementInfo.call(void 0, {
361
+ const { parseResult, usage } = await _chunkQX6K65KHjs.AiExtractElementInfo.call(void 0, {
362
362
  context,
363
363
  dataQuery: dataDemand
364
364
  });
@@ -410,7 +410,7 @@ ${parseResult.errors.join("\n")}`;
410
410
  this.onceDumpUpdatedFn = void 0;
411
411
  const context = await this.contextRetrieverFn("assert");
412
412
  const startTime = Date.now();
413
- const assertResult = await _chunk5EO33FHKjs.AiAssert.call(void 0, {
413
+ const assertResult = await _chunkQX6K65KHjs.AiAssert.call(void 0, {
414
414
  assertion,
415
415
  context
416
416
  });
@@ -458,6 +458,6 @@ var src_default = Insight;
458
458
 
459
459
 
460
460
 
461
- exports.AiAssert = _chunk5EO33FHKjs.AiAssert; exports.AiLocateElement = _chunk5EO33FHKjs.AiLocateElement; exports.Executor = Executor; exports.Insight = Insight; exports.MIDSCENE_MODEL_NAME = _chunkUBGEKXK7js.MIDSCENE_MODEL_NAME; exports.default = src_default; exports.describeUserPage = _chunk5EO33FHKjs.describeUserPage; exports.getAIConfig = _chunkUBGEKXK7js.getAIConfig; exports.getLogDirByType = _chunkSAXMKI7Zjs.getLogDirByType; exports.getVersion = _chunkSAXMKI7Zjs.getVersion; exports.plan = _chunk5EO33FHKjs.plan; exports.setLogDir = _chunkSAXMKI7Zjs.setLogDir;
461
+ exports.AiAssert = _chunkQX6K65KHjs.AiAssert; exports.AiLocateElement = _chunkQX6K65KHjs.AiLocateElement; exports.Executor = Executor; exports.Insight = Insight; exports.MIDSCENE_MODEL_NAME = _chunk6SYVFZ6Cjs.MIDSCENE_MODEL_NAME; exports.default = src_default; exports.describeUserPage = _chunkQX6K65KHjs.describeUserPage; exports.getAIConfig = _chunk6SYVFZ6Cjs.getAIConfig; exports.getLogDirByType = _chunk6MPYG6WRjs.getLogDirByType; exports.getVersion = _chunk6MPYG6WRjs.getVersion; exports.plan = _chunkQX6K65KHjs.plan; exports.setLogDir = _chunk6MPYG6WRjs.setLogDir;
462
462
 
463
463
  //# sourceMappingURL=index.js.map
@@ -1,4 +1,4 @@
1
- import { k as AIUsageInfo, R as Rect, B as BaseElement, U as UIContext, A as AISingleElementResponse, n as AISingleElementResponseByPosition, o as AIElementLocatorResponse, H as ElementById, r as AIDataExtractionResponse, t as AIAssertionResponse, T as PlanningAIResponse } from './types-e2a418c3.js';
1
+ import { k as AIUsageInfo, R as Rect, B as BaseElement, U as UIContext, A as AISingleElementResponse, n as AISingleElementResponseByPosition, o as AIElementLocatorResponse, H as ElementById, r as AIDataExtractionResponse, t as AIAssertionResponse, T as PlanningAIResponse } from './types-d2831105.js';
2
2
  import { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources';
3
3
 
4
4
  type AIArgs = [
@@ -1,5 +1,5 @@
1
1
  import * as _midscene_shared_constants from '@midscene/shared/constants';
2
- import { B as BaseElement, j as ElementTreeNode } from './types-e2a418c3.js';
2
+ import { B as BaseElement, j as ElementTreeNode } from './types-d2831105.js';
3
3
  import 'openai/resources';
4
4
 
5
5
  declare function truncateText(text: string | number | object | undefined, maxLength?: number): string;
@@ -14,7 +14,9 @@ interface scrollParam {
14
14
  distance?: null | number;
15
15
  }
16
16
  interface MidsceneYamlScript {
17
- target: MidsceneYamlScriptEnv;
17
+ target?: MidsceneYamlScriptWebEnv;
18
+ web?: MidsceneYamlScriptWebEnv;
19
+ android?: MidsceneYamlScriptAndroidEnv;
18
20
  tasks: MidsceneYamlTask[];
19
21
  }
20
22
  interface MidsceneYamlTask {
@@ -22,9 +24,11 @@ interface MidsceneYamlTask {
22
24
  flow: MidsceneYamlFlowItem[];
23
25
  continueOnError?: boolean;
24
26
  }
25
- interface MidsceneYamlScriptEnv {
27
+ interface MidsceneYamlScriptEnvBase {
26
28
  output?: string;
27
29
  aiActionContext?: string;
30
+ }
31
+ interface MidsceneYamlScriptWebEnv extends MidsceneYamlScriptEnvBase {
28
32
  serve?: string;
29
33
  url: string;
30
34
  userAgent?: string;
@@ -41,6 +45,11 @@ interface MidsceneYamlScriptEnv {
41
45
  bridgeMode?: false | 'newTabWithUrl' | 'currentTab';
42
46
  closeNewTabsAfterDisconnect?: boolean;
43
47
  }
48
+ interface MidsceneYamlScriptAndroidEnv extends MidsceneYamlScriptEnvBase {
49
+ deviceId?: string;
50
+ launch?: string;
51
+ }
52
+ type MidsceneYamlScriptEnv = MidsceneYamlScriptWebEnv | MidsceneYamlScriptAndroidEnv;
44
53
  interface MidsceneYamlFlowItemAIAction {
45
54
  ai?: string;
46
55
  aiAction?: string;
@@ -410,4 +419,4 @@ interface GroupedActionDump {
410
419
  executions: ExecutionDump[];
411
420
  }
412
421
 
413
- export { type PlanningActionParamError as $, type AISingleElementResponse as A, BaseElement as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type PartialInsightDumpFromSDK as F, type LiteUISection as G, type ElementById as H, type InsightAction as I, type AgentWaitForOpt as J, type AgentAssertOpt as K, type LocateResult as L, type MidsceneYamlScript as M, type PlanningLocateParam as N, type OnTaskStartTip as O, type Point as P, type PlanningAction as Q, type Rect as R, type Size as S, type PlanningAIResponse as T, UIContext as U, type PlanningActionParamTap as V, type PlanningActionParamHover as W, type PlanningActionParamInputOrKeyPress as X, type PlanningActionParamScroll as Y, type PlanningActionParamAssert as Z, type PlanningActionParamSleep as _, type ExecutionTaskProgressOptions as a, type PlanningActionParamWaitFor as a0, type Color as a1, type BaseAgentParserOpt as a2, type PuppeteerParserOpt as a3, type PlaywrightParserOpt as a4, type ExecutionRecorderItem as a5, type ExecutionTaskType as a6, type ExecutorContext as a7, type TaskCacheInfo as a8, type ExecutionTaskReturn as a9, type MidsceneYamlFlowItemAIInput as aA, type MidsceneYamlFlowItemAIKeyboardPress as aB, type MidsceneYamlFlowItemAIScroll as aC, type MidsceneYamlFlowItemSleep as aD, type FreeFn as aE, type ScriptPlayerTaskStatus as aF, type ScriptPlayerStatusValue as aG, type ExecutionTaskInsightLocateParam as aa, type ExecutionTaskInsightLocateOutput as ab, type ExecutionTaskInsightDumpLog as ac, type ExecutionTaskInsightLocateApply as ad, type ExecutionTaskInsightLocate as ae, type ExecutionTaskInsightQueryParam as af, type ExecutionTaskInsightQueryOutput as ag, type ExecutionTaskInsightQueryApply as ah, type ExecutionTaskInsightQuery as ai, type ExecutionTaskInsightAssertionParam as aj, type ExecutionTaskInsightAssertionApply as ak, type ExecutionTaskInsightAssertion as al, type ExecutionTaskActionApply as am, type ExecutionTaskAction as an, type ExecutionTaskPlanningApply as ao, type ExecutionTaskPlanning as ap, type GroupedActionDump as aq, type LocateOption as ar, type scrollParam as as, type MidsceneYamlScriptEnv as at, type MidsceneYamlFlowItemAIAction as au, type MidsceneYamlFlowItemAIAssert as av, type MidsceneYamlFlowItemAIQuery as aw, type MidsceneYamlFlowItemAIWaitFor as ax, type MidsceneYamlFlowItemAITap as ay, type MidsceneYamlFlowItemAIHover as az, type ExecutionTaskApply as b, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightAssertionResponse as g, type MidsceneYamlTask as h, type MidsceneYamlFlowItem as i, type ElementTreeNode as j, type AIUsageInfo as k, AIResponseFormat as l, type AISingleElementResponseById as m, type AISingleElementResponseByPosition as n, type AIElementLocatorResponse as o, type AIElementCoordinatesResponse as p, type AIElementResponse as q, type AIDataExtractionResponse as r, type AISectionLocatorResponse as s, type AIAssertionResponse as t, type EnsureObject as u, type InsightExtractParam as v, type LocateResultElement as w, type DumpMeta as x, type ReportDumpWithAttributes as y, type InsightDump as z };
422
+ export { type PlanningActionParamError as $, type AISingleElementResponse as A, BaseElement as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type PartialInsightDumpFromSDK as F, type LiteUISection as G, type ElementById as H, type InsightAction as I, type AgentWaitForOpt as J, type AgentAssertOpt as K, type LocateResult as L, type MidsceneYamlScript as M, type PlanningLocateParam as N, type OnTaskStartTip as O, type Point as P, type PlanningAction as Q, type Rect as R, type Size as S, type PlanningAIResponse as T, UIContext as U, type PlanningActionParamTap as V, type PlanningActionParamHover as W, type PlanningActionParamInputOrKeyPress as X, type PlanningActionParamScroll as Y, type PlanningActionParamAssert as Z, type PlanningActionParamSleep as _, type ExecutionTaskProgressOptions as a, type PlanningActionParamWaitFor as a0, type Color as a1, type BaseAgentParserOpt as a2, type PuppeteerParserOpt as a3, type PlaywrightParserOpt as a4, type ExecutionRecorderItem as a5, type ExecutionTaskType as a6, type ExecutorContext as a7, type TaskCacheInfo as a8, type ExecutionTaskReturn as a9, type MidsceneYamlFlowItemAIWaitFor as aA, type MidsceneYamlFlowItemAITap as aB, type MidsceneYamlFlowItemAIHover as aC, type MidsceneYamlFlowItemAIInput as aD, type MidsceneYamlFlowItemAIKeyboardPress as aE, type MidsceneYamlFlowItemAIScroll as aF, type MidsceneYamlFlowItemSleep as aG, type FreeFn as aH, type ScriptPlayerTaskStatus as aI, type ScriptPlayerStatusValue as aJ, type ExecutionTaskInsightLocateParam as aa, type ExecutionTaskInsightLocateOutput as ab, type ExecutionTaskInsightDumpLog as ac, type ExecutionTaskInsightLocateApply as ad, type ExecutionTaskInsightLocate as ae, type ExecutionTaskInsightQueryParam as af, type ExecutionTaskInsightQueryOutput as ag, type ExecutionTaskInsightQueryApply as ah, type ExecutionTaskInsightQuery as ai, type ExecutionTaskInsightAssertionParam as aj, type ExecutionTaskInsightAssertionApply as ak, type ExecutionTaskInsightAssertion as al, type ExecutionTaskActionApply as am, type ExecutionTaskAction as an, type ExecutionTaskPlanningApply as ao, type ExecutionTaskPlanning as ap, type GroupedActionDump as aq, type LocateOption as ar, type scrollParam as as, type MidsceneYamlScriptEnvBase as at, type MidsceneYamlScriptWebEnv as au, type MidsceneYamlScriptAndroidEnv as av, type MidsceneYamlScriptEnv as aw, type MidsceneYamlFlowItemAIAction as ax, type MidsceneYamlFlowItemAIAssert as ay, type MidsceneYamlFlowItemAIQuery as az, type ExecutionTaskApply as b, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightAssertionResponse as g, type MidsceneYamlTask as h, type MidsceneYamlFlowItem as i, type ElementTreeNode as j, type AIUsageInfo as k, AIResponseFormat as l, type AISingleElementResponseById as m, type AISingleElementResponseByPosition as n, type AIElementLocatorResponse as o, type AIElementCoordinatesResponse as p, type AIElementResponse as q, type AIDataExtractionResponse as r, type AISectionLocatorResponse as s, type AIAssertionResponse as t, type EnsureObject as u, type InsightExtractParam as v, type LocateResultElement as w, type DumpMeta as x, type ReportDumpWithAttributes as y, type InsightDump as z };
@@ -1,4 +1,4 @@
1
- import { y as ReportDumpWithAttributes, R as Rect } from './types-e2a418c3.js';
1
+ import { y as ReportDumpWithAttributes, R as Rect } from './types-d2831105.js';
2
2
  import '@midscene/shared/constants';
3
3
  import 'openai/resources';
4
4
 
package/dist/lib/utils.js CHANGED
@@ -16,8 +16,8 @@
16
16
 
17
17
 
18
18
 
19
- var _chunkSAXMKI7Zjs = require('./chunk-SAXMKI7Z.js');
20
- require('./chunk-UBGEKXK7.js');
19
+ var _chunk6MPYG6WRjs = require('./chunk-6MPYG6WR.js');
20
+ require('./chunk-6SYVFZ6C.js');
21
21
 
22
22
 
23
23
 
@@ -36,4 +36,4 @@ require('./chunk-UBGEKXK7.js');
36
36
 
37
37
 
38
38
 
39
- exports.getLogDir = _chunkSAXMKI7Zjs.getLogDir; exports.getLogDirByType = _chunkSAXMKI7Zjs.getLogDirByType; exports.getTmpDir = _chunkSAXMKI7Zjs.getTmpDir; exports.getTmpFile = _chunkSAXMKI7Zjs.getTmpFile; exports.getVersion = _chunkSAXMKI7Zjs.getVersion; exports.groupedActionDumpFileExt = _chunkSAXMKI7Zjs.groupedActionDumpFileExt; exports.overlapped = _chunkSAXMKI7Zjs.overlapped; exports.replaceStringWithFirstAppearance = _chunkSAXMKI7Zjs.replaceStringWithFirstAppearance; exports.replacerForPageObject = _chunkSAXMKI7Zjs.replacerForPageObject; exports.reportHTMLContent = _chunkSAXMKI7Zjs.reportHTMLContent; exports.setLogDir = _chunkSAXMKI7Zjs.setLogDir; exports.setReportTpl = _chunkSAXMKI7Zjs.setReportTpl; exports.sleep = _chunkSAXMKI7Zjs.sleep; exports.stringifyDumpData = _chunkSAXMKI7Zjs.stringifyDumpData; exports.uploadTestInfoToServer = _chunkSAXMKI7Zjs.uploadTestInfoToServer; exports.writeDumpReport = _chunkSAXMKI7Zjs.writeDumpReport; exports.writeLogFile = _chunkSAXMKI7Zjs.writeLogFile;
39
+ exports.getLogDir = _chunk6MPYG6WRjs.getLogDir; exports.getLogDirByType = _chunk6MPYG6WRjs.getLogDirByType; exports.getTmpDir = _chunk6MPYG6WRjs.getTmpDir; exports.getTmpFile = _chunk6MPYG6WRjs.getTmpFile; exports.getVersion = _chunk6MPYG6WRjs.getVersion; exports.groupedActionDumpFileExt = _chunk6MPYG6WRjs.groupedActionDumpFileExt; exports.overlapped = _chunk6MPYG6WRjs.overlapped; exports.replaceStringWithFirstAppearance = _chunk6MPYG6WRjs.replaceStringWithFirstAppearance; exports.replacerForPageObject = _chunk6MPYG6WRjs.replacerForPageObject; exports.reportHTMLContent = _chunk6MPYG6WRjs.reportHTMLContent; exports.setLogDir = _chunk6MPYG6WRjs.setLogDir; exports.setReportTpl = _chunk6MPYG6WRjs.setReportTpl; exports.sleep = _chunk6MPYG6WRjs.sleep; exports.stringifyDumpData = _chunk6MPYG6WRjs.stringifyDumpData; exports.uploadTestInfoToServer = _chunk6MPYG6WRjs.uploadTestInfoToServer; exports.writeDumpReport = _chunk6MPYG6WRjs.writeDumpReport; exports.writeLogFile = _chunk6MPYG6WRjs.writeLogFile;
@@ -1,8 +1,8 @@
1
- import { k as AIUsageInfo, Q as PlanningAction } from './types-e2a418c3.js';
1
+ import { k as AIUsageInfo, Q as PlanningAction } from './types-d2831105.js';
2
2
  import { ChatCompletionMessageParam } from 'openai/resources';
3
3
  export { ChatCompletionMessageParam } from 'openai/resources';
4
- import { b as AIActionType } from './llm-planning-bd80e99e.js';
5
- export { a as AiAssert, e as AiExtractElementInfo, A as AiLocateElement, f as AiLocateSection, g as adaptBboxToRect, c as callAiFn, d as describeUserPage, p as plan } from './llm-planning-bd80e99e.js';
4
+ import { b as AIActionType } from './llm-planning-d084250f.js';
5
+ export { a as AiAssert, e as AiExtractElementInfo, A as AiLocateElement, f as AiLocateSection, g as adaptBboxToRect, c as callAiFn, d as describeUserPage, p as plan } from './llm-planning-d084250f.js';
6
6
  import { actionParser } from '@ui-tars/action-parser';
7
7
  import '@midscene/shared/constants';
8
8
 
@@ -60,7 +60,7 @@ declare const allConfigFromEnv: () => {
60
60
  AZURE_OPENAI_DEPLOYMENT: string | undefined;
61
61
  };
62
62
  declare let userConfig: Partial<ReturnType<typeof allConfigFromEnv>>;
63
- declare const vlLocateMode: () => "qwen-vl" | "doubao-vision" | "vl-model" | false;
63
+ declare const vlLocateMode: () => "qwen-vl" | "doubao-vision" | "vl-model" | "vlm-ui-tars" | false;
64
64
  declare const getAIConfig: (configKey: keyof typeof userConfig) => string | undefined;
65
65
  declare const getAIConfigInBoolean: (configKey: keyof typeof userConfig) => boolean;
66
66
  declare const getAIConfigInJson: (configKey: keyof typeof userConfig) => any;
@@ -1,7 +1,7 @@
1
- import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, B as BaseElement, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightAssertionResponse, A as AISingleElementResponse } from './types-e2a418c3.js';
2
- export { t as AIAssertionResponse, r as AIDataExtractionResponse, p as AIElementCoordinatesResponse, o as AIElementLocatorResponse, q as AIElementResponse, l as AIResponseFormat, s as AISectionLocatorResponse, m as AISingleElementResponseById, n as AISingleElementResponseByPosition, k as AIUsageInfo, K as AgentAssertOpt, J as AgentWaitForOpt, a2 as BaseAgentParserOpt, C as CallAIFn, a1 as Color, x as DumpMeta, H as ElementById, j as ElementTreeNode, u as EnsureObject, a5 as ExecutionRecorderItem, an as ExecutionTaskAction, am as ExecutionTaskActionApply, al as ExecutionTaskInsightAssertion, ak as ExecutionTaskInsightAssertionApply, aj as ExecutionTaskInsightAssertionParam, ac as ExecutionTaskInsightDumpLog, ae as ExecutionTaskInsightLocate, ad as ExecutionTaskInsightLocateApply, ab as ExecutionTaskInsightLocateOutput, aa as ExecutionTaskInsightLocateParam, ai as ExecutionTaskInsightQuery, ah as ExecutionTaskInsightQueryApply, ag as ExecutionTaskInsightQueryOutput, af as ExecutionTaskInsightQueryParam, ap as ExecutionTaskPlanning, ao as ExecutionTaskPlanningApply, a9 as ExecutionTaskReturn, a6 as ExecutionTaskType, a7 as ExecutorContext, aE as FreeFn, aq as GroupedActionDump, z as InsightDump, v as InsightExtractParam, G as LiteUISection, ar as LocateOption, w as LocateResultElement, i as MidsceneYamlFlowItem, au as MidsceneYamlFlowItemAIAction, av as MidsceneYamlFlowItemAIAssert, az as MidsceneYamlFlowItemAIHover, aA as MidsceneYamlFlowItemAIInput, aB as MidsceneYamlFlowItemAIKeyboardPress, aw as MidsceneYamlFlowItemAIQuery, aC as MidsceneYamlFlowItemAIScroll, ay as MidsceneYamlFlowItemAITap, ax as MidsceneYamlFlowItemAIWaitFor, aD as MidsceneYamlFlowItemSleep, M as MidsceneYamlScript, at as MidsceneYamlScriptEnv, h as MidsceneYamlTask, O as OnTaskStartTip, F as PartialInsightDumpFromSDK, T as PlanningAIResponse, Q as PlanningAction, Z as PlanningActionParamAssert, $ as PlanningActionParamError, W as PlanningActionParamHover, X as PlanningActionParamInputOrKeyPress, Y as PlanningActionParamScroll, _ as PlanningActionParamSleep, V as PlanningActionParamTap, a0 as PlanningActionParamWaitFor, N as PlanningLocateParam, a4 as PlaywrightParserOpt, P as Point, a3 as PuppeteerParserOpt, R as Rect, y as ReportDumpWithAttributes, aG as ScriptPlayerStatusValue, aF as ScriptPlayerTaskStatus, S as Size, a8 as TaskCacheInfo, as as scrollParam } from './types-e2a418c3.js';
3
- import { c as callAiFn } from './llm-planning-bd80e99e.js';
4
- export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-bd80e99e.js';
1
+ import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, B as BaseElement, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightAssertionResponse, A as AISingleElementResponse } from './types-d2831105.js';
2
+ export { t as AIAssertionResponse, r as AIDataExtractionResponse, p as AIElementCoordinatesResponse, o as AIElementLocatorResponse, q as AIElementResponse, l as AIResponseFormat, s as AISectionLocatorResponse, m as AISingleElementResponseById, n as AISingleElementResponseByPosition, k as AIUsageInfo, K as AgentAssertOpt, J as AgentWaitForOpt, a2 as BaseAgentParserOpt, C as CallAIFn, a1 as Color, x as DumpMeta, H as ElementById, j as ElementTreeNode, u as EnsureObject, a5 as ExecutionRecorderItem, an as ExecutionTaskAction, am as ExecutionTaskActionApply, al as ExecutionTaskInsightAssertion, ak as ExecutionTaskInsightAssertionApply, aj as ExecutionTaskInsightAssertionParam, ac as ExecutionTaskInsightDumpLog, ae as ExecutionTaskInsightLocate, ad as ExecutionTaskInsightLocateApply, ab as ExecutionTaskInsightLocateOutput, aa as ExecutionTaskInsightLocateParam, ai as ExecutionTaskInsightQuery, ah as ExecutionTaskInsightQueryApply, ag as ExecutionTaskInsightQueryOutput, af as ExecutionTaskInsightQueryParam, ap as ExecutionTaskPlanning, ao as ExecutionTaskPlanningApply, a9 as ExecutionTaskReturn, a6 as ExecutionTaskType, a7 as ExecutorContext, aH as FreeFn, aq as GroupedActionDump, z as InsightDump, v as InsightExtractParam, G as LiteUISection, ar as LocateOption, w as LocateResultElement, i as MidsceneYamlFlowItem, ax as MidsceneYamlFlowItemAIAction, ay as MidsceneYamlFlowItemAIAssert, aC as MidsceneYamlFlowItemAIHover, aD as MidsceneYamlFlowItemAIInput, aE as MidsceneYamlFlowItemAIKeyboardPress, az as MidsceneYamlFlowItemAIQuery, aF as MidsceneYamlFlowItemAIScroll, aB as MidsceneYamlFlowItemAITap, aA as MidsceneYamlFlowItemAIWaitFor, aG as MidsceneYamlFlowItemSleep, M as MidsceneYamlScript, av as MidsceneYamlScriptAndroidEnv, aw as MidsceneYamlScriptEnv, at as MidsceneYamlScriptEnvBase, au as MidsceneYamlScriptWebEnv, h as MidsceneYamlTask, O as OnTaskStartTip, F as PartialInsightDumpFromSDK, T as PlanningAIResponse, Q as PlanningAction, Z as PlanningActionParamAssert, $ as PlanningActionParamError, W as PlanningActionParamHover, X as PlanningActionParamInputOrKeyPress, Y as PlanningActionParamScroll, _ as PlanningActionParamSleep, V as PlanningActionParamTap, a0 as PlanningActionParamWaitFor, N as PlanningLocateParam, a4 as PlaywrightParserOpt, P as Point, a3 as PuppeteerParserOpt, R as Rect, y as ReportDumpWithAttributes, aJ as ScriptPlayerStatusValue, aI as ScriptPlayerTaskStatus, S as Size, a8 as TaskCacheInfo, as as scrollParam } from './types-d2831105.js';
3
+ import { c as callAiFn } from './llm-planning-d084250f.js';
4
+ export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-d084250f.js';
5
5
  export { getLogDirByType, getVersion, setLogDir } from './utils.js';
6
6
  export { MIDSCENE_MODEL_NAME, getAIConfig } from './env.js';
7
7
  import '@midscene/shared/constants';