@midscene/shared 1.0.1-beta-20251110055007.0 → 1.0.1-beta-20251110115555.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,21 +1,12 @@
1
- import { MIDSCENE_GROUNDING_LOCATOR_MODE, MIDSCENE_GROUNDING_MODEL_API_KEY, MIDSCENE_GROUNDING_MODEL_BASE_URL, MIDSCENE_GROUNDING_MODEL_HTTP_PROXY, MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON, MIDSCENE_GROUNDING_MODEL_NAME, MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY, MIDSCENE_LOCATOR_MODE, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_LOCATOR_MODE, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, MIDSCENE_VQA_LOCATOR_MODE, MIDSCENE_VQA_MODEL_API_KEY, MIDSCENE_VQA_MODEL_BASE_URL, MIDSCENE_VQA_MODEL_HTTP_PROXY, MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON, MIDSCENE_VQA_MODEL_NAME, MIDSCENE_VQA_MODEL_SOCKS_PROXY, OPENAI_API_KEY, OPENAI_BASE_URL } from "./types.mjs";
2
- const VQA_MODEL_CONFIG_KEYS = {
3
- modelName: MIDSCENE_VQA_MODEL_NAME,
4
- socksProxy: MIDSCENE_VQA_MODEL_SOCKS_PROXY,
5
- httpProxy: MIDSCENE_VQA_MODEL_HTTP_PROXY,
6
- openaiBaseURL: MIDSCENE_VQA_MODEL_BASE_URL,
7
- openaiApiKey: MIDSCENE_VQA_MODEL_API_KEY,
8
- openaiExtraConfig: MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON,
9
- vlMode: MIDSCENE_VQA_LOCATOR_MODE
10
- };
11
- const GROUNDING_MODEL_CONFIG_KEYS = {
12
- modelName: MIDSCENE_GROUNDING_MODEL_NAME,
13
- socksProxy: MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY,
14
- httpProxy: MIDSCENE_GROUNDING_MODEL_HTTP_PROXY,
15
- openaiBaseURL: MIDSCENE_GROUNDING_MODEL_BASE_URL,
16
- openaiApiKey: MIDSCENE_GROUNDING_MODEL_API_KEY,
17
- openaiExtraConfig: MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON,
18
- vlMode: MIDSCENE_GROUNDING_LOCATOR_MODE
1
+ import { MIDSCENE_INSIGHT_LOCATOR_MODE, MIDSCENE_INSIGHT_MODEL_API_KEY, MIDSCENE_INSIGHT_MODEL_BASE_URL, MIDSCENE_INSIGHT_MODEL_HTTP_PROXY, MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON, MIDSCENE_INSIGHT_MODEL_NAME, MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY, MIDSCENE_LOCATOR_MODE, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_LOCATOR_MODE, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, OPENAI_API_KEY, OPENAI_BASE_URL } from "./types.mjs";
2
+ const INSIGHT_MODEL_CONFIG_KEYS = {
3
+ modelName: MIDSCENE_INSIGHT_MODEL_NAME,
4
+ socksProxy: MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
5
+ httpProxy: MIDSCENE_INSIGHT_MODEL_HTTP_PROXY,
6
+ openaiBaseURL: MIDSCENE_INSIGHT_MODEL_BASE_URL,
7
+ openaiApiKey: MIDSCENE_INSIGHT_MODEL_API_KEY,
8
+ openaiExtraConfig: MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
9
+ vlMode: MIDSCENE_INSIGHT_LOCATOR_MODE
19
10
  };
20
11
  const PLANNING_MODEL_CONFIG_KEYS = {
21
12
  modelName: MIDSCENE_PLANNING_MODEL_NAME,
@@ -44,4 +35,4 @@ const DEFAULT_MODEL_CONFIG_KEYS_LEGACY = {
44
35
  openaiExtraConfig: MIDSCENE_OPENAI_INIT_CONFIG_JSON,
45
36
  vlMode: 'DEFAULT_MODEL_CONFIG_KEYS has no vlMode key'
46
37
  };
47
- export { DEFAULT_MODEL_CONFIG_KEYS, DEFAULT_MODEL_CONFIG_KEYS_LEGACY, GROUNDING_MODEL_CONFIG_KEYS, PLANNING_MODEL_CONFIG_KEYS, VQA_MODEL_CONFIG_KEYS };
38
+ export { DEFAULT_MODEL_CONFIG_KEYS, DEFAULT_MODEL_CONFIG_KEYS_LEGACY, INSIGHT_MODEL_CONFIG_KEYS, PLANNING_MODEL_CONFIG_KEYS };
@@ -1,4 +1,4 @@
1
- import { DEFAULT_MODEL_CONFIG_KEYS, DEFAULT_MODEL_CONFIG_KEYS_LEGACY, GROUNDING_MODEL_CONFIG_KEYS, PLANNING_MODEL_CONFIG_KEYS, VQA_MODEL_CONFIG_KEYS } from "./constants.mjs";
1
+ import { DEFAULT_MODEL_CONFIG_KEYS, DEFAULT_MODEL_CONFIG_KEYS_LEGACY, INSIGHT_MODEL_CONFIG_KEYS, PLANNING_MODEL_CONFIG_KEYS } from "./constants.mjs";
2
2
  import { MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MODEL_API_KEY, MODEL_BASE_URL, OPENAI_API_KEY, OPENAI_BASE_URL } from "./types.mjs";
3
3
  import { getDebug } from "../logger.mjs";
4
4
  import { assert } from "../utils.mjs";
@@ -6,8 +6,7 @@ import { createAssert, maskConfig, parseJson } from "./helper.mjs";
6
6
  import { initDebugConfig } from "./init-debug.mjs";
7
7
  import { parseVlModeAndUiTarsFromGlobalConfig, parseVlModeAndUiTarsModelVersionFromRawValue } from "./parse.mjs";
8
8
  const KEYS_MAP = {
9
- VQA: VQA_MODEL_CONFIG_KEYS,
10
- grounding: GROUNDING_MODEL_CONFIG_KEYS,
9
+ insight: INSIGHT_MODEL_CONFIG_KEYS,
11
10
  planning: PLANNING_MODEL_CONFIG_KEYS,
12
11
  default: DEFAULT_MODEL_CONFIG_KEYS
13
12
  };
@@ -11,17 +11,15 @@ function _define_property(obj, key, value) {
11
11
  return obj;
12
12
  }
13
13
  const ALL_INTENTS = [
14
- 'VQA',
14
+ 'insight',
15
15
  'default',
16
- 'grounding',
17
16
  'planning'
18
17
  ];
19
18
  class ModelConfigManager {
20
19
  calcIntentConfigMap(modelConfigFn) {
21
20
  const intentConfigMap = {
22
- VQA: void 0,
21
+ insight: void 0,
23
22
  default: void 0,
24
- grounding: void 0,
25
23
  planning: void 0
26
24
  };
27
25
  for (const i of ALL_INTENTS){
@@ -35,9 +33,8 @@ class ModelConfigManager {
35
33
  }
36
34
  calcModelConfigMapBaseOnIntent(intentConfigMap) {
37
35
  const modelConfigMap = {
38
- VQA: void 0,
36
+ insight: void 0,
39
37
  default: void 0,
40
- grounding: void 0,
41
38
  planning: void 0
42
39
  };
43
40
  for (const i of ALL_INTENTS){
@@ -51,9 +48,8 @@ class ModelConfigManager {
51
48
  }
52
49
  calcModelConfigMapBaseOnEnv(allEnvConfig) {
53
50
  const modelConfigMap = {
54
- VQA: void 0,
51
+ insight: void 0,
55
52
  default: void 0,
56
- grounding: void 0,
57
53
  planning: void 0
58
54
  };
59
55
  for (const i of ALL_INTENTS){
@@ -101,7 +97,7 @@ Learn more: https://midscenejs.com/choose-a-model`);
101
97
  registerGlobalConfigManager(globalConfigManager) {
102
98
  this.globalConfigManager = globalConfigManager;
103
99
  }
104
- throwErrorIfNonVLModel(intent = 'grounding') {
100
+ throwErrorIfNonVLModel(intent = 'insight') {
105
101
  const modelConfig = this.getModelConfig(intent);
106
102
  if (!modelConfig.vlMode) throw new Error('No visual language model (VL model) detected for the current scenario. Element localization may be inaccurate. Please verify your model configuration. Learn more: https://midscenejs.com/choose-a-model');
107
103
  }
@@ -42,13 +42,13 @@ const MIDSCENE_PREFERRED_LANGUAGE = 'MIDSCENE_PREFERRED_LANGUAGE';
42
42
  const MIDSCENE_CACHE_MAX_FILENAME_LENGTH = 'MIDSCENE_CACHE_MAX_FILENAME_LENGTH';
43
43
  const MIDSCENE_RUN_DIR = 'MIDSCENE_RUN_DIR';
44
44
  const MIDSCENE_LOCATOR_MODE = 'MIDSCENE_LOCATOR_MODE';
45
- const MIDSCENE_VQA_MODEL_NAME = 'MIDSCENE_VQA_MODEL_NAME';
46
- const MIDSCENE_VQA_MODEL_SOCKS_PROXY = 'MIDSCENE_VQA_MODEL_SOCKS_PROXY';
47
- const MIDSCENE_VQA_MODEL_HTTP_PROXY = 'MIDSCENE_VQA_MODEL_HTTP_PROXY';
48
- const MIDSCENE_VQA_MODEL_BASE_URL = 'MIDSCENE_VQA_MODEL_BASE_URL';
49
- const MIDSCENE_VQA_MODEL_API_KEY = 'MIDSCENE_VQA_MODEL_API_KEY';
50
- const MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON';
51
- const MIDSCENE_VQA_LOCATOR_MODE = 'MIDSCENE_VQA_LOCATOR_MODE';
45
+ const MIDSCENE_INSIGHT_MODEL_NAME = 'MIDSCENE_INSIGHT_MODEL_NAME';
46
+ const MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY = 'MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY';
47
+ const MIDSCENE_INSIGHT_MODEL_HTTP_PROXY = 'MIDSCENE_INSIGHT_MODEL_HTTP_PROXY';
48
+ const MIDSCENE_INSIGHT_MODEL_BASE_URL = 'MIDSCENE_INSIGHT_MODEL_BASE_URL';
49
+ const MIDSCENE_INSIGHT_MODEL_API_KEY = 'MIDSCENE_INSIGHT_MODEL_API_KEY';
50
+ const MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON';
51
+ const MIDSCENE_INSIGHT_LOCATOR_MODE = 'MIDSCENE_INSIGHT_LOCATOR_MODE';
52
52
  const MIDSCENE_PLANNING_MODEL_NAME = 'MIDSCENE_PLANNING_MODEL_NAME';
53
53
  const MIDSCENE_PLANNING_MODEL_SOCKS_PROXY = 'MIDSCENE_PLANNING_MODEL_SOCKS_PROXY';
54
54
  const MIDSCENE_PLANNING_MODEL_HTTP_PROXY = 'MIDSCENE_PLANNING_MODEL_HTTP_PROXY';
@@ -56,13 +56,6 @@ const MIDSCENE_PLANNING_MODEL_BASE_URL = 'MIDSCENE_PLANNING_MODEL_BASE_URL';
56
56
  const MIDSCENE_PLANNING_MODEL_API_KEY = 'MIDSCENE_PLANNING_MODEL_API_KEY';
57
57
  const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON';
58
58
  const MIDSCENE_PLANNING_LOCATOR_MODE = 'MIDSCENE_PLANNING_LOCATOR_MODE';
59
- const MIDSCENE_GROUNDING_MODEL_NAME = 'MIDSCENE_GROUNDING_MODEL_NAME';
60
- const MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY = 'MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY';
61
- const MIDSCENE_GROUNDING_MODEL_HTTP_PROXY = 'MIDSCENE_GROUNDING_MODEL_HTTP_PROXY';
62
- const MIDSCENE_GROUNDING_MODEL_BASE_URL = 'MIDSCENE_GROUNDING_MODEL_BASE_URL';
63
- const MIDSCENE_GROUNDING_MODEL_API_KEY = 'MIDSCENE_GROUNDING_MODEL_API_KEY';
64
- const MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON';
65
- const MIDSCENE_GROUNDING_LOCATOR_MODE = 'MIDSCENE_GROUNDING_LOCATOR_MODE';
66
59
  const UNUSED_ENV_KEYS = [
67
60
  MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG
68
61
  ];
@@ -123,27 +116,20 @@ const MODEL_ENV_KEYS = [
123
116
  MIDSCENE_OPENAI_SOCKS_PROXY,
124
117
  MODEL_API_KEY,
125
118
  MODEL_BASE_URL,
126
- MIDSCENE_VQA_MODEL_NAME,
127
- MIDSCENE_VQA_MODEL_SOCKS_PROXY,
128
- MIDSCENE_VQA_MODEL_HTTP_PROXY,
129
- MIDSCENE_VQA_MODEL_BASE_URL,
130
- MIDSCENE_VQA_MODEL_API_KEY,
131
- MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON,
132
- MIDSCENE_VQA_LOCATOR_MODE,
119
+ MIDSCENE_INSIGHT_MODEL_NAME,
120
+ MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
121
+ MIDSCENE_INSIGHT_MODEL_HTTP_PROXY,
122
+ MIDSCENE_INSIGHT_MODEL_BASE_URL,
123
+ MIDSCENE_INSIGHT_MODEL_API_KEY,
124
+ MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
125
+ MIDSCENE_INSIGHT_LOCATOR_MODE,
133
126
  MIDSCENE_PLANNING_MODEL_NAME,
134
127
  MIDSCENE_PLANNING_MODEL_SOCKS_PROXY,
135
128
  MIDSCENE_PLANNING_MODEL_HTTP_PROXY,
136
129
  MIDSCENE_PLANNING_MODEL_BASE_URL,
137
130
  MIDSCENE_PLANNING_MODEL_API_KEY,
138
131
  MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
139
- MIDSCENE_PLANNING_LOCATOR_MODE,
140
- MIDSCENE_GROUNDING_MODEL_NAME,
141
- MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY,
142
- MIDSCENE_GROUNDING_MODEL_HTTP_PROXY,
143
- MIDSCENE_GROUNDING_MODEL_BASE_URL,
144
- MIDSCENE_GROUNDING_MODEL_API_KEY,
145
- MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON,
146
- MIDSCENE_GROUNDING_LOCATOR_MODE
132
+ MIDSCENE_PLANNING_LOCATOR_MODE
147
133
  ];
148
134
  const ALL_ENV_KEYS = [
149
135
  ...UNUSED_ENV_KEYS,
@@ -167,4 +153,4 @@ const VL_MODE_RAW_VALID_VALUES = [
167
153
  'vlm-ui-tars-doubao',
168
154
  'vlm-ui-tars-doubao-1.5'
169
155
  ];
170
- export { ALL_ENV_KEYS, BASIC_ENV_KEYS, BOOLEAN_ENV_KEYS, DOCKER_CONTAINER, GLOBAL_ENV_KEYS, MATCH_BY_POSITION, MIDSCENE_ADB_PATH, MIDSCENE_ADB_REMOTE_HOST, MIDSCENE_ADB_REMOTE_PORT, MIDSCENE_ANDROID_IME_STRATEGY, MIDSCENE_CACHE, MIDSCENE_CACHE_MAX_FILENAME_LENGTH, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_MODE, MIDSCENE_DEBUG_MODEL_PROFILE, MIDSCENE_DEBUG_MODEL_RESPONSE, MIDSCENE_FORCE_DEEP_THINK, MIDSCENE_GROUNDING_LOCATOR_MODE, MIDSCENE_GROUNDING_MODEL_API_KEY, MIDSCENE_GROUNDING_MODEL_BASE_URL, MIDSCENE_GROUNDING_MODEL_HTTP_PROXY, MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON, MIDSCENE_GROUNDING_MODEL_NAME, MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY, MIDSCENE_IOS_DEVICE_UDID, MIDSCENE_IOS_SIMULATOR_UDID, MIDSCENE_LOCATOR_MODE, MIDSCENE_MCP_ANDROID_MODE, MIDSCENE_MCP_CHROME_PATH, MIDSCENE_MCP_USE_PUPPETEER_MODE, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_MAX_TOKENS, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_LOCATOR_MODE, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, MIDSCENE_PREFERRED_LANGUAGE, MIDSCENE_REPLANNING_CYCLE_LIMIT, MIDSCENE_REPORT_TAG_NAME, MIDSCENE_RUN_DIR, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MIDSCENE_USE_VL_MODEL, MIDSCENE_VQA_LOCATOR_MODE, MIDSCENE_VQA_MODEL_API_KEY, MIDSCENE_VQA_MODEL_BASE_URL, MIDSCENE_VQA_MODEL_HTTP_PROXY, MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON, MIDSCENE_VQA_MODEL_NAME, MIDSCENE_VQA_MODEL_SOCKS_PROXY, MODEL_API_KEY, MODEL_BASE_URL, MODEL_ENV_KEYS, NUMBER_ENV_KEYS, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MAX_TOKENS, STRING_ENV_KEYS, types_UITarsModelVersion as UITarsModelVersion, UNUSED_ENV_KEYS, VL_MODE_RAW_VALID_VALUES };
156
+ export { ALL_ENV_KEYS, BASIC_ENV_KEYS, BOOLEAN_ENV_KEYS, DOCKER_CONTAINER, GLOBAL_ENV_KEYS, MATCH_BY_POSITION, MIDSCENE_ADB_PATH, MIDSCENE_ADB_REMOTE_HOST, MIDSCENE_ADB_REMOTE_PORT, MIDSCENE_ANDROID_IME_STRATEGY, MIDSCENE_CACHE, MIDSCENE_CACHE_MAX_FILENAME_LENGTH, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_MODE, MIDSCENE_DEBUG_MODEL_PROFILE, MIDSCENE_DEBUG_MODEL_RESPONSE, MIDSCENE_FORCE_DEEP_THINK, MIDSCENE_INSIGHT_LOCATOR_MODE, MIDSCENE_INSIGHT_MODEL_API_KEY, MIDSCENE_INSIGHT_MODEL_BASE_URL, MIDSCENE_INSIGHT_MODEL_HTTP_PROXY, MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON, MIDSCENE_INSIGHT_MODEL_NAME, MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY, MIDSCENE_IOS_DEVICE_UDID, MIDSCENE_IOS_SIMULATOR_UDID, MIDSCENE_LOCATOR_MODE, MIDSCENE_MCP_ANDROID_MODE, MIDSCENE_MCP_CHROME_PATH, MIDSCENE_MCP_USE_PUPPETEER_MODE, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_MAX_TOKENS, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_LOCATOR_MODE, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, MIDSCENE_PREFERRED_LANGUAGE, MIDSCENE_REPLANNING_CYCLE_LIMIT, MIDSCENE_REPORT_TAG_NAME, MIDSCENE_RUN_DIR, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MIDSCENE_USE_VL_MODEL, MODEL_API_KEY, MODEL_BASE_URL, MODEL_ENV_KEYS, NUMBER_ENV_KEYS, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MAX_TOKENS, STRING_ENV_KEYS, types_UITarsModelVersion as UITarsModelVersion, UNUSED_ENV_KEYS, VL_MODE_RAW_VALID_VALUES };
@@ -25,29 +25,19 @@ var __webpack_exports__ = {};
25
25
  __webpack_require__.r(__webpack_exports__);
26
26
  __webpack_require__.d(__webpack_exports__, {
27
27
  DEFAULT_MODEL_CONFIG_KEYS_LEGACY: ()=>DEFAULT_MODEL_CONFIG_KEYS_LEGACY,
28
- VQA_MODEL_CONFIG_KEYS: ()=>VQA_MODEL_CONFIG_KEYS,
28
+ INSIGHT_MODEL_CONFIG_KEYS: ()=>INSIGHT_MODEL_CONFIG_KEYS,
29
29
  DEFAULT_MODEL_CONFIG_KEYS: ()=>DEFAULT_MODEL_CONFIG_KEYS,
30
- PLANNING_MODEL_CONFIG_KEYS: ()=>PLANNING_MODEL_CONFIG_KEYS,
31
- GROUNDING_MODEL_CONFIG_KEYS: ()=>GROUNDING_MODEL_CONFIG_KEYS
30
+ PLANNING_MODEL_CONFIG_KEYS: ()=>PLANNING_MODEL_CONFIG_KEYS
32
31
  });
33
32
  const external_types_js_namespaceObject = require("./types.js");
34
- const VQA_MODEL_CONFIG_KEYS = {
35
- modelName: external_types_js_namespaceObject.MIDSCENE_VQA_MODEL_NAME,
36
- socksProxy: external_types_js_namespaceObject.MIDSCENE_VQA_MODEL_SOCKS_PROXY,
37
- httpProxy: external_types_js_namespaceObject.MIDSCENE_VQA_MODEL_HTTP_PROXY,
38
- openaiBaseURL: external_types_js_namespaceObject.MIDSCENE_VQA_MODEL_BASE_URL,
39
- openaiApiKey: external_types_js_namespaceObject.MIDSCENE_VQA_MODEL_API_KEY,
40
- openaiExtraConfig: external_types_js_namespaceObject.MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON,
41
- vlMode: external_types_js_namespaceObject.MIDSCENE_VQA_LOCATOR_MODE
42
- };
43
- const GROUNDING_MODEL_CONFIG_KEYS = {
44
- modelName: external_types_js_namespaceObject.MIDSCENE_GROUNDING_MODEL_NAME,
45
- socksProxy: external_types_js_namespaceObject.MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY,
46
- httpProxy: external_types_js_namespaceObject.MIDSCENE_GROUNDING_MODEL_HTTP_PROXY,
47
- openaiBaseURL: external_types_js_namespaceObject.MIDSCENE_GROUNDING_MODEL_BASE_URL,
48
- openaiApiKey: external_types_js_namespaceObject.MIDSCENE_GROUNDING_MODEL_API_KEY,
49
- openaiExtraConfig: external_types_js_namespaceObject.MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON,
50
- vlMode: external_types_js_namespaceObject.MIDSCENE_GROUNDING_LOCATOR_MODE
33
+ const INSIGHT_MODEL_CONFIG_KEYS = {
34
+ modelName: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_NAME,
35
+ socksProxy: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
36
+ httpProxy: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_HTTP_PROXY,
37
+ openaiBaseURL: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_BASE_URL,
38
+ openaiApiKey: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_API_KEY,
39
+ openaiExtraConfig: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
40
+ vlMode: external_types_js_namespaceObject.MIDSCENE_INSIGHT_LOCATOR_MODE
51
41
  };
52
42
  const PLANNING_MODEL_CONFIG_KEYS = {
53
43
  modelName: external_types_js_namespaceObject.MIDSCENE_PLANNING_MODEL_NAME,
@@ -78,15 +68,13 @@ const DEFAULT_MODEL_CONFIG_KEYS_LEGACY = {
78
68
  };
79
69
  exports.DEFAULT_MODEL_CONFIG_KEYS = __webpack_exports__.DEFAULT_MODEL_CONFIG_KEYS;
80
70
  exports.DEFAULT_MODEL_CONFIG_KEYS_LEGACY = __webpack_exports__.DEFAULT_MODEL_CONFIG_KEYS_LEGACY;
81
- exports.GROUNDING_MODEL_CONFIG_KEYS = __webpack_exports__.GROUNDING_MODEL_CONFIG_KEYS;
71
+ exports.INSIGHT_MODEL_CONFIG_KEYS = __webpack_exports__.INSIGHT_MODEL_CONFIG_KEYS;
82
72
  exports.PLANNING_MODEL_CONFIG_KEYS = __webpack_exports__.PLANNING_MODEL_CONFIG_KEYS;
83
- exports.VQA_MODEL_CONFIG_KEYS = __webpack_exports__.VQA_MODEL_CONFIG_KEYS;
84
73
  for(var __webpack_i__ in __webpack_exports__)if (-1 === [
85
74
  "DEFAULT_MODEL_CONFIG_KEYS",
86
75
  "DEFAULT_MODEL_CONFIG_KEYS_LEGACY",
87
- "GROUNDING_MODEL_CONFIG_KEYS",
88
- "PLANNING_MODEL_CONFIG_KEYS",
89
- "VQA_MODEL_CONFIG_KEYS"
76
+ "INSIGHT_MODEL_CONFIG_KEYS",
77
+ "PLANNING_MODEL_CONFIG_KEYS"
90
78
  ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
91
79
  Object.defineProperty(exports, '__esModule', {
92
80
  value: true
@@ -36,8 +36,7 @@ const external_helper_js_namespaceObject = require("./helper.js");
36
36
  const external_init_debug_js_namespaceObject = require("./init-debug.js");
37
37
  const external_parse_js_namespaceObject = require("./parse.js");
38
38
  const KEYS_MAP = {
39
- VQA: external_constants_js_namespaceObject.VQA_MODEL_CONFIG_KEYS,
40
- grounding: external_constants_js_namespaceObject.GROUNDING_MODEL_CONFIG_KEYS,
39
+ insight: external_constants_js_namespaceObject.INSIGHT_MODEL_CONFIG_KEYS,
41
40
  planning: external_constants_js_namespaceObject.PLANNING_MODEL_CONFIG_KEYS,
42
41
  default: external_constants_js_namespaceObject.DEFAULT_MODEL_CONFIG_KEYS
43
42
  };
@@ -39,17 +39,15 @@ function _define_property(obj, key, value) {
39
39
  return obj;
40
40
  }
41
41
  const ALL_INTENTS = [
42
- 'VQA',
42
+ 'insight',
43
43
  'default',
44
- 'grounding',
45
44
  'planning'
46
45
  ];
47
46
  class ModelConfigManager {
48
47
  calcIntentConfigMap(modelConfigFn) {
49
48
  const intentConfigMap = {
50
- VQA: void 0,
49
+ insight: void 0,
51
50
  default: void 0,
52
- grounding: void 0,
53
51
  planning: void 0
54
52
  };
55
53
  for (const i of ALL_INTENTS){
@@ -63,9 +61,8 @@ class ModelConfigManager {
63
61
  }
64
62
  calcModelConfigMapBaseOnIntent(intentConfigMap) {
65
63
  const modelConfigMap = {
66
- VQA: void 0,
64
+ insight: void 0,
67
65
  default: void 0,
68
- grounding: void 0,
69
66
  planning: void 0
70
67
  };
71
68
  for (const i of ALL_INTENTS){
@@ -79,9 +76,8 @@ class ModelConfigManager {
79
76
  }
80
77
  calcModelConfigMapBaseOnEnv(allEnvConfig) {
81
78
  const modelConfigMap = {
82
- VQA: void 0,
79
+ insight: void 0,
83
80
  default: void 0,
84
- grounding: void 0,
85
81
  planning: void 0
86
82
  };
87
83
  for (const i of ALL_INTENTS){
@@ -129,7 +125,7 @@ Learn more: https://midscenejs.com/choose-a-model`);
129
125
  registerGlobalConfigManager(globalConfigManager) {
130
126
  this.globalConfigManager = globalConfigManager;
131
127
  }
132
- throwErrorIfNonVLModel(intent = 'grounding') {
128
+ throwErrorIfNonVLModel(intent = 'insight') {
133
129
  const modelConfig = this.getModelConfig(intent);
134
130
  if (!modelConfig.vlMode) throw new Error('No visual language model (VL model) detected for the current scenario. Element localization may be inaccurate. Please verify your model configuration. Learn more: https://midscenejs.com/choose-a-model');
135
131
  }
@@ -41,13 +41,13 @@ __webpack_require__.d(__webpack_exports__, {
41
41
  MIDSCENE_DEBUG_MODEL_PROFILE: ()=>MIDSCENE_DEBUG_MODEL_PROFILE,
42
42
  MIDSCENE_DEBUG_MODEL_RESPONSE: ()=>MIDSCENE_DEBUG_MODEL_RESPONSE,
43
43
  MIDSCENE_FORCE_DEEP_THINK: ()=>MIDSCENE_FORCE_DEEP_THINK,
44
- MIDSCENE_GROUNDING_LOCATOR_MODE: ()=>MIDSCENE_GROUNDING_LOCATOR_MODE,
45
- MIDSCENE_GROUNDING_MODEL_API_KEY: ()=>MIDSCENE_GROUNDING_MODEL_API_KEY,
46
- MIDSCENE_GROUNDING_MODEL_BASE_URL: ()=>MIDSCENE_GROUNDING_MODEL_BASE_URL,
47
- MIDSCENE_GROUNDING_MODEL_HTTP_PROXY: ()=>MIDSCENE_GROUNDING_MODEL_HTTP_PROXY,
48
- MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON: ()=>MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON,
49
- MIDSCENE_GROUNDING_MODEL_NAME: ()=>MIDSCENE_GROUNDING_MODEL_NAME,
50
- MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY: ()=>MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY,
44
+ MIDSCENE_INSIGHT_LOCATOR_MODE: ()=>MIDSCENE_INSIGHT_LOCATOR_MODE,
45
+ MIDSCENE_INSIGHT_MODEL_API_KEY: ()=>MIDSCENE_INSIGHT_MODEL_API_KEY,
46
+ MIDSCENE_INSIGHT_MODEL_BASE_URL: ()=>MIDSCENE_INSIGHT_MODEL_BASE_URL,
47
+ MIDSCENE_INSIGHT_MODEL_HTTP_PROXY: ()=>MIDSCENE_INSIGHT_MODEL_HTTP_PROXY,
48
+ MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON: ()=>MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
49
+ MIDSCENE_INSIGHT_MODEL_NAME: ()=>MIDSCENE_INSIGHT_MODEL_NAME,
50
+ MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY: ()=>MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
51
51
  MIDSCENE_IOS_DEVICE_UDID: ()=>MIDSCENE_IOS_DEVICE_UDID,
52
52
  MIDSCENE_IOS_SIMULATOR_UDID: ()=>MIDSCENE_IOS_SIMULATOR_UDID,
53
53
  MIDSCENE_LOCATOR_MODE: ()=>MIDSCENE_LOCATOR_MODE,
@@ -81,13 +81,6 @@ __webpack_require__.d(__webpack_exports__, {
81
81
  MIDSCENE_USE_QWEN_VL: ()=>MIDSCENE_USE_QWEN_VL,
82
82
  MIDSCENE_USE_VLM_UI_TARS: ()=>MIDSCENE_USE_VLM_UI_TARS,
83
83
  MIDSCENE_USE_VL_MODEL: ()=>MIDSCENE_USE_VL_MODEL,
84
- MIDSCENE_VQA_LOCATOR_MODE: ()=>MIDSCENE_VQA_LOCATOR_MODE,
85
- MIDSCENE_VQA_MODEL_API_KEY: ()=>MIDSCENE_VQA_MODEL_API_KEY,
86
- MIDSCENE_VQA_MODEL_BASE_URL: ()=>MIDSCENE_VQA_MODEL_BASE_URL,
87
- MIDSCENE_VQA_MODEL_HTTP_PROXY: ()=>MIDSCENE_VQA_MODEL_HTTP_PROXY,
88
- MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON: ()=>MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON,
89
- MIDSCENE_VQA_MODEL_NAME: ()=>MIDSCENE_VQA_MODEL_NAME,
90
- MIDSCENE_VQA_MODEL_SOCKS_PROXY: ()=>MIDSCENE_VQA_MODEL_SOCKS_PROXY,
91
84
  MODEL_API_KEY: ()=>MODEL_API_KEY,
92
85
  MODEL_BASE_URL: ()=>MODEL_BASE_URL,
93
86
  MODEL_ENV_KEYS: ()=>MODEL_ENV_KEYS,
@@ -144,13 +137,13 @@ const MIDSCENE_PREFERRED_LANGUAGE = 'MIDSCENE_PREFERRED_LANGUAGE';
144
137
  const MIDSCENE_CACHE_MAX_FILENAME_LENGTH = 'MIDSCENE_CACHE_MAX_FILENAME_LENGTH';
145
138
  const MIDSCENE_RUN_DIR = 'MIDSCENE_RUN_DIR';
146
139
  const MIDSCENE_LOCATOR_MODE = 'MIDSCENE_LOCATOR_MODE';
147
- const MIDSCENE_VQA_MODEL_NAME = 'MIDSCENE_VQA_MODEL_NAME';
148
- const MIDSCENE_VQA_MODEL_SOCKS_PROXY = 'MIDSCENE_VQA_MODEL_SOCKS_PROXY';
149
- const MIDSCENE_VQA_MODEL_HTTP_PROXY = 'MIDSCENE_VQA_MODEL_HTTP_PROXY';
150
- const MIDSCENE_VQA_MODEL_BASE_URL = 'MIDSCENE_VQA_MODEL_BASE_URL';
151
- const MIDSCENE_VQA_MODEL_API_KEY = 'MIDSCENE_VQA_MODEL_API_KEY';
152
- const MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON';
153
- const MIDSCENE_VQA_LOCATOR_MODE = 'MIDSCENE_VQA_LOCATOR_MODE';
140
+ const MIDSCENE_INSIGHT_MODEL_NAME = 'MIDSCENE_INSIGHT_MODEL_NAME';
141
+ const MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY = 'MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY';
142
+ const MIDSCENE_INSIGHT_MODEL_HTTP_PROXY = 'MIDSCENE_INSIGHT_MODEL_HTTP_PROXY';
143
+ const MIDSCENE_INSIGHT_MODEL_BASE_URL = 'MIDSCENE_INSIGHT_MODEL_BASE_URL';
144
+ const MIDSCENE_INSIGHT_MODEL_API_KEY = 'MIDSCENE_INSIGHT_MODEL_API_KEY';
145
+ const MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON';
146
+ const MIDSCENE_INSIGHT_LOCATOR_MODE = 'MIDSCENE_INSIGHT_LOCATOR_MODE';
154
147
  const MIDSCENE_PLANNING_MODEL_NAME = 'MIDSCENE_PLANNING_MODEL_NAME';
155
148
  const MIDSCENE_PLANNING_MODEL_SOCKS_PROXY = 'MIDSCENE_PLANNING_MODEL_SOCKS_PROXY';
156
149
  const MIDSCENE_PLANNING_MODEL_HTTP_PROXY = 'MIDSCENE_PLANNING_MODEL_HTTP_PROXY';
@@ -158,13 +151,6 @@ const MIDSCENE_PLANNING_MODEL_BASE_URL = 'MIDSCENE_PLANNING_MODEL_BASE_URL';
158
151
  const MIDSCENE_PLANNING_MODEL_API_KEY = 'MIDSCENE_PLANNING_MODEL_API_KEY';
159
152
  const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON';
160
153
  const MIDSCENE_PLANNING_LOCATOR_MODE = 'MIDSCENE_PLANNING_LOCATOR_MODE';
161
- const MIDSCENE_GROUNDING_MODEL_NAME = 'MIDSCENE_GROUNDING_MODEL_NAME';
162
- const MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY = 'MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY';
163
- const MIDSCENE_GROUNDING_MODEL_HTTP_PROXY = 'MIDSCENE_GROUNDING_MODEL_HTTP_PROXY';
164
- const MIDSCENE_GROUNDING_MODEL_BASE_URL = 'MIDSCENE_GROUNDING_MODEL_BASE_URL';
165
- const MIDSCENE_GROUNDING_MODEL_API_KEY = 'MIDSCENE_GROUNDING_MODEL_API_KEY';
166
- const MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON';
167
- const MIDSCENE_GROUNDING_LOCATOR_MODE = 'MIDSCENE_GROUNDING_LOCATOR_MODE';
168
154
  const UNUSED_ENV_KEYS = [
169
155
  MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG
170
156
  ];
@@ -225,27 +211,20 @@ const MODEL_ENV_KEYS = [
225
211
  MIDSCENE_OPENAI_SOCKS_PROXY,
226
212
  MODEL_API_KEY,
227
213
  MODEL_BASE_URL,
228
- MIDSCENE_VQA_MODEL_NAME,
229
- MIDSCENE_VQA_MODEL_SOCKS_PROXY,
230
- MIDSCENE_VQA_MODEL_HTTP_PROXY,
231
- MIDSCENE_VQA_MODEL_BASE_URL,
232
- MIDSCENE_VQA_MODEL_API_KEY,
233
- MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON,
234
- MIDSCENE_VQA_LOCATOR_MODE,
214
+ MIDSCENE_INSIGHT_MODEL_NAME,
215
+ MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
216
+ MIDSCENE_INSIGHT_MODEL_HTTP_PROXY,
217
+ MIDSCENE_INSIGHT_MODEL_BASE_URL,
218
+ MIDSCENE_INSIGHT_MODEL_API_KEY,
219
+ MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
220
+ MIDSCENE_INSIGHT_LOCATOR_MODE,
235
221
  MIDSCENE_PLANNING_MODEL_NAME,
236
222
  MIDSCENE_PLANNING_MODEL_SOCKS_PROXY,
237
223
  MIDSCENE_PLANNING_MODEL_HTTP_PROXY,
238
224
  MIDSCENE_PLANNING_MODEL_BASE_URL,
239
225
  MIDSCENE_PLANNING_MODEL_API_KEY,
240
226
  MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
241
- MIDSCENE_PLANNING_LOCATOR_MODE,
242
- MIDSCENE_GROUNDING_MODEL_NAME,
243
- MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY,
244
- MIDSCENE_GROUNDING_MODEL_HTTP_PROXY,
245
- MIDSCENE_GROUNDING_MODEL_BASE_URL,
246
- MIDSCENE_GROUNDING_MODEL_API_KEY,
247
- MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON,
248
- MIDSCENE_GROUNDING_LOCATOR_MODE
227
+ MIDSCENE_PLANNING_LOCATOR_MODE
249
228
  ];
250
229
  const ALL_ENV_KEYS = [
251
230
  ...UNUSED_ENV_KEYS,
@@ -286,13 +265,13 @@ exports.MIDSCENE_DEBUG_MODE = __webpack_exports__.MIDSCENE_DEBUG_MODE;
286
265
  exports.MIDSCENE_DEBUG_MODEL_PROFILE = __webpack_exports__.MIDSCENE_DEBUG_MODEL_PROFILE;
287
266
  exports.MIDSCENE_DEBUG_MODEL_RESPONSE = __webpack_exports__.MIDSCENE_DEBUG_MODEL_RESPONSE;
288
267
  exports.MIDSCENE_FORCE_DEEP_THINK = __webpack_exports__.MIDSCENE_FORCE_DEEP_THINK;
289
- exports.MIDSCENE_GROUNDING_LOCATOR_MODE = __webpack_exports__.MIDSCENE_GROUNDING_LOCATOR_MODE;
290
- exports.MIDSCENE_GROUNDING_MODEL_API_KEY = __webpack_exports__.MIDSCENE_GROUNDING_MODEL_API_KEY;
291
- exports.MIDSCENE_GROUNDING_MODEL_BASE_URL = __webpack_exports__.MIDSCENE_GROUNDING_MODEL_BASE_URL;
292
- exports.MIDSCENE_GROUNDING_MODEL_HTTP_PROXY = __webpack_exports__.MIDSCENE_GROUNDING_MODEL_HTTP_PROXY;
293
- exports.MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON = __webpack_exports__.MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON;
294
- exports.MIDSCENE_GROUNDING_MODEL_NAME = __webpack_exports__.MIDSCENE_GROUNDING_MODEL_NAME;
295
- exports.MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY = __webpack_exports__.MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY;
268
+ exports.MIDSCENE_INSIGHT_LOCATOR_MODE = __webpack_exports__.MIDSCENE_INSIGHT_LOCATOR_MODE;
269
+ exports.MIDSCENE_INSIGHT_MODEL_API_KEY = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_API_KEY;
270
+ exports.MIDSCENE_INSIGHT_MODEL_BASE_URL = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_BASE_URL;
271
+ exports.MIDSCENE_INSIGHT_MODEL_HTTP_PROXY = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_HTTP_PROXY;
272
+ exports.MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON;
273
+ exports.MIDSCENE_INSIGHT_MODEL_NAME = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_NAME;
274
+ exports.MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY;
296
275
  exports.MIDSCENE_IOS_DEVICE_UDID = __webpack_exports__.MIDSCENE_IOS_DEVICE_UDID;
297
276
  exports.MIDSCENE_IOS_SIMULATOR_UDID = __webpack_exports__.MIDSCENE_IOS_SIMULATOR_UDID;
298
277
  exports.MIDSCENE_LOCATOR_MODE = __webpack_exports__.MIDSCENE_LOCATOR_MODE;
@@ -326,13 +305,6 @@ exports.MIDSCENE_USE_QWEN3_VL = __webpack_exports__.MIDSCENE_USE_QWEN3_VL;
326
305
  exports.MIDSCENE_USE_QWEN_VL = __webpack_exports__.MIDSCENE_USE_QWEN_VL;
327
306
  exports.MIDSCENE_USE_VLM_UI_TARS = __webpack_exports__.MIDSCENE_USE_VLM_UI_TARS;
328
307
  exports.MIDSCENE_USE_VL_MODEL = __webpack_exports__.MIDSCENE_USE_VL_MODEL;
329
- exports.MIDSCENE_VQA_LOCATOR_MODE = __webpack_exports__.MIDSCENE_VQA_LOCATOR_MODE;
330
- exports.MIDSCENE_VQA_MODEL_API_KEY = __webpack_exports__.MIDSCENE_VQA_MODEL_API_KEY;
331
- exports.MIDSCENE_VQA_MODEL_BASE_URL = __webpack_exports__.MIDSCENE_VQA_MODEL_BASE_URL;
332
- exports.MIDSCENE_VQA_MODEL_HTTP_PROXY = __webpack_exports__.MIDSCENE_VQA_MODEL_HTTP_PROXY;
333
- exports.MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON = __webpack_exports__.MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON;
334
- exports.MIDSCENE_VQA_MODEL_NAME = __webpack_exports__.MIDSCENE_VQA_MODEL_NAME;
335
- exports.MIDSCENE_VQA_MODEL_SOCKS_PROXY = __webpack_exports__.MIDSCENE_VQA_MODEL_SOCKS_PROXY;
336
308
  exports.MODEL_API_KEY = __webpack_exports__.MODEL_API_KEY;
337
309
  exports.MODEL_BASE_URL = __webpack_exports__.MODEL_BASE_URL;
338
310
  exports.MODEL_ENV_KEYS = __webpack_exports__.MODEL_ENV_KEYS;
@@ -362,13 +334,13 @@ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
362
334
  "MIDSCENE_DEBUG_MODEL_PROFILE",
363
335
  "MIDSCENE_DEBUG_MODEL_RESPONSE",
364
336
  "MIDSCENE_FORCE_DEEP_THINK",
365
- "MIDSCENE_GROUNDING_LOCATOR_MODE",
366
- "MIDSCENE_GROUNDING_MODEL_API_KEY",
367
- "MIDSCENE_GROUNDING_MODEL_BASE_URL",
368
- "MIDSCENE_GROUNDING_MODEL_HTTP_PROXY",
369
- "MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON",
370
- "MIDSCENE_GROUNDING_MODEL_NAME",
371
- "MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY",
337
+ "MIDSCENE_INSIGHT_LOCATOR_MODE",
338
+ "MIDSCENE_INSIGHT_MODEL_API_KEY",
339
+ "MIDSCENE_INSIGHT_MODEL_BASE_URL",
340
+ "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY",
341
+ "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON",
342
+ "MIDSCENE_INSIGHT_MODEL_NAME",
343
+ "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY",
372
344
  "MIDSCENE_IOS_DEVICE_UDID",
373
345
  "MIDSCENE_IOS_SIMULATOR_UDID",
374
346
  "MIDSCENE_LOCATOR_MODE",
@@ -402,13 +374,6 @@ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
402
374
  "MIDSCENE_USE_QWEN_VL",
403
375
  "MIDSCENE_USE_VLM_UI_TARS",
404
376
  "MIDSCENE_USE_VL_MODEL",
405
- "MIDSCENE_VQA_LOCATOR_MODE",
406
- "MIDSCENE_VQA_MODEL_API_KEY",
407
- "MIDSCENE_VQA_MODEL_BASE_URL",
408
- "MIDSCENE_VQA_MODEL_HTTP_PROXY",
409
- "MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON",
410
- "MIDSCENE_VQA_MODEL_NAME",
411
- "MIDSCENE_VQA_MODEL_SOCKS_PROXY",
412
377
  "MODEL_API_KEY",
413
378
  "MODEL_BASE_URL",
414
379
  "MODEL_ENV_KEYS",
@@ -16,8 +16,7 @@ interface IModelConfigKeys {
16
16
  */
17
17
  vlMode: string;
18
18
  }
19
- export declare const VQA_MODEL_CONFIG_KEYS: IModelConfigKeys;
20
- export declare const GROUNDING_MODEL_CONFIG_KEYS: IModelConfigKeys;
19
+ export declare const INSIGHT_MODEL_CONFIG_KEYS: IModelConfigKeys;
21
20
  export declare const PLANNING_MODEL_CONFIG_KEYS: IModelConfigKeys;
22
21
  export declare const DEFAULT_MODEL_CONFIG_KEYS: IModelConfigKeys;
23
22
  export declare const DEFAULT_MODEL_CONFIG_KEYS_LEGACY: IModelConfigKeys;
@@ -1,6 +1,6 @@
1
1
  import type { IModelConfig, TIntent } from './types';
2
- import { DEFAULT_MODEL_CONFIG_KEYS, DEFAULT_MODEL_CONFIG_KEYS_LEGACY, GROUNDING_MODEL_CONFIG_KEYS, PLANNING_MODEL_CONFIG_KEYS, VQA_MODEL_CONFIG_KEYS } from './constants';
3
- type TModelConfigKeys = typeof VQA_MODEL_CONFIG_KEYS | typeof GROUNDING_MODEL_CONFIG_KEYS | typeof PLANNING_MODEL_CONFIG_KEYS | typeof DEFAULT_MODEL_CONFIG_KEYS | typeof DEFAULT_MODEL_CONFIG_KEYS_LEGACY;
2
+ import { DEFAULT_MODEL_CONFIG_KEYS, DEFAULT_MODEL_CONFIG_KEYS_LEGACY, INSIGHT_MODEL_CONFIG_KEYS, PLANNING_MODEL_CONFIG_KEYS } from './constants';
3
+ type TModelConfigKeys = typeof INSIGHT_MODEL_CONFIG_KEYS | typeof PLANNING_MODEL_CONFIG_KEYS | typeof DEFAULT_MODEL_CONFIG_KEYS | typeof DEFAULT_MODEL_CONFIG_KEYS_LEGACY;
4
4
  /**
5
5
  * Choose OpenAI SDK config
6
6
  */
@@ -66,13 +66,13 @@ export declare const MIDSCENE_PREFERRED_LANGUAGE = "MIDSCENE_PREFERRED_LANGUAGE"
66
66
  export declare const MIDSCENE_CACHE_MAX_FILENAME_LENGTH = "MIDSCENE_CACHE_MAX_FILENAME_LENGTH";
67
67
  export declare const MIDSCENE_RUN_DIR = "MIDSCENE_RUN_DIR";
68
68
  export declare const MIDSCENE_LOCATOR_MODE = "MIDSCENE_LOCATOR_MODE";
69
- export declare const MIDSCENE_VQA_MODEL_NAME = "MIDSCENE_VQA_MODEL_NAME";
70
- export declare const MIDSCENE_VQA_MODEL_SOCKS_PROXY = "MIDSCENE_VQA_MODEL_SOCKS_PROXY";
71
- export declare const MIDSCENE_VQA_MODEL_HTTP_PROXY = "MIDSCENE_VQA_MODEL_HTTP_PROXY";
72
- export declare const MIDSCENE_VQA_MODEL_BASE_URL = "MIDSCENE_VQA_MODEL_BASE_URL";
73
- export declare const MIDSCENE_VQA_MODEL_API_KEY = "MIDSCENE_VQA_MODEL_API_KEY";
74
- export declare const MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON = "MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON";
75
- export declare const MIDSCENE_VQA_LOCATOR_MODE = "MIDSCENE_VQA_LOCATOR_MODE";
69
+ export declare const MIDSCENE_INSIGHT_MODEL_NAME = "MIDSCENE_INSIGHT_MODEL_NAME";
70
+ export declare const MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY = "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY";
71
+ export declare const MIDSCENE_INSIGHT_MODEL_HTTP_PROXY = "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY";
72
+ export declare const MIDSCENE_INSIGHT_MODEL_BASE_URL = "MIDSCENE_INSIGHT_MODEL_BASE_URL";
73
+ export declare const MIDSCENE_INSIGHT_MODEL_API_KEY = "MIDSCENE_INSIGHT_MODEL_API_KEY";
74
+ export declare const MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON = "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON";
75
+ export declare const MIDSCENE_INSIGHT_LOCATOR_MODE = "MIDSCENE_INSIGHT_LOCATOR_MODE";
76
76
  export declare const MIDSCENE_PLANNING_MODEL_NAME = "MIDSCENE_PLANNING_MODEL_NAME";
77
77
  export declare const MIDSCENE_PLANNING_MODEL_SOCKS_PROXY = "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY";
78
78
  export declare const MIDSCENE_PLANNING_MODEL_HTTP_PROXY = "MIDSCENE_PLANNING_MODEL_HTTP_PROXY";
@@ -80,13 +80,6 @@ export declare const MIDSCENE_PLANNING_MODEL_BASE_URL = "MIDSCENE_PLANNING_MODEL
80
80
  export declare const MIDSCENE_PLANNING_MODEL_API_KEY = "MIDSCENE_PLANNING_MODEL_API_KEY";
81
81
  export declare const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON = "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON";
82
82
  export declare const MIDSCENE_PLANNING_LOCATOR_MODE = "MIDSCENE_PLANNING_LOCATOR_MODE";
83
- export declare const MIDSCENE_GROUNDING_MODEL_NAME = "MIDSCENE_GROUNDING_MODEL_NAME";
84
- export declare const MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY = "MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY";
85
- export declare const MIDSCENE_GROUNDING_MODEL_HTTP_PROXY = "MIDSCENE_GROUNDING_MODEL_HTTP_PROXY";
86
- export declare const MIDSCENE_GROUNDING_MODEL_BASE_URL = "MIDSCENE_GROUNDING_MODEL_BASE_URL";
87
- export declare const MIDSCENE_GROUNDING_MODEL_API_KEY = "MIDSCENE_GROUNDING_MODEL_API_KEY";
88
- export declare const MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON = "MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON";
89
- export declare const MIDSCENE_GROUNDING_LOCATOR_MODE = "MIDSCENE_GROUNDING_LOCATOR_MODE";
90
83
  /**
91
84
  * env keys declared but unused
92
85
  */
@@ -110,20 +103,20 @@ export declare const GLOBAL_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_FORC
110
103
  * Can be override by both agent.modelConfig and overrideAIConfig
111
104
  * Can only be access after agent.constructor
112
105
  */
113
- export declare const MODEL_ENV_KEYS: readonly ["MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "MIDSCENE_LOCATOR_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MODEL_API_KEY", "MODEL_BASE_URL", "MIDSCENE_VQA_MODEL_NAME", "MIDSCENE_VQA_MODEL_SOCKS_PROXY", "MIDSCENE_VQA_MODEL_HTTP_PROXY", "MIDSCENE_VQA_MODEL_BASE_URL", "MIDSCENE_VQA_MODEL_API_KEY", "MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON", "MIDSCENE_VQA_LOCATOR_MODE", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_LOCATOR_MODE", "MIDSCENE_GROUNDING_MODEL_NAME", "MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY", "MIDSCENE_GROUNDING_MODEL_HTTP_PROXY", "MIDSCENE_GROUNDING_MODEL_BASE_URL", "MIDSCENE_GROUNDING_MODEL_API_KEY", "MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_GROUNDING_LOCATOR_MODE"];
114
- export declare const ALL_ENV_KEYS: readonly [...string[], "MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR", "MIDSCENE_CACHE", "MIDSCENE_FORCE_DEEP_THINK", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER", "MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "MIDSCENE_LOCATOR_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MODEL_API_KEY", "MODEL_BASE_URL", "MIDSCENE_VQA_MODEL_NAME", "MIDSCENE_VQA_MODEL_SOCKS_PROXY", "MIDSCENE_VQA_MODEL_HTTP_PROXY", "MIDSCENE_VQA_MODEL_BASE_URL", "MIDSCENE_VQA_MODEL_API_KEY", "MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON", "MIDSCENE_VQA_LOCATOR_MODE", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_LOCATOR_MODE", "MIDSCENE_GROUNDING_MODEL_NAME", "MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY", "MIDSCENE_GROUNDING_MODEL_HTTP_PROXY", "MIDSCENE_GROUNDING_MODEL_BASE_URL", "MIDSCENE_GROUNDING_MODEL_API_KEY", "MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_GROUNDING_LOCATOR_MODE"];
106
+ export declare const MODEL_ENV_KEYS: readonly ["MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "MIDSCENE_LOCATOR_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MODEL_API_KEY", "MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_LOCATOR_MODE", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_LOCATOR_MODE"];
107
+ export declare const ALL_ENV_KEYS: readonly [...string[], "MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR", "MIDSCENE_CACHE", "MIDSCENE_FORCE_DEEP_THINK", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER", "MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "MIDSCENE_LOCATOR_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MODEL_API_KEY", "MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_LOCATOR_MODE", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_LOCATOR_MODE"];
115
108
  export type TEnvKeys = (typeof ALL_ENV_KEYS)[number];
116
109
  export type TGlobalConfig = Record<TEnvKeys, string | undefined>;
117
110
  export type TVlModeValues = 'qwen-vl' | 'qwen3-vl' | 'doubao-vision' | 'gemini' | 'vlm-ui-tars' | 'vlm-ui-tars-doubao' | 'vlm-ui-tars-doubao-1.5';
118
111
  export type TVlModeTypes = 'qwen-vl' | 'qwen3-vl' | 'doubao-vision' | 'gemini' | 'vlm-ui-tars';
119
- export interface IModelConfigForVQA {
120
- [MIDSCENE_VQA_MODEL_NAME]: string;
121
- [MIDSCENE_VQA_MODEL_SOCKS_PROXY]?: string;
122
- [MIDSCENE_VQA_MODEL_HTTP_PROXY]?: string;
123
- [MIDSCENE_VQA_MODEL_BASE_URL]?: string;
124
- [MIDSCENE_VQA_MODEL_API_KEY]?: string;
125
- [MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON]?: string;
126
- [MIDSCENE_VQA_LOCATOR_MODE]?: TVlModeValues;
112
+ export interface IModelConfigForInsight {
113
+ [MIDSCENE_INSIGHT_MODEL_NAME]: string;
114
+ [MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY]?: string;
115
+ [MIDSCENE_INSIGHT_MODEL_HTTP_PROXY]?: string;
116
+ [MIDSCENE_INSIGHT_MODEL_BASE_URL]?: string;
117
+ [MIDSCENE_INSIGHT_MODEL_API_KEY]?: string;
118
+ [MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON]?: string;
119
+ [MIDSCENE_INSIGHT_LOCATOR_MODE]?: TVlModeValues;
127
120
  }
128
121
  /**
129
122
  * Model configuration for Planning intent.
@@ -149,15 +142,6 @@ export interface IModelConfigForPlanning {
149
142
  [MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON]?: string;
150
143
  [MIDSCENE_PLANNING_LOCATOR_MODE]?: TVlModeValues;
151
144
  }
152
- export interface IModeConfigForGrounding {
153
- [MIDSCENE_GROUNDING_MODEL_NAME]: string;
154
- [MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY]?: string;
155
- [MIDSCENE_GROUNDING_MODEL_HTTP_PROXY]?: string;
156
- [MIDSCENE_GROUNDING_MODEL_BASE_URL]?: string;
157
- [MIDSCENE_GROUNDING_MODEL_API_KEY]?: string;
158
- [MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON]?: string;
159
- [MIDSCENE_GROUNDING_LOCATOR_MODE]?: TVlModeValues;
160
- }
161
145
  export interface IModelConfigForDefault {
162
146
  [MIDSCENE_MODEL_NAME]: string;
163
147
  [MIDSCENE_MODEL_SOCKS_PROXY]?: string;
@@ -177,24 +161,23 @@ export interface IModelConfigForDefaultLegacy {
177
161
  [MIDSCENE_LOCATOR_MODE]?: TVlModeValues;
178
162
  }
179
163
  /**
180
- * - VQA: Visual Question Answering
181
- * - grounding:short for Visual Grounding
164
+ * - insight: Visual Question Answering and Visual Grounding (unified)
182
165
  * - planning: planning
183
- * - default: all except VQAgrounding、planning
166
+ * - default: all except insight、planning
184
167
  */
185
- export type TIntent = 'VQA' | 'planning' | 'grounding' | 'default';
168
+ export type TIntent = 'insight' | 'planning' | 'default';
186
169
  /**
187
170
  * Internal type with intent parameter for ModelConfigManager
188
171
  * @internal
189
172
  */
190
173
  export type TModelConfigFnInternal = (options: {
191
174
  intent: TIntent;
192
- }) => IModelConfigForVQA | IModelConfigForPlanning | IModeConfigForGrounding | IModelConfigForDefault;
175
+ }) => IModelConfigForInsight | IModelConfigForPlanning | IModelConfigForDefault;
193
176
  /**
194
177
  * User-facing model config function type
195
178
  * Users return config objects without needing to know about intent parameter
196
179
  */
197
- export type TModelConfigFn = () => IModelConfigForVQA | IModelConfigForPlanning | IModeConfigForGrounding | IModelConfigForDefault;
180
+ export type TModelConfigFn = () => IModelConfigForInsight | IModelConfigForPlanning | IModelConfigForDefault;
198
181
  export declare enum UITarsModelVersion {
199
182
  V1_0 = "1.0",
200
183
  V1_5 = "1.5",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@midscene/shared",
3
- "version": "1.0.1-beta-20251110055007.0",
3
+ "version": "1.0.1-beta-20251110115555.0",
4
4
  "repository": "https://github.com/web-infra-dev/midscene",
5
5
  "homepage": "https://midscenejs.com/",
6
6
  "types": "./dist/types/index.d.ts",
@@ -1,11 +1,11 @@
1
1
  import {
2
- MIDSCENE_GROUNDING_LOCATOR_MODE,
3
- MIDSCENE_GROUNDING_MODEL_API_KEY,
4
- MIDSCENE_GROUNDING_MODEL_BASE_URL,
5
- MIDSCENE_GROUNDING_MODEL_HTTP_PROXY,
6
- MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON,
7
- MIDSCENE_GROUNDING_MODEL_NAME,
8
- MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY,
2
+ MIDSCENE_INSIGHT_LOCATOR_MODE,
3
+ MIDSCENE_INSIGHT_MODEL_API_KEY,
4
+ MIDSCENE_INSIGHT_MODEL_BASE_URL,
5
+ MIDSCENE_INSIGHT_MODEL_HTTP_PROXY,
6
+ MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
7
+ MIDSCENE_INSIGHT_MODEL_NAME,
8
+ MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
9
9
  MIDSCENE_LOCATOR_MODE,
10
10
  MIDSCENE_MODEL_API_KEY,
11
11
  MIDSCENE_MODEL_BASE_URL,
@@ -23,14 +23,6 @@ import {
23
23
  MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
24
24
  MIDSCENE_PLANNING_MODEL_NAME,
25
25
  MIDSCENE_PLANNING_MODEL_SOCKS_PROXY,
26
- MIDSCENE_VQA_LOCATOR_MODE,
27
- MIDSCENE_VQA_MODEL_API_KEY,
28
- MIDSCENE_VQA_MODEL_BASE_URL,
29
- MIDSCENE_VQA_MODEL_HTTP_PROXY,
30
- MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON,
31
- // VQA
32
- MIDSCENE_VQA_MODEL_NAME,
33
- MIDSCENE_VQA_MODEL_SOCKS_PROXY,
34
26
  OPENAI_API_KEY,
35
27
  OPENAI_BASE_URL,
36
28
  } from './types';
@@ -54,42 +46,23 @@ interface IModelConfigKeys {
54
46
  vlMode: string;
55
47
  }
56
48
 
57
- export const VQA_MODEL_CONFIG_KEYS: IModelConfigKeys = {
58
- modelName: MIDSCENE_VQA_MODEL_NAME,
49
+ export const INSIGHT_MODEL_CONFIG_KEYS: IModelConfigKeys = {
50
+ modelName: MIDSCENE_INSIGHT_MODEL_NAME,
59
51
  /**
60
52
  * proxy
61
53
  */
62
- socksProxy: MIDSCENE_VQA_MODEL_SOCKS_PROXY,
63
- httpProxy: MIDSCENE_VQA_MODEL_HTTP_PROXY,
54
+ socksProxy: MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
55
+ httpProxy: MIDSCENE_INSIGHT_MODEL_HTTP_PROXY,
64
56
  /**
65
57
  * OpenAI
66
58
  */
67
- openaiBaseURL: MIDSCENE_VQA_MODEL_BASE_URL,
68
- openaiApiKey: MIDSCENE_VQA_MODEL_API_KEY,
69
- openaiExtraConfig: MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON,
59
+ openaiBaseURL: MIDSCENE_INSIGHT_MODEL_BASE_URL,
60
+ openaiApiKey: MIDSCENE_INSIGHT_MODEL_API_KEY,
61
+ openaiExtraConfig: MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
70
62
  /**
71
63
  * Extra
72
64
  */
73
- vlMode: MIDSCENE_VQA_LOCATOR_MODE,
74
- } as const;
75
-
76
- export const GROUNDING_MODEL_CONFIG_KEYS: IModelConfigKeys = {
77
- modelName: MIDSCENE_GROUNDING_MODEL_NAME,
78
- /**
79
- * proxy
80
- */
81
- socksProxy: MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY,
82
- httpProxy: MIDSCENE_GROUNDING_MODEL_HTTP_PROXY,
83
- /**
84
- * OpenAI
85
- */
86
- openaiBaseURL: MIDSCENE_GROUNDING_MODEL_BASE_URL,
87
- openaiApiKey: MIDSCENE_GROUNDING_MODEL_API_KEY,
88
- openaiExtraConfig: MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON,
89
- /**
90
- * Extra
91
- */
92
- vlMode: MIDSCENE_GROUNDING_LOCATOR_MODE,
65
+ vlMode: MIDSCENE_INSIGHT_LOCATOR_MODE,
93
66
  } as const;
94
67
 
95
68
  export const PLANNING_MODEL_CONFIG_KEYS: IModelConfigKeys = {
@@ -8,9 +8,8 @@ import type {
8
8
  import {
9
9
  DEFAULT_MODEL_CONFIG_KEYS,
10
10
  DEFAULT_MODEL_CONFIG_KEYS_LEGACY,
11
- GROUNDING_MODEL_CONFIG_KEYS,
11
+ INSIGHT_MODEL_CONFIG_KEYS,
12
12
  PLANNING_MODEL_CONFIG_KEYS,
13
- VQA_MODEL_CONFIG_KEYS,
14
13
  } from './constants';
15
14
  import {
16
15
  MIDSCENE_MODEL_API_KEY,
@@ -37,15 +36,13 @@ import {
37
36
  } from './parse';
38
37
 
39
38
  type TModelConfigKeys =
40
- | typeof VQA_MODEL_CONFIG_KEYS
41
- | typeof GROUNDING_MODEL_CONFIG_KEYS
39
+ | typeof INSIGHT_MODEL_CONFIG_KEYS
42
40
  | typeof PLANNING_MODEL_CONFIG_KEYS
43
41
  | typeof DEFAULT_MODEL_CONFIG_KEYS
44
42
  | typeof DEFAULT_MODEL_CONFIG_KEYS_LEGACY;
45
43
 
46
44
  const KEYS_MAP: Record<TIntent, TModelConfigKeys> = {
47
- VQA: VQA_MODEL_CONFIG_KEYS,
48
- grounding: GROUNDING_MODEL_CONFIG_KEYS,
45
+ insight: INSIGHT_MODEL_CONFIG_KEYS,
49
46
  planning: PLANNING_MODEL_CONFIG_KEYS,
50
47
  default: DEFAULT_MODEL_CONFIG_KEYS,
51
48
  } as const;
@@ -13,7 +13,7 @@ import type {
13
13
  } from './types';
14
14
  import { VL_MODE_RAW_VALID_VALUES as VL_MODES } from './types';
15
15
 
16
- const ALL_INTENTS: TIntent[] = ['VQA', 'default', 'grounding', 'planning'];
16
+ const ALL_INTENTS: TIntent[] = ['insight', 'default', 'planning'];
17
17
 
18
18
  export type TIntentConfigMap = Record<
19
19
  TIntent,
@@ -51,9 +51,8 @@ export class ModelConfigManager {
51
51
  modelConfigFn: TModelConfigFnInternal,
52
52
  ): TIntentConfigMap {
53
53
  const intentConfigMap: TIntentConfigMap = {
54
- VQA: undefined,
54
+ insight: undefined,
55
55
  default: undefined,
56
- grounding: undefined,
57
56
  planning: undefined,
58
57
  };
59
58
 
@@ -71,9 +70,8 @@ export class ModelConfigManager {
71
70
 
72
71
  private calcModelConfigMapBaseOnIntent(intentConfigMap: TIntentConfigMap) {
73
72
  const modelConfigMap: Record<TIntent, IModelConfig | undefined> = {
74
- VQA: undefined,
73
+ insight: undefined,
75
74
  default: undefined,
76
- grounding: undefined,
77
75
  planning: undefined,
78
76
  };
79
77
  for (const i of ALL_INTENTS) {
@@ -93,9 +91,8 @@ export class ModelConfigManager {
93
91
  allEnvConfig: Record<string, string | undefined>,
94
92
  ) {
95
93
  const modelConfigMap: Record<TIntent, IModelConfig | undefined> = {
96
- VQA: undefined,
94
+ insight: undefined,
97
95
  default: undefined,
98
- grounding: undefined,
99
96
  planning: undefined,
100
97
  };
101
98
  for (const i of ALL_INTENTS) {
@@ -177,7 +174,7 @@ Learn more: https://midscenejs.com/choose-a-model`,
177
174
  this.globalConfigManager = globalConfigManager;
178
175
  }
179
176
 
180
- throwErrorIfNonVLModel(intent: TIntent = 'grounding') {
177
+ throwErrorIfNonVLModel(intent: TIntent = 'insight') {
181
178
  const modelConfig = this.getModelConfig(intent);
182
179
 
183
180
  if (!modelConfig.vlMode) {
package/src/env/types.ts CHANGED
@@ -88,15 +88,18 @@ export const MIDSCENE_RUN_DIR = 'MIDSCENE_RUN_DIR';
88
88
  // default new
89
89
  export const MIDSCENE_LOCATOR_MODE = 'MIDSCENE_LOCATOR_MODE';
90
90
 
91
- // VQA
92
- export const MIDSCENE_VQA_MODEL_NAME = 'MIDSCENE_VQA_MODEL_NAME';
93
- export const MIDSCENE_VQA_MODEL_SOCKS_PROXY = 'MIDSCENE_VQA_MODEL_SOCKS_PROXY';
94
- export const MIDSCENE_VQA_MODEL_HTTP_PROXY = 'MIDSCENE_VQA_MODEL_HTTP_PROXY';
95
- export const MIDSCENE_VQA_MODEL_BASE_URL = 'MIDSCENE_VQA_MODEL_BASE_URL';
96
- export const MIDSCENE_VQA_MODEL_API_KEY = 'MIDSCENE_VQA_MODEL_API_KEY';
97
- export const MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON =
98
- 'MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON';
99
- export const MIDSCENE_VQA_LOCATOR_MODE = 'MIDSCENE_VQA_LOCATOR_MODE';
91
+ // INSIGHT (unified VQA and Grounding)
92
+ export const MIDSCENE_INSIGHT_MODEL_NAME = 'MIDSCENE_INSIGHT_MODEL_NAME';
93
+ export const MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY =
94
+ 'MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY';
95
+ export const MIDSCENE_INSIGHT_MODEL_HTTP_PROXY =
96
+ 'MIDSCENE_INSIGHT_MODEL_HTTP_PROXY';
97
+ export const MIDSCENE_INSIGHT_MODEL_BASE_URL =
98
+ 'MIDSCENE_INSIGHT_MODEL_BASE_URL';
99
+ export const MIDSCENE_INSIGHT_MODEL_API_KEY = 'MIDSCENE_INSIGHT_MODEL_API_KEY';
100
+ export const MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON =
101
+ 'MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON';
102
+ export const MIDSCENE_INSIGHT_LOCATOR_MODE = 'MIDSCENE_INSIGHT_LOCATOR_MODE';
100
103
 
101
104
  // PLANNING
102
105
  export const MIDSCENE_PLANNING_MODEL_NAME = 'MIDSCENE_PLANNING_MODEL_NAME';
@@ -112,21 +115,6 @@ export const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON =
112
115
  'MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON';
113
116
  export const MIDSCENE_PLANNING_LOCATOR_MODE = 'MIDSCENE_PLANNING_LOCATOR_MODE';
114
117
 
115
- // GROUNDING
116
- export const MIDSCENE_GROUNDING_MODEL_NAME = 'MIDSCENE_GROUNDING_MODEL_NAME';
117
- export const MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY =
118
- 'MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY';
119
- export const MIDSCENE_GROUNDING_MODEL_HTTP_PROXY =
120
- 'MIDSCENE_GROUNDING_MODEL_HTTP_PROXY';
121
- export const MIDSCENE_GROUNDING_MODEL_BASE_URL =
122
- 'MIDSCENE_GROUNDING_MODEL_BASE_URL';
123
- export const MIDSCENE_GROUNDING_MODEL_API_KEY =
124
- 'MIDSCENE_GROUNDING_MODEL_API_KEY';
125
- export const MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON =
126
- 'MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON';
127
- export const MIDSCENE_GROUNDING_LOCATOR_MODE =
128
- 'MIDSCENE_GROUNDING_LOCATOR_MODE';
129
-
130
118
  /**
131
119
  * env keys declared but unused
132
120
  */
@@ -210,14 +198,14 @@ export const MODEL_ENV_KEYS = [
210
198
  MIDSCENE_OPENAI_SOCKS_PROXY,
211
199
  MODEL_API_KEY,
212
200
  MODEL_BASE_URL,
213
- // VQA
214
- MIDSCENE_VQA_MODEL_NAME,
215
- MIDSCENE_VQA_MODEL_SOCKS_PROXY,
216
- MIDSCENE_VQA_MODEL_HTTP_PROXY,
217
- MIDSCENE_VQA_MODEL_BASE_URL,
218
- MIDSCENE_VQA_MODEL_API_KEY,
219
- MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON,
220
- MIDSCENE_VQA_LOCATOR_MODE,
201
+ // INSIGHT (unified VQA and Grounding)
202
+ MIDSCENE_INSIGHT_MODEL_NAME,
203
+ MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
204
+ MIDSCENE_INSIGHT_MODEL_HTTP_PROXY,
205
+ MIDSCENE_INSIGHT_MODEL_BASE_URL,
206
+ MIDSCENE_INSIGHT_MODEL_API_KEY,
207
+ MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
208
+ MIDSCENE_INSIGHT_LOCATOR_MODE,
221
209
  // PLANNING
222
210
  MIDSCENE_PLANNING_MODEL_NAME,
223
211
  MIDSCENE_PLANNING_MODEL_SOCKS_PROXY,
@@ -226,14 +214,6 @@ export const MODEL_ENV_KEYS = [
226
214
  MIDSCENE_PLANNING_MODEL_API_KEY,
227
215
  MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
228
216
  MIDSCENE_PLANNING_LOCATOR_MODE,
229
- // GROUNDING
230
- MIDSCENE_GROUNDING_MODEL_NAME,
231
- MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY,
232
- MIDSCENE_GROUNDING_MODEL_HTTP_PROXY,
233
- MIDSCENE_GROUNDING_MODEL_BASE_URL,
234
- MIDSCENE_GROUNDING_MODEL_API_KEY,
235
- MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON,
236
- MIDSCENE_GROUNDING_LOCATOR_MODE,
237
217
  ] as const;
238
218
 
239
219
  export const ALL_ENV_KEYS = [
@@ -262,18 +242,18 @@ export type TVlModeTypes =
262
242
  | 'gemini'
263
243
  | 'vlm-ui-tars';
264
244
 
265
- export interface IModelConfigForVQA {
245
+ export interface IModelConfigForInsight {
266
246
  // model name
267
- [MIDSCENE_VQA_MODEL_NAME]: string;
247
+ [MIDSCENE_INSIGHT_MODEL_NAME]: string;
268
248
  // proxy
269
- [MIDSCENE_VQA_MODEL_SOCKS_PROXY]?: string;
270
- [MIDSCENE_VQA_MODEL_HTTP_PROXY]?: string;
249
+ [MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY]?: string;
250
+ [MIDSCENE_INSIGHT_MODEL_HTTP_PROXY]?: string;
271
251
  // OpenAI
272
- [MIDSCENE_VQA_MODEL_BASE_URL]?: string;
273
- [MIDSCENE_VQA_MODEL_API_KEY]?: string;
274
- [MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON]?: string;
252
+ [MIDSCENE_INSIGHT_MODEL_BASE_URL]?: string;
253
+ [MIDSCENE_INSIGHT_MODEL_API_KEY]?: string;
254
+ [MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON]?: string;
275
255
  // extra
276
- [MIDSCENE_VQA_LOCATOR_MODE]?: TVlModeValues;
256
+ [MIDSCENE_INSIGHT_LOCATOR_MODE]?: TVlModeValues;
277
257
  }
278
258
 
279
259
  /**
@@ -305,20 +285,6 @@ export interface IModelConfigForPlanning {
305
285
  [MIDSCENE_PLANNING_LOCATOR_MODE]?: TVlModeValues;
306
286
  }
307
287
 
308
- export interface IModeConfigForGrounding {
309
- // model name
310
- [MIDSCENE_GROUNDING_MODEL_NAME]: string;
311
- // proxy
312
- [MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY]?: string;
313
- [MIDSCENE_GROUNDING_MODEL_HTTP_PROXY]?: string;
314
- // OpenAI
315
- [MIDSCENE_GROUNDING_MODEL_BASE_URL]?: string;
316
- [MIDSCENE_GROUNDING_MODEL_API_KEY]?: string;
317
- [MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON]?: string;
318
- // extra
319
- [MIDSCENE_GROUNDING_LOCATOR_MODE]?: TVlModeValues;
320
- }
321
-
322
288
  export interface IModelConfigForDefault {
323
289
  // model name
324
290
  [MIDSCENE_MODEL_NAME]: string;
@@ -348,12 +314,11 @@ export interface IModelConfigForDefaultLegacy {
348
314
  }
349
315
 
350
316
  /**
351
- * - VQA: Visual Question Answering
352
- * - grounding:short for Visual Grounding
317
+ * - insight: Visual Question Answering and Visual Grounding (unified)
353
318
  * - planning: planning
354
- * - default: all except VQAgrounding、planning
319
+ * - default: all except insight、planning
355
320
  */
356
- export type TIntent = 'VQA' | 'planning' | 'grounding' | 'default';
321
+ export type TIntent = 'insight' | 'planning' | 'default';
357
322
 
358
323
  /**
359
324
  * Internal type with intent parameter for ModelConfigManager
@@ -361,20 +326,15 @@ export type TIntent = 'VQA' | 'planning' | 'grounding' | 'default';
361
326
  */
362
327
  export type TModelConfigFnInternal = (options: {
363
328
  intent: TIntent;
364
- }) =>
365
- | IModelConfigForVQA
366
- | IModelConfigForPlanning
367
- | IModeConfigForGrounding
368
- | IModelConfigForDefault;
329
+ }) => IModelConfigForInsight | IModelConfigForPlanning | IModelConfigForDefault;
369
330
 
370
331
  /**
371
332
  * User-facing model config function type
372
333
  * Users return config objects without needing to know about intent parameter
373
334
  */
374
335
  export type TModelConfigFn = () =>
375
- | IModelConfigForVQA
336
+ | IModelConfigForInsight
376
337
  | IModelConfigForPlanning
377
- | IModeConfigForGrounding
378
338
  | IModelConfigForDefault;
379
339
 
380
340
  export enum UITarsModelVersion {