@midscene/shared 1.0.1-beta-20251118021215.0 → 1.0.1-beta-20251118060105.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ import { getDebug } from "../logger.mjs";
4
4
  import { assert } from "../utils.mjs";
5
5
  import { createAssert, maskConfig, parseJson } from "./helper.mjs";
6
6
  import { initDebugConfig } from "./init-debug.mjs";
7
- import { parseVlModeAndUiTarsFromGlobalConfig, parseVlModeAndUiTarsModelVersionFromRawValue } from "./parse.mjs";
7
+ import { parseModelFamilyFromEnv, parseVlModeAndUiTarsFromGlobalConfig, parseVlModeAndUiTarsModelVersionFromRawValue } from "./parse.mjs";
8
8
  const KEYS_MAP = {
9
9
  insight: INSIGHT_MODEL_CONFIG_KEYS,
10
10
  planning: PLANNING_MODEL_CONFIG_KEYS,
@@ -41,6 +41,33 @@ const getModelDescription = (vlMode, uiTarsVersion)=>{
41
41
  else return `${vlMode} mode`;
42
42
  return '';
43
43
  };
44
+ const parseVlModeForIntent = (intent, allEnvConfig, result, debugLog)=>{
45
+ if ('planning' === intent) {
46
+ const parseResult = parseModelFamilyFromEnv(allEnvConfig);
47
+ parseResult.warnings.forEach((warning)=>{
48
+ console.warn(`[Midscene] ${warning}`);
49
+ });
50
+ if (parseResult.modelFamily) debugLog(`Using model family: ${parseResult.modelFamily}`);
51
+ return {
52
+ vlMode: parseResult.vlMode,
53
+ uiTarsVersion: parseResult.uiTarsVersion
54
+ };
55
+ }
56
+ if (void 0 !== result.vlModeRaw) {
57
+ const parsed = parseVlModeAndUiTarsModelVersionFromRawValue(result.vlModeRaw);
58
+ return {
59
+ vlMode: parsed.vlMode,
60
+ uiTarsVersion: parsed.uiTarsVersion
61
+ };
62
+ }
63
+ {
64
+ const parsed = parseVlModeAndUiTarsFromGlobalConfig(allEnvConfig);
65
+ return {
66
+ vlMode: parsed.vlMode,
67
+ uiTarsVersion: parsed.uiTarsVersion
68
+ };
69
+ }
70
+ };
44
71
  const decideModelConfigFromIntentConfig = (intent, intentConfig)=>{
45
72
  const debugLog = getDebug('ai:config');
46
73
  debugLog('decideModelConfig base on agent.modelConfig()');
@@ -87,7 +114,7 @@ const decideModelConfigFromEnv = (intent, allEnvConfig)=>{
87
114
  provider: allEnvConfig,
88
115
  valueAssert: createAssert(keysForEnv.modelName, 'process.env', modelName)
89
116
  });
90
- const { vlMode, uiTarsVersion } = parseVlModeAndUiTarsModelVersionFromRawValue(result.vlModeRaw);
117
+ const { vlMode, uiTarsVersion } = parseVlModeForIntent(intent, allEnvConfig, result, debugLog);
91
118
  const modelDescription = getModelDescription(vlMode, uiTarsVersion);
92
119
  const finalResult = {
93
120
  ...result,
@@ -107,7 +134,7 @@ const decideModelConfigFromEnv = (intent, allEnvConfig)=>{
107
134
  provider: allEnvConfig,
108
135
  valueAssert: createAssert(DEFAULT_MODEL_CONFIG_KEYS_LEGACY.modelName, 'process.env')
109
136
  });
110
- const { vlMode, uiTarsVersion } = parseVlModeAndUiTarsFromGlobalConfig(allEnvConfig);
137
+ const { vlMode, uiTarsVersion } = parseVlModeForIntent(intent, allEnvConfig, result, debugLog);
111
138
  const modelDescription = getModelDescription(vlMode, uiTarsVersion);
112
139
  const finalResult = {
113
140
  ...result,
@@ -1,4 +1,4 @@
1
- import { MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, UITarsModelVersion, VL_MODE_RAW_VALID_VALUES } from "./types.mjs";
1
+ import { MIDSCENE_MODEL_FAMILY, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MODEL_FAMILY_VALUES, UITarsModelVersion, VL_MODE_RAW_VALID_VALUES } from "./types.mjs";
2
2
  const parseVlModeAndUiTarsModelVersionFromRawValue = (vlModeRaw)=>{
3
3
  if (!vlModeRaw) return {
4
4
  vlMode: void 0,
@@ -38,7 +38,7 @@ const parseVlModeAndUiTarsFromGlobalConfig = (provider)=>{
38
38
  uiTarsVersion: void 0
39
39
  };
40
40
  if (isQwen) return {
41
- vlMode: 'qwen-vl',
41
+ vlMode: 'qwen2.5-vl',
42
42
  uiTarsVersion: void 0
43
43
  };
44
44
  if (isDoubao) return {
@@ -66,4 +66,53 @@ const parseVlModeAndUiTarsFromGlobalConfig = (provider)=>{
66
66
  uiTarsVersion: void 0
67
67
  };
68
68
  };
69
- export { parseVlModeAndUiTarsFromGlobalConfig, parseVlModeAndUiTarsModelVersionFromRawValue };
69
+ const detectLegacyVlModeEnvVars = (provider)=>{
70
+ const legacyVars = [
71
+ MIDSCENE_USE_DOUBAO_VISION,
72
+ MIDSCENE_USE_QWEN_VL,
73
+ MIDSCENE_USE_QWEN3_VL,
74
+ MIDSCENE_USE_VLM_UI_TARS,
75
+ MIDSCENE_USE_GEMINI
76
+ ];
77
+ return legacyVars.filter((varName)=>provider[varName]);
78
+ };
79
+ function isValidModelFamily(value) {
80
+ return MODEL_FAMILY_VALUES.includes(value);
81
+ }
82
+ function mapLegacyToModelFamily(vlMode, uiTarsVersion) {
83
+ if (!vlMode) return;
84
+ if ('vlm-ui-tars' === vlMode) if (uiTarsVersion === UITarsModelVersion.V1_0) return 'vlm-ui-tars';
85
+ else if (uiTarsVersion === UITarsModelVersion.DOUBAO_1_5_20B) return 'vlm-ui-tars-doubao-1.5';
86
+ else return 'vlm-ui-tars-doubao';
87
+ return vlMode;
88
+ }
89
+ const parseModelFamilyFromEnv = (provider)=>{
90
+ const warnings = [];
91
+ const modelFamilyRaw = provider[MIDSCENE_MODEL_FAMILY];
92
+ const legacyVars = detectLegacyVlModeEnvVars(provider);
93
+ if (modelFamilyRaw && legacyVars.length > 0) throw new Error(`Conflicting configuration detected: Both MIDSCENE_MODEL_FAMILY and legacy environment variables (${legacyVars.join(', ')}) are set. Please use only MIDSCENE_MODEL_FAMILY.`);
94
+ if (modelFamilyRaw) {
95
+ if (!isValidModelFamily(modelFamilyRaw)) throw new Error(`Invalid MIDSCENE_MODEL_FAMILY value: "${modelFamilyRaw}". Must be one of: ${MODEL_FAMILY_VALUES.join(', ')}. See documentation: https://midscenejs.com/model-provider.html`);
96
+ const modelFamily = modelFamilyRaw;
97
+ const parsed = parseVlModeAndUiTarsModelVersionFromRawValue(modelFamily);
98
+ return {
99
+ vlMode: parsed.vlMode,
100
+ uiTarsVersion: parsed.uiTarsVersion,
101
+ modelFamily,
102
+ warnings
103
+ };
104
+ }
105
+ if (legacyVars.length > 0) {
106
+ const legacyResult = parseVlModeAndUiTarsFromGlobalConfig(provider);
107
+ warnings.push(`DEPRECATED: Environment ${legacyVars.length > 1 ? 'variables' : 'variable'} ${legacyVars.join(', ')} ${legacyVars.length > 1 ? 'are' : 'is'} deprecated. Please use MIDSCENE_MODEL_FAMILY instead. See migration guide for details.`);
108
+ const modelFamily = mapLegacyToModelFamily(legacyResult.vlMode, legacyResult.uiTarsVersion);
109
+ return {
110
+ vlMode: legacyResult.vlMode,
111
+ uiTarsVersion: legacyResult.uiTarsVersion,
112
+ modelFamily,
113
+ warnings
114
+ };
115
+ }
116
+ throw new Error(`MIDSCENE_MODEL_FAMILY is required for planning tasks. Please set it to one of: ${MODEL_FAMILY_VALUES.join(', ')}. See documentation: https://midscenejs.com/model-provider.html`);
117
+ };
118
+ export { detectLegacyVlModeEnvVars, parseModelFamilyFromEnv, parseVlModeAndUiTarsFromGlobalConfig, parseVlModeAndUiTarsModelVersionFromRawValue };
@@ -56,6 +56,7 @@ const MIDSCENE_PLANNING_MODEL_BASE_URL = 'MIDSCENE_PLANNING_MODEL_BASE_URL';
56
56
  const MIDSCENE_PLANNING_MODEL_API_KEY = 'MIDSCENE_PLANNING_MODEL_API_KEY';
57
57
  const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON';
58
58
  const MIDSCENE_PLANNING_LOCATOR_MODE = 'MIDSCENE_PLANNING_LOCATOR_MODE';
59
+ const MIDSCENE_MODEL_FAMILY = 'MIDSCENE_MODEL_FAMILY';
59
60
  const UNUSED_ENV_KEYS = [
60
61
  MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG
61
62
  ];
@@ -129,7 +130,8 @@ const MODEL_ENV_KEYS = [
129
130
  MIDSCENE_PLANNING_MODEL_BASE_URL,
130
131
  MIDSCENE_PLANNING_MODEL_API_KEY,
131
132
  MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
132
- MIDSCENE_PLANNING_LOCATOR_MODE
133
+ MIDSCENE_PLANNING_LOCATOR_MODE,
134
+ MIDSCENE_MODEL_FAMILY
133
135
  ];
134
136
  const ALL_ENV_KEYS = [
135
137
  ...UNUSED_ENV_KEYS,
@@ -137,20 +139,23 @@ const ALL_ENV_KEYS = [
137
139
  ...GLOBAL_ENV_KEYS,
138
140
  ...MODEL_ENV_KEYS
139
141
  ];
140
- var types_UITarsModelVersion = /*#__PURE__*/ function(UITarsModelVersion) {
141
- UITarsModelVersion["V1_0"] = "1.0";
142
- UITarsModelVersion["V1_5"] = "1.5";
143
- UITarsModelVersion["DOUBAO_1_5_15B"] = "doubao-1.5-15B";
144
- UITarsModelVersion["DOUBAO_1_5_20B"] = "doubao-1.5-20B";
145
- return UITarsModelVersion;
146
- }({});
147
142
  const VL_MODE_RAW_VALID_VALUES = [
148
143
  'doubao-vision',
149
144
  'gemini',
150
- 'qwen-vl',
145
+ 'qwen2.5-vl',
151
146
  'qwen3-vl',
152
147
  'vlm-ui-tars',
153
148
  'vlm-ui-tars-doubao',
154
149
  'vlm-ui-tars-doubao-1.5'
155
150
  ];
156
- export { ALL_ENV_KEYS, BASIC_ENV_KEYS, BOOLEAN_ENV_KEYS, DOCKER_CONTAINER, GLOBAL_ENV_KEYS, MATCH_BY_POSITION, MIDSCENE_ADB_PATH, MIDSCENE_ADB_REMOTE_HOST, MIDSCENE_ADB_REMOTE_PORT, MIDSCENE_ANDROID_IME_STRATEGY, MIDSCENE_CACHE, MIDSCENE_CACHE_MAX_FILENAME_LENGTH, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_MODE, MIDSCENE_DEBUG_MODEL_PROFILE, MIDSCENE_DEBUG_MODEL_RESPONSE, MIDSCENE_FORCE_DEEP_THINK, MIDSCENE_INSIGHT_LOCATOR_MODE, MIDSCENE_INSIGHT_MODEL_API_KEY, MIDSCENE_INSIGHT_MODEL_BASE_URL, MIDSCENE_INSIGHT_MODEL_HTTP_PROXY, MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON, MIDSCENE_INSIGHT_MODEL_NAME, MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY, MIDSCENE_IOS_DEVICE_UDID, MIDSCENE_IOS_SIMULATOR_UDID, MIDSCENE_LOCATOR_MODE, MIDSCENE_MCP_ANDROID_MODE, MIDSCENE_MCP_CHROME_PATH, MIDSCENE_MCP_USE_PUPPETEER_MODE, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_MAX_TOKENS, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_LOCATOR_MODE, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, MIDSCENE_PREFERRED_LANGUAGE, MIDSCENE_REPLANNING_CYCLE_LIMIT, MIDSCENE_REPORT_TAG_NAME, MIDSCENE_RUN_DIR, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MIDSCENE_USE_VL_MODEL, MODEL_API_KEY, MODEL_BASE_URL, MODEL_ENV_KEYS, NUMBER_ENV_KEYS, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MAX_TOKENS, STRING_ENV_KEYS, types_UITarsModelVersion as UITarsModelVersion, UNUSED_ENV_KEYS, VL_MODE_RAW_VALID_VALUES };
151
+ const MODEL_FAMILY_VALUES = [
152
+ ...VL_MODE_RAW_VALID_VALUES
153
+ ];
154
+ var types_UITarsModelVersion = /*#__PURE__*/ function(UITarsModelVersion) {
155
+ UITarsModelVersion["V1_0"] = "1.0";
156
+ UITarsModelVersion["V1_5"] = "1.5";
157
+ UITarsModelVersion["DOUBAO_1_5_15B"] = "doubao-1.5-15B";
158
+ UITarsModelVersion["DOUBAO_1_5_20B"] = "doubao-1.5-20B";
159
+ return UITarsModelVersion;
160
+ }({});
161
+ export { ALL_ENV_KEYS, BASIC_ENV_KEYS, BOOLEAN_ENV_KEYS, DOCKER_CONTAINER, GLOBAL_ENV_KEYS, MATCH_BY_POSITION, MIDSCENE_ADB_PATH, MIDSCENE_ADB_REMOTE_HOST, MIDSCENE_ADB_REMOTE_PORT, MIDSCENE_ANDROID_IME_STRATEGY, MIDSCENE_CACHE, MIDSCENE_CACHE_MAX_FILENAME_LENGTH, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_MODE, MIDSCENE_DEBUG_MODEL_PROFILE, MIDSCENE_DEBUG_MODEL_RESPONSE, MIDSCENE_FORCE_DEEP_THINK, MIDSCENE_INSIGHT_LOCATOR_MODE, MIDSCENE_INSIGHT_MODEL_API_KEY, MIDSCENE_INSIGHT_MODEL_BASE_URL, MIDSCENE_INSIGHT_MODEL_HTTP_PROXY, MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON, MIDSCENE_INSIGHT_MODEL_NAME, MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY, MIDSCENE_IOS_DEVICE_UDID, MIDSCENE_IOS_SIMULATOR_UDID, MIDSCENE_LOCATOR_MODE, MIDSCENE_MCP_ANDROID_MODE, MIDSCENE_MCP_CHROME_PATH, MIDSCENE_MCP_USE_PUPPETEER_MODE, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_FAMILY, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_MAX_TOKENS, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_LOCATOR_MODE, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, MIDSCENE_PREFERRED_LANGUAGE, MIDSCENE_REPLANNING_CYCLE_LIMIT, MIDSCENE_REPORT_TAG_NAME, MIDSCENE_RUN_DIR, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MIDSCENE_USE_VL_MODEL, MODEL_API_KEY, MODEL_BASE_URL, MODEL_ENV_KEYS, MODEL_FAMILY_VALUES, NUMBER_ENV_KEYS, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MAX_TOKENS, STRING_ENV_KEYS, types_UITarsModelVersion as UITarsModelVersion, UNUSED_ENV_KEYS, VL_MODE_RAW_VALID_VALUES };
@@ -71,6 +71,33 @@ const getModelDescription = (vlMode, uiTarsVersion)=>{
71
71
  else return `${vlMode} mode`;
72
72
  return '';
73
73
  };
74
+ const parseVlModeForIntent = (intent, allEnvConfig, result, debugLog)=>{
75
+ if ('planning' === intent) {
76
+ const parseResult = (0, external_parse_js_namespaceObject.parseModelFamilyFromEnv)(allEnvConfig);
77
+ parseResult.warnings.forEach((warning)=>{
78
+ console.warn(`[Midscene] ${warning}`);
79
+ });
80
+ if (parseResult.modelFamily) debugLog(`Using model family: ${parseResult.modelFamily}`);
81
+ return {
82
+ vlMode: parseResult.vlMode,
83
+ uiTarsVersion: parseResult.uiTarsVersion
84
+ };
85
+ }
86
+ if (void 0 !== result.vlModeRaw) {
87
+ const parsed = (0, external_parse_js_namespaceObject.parseVlModeAndUiTarsModelVersionFromRawValue)(result.vlModeRaw);
88
+ return {
89
+ vlMode: parsed.vlMode,
90
+ uiTarsVersion: parsed.uiTarsVersion
91
+ };
92
+ }
93
+ {
94
+ const parsed = (0, external_parse_js_namespaceObject.parseVlModeAndUiTarsFromGlobalConfig)(allEnvConfig);
95
+ return {
96
+ vlMode: parsed.vlMode,
97
+ uiTarsVersion: parsed.uiTarsVersion
98
+ };
99
+ }
100
+ };
74
101
  const decideModelConfigFromIntentConfig = (intent, intentConfig)=>{
75
102
  const debugLog = (0, external_logger_js_namespaceObject.getDebug)('ai:config');
76
103
  debugLog('decideModelConfig base on agent.modelConfig()');
@@ -117,7 +144,7 @@ const decideModelConfigFromEnv = (intent, allEnvConfig)=>{
117
144
  provider: allEnvConfig,
118
145
  valueAssert: (0, external_helper_js_namespaceObject.createAssert)(keysForEnv.modelName, 'process.env', modelName)
119
146
  });
120
- const { vlMode, uiTarsVersion } = (0, external_parse_js_namespaceObject.parseVlModeAndUiTarsModelVersionFromRawValue)(result.vlModeRaw);
147
+ const { vlMode, uiTarsVersion } = parseVlModeForIntent(intent, allEnvConfig, result, debugLog);
121
148
  const modelDescription = getModelDescription(vlMode, uiTarsVersion);
122
149
  const finalResult = {
123
150
  ...result,
@@ -137,7 +164,7 @@ const decideModelConfigFromEnv = (intent, allEnvConfig)=>{
137
164
  provider: allEnvConfig,
138
165
  valueAssert: (0, external_helper_js_namespaceObject.createAssert)(external_constants_js_namespaceObject.DEFAULT_MODEL_CONFIG_KEYS_LEGACY.modelName, 'process.env')
139
166
  });
140
- const { vlMode, uiTarsVersion } = (0, external_parse_js_namespaceObject.parseVlModeAndUiTarsFromGlobalConfig)(allEnvConfig);
167
+ const { vlMode, uiTarsVersion } = parseVlModeForIntent(intent, allEnvConfig, result, debugLog);
141
168
  const modelDescription = getModelDescription(vlMode, uiTarsVersion);
142
169
  const finalResult = {
143
170
  ...result,
@@ -25,6 +25,8 @@ var __webpack_exports__ = {};
25
25
  __webpack_require__.r(__webpack_exports__);
26
26
  __webpack_require__.d(__webpack_exports__, {
27
27
  parseVlModeAndUiTarsFromGlobalConfig: ()=>parseVlModeAndUiTarsFromGlobalConfig,
28
+ detectLegacyVlModeEnvVars: ()=>detectLegacyVlModeEnvVars,
29
+ parseModelFamilyFromEnv: ()=>parseModelFamilyFromEnv,
28
30
  parseVlModeAndUiTarsModelVersionFromRawValue: ()=>parseVlModeAndUiTarsModelVersionFromRawValue
29
31
  });
30
32
  const external_types_js_namespaceObject = require("./types.js");
@@ -67,7 +69,7 @@ const parseVlModeAndUiTarsFromGlobalConfig = (provider)=>{
67
69
  uiTarsVersion: void 0
68
70
  };
69
71
  if (isQwen) return {
70
- vlMode: 'qwen-vl',
72
+ vlMode: 'qwen2.5-vl',
71
73
  uiTarsVersion: void 0
72
74
  };
73
75
  if (isDoubao) return {
@@ -95,9 +97,62 @@ const parseVlModeAndUiTarsFromGlobalConfig = (provider)=>{
95
97
  uiTarsVersion: void 0
96
98
  };
97
99
  };
100
+ const detectLegacyVlModeEnvVars = (provider)=>{
101
+ const legacyVars = [
102
+ external_types_js_namespaceObject.MIDSCENE_USE_DOUBAO_VISION,
103
+ external_types_js_namespaceObject.MIDSCENE_USE_QWEN_VL,
104
+ external_types_js_namespaceObject.MIDSCENE_USE_QWEN3_VL,
105
+ external_types_js_namespaceObject.MIDSCENE_USE_VLM_UI_TARS,
106
+ external_types_js_namespaceObject.MIDSCENE_USE_GEMINI
107
+ ];
108
+ return legacyVars.filter((varName)=>provider[varName]);
109
+ };
110
+ function isValidModelFamily(value) {
111
+ return external_types_js_namespaceObject.MODEL_FAMILY_VALUES.includes(value);
112
+ }
113
+ function mapLegacyToModelFamily(vlMode, uiTarsVersion) {
114
+ if (!vlMode) return;
115
+ if ('vlm-ui-tars' === vlMode) if (uiTarsVersion === external_types_js_namespaceObject.UITarsModelVersion.V1_0) return 'vlm-ui-tars';
116
+ else if (uiTarsVersion === external_types_js_namespaceObject.UITarsModelVersion.DOUBAO_1_5_20B) return 'vlm-ui-tars-doubao-1.5';
117
+ else return 'vlm-ui-tars-doubao';
118
+ return vlMode;
119
+ }
120
+ const parseModelFamilyFromEnv = (provider)=>{
121
+ const warnings = [];
122
+ const modelFamilyRaw = provider[external_types_js_namespaceObject.MIDSCENE_MODEL_FAMILY];
123
+ const legacyVars = detectLegacyVlModeEnvVars(provider);
124
+ if (modelFamilyRaw && legacyVars.length > 0) throw new Error(`Conflicting configuration detected: Both MIDSCENE_MODEL_FAMILY and legacy environment variables (${legacyVars.join(', ')}) are set. Please use only MIDSCENE_MODEL_FAMILY.`);
125
+ if (modelFamilyRaw) {
126
+ if (!isValidModelFamily(modelFamilyRaw)) throw new Error(`Invalid MIDSCENE_MODEL_FAMILY value: "${modelFamilyRaw}". Must be one of: ${external_types_js_namespaceObject.MODEL_FAMILY_VALUES.join(', ')}. See documentation: https://midscenejs.com/model-provider.html`);
127
+ const modelFamily = modelFamilyRaw;
128
+ const parsed = parseVlModeAndUiTarsModelVersionFromRawValue(modelFamily);
129
+ return {
130
+ vlMode: parsed.vlMode,
131
+ uiTarsVersion: parsed.uiTarsVersion,
132
+ modelFamily,
133
+ warnings
134
+ };
135
+ }
136
+ if (legacyVars.length > 0) {
137
+ const legacyResult = parseVlModeAndUiTarsFromGlobalConfig(provider);
138
+ warnings.push(`DEPRECATED: Environment ${legacyVars.length > 1 ? 'variables' : 'variable'} ${legacyVars.join(', ')} ${legacyVars.length > 1 ? 'are' : 'is'} deprecated. Please use MIDSCENE_MODEL_FAMILY instead. See migration guide for details.`);
139
+ const modelFamily = mapLegacyToModelFamily(legacyResult.vlMode, legacyResult.uiTarsVersion);
140
+ return {
141
+ vlMode: legacyResult.vlMode,
142
+ uiTarsVersion: legacyResult.uiTarsVersion,
143
+ modelFamily,
144
+ warnings
145
+ };
146
+ }
147
+ throw new Error(`MIDSCENE_MODEL_FAMILY is required for planning tasks. Please set it to one of: ${external_types_js_namespaceObject.MODEL_FAMILY_VALUES.join(', ')}. See documentation: https://midscenejs.com/model-provider.html`);
148
+ };
149
+ exports.detectLegacyVlModeEnvVars = __webpack_exports__.detectLegacyVlModeEnvVars;
150
+ exports.parseModelFamilyFromEnv = __webpack_exports__.parseModelFamilyFromEnv;
98
151
  exports.parseVlModeAndUiTarsFromGlobalConfig = __webpack_exports__.parseVlModeAndUiTarsFromGlobalConfig;
99
152
  exports.parseVlModeAndUiTarsModelVersionFromRawValue = __webpack_exports__.parseVlModeAndUiTarsModelVersionFromRawValue;
100
153
  for(var __webpack_i__ in __webpack_exports__)if (-1 === [
154
+ "detectLegacyVlModeEnvVars",
155
+ "parseModelFamilyFromEnv",
101
156
  "parseVlModeAndUiTarsFromGlobalConfig",
102
157
  "parseVlModeAndUiTarsModelVersionFromRawValue"
103
158
  ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
@@ -56,6 +56,7 @@ __webpack_require__.d(__webpack_exports__, {
56
56
  MIDSCENE_MCP_USE_PUPPETEER_MODE: ()=>MIDSCENE_MCP_USE_PUPPETEER_MODE,
57
57
  MIDSCENE_MODEL_API_KEY: ()=>MIDSCENE_MODEL_API_KEY,
58
58
  MIDSCENE_MODEL_BASE_URL: ()=>MIDSCENE_MODEL_BASE_URL,
59
+ MIDSCENE_MODEL_FAMILY: ()=>MIDSCENE_MODEL_FAMILY,
59
60
  MIDSCENE_MODEL_HTTP_PROXY: ()=>MIDSCENE_MODEL_HTTP_PROXY,
60
61
  MIDSCENE_MODEL_INIT_CONFIG_JSON: ()=>MIDSCENE_MODEL_INIT_CONFIG_JSON,
61
62
  MIDSCENE_MODEL_MAX_TOKENS: ()=>MIDSCENE_MODEL_MAX_TOKENS,
@@ -84,6 +85,7 @@ __webpack_require__.d(__webpack_exports__, {
84
85
  MODEL_API_KEY: ()=>MODEL_API_KEY,
85
86
  MODEL_BASE_URL: ()=>MODEL_BASE_URL,
86
87
  MODEL_ENV_KEYS: ()=>MODEL_ENV_KEYS,
88
+ MODEL_FAMILY_VALUES: ()=>MODEL_FAMILY_VALUES,
87
89
  NUMBER_ENV_KEYS: ()=>NUMBER_ENV_KEYS,
88
90
  OPENAI_API_KEY: ()=>OPENAI_API_KEY,
89
91
  OPENAI_BASE_URL: ()=>OPENAI_BASE_URL,
@@ -151,6 +153,7 @@ const MIDSCENE_PLANNING_MODEL_BASE_URL = 'MIDSCENE_PLANNING_MODEL_BASE_URL';
151
153
  const MIDSCENE_PLANNING_MODEL_API_KEY = 'MIDSCENE_PLANNING_MODEL_API_KEY';
152
154
  const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON';
153
155
  const MIDSCENE_PLANNING_LOCATOR_MODE = 'MIDSCENE_PLANNING_LOCATOR_MODE';
156
+ const MIDSCENE_MODEL_FAMILY = 'MIDSCENE_MODEL_FAMILY';
154
157
  const UNUSED_ENV_KEYS = [
155
158
  MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG
156
159
  ];
@@ -224,7 +227,8 @@ const MODEL_ENV_KEYS = [
224
227
  MIDSCENE_PLANNING_MODEL_BASE_URL,
225
228
  MIDSCENE_PLANNING_MODEL_API_KEY,
226
229
  MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
227
- MIDSCENE_PLANNING_LOCATOR_MODE
230
+ MIDSCENE_PLANNING_LOCATOR_MODE,
231
+ MIDSCENE_MODEL_FAMILY
228
232
  ];
229
233
  const ALL_ENV_KEYS = [
230
234
  ...UNUSED_ENV_KEYS,
@@ -232,22 +236,25 @@ const ALL_ENV_KEYS = [
232
236
  ...GLOBAL_ENV_KEYS,
233
237
  ...MODEL_ENV_KEYS
234
238
  ];
235
- var UITarsModelVersion = /*#__PURE__*/ function(UITarsModelVersion) {
236
- UITarsModelVersion["V1_0"] = "1.0";
237
- UITarsModelVersion["V1_5"] = "1.5";
238
- UITarsModelVersion["DOUBAO_1_5_15B"] = "doubao-1.5-15B";
239
- UITarsModelVersion["DOUBAO_1_5_20B"] = "doubao-1.5-20B";
240
- return UITarsModelVersion;
241
- }({});
242
239
  const VL_MODE_RAW_VALID_VALUES = [
243
240
  'doubao-vision',
244
241
  'gemini',
245
- 'qwen-vl',
242
+ 'qwen2.5-vl',
246
243
  'qwen3-vl',
247
244
  'vlm-ui-tars',
248
245
  'vlm-ui-tars-doubao',
249
246
  'vlm-ui-tars-doubao-1.5'
250
247
  ];
248
+ const MODEL_FAMILY_VALUES = [
249
+ ...VL_MODE_RAW_VALID_VALUES
250
+ ];
251
+ var UITarsModelVersion = /*#__PURE__*/ function(UITarsModelVersion) {
252
+ UITarsModelVersion["V1_0"] = "1.0";
253
+ UITarsModelVersion["V1_5"] = "1.5";
254
+ UITarsModelVersion["DOUBAO_1_5_15B"] = "doubao-1.5-15B";
255
+ UITarsModelVersion["DOUBAO_1_5_20B"] = "doubao-1.5-20B";
256
+ return UITarsModelVersion;
257
+ }({});
251
258
  exports.ALL_ENV_KEYS = __webpack_exports__.ALL_ENV_KEYS;
252
259
  exports.BASIC_ENV_KEYS = __webpack_exports__.BASIC_ENV_KEYS;
253
260
  exports.BOOLEAN_ENV_KEYS = __webpack_exports__.BOOLEAN_ENV_KEYS;
@@ -280,6 +287,7 @@ exports.MIDSCENE_MCP_CHROME_PATH = __webpack_exports__.MIDSCENE_MCP_CHROME_PATH;
280
287
  exports.MIDSCENE_MCP_USE_PUPPETEER_MODE = __webpack_exports__.MIDSCENE_MCP_USE_PUPPETEER_MODE;
281
288
  exports.MIDSCENE_MODEL_API_KEY = __webpack_exports__.MIDSCENE_MODEL_API_KEY;
282
289
  exports.MIDSCENE_MODEL_BASE_URL = __webpack_exports__.MIDSCENE_MODEL_BASE_URL;
290
+ exports.MIDSCENE_MODEL_FAMILY = __webpack_exports__.MIDSCENE_MODEL_FAMILY;
283
291
  exports.MIDSCENE_MODEL_HTTP_PROXY = __webpack_exports__.MIDSCENE_MODEL_HTTP_PROXY;
284
292
  exports.MIDSCENE_MODEL_INIT_CONFIG_JSON = __webpack_exports__.MIDSCENE_MODEL_INIT_CONFIG_JSON;
285
293
  exports.MIDSCENE_MODEL_MAX_TOKENS = __webpack_exports__.MIDSCENE_MODEL_MAX_TOKENS;
@@ -308,6 +316,7 @@ exports.MIDSCENE_USE_VL_MODEL = __webpack_exports__.MIDSCENE_USE_VL_MODEL;
308
316
  exports.MODEL_API_KEY = __webpack_exports__.MODEL_API_KEY;
309
317
  exports.MODEL_BASE_URL = __webpack_exports__.MODEL_BASE_URL;
310
318
  exports.MODEL_ENV_KEYS = __webpack_exports__.MODEL_ENV_KEYS;
319
+ exports.MODEL_FAMILY_VALUES = __webpack_exports__.MODEL_FAMILY_VALUES;
311
320
  exports.NUMBER_ENV_KEYS = __webpack_exports__.NUMBER_ENV_KEYS;
312
321
  exports.OPENAI_API_KEY = __webpack_exports__.OPENAI_API_KEY;
313
322
  exports.OPENAI_BASE_URL = __webpack_exports__.OPENAI_BASE_URL;
@@ -349,6 +358,7 @@ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
349
358
  "MIDSCENE_MCP_USE_PUPPETEER_MODE",
350
359
  "MIDSCENE_MODEL_API_KEY",
351
360
  "MIDSCENE_MODEL_BASE_URL",
361
+ "MIDSCENE_MODEL_FAMILY",
352
362
  "MIDSCENE_MODEL_HTTP_PROXY",
353
363
  "MIDSCENE_MODEL_INIT_CONFIG_JSON",
354
364
  "MIDSCENE_MODEL_MAX_TOKENS",
@@ -377,6 +387,7 @@ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
377
387
  "MODEL_API_KEY",
378
388
  "MODEL_BASE_URL",
379
389
  "MODEL_ENV_KEYS",
390
+ "MODEL_FAMILY_VALUES",
380
391
  "NUMBER_ENV_KEYS",
381
392
  "OPENAI_API_KEY",
382
393
  "OPENAI_BASE_URL",
@@ -1,4 +1,4 @@
1
- import { type TVlModeTypes, UITarsModelVersion } from './types';
1
+ import { type TModelFamily, type TVlModeTypes, UITarsModelVersion } from './types';
2
2
  export declare const parseVlModeAndUiTarsModelVersionFromRawValue: (vlModeRaw?: string) => {
3
3
  vlMode?: TVlModeTypes;
4
4
  uiTarsVersion?: UITarsModelVersion;
@@ -10,3 +10,22 @@ export declare const parseVlModeAndUiTarsFromGlobalConfig: (provider: Record<str
10
10
  vlMode?: TVlModeTypes;
11
11
  uiTarsVersion?: UITarsModelVersion;
12
12
  };
13
+ /**
14
+ * Check if old MIDSCENE_USE_* environment variables are being used
15
+ * @param provider - Environment variable provider
16
+ * @returns Array of legacy environment variable names that are set
17
+ */
18
+ export declare const detectLegacyVlModeEnvVars: (provider: Record<string, string | undefined>) => string[];
19
+ /**
20
+ * Parse model family from environment variables with validation and warnings
21
+ * Supports both new MIDSCENE_MODEL_FAMILY and legacy MIDSCENE_USE_* variables
22
+ *
23
+ * @param provider - Environment variable provider
24
+ * @returns Object with vlMode, uiTarsVersion, and warnings
25
+ */
26
+ export declare const parseModelFamilyFromEnv: (provider: Record<string, string | undefined>) => {
27
+ vlMode?: TVlModeTypes;
28
+ uiTarsVersion?: UITarsModelVersion;
29
+ warnings: string[];
30
+ modelFamily?: TModelFamily;
31
+ };
@@ -80,6 +80,7 @@ export declare const MIDSCENE_PLANNING_MODEL_BASE_URL = "MIDSCENE_PLANNING_MODEL
80
80
  export declare const MIDSCENE_PLANNING_MODEL_API_KEY = "MIDSCENE_PLANNING_MODEL_API_KEY";
81
81
  export declare const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON = "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON";
82
82
  export declare const MIDSCENE_PLANNING_LOCATOR_MODE = "MIDSCENE_PLANNING_LOCATOR_MODE";
83
+ export declare const MIDSCENE_MODEL_FAMILY = "MIDSCENE_MODEL_FAMILY";
83
84
  /**
84
85
  * env keys declared but unused
85
86
  */
@@ -103,12 +104,23 @@ export declare const GLOBAL_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_FORC
103
104
  * Can be override by both agent.modelConfig and overrideAIConfig
104
105
  * Can only be access after agent.constructor
105
106
  */
106
- export declare const MODEL_ENV_KEYS: readonly ["MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "MIDSCENE_LOCATOR_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MODEL_API_KEY", "MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_LOCATOR_MODE", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_LOCATOR_MODE"];
107
- export declare const ALL_ENV_KEYS: readonly [...string[], "MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR", "MIDSCENE_CACHE", "MIDSCENE_FORCE_DEEP_THINK", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER", "MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "MIDSCENE_LOCATOR_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MODEL_API_KEY", "MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_LOCATOR_MODE", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_LOCATOR_MODE"];
107
+ export declare const MODEL_ENV_KEYS: readonly ["MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "MIDSCENE_LOCATOR_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MODEL_API_KEY", "MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_LOCATOR_MODE", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_LOCATOR_MODE", "MIDSCENE_MODEL_FAMILY"];
108
+ export declare const ALL_ENV_KEYS: readonly [...string[], "MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR", "MIDSCENE_CACHE", "MIDSCENE_FORCE_DEEP_THINK", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER", "MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "MIDSCENE_LOCATOR_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MODEL_API_KEY", "MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_LOCATOR_MODE", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_LOCATOR_MODE", "MIDSCENE_MODEL_FAMILY"];
108
109
  export type TEnvKeys = (typeof ALL_ENV_KEYS)[number];
109
110
  export type TGlobalConfig = Record<TEnvKeys, string | undefined>;
110
- export type TVlModeValues = 'qwen-vl' | 'qwen3-vl' | 'doubao-vision' | 'gemini' | 'vlm-ui-tars' | 'vlm-ui-tars-doubao' | 'vlm-ui-tars-doubao-1.5';
111
- export type TVlModeTypes = 'qwen-vl' | 'qwen3-vl' | 'doubao-vision' | 'gemini' | 'vlm-ui-tars';
111
+ export type TVlModeValues = 'qwen2.5-vl' | 'qwen3-vl' | 'doubao-vision' | 'gemini' | 'vlm-ui-tars' | 'vlm-ui-tars-doubao' | 'vlm-ui-tars-doubao-1.5';
112
+ export type TVlModeTypes = 'qwen2.5-vl' | 'qwen3-vl' | 'doubao-vision' | 'gemini' | 'vlm-ui-tars';
113
+ export declare const VL_MODE_RAW_VALID_VALUES: TVlModeValues[];
114
+ /**
115
+ * Model family values - unified model configuration approach
116
+ * Replaces the old MIDSCENE_USE_* environment variables
117
+ *
118
+ * Note: These values directly correspond to VL_MODE_RAW_VALID_VALUES
119
+ * - 'qwen2.5-vl' is Qwen 2.5
120
+ * - 'qwen3-vl' is Qwen 3
121
+ */
122
+ export type TModelFamily = TVlModeValues;
123
+ export declare const MODEL_FAMILY_VALUES: TVlModeValues[];
112
124
  export interface IModelConfigForInsight {
113
125
  [MIDSCENE_INSIGHT_MODEL_NAME]: string;
114
126
  [MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY]?: string;
@@ -125,7 +137,7 @@ export interface IModelConfigForInsight {
125
137
  * DOM-based planning is not supported.
126
138
  *
127
139
  * Required: MIDSCENE_PLANNING_LOCATOR_MODE must be set to one of:
128
- * - 'qwen-vl'
140
+ * - 'qwen2.5-vl'
129
141
  * - 'qwen3-vl'
130
142
  * - 'gemini'
131
143
  * - 'doubao-vision'
@@ -184,7 +196,6 @@ export declare enum UITarsModelVersion {
184
196
  DOUBAO_1_5_15B = "doubao-1.5-15B",
185
197
  DOUBAO_1_5_20B = "doubao-1.5-20B"
186
198
  }
187
- export declare const VL_MODE_RAW_VALID_VALUES: TVlModeValues[];
188
199
  /**
189
200
  * Callback to create custom OpenAI client instance
190
201
  * @param config - Resolved model configuration including apiKey, baseURL, modelName, intent, etc.
@@ -229,8 +240,8 @@ export interface IModelConfig {
229
240
  openaiApiKey?: string;
230
241
  openaiExtraConfig?: Record<string, unknown>;
231
242
  /**
232
- * - vlModeRaw: exists only in non-legacy logic. value can be 'doubao-vision', 'gemini', 'qwen-vl', 'vlm-ui-tars', 'vlm-ui-tars-doubao', 'vlm-ui-tars-doubao-1.5'
233
- * - vlMode: based on the results of the vlModoRaw classification,value can be 'doubao-vision', 'gemini', 'qwen-vl', 'vlm-ui-tars'
243
+ * - vlModeRaw: exists only in non-legacy logic. value can be 'doubao-vision', 'gemini', 'qwen2.5-vl', 'vlm-ui-tars', 'vlm-ui-tars-doubao', 'vlm-ui-tars-doubao-1.5'
244
+ * - vlMode: based on the results of the vlModoRaw classification,value can be 'doubao-vision', 'gemini', 'qwen2.5-vl', 'vlm-ui-tars'
234
245
  */
235
246
  vlModeRaw?: string;
236
247
  vlMode?: TVlModeTypes;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@midscene/shared",
3
- "version": "1.0.1-beta-20251118021215.0",
3
+ "version": "1.0.1-beta-20251118060105.0",
4
4
  "repository": "https://github.com/web-infra-dev/midscene",
5
5
  "homepage": "https://midscenejs.com/",
6
6
  "types": "./dist/types/index.d.ts",
@@ -31,6 +31,7 @@ import { assert } from '../utils';
31
31
  import { createAssert, maskConfig, parseJson } from './helper';
32
32
  import { initDebugConfig } from './init-debug';
33
33
  import {
34
+ parseModelFamilyFromEnv,
34
35
  parseVlModeAndUiTarsFromGlobalConfig,
35
36
  parseVlModeAndUiTarsModelVersionFromRawValue,
36
37
  } from './parse';
@@ -167,6 +168,56 @@ const getModelDescription = (
167
168
  return '';
168
169
  };
169
170
 
171
+ /**
172
+ * Parse vlMode and uiTarsVersion based on intent and config
173
+ * Consolidates the logic for handling planning intent vs other intents
174
+ */
175
+ const parseVlModeForIntent = (
176
+ intent: TIntent,
177
+ allEnvConfig: Record<string, string | undefined>,
178
+ result: { vlModeRaw?: string },
179
+ debugLog: (...args: any[]) => void,
180
+ ): {
181
+ vlMode?: TVlModeTypes;
182
+ uiTarsVersion?: UITarsModelVersion;
183
+ } => {
184
+ if (intent === 'planning') {
185
+ const parseResult = parseModelFamilyFromEnv(allEnvConfig);
186
+
187
+ // Output warnings to debug log
188
+ parseResult.warnings.forEach((warning) => {
189
+ console.warn(`[Midscene] ${warning}`);
190
+ });
191
+
192
+ if (parseResult.modelFamily) {
193
+ debugLog(`Using model family: ${parseResult.modelFamily}`);
194
+ }
195
+
196
+ return {
197
+ vlMode: parseResult.vlMode,
198
+ uiTarsVersion: parseResult.uiTarsVersion,
199
+ };
200
+ } else {
201
+ // For other intents, use parseVlModeAndUiTarsModelVersionFromRawValue if vlModeRaw is available
202
+ // Otherwise, use parseVlModeAndUiTarsFromGlobalConfig
203
+ if (result.vlModeRaw !== undefined) {
204
+ const parsed = parseVlModeAndUiTarsModelVersionFromRawValue(
205
+ result.vlModeRaw,
206
+ );
207
+ return {
208
+ vlMode: parsed.vlMode,
209
+ uiTarsVersion: parsed.uiTarsVersion,
210
+ };
211
+ } else {
212
+ const parsed = parseVlModeAndUiTarsFromGlobalConfig(allEnvConfig);
213
+ return {
214
+ vlMode: parsed.vlMode,
215
+ uiTarsVersion: parsed.uiTarsVersion,
216
+ };
217
+ }
218
+ }
219
+ };
220
+
170
221
  export const decideModelConfigFromIntentConfig = (
171
222
  intent: TIntent,
172
223
  intentConfig: Record<string, string | undefined>,
@@ -254,8 +305,13 @@ export const decideModelConfigFromEnv = (
254
305
  valueAssert: createAssert(keysForEnv.modelName, 'process.env', modelName),
255
306
  });
256
307
 
257
- const { vlMode, uiTarsVersion } =
258
- parseVlModeAndUiTarsModelVersionFromRawValue(result.vlModeRaw);
308
+ const { vlMode, uiTarsVersion } = parseVlModeForIntent(
309
+ intent,
310
+ allEnvConfig,
311
+ result,
312
+ debugLog,
313
+ );
314
+
259
315
  const modelDescription = getModelDescription(vlMode, uiTarsVersion);
260
316
 
261
317
  const finalResult: IModelConfig = {
@@ -287,8 +343,12 @@ export const decideModelConfigFromEnv = (
287
343
  ),
288
344
  });
289
345
 
290
- const { vlMode, uiTarsVersion } =
291
- parseVlModeAndUiTarsFromGlobalConfig(allEnvConfig);
346
+ const { vlMode, uiTarsVersion } = parseVlModeForIntent(
347
+ intent,
348
+ allEnvConfig,
349
+ result,
350
+ debugLog,
351
+ );
292
352
 
293
353
  const modelDescription = getModelDescription(vlMode, uiTarsVersion);
294
354
 
package/src/env/parse.ts CHANGED
@@ -1,9 +1,12 @@
1
1
  import {
2
+ MIDSCENE_MODEL_FAMILY,
2
3
  MIDSCENE_USE_DOUBAO_VISION,
3
4
  MIDSCENE_USE_GEMINI,
4
5
  MIDSCENE_USE_QWEN3_VL,
5
6
  MIDSCENE_USE_QWEN_VL,
6
7
  MIDSCENE_USE_VLM_UI_TARS,
8
+ MODEL_FAMILY_VALUES,
9
+ type TModelFamily,
7
10
  type TVlModeTypes,
8
11
  type TVlModeValues,
9
12
  UITarsModelVersion,
@@ -17,10 +20,7 @@ export const parseVlModeAndUiTarsModelVersionFromRawValue = (
17
20
  uiTarsVersion?: UITarsModelVersion;
18
21
  } => {
19
22
  if (!vlModeRaw) {
20
- return {
21
- vlMode: undefined,
22
- uiTarsVersion: undefined,
23
- };
23
+ return { vlMode: undefined, uiTarsVersion: undefined };
24
24
  }
25
25
 
26
26
  if (!VL_MODE_RAW_VALID_VALUES.includes(vlModeRaw as never)) {
@@ -31,21 +31,17 @@ export const parseVlModeAndUiTarsModelVersionFromRawValue = (
31
31
  const raw = vlModeRaw as TVlModeValues;
32
32
 
33
33
  if (raw === 'vlm-ui-tars') {
34
- return {
35
- vlMode: 'vlm-ui-tars',
36
- uiTarsVersion: UITarsModelVersion.V1_0,
37
- };
38
- } else if (raw === 'vlm-ui-tars-doubao' || raw === 'vlm-ui-tars-doubao-1.5') {
34
+ return { vlMode: 'vlm-ui-tars', uiTarsVersion: UITarsModelVersion.V1_0 };
35
+ }
36
+
37
+ if (raw === 'vlm-ui-tars-doubao' || raw === 'vlm-ui-tars-doubao-1.5') {
39
38
  return {
40
39
  vlMode: 'vlm-ui-tars',
41
40
  uiTarsVersion: UITarsModelVersion.DOUBAO_1_5_20B,
42
41
  };
43
42
  }
44
43
 
45
- return {
46
- vlMode: raw as TVlModeTypes,
47
- uiTarsVersion: undefined,
48
- };
44
+ return { vlMode: raw as TVlModeTypes, uiTarsVersion: undefined };
49
45
  };
50
46
 
51
47
  /**
@@ -77,34 +73,13 @@ export const parseVlModeAndUiTarsFromGlobalConfig = (
77
73
  );
78
74
  }
79
75
 
80
- if (isQwen3) {
81
- return {
82
- vlMode: 'qwen3-vl',
83
- uiTarsVersion: undefined,
84
- };
85
- }
86
-
87
- if (isQwen) {
88
- return {
89
- vlMode: 'qwen-vl',
90
- uiTarsVersion: undefined,
91
- };
92
- }
93
-
94
- if (isDoubao) {
95
- return {
96
- vlMode: 'doubao-vision',
97
- uiTarsVersion: undefined,
98
- };
99
- }
100
-
101
- if (isGemini) {
102
- return {
103
- vlMode: 'gemini',
104
- uiTarsVersion: undefined,
105
- };
106
- }
76
+ // Simple modes without version
77
+ if (isQwen3) return { vlMode: 'qwen3-vl', uiTarsVersion: undefined };
78
+ if (isQwen) return { vlMode: 'qwen2.5-vl', uiTarsVersion: undefined };
79
+ if (isDoubao) return { vlMode: 'doubao-vision', uiTarsVersion: undefined };
80
+ if (isGemini) return { vlMode: 'gemini', uiTarsVersion: undefined };
107
81
 
82
+ // UI-TARS with version detection
108
83
  if (isUiTars) {
109
84
  if (isUiTars === '1') {
110
85
  return {
@@ -124,8 +99,131 @@ export const parseVlModeAndUiTarsFromGlobalConfig = (
124
99
  }
125
100
  }
126
101
 
127
- return {
128
- vlMode: undefined,
129
- uiTarsVersion: undefined,
130
- };
102
+ return { vlMode: undefined, uiTarsVersion: undefined };
103
+ };
104
+
105
+ /**
106
+ * Check if old MIDSCENE_USE_* environment variables are being used
107
+ * @param provider - Environment variable provider
108
+ * @returns Array of legacy environment variable names that are set
109
+ */
110
+ export const detectLegacyVlModeEnvVars = (
111
+ provider: Record<string, string | undefined>,
112
+ ): string[] => {
113
+ const legacyVars = [
114
+ MIDSCENE_USE_DOUBAO_VISION,
115
+ MIDSCENE_USE_QWEN_VL,
116
+ MIDSCENE_USE_QWEN3_VL,
117
+ MIDSCENE_USE_VLM_UI_TARS,
118
+ MIDSCENE_USE_GEMINI,
119
+ ];
120
+
121
+ return legacyVars.filter((varName) => provider[varName]);
122
+ };
123
+
124
+ /**
125
+ * Type guard to check if a string is a valid TModelFamily
126
+ */
127
+ function isValidModelFamily(value: string): value is TModelFamily {
128
+ return (MODEL_FAMILY_VALUES as readonly string[]).includes(value);
129
+ }
130
+
131
+ /**
132
+ * Map legacy vlMode and uiTarsVersion to model family
133
+ * @param vlMode - The VL mode type
134
+ * @param uiTarsVersion - The UI-TARS version (if applicable)
135
+ * @returns The corresponding model family value
136
+ */
137
+ function mapLegacyToModelFamily(
138
+ vlMode?: TVlModeTypes,
139
+ uiTarsVersion?: UITarsModelVersion,
140
+ ): TModelFamily | undefined {
141
+ if (!vlMode) return undefined;
142
+
143
+ if (vlMode === 'vlm-ui-tars') {
144
+ // UI-TARS needs special handling for version
145
+ if (uiTarsVersion === UITarsModelVersion.V1_0) {
146
+ return 'vlm-ui-tars';
147
+ } else if (uiTarsVersion === UITarsModelVersion.DOUBAO_1_5_20B) {
148
+ return 'vlm-ui-tars-doubao-1.5';
149
+ } else {
150
+ // Handle other UI-TARS versions (vlm-ui-tars-doubao)
151
+ return 'vlm-ui-tars-doubao';
152
+ }
153
+ }
154
+
155
+ // For other modes, model family directly matches vlMode
156
+ return vlMode as TModelFamily;
157
+ }
158
+
159
+ /**
160
+ * Parse model family from environment variables with validation and warnings
161
+ * Supports both new MIDSCENE_MODEL_FAMILY and legacy MIDSCENE_USE_* variables
162
+ *
163
+ * @param provider - Environment variable provider
164
+ * @returns Object with vlMode, uiTarsVersion, and warnings
165
+ */
166
+ export const parseModelFamilyFromEnv = (
167
+ provider: Record<string, string | undefined>,
168
+ ): {
169
+ vlMode?: TVlModeTypes;
170
+ uiTarsVersion?: UITarsModelVersion;
171
+ warnings: string[];
172
+ modelFamily?: TModelFamily;
173
+ } => {
174
+ const warnings: string[] = [];
175
+ const modelFamilyRaw = provider[MIDSCENE_MODEL_FAMILY];
176
+ const legacyVars = detectLegacyVlModeEnvVars(provider);
177
+
178
+ // Case 1: Both new and legacy variables are set - ERROR
179
+ if (modelFamilyRaw && legacyVars.length > 0) {
180
+ throw new Error(
181
+ `Conflicting configuration detected: Both MIDSCENE_MODEL_FAMILY and legacy environment variables (${legacyVars.join(', ')}) are set. Please use only MIDSCENE_MODEL_FAMILY.`,
182
+ );
183
+ }
184
+
185
+ // Case 2: Only new MIDSCENE_MODEL_FAMILY is set
186
+ if (modelFamilyRaw) {
187
+ // Validate model family value
188
+ if (!isValidModelFamily(modelFamilyRaw)) {
189
+ throw new Error(
190
+ `Invalid MIDSCENE_MODEL_FAMILY value: "${modelFamilyRaw}". Must be one of: ${MODEL_FAMILY_VALUES.join(', ')}. See documentation: https://midscenejs.com/model-provider.html`,
191
+ );
192
+ }
193
+
194
+ const modelFamily = modelFamilyRaw;
195
+ const parsed = parseVlModeAndUiTarsModelVersionFromRawValue(modelFamily);
196
+ return {
197
+ vlMode: parsed.vlMode,
198
+ uiTarsVersion: parsed.uiTarsVersion,
199
+ modelFamily,
200
+ warnings,
201
+ };
202
+ }
203
+
204
+ // Case 3: Only legacy variables are set - WARN
205
+ if (legacyVars.length > 0) {
206
+ const legacyResult = parseVlModeAndUiTarsFromGlobalConfig(provider);
207
+
208
+ warnings.push(
209
+ `DEPRECATED: Environment ${legacyVars.length > 1 ? 'variables' : 'variable'} ${legacyVars.join(', ')} ${legacyVars.length > 1 ? 'are' : 'is'} deprecated. Please use MIDSCENE_MODEL_FAMILY instead. See migration guide for details.`,
210
+ );
211
+
212
+ const modelFamily = mapLegacyToModelFamily(
213
+ legacyResult.vlMode,
214
+ legacyResult.uiTarsVersion,
215
+ );
216
+
217
+ return {
218
+ vlMode: legacyResult.vlMode,
219
+ uiTarsVersion: legacyResult.uiTarsVersion,
220
+ modelFamily,
221
+ warnings,
222
+ };
223
+ }
224
+
225
+ // Case 4: No configuration set - ERROR
226
+ throw new Error(
227
+ `MIDSCENE_MODEL_FAMILY is required for planning tasks. Please set it to one of: ${MODEL_FAMILY_VALUES.join(', ')}. See documentation: https://midscenejs.com/model-provider.html`,
228
+ );
131
229
  };
package/src/env/types.ts CHANGED
@@ -114,6 +114,7 @@ export const MIDSCENE_PLANNING_MODEL_API_KEY =
114
114
  export const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON =
115
115
  'MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON';
116
116
  export const MIDSCENE_PLANNING_LOCATOR_MODE = 'MIDSCENE_PLANNING_LOCATOR_MODE';
117
+ export const MIDSCENE_MODEL_FAMILY = 'MIDSCENE_MODEL_FAMILY';
117
118
 
118
119
  /**
119
120
  * env keys declared but unused
@@ -214,6 +215,7 @@ export const MODEL_ENV_KEYS = [
214
215
  MIDSCENE_PLANNING_MODEL_API_KEY,
215
216
  MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
216
217
  MIDSCENE_PLANNING_LOCATOR_MODE,
218
+ MIDSCENE_MODEL_FAMILY,
217
219
  ] as const;
218
220
 
219
221
  export const ALL_ENV_KEYS = [
@@ -227,7 +229,7 @@ export type TEnvKeys = (typeof ALL_ENV_KEYS)[number];
227
229
  export type TGlobalConfig = Record<TEnvKeys, string | undefined>;
228
230
 
229
231
  export type TVlModeValues =
230
- | 'qwen-vl'
232
+ | 'qwen2.5-vl'
231
233
  | 'qwen3-vl'
232
234
  | 'doubao-vision'
233
235
  | 'gemini'
@@ -236,12 +238,36 @@ export type TVlModeValues =
236
238
  | 'vlm-ui-tars-doubao-1.5';
237
239
 
238
240
  export type TVlModeTypes =
239
- | 'qwen-vl'
241
+ | 'qwen2.5-vl'
240
242
  | 'qwen3-vl'
241
243
  | 'doubao-vision'
242
244
  | 'gemini'
243
245
  | 'vlm-ui-tars';
244
246
 
247
+ export const VL_MODE_RAW_VALID_VALUES: TVlModeValues[] = [
248
+ 'doubao-vision',
249
+ 'gemini',
250
+ 'qwen2.5-vl',
251
+ 'qwen3-vl',
252
+ 'vlm-ui-tars',
253
+ 'vlm-ui-tars-doubao',
254
+ 'vlm-ui-tars-doubao-1.5',
255
+ ];
256
+
257
+ /**
258
+ * Model family values - unified model configuration approach
259
+ * Replaces the old MIDSCENE_USE_* environment variables
260
+ *
261
+ * Note: These values directly correspond to VL_MODE_RAW_VALID_VALUES
262
+ * - 'qwen2.5-vl' is Qwen 2.5
263
+ * - 'qwen3-vl' is Qwen 3
264
+ */
265
+ export type TModelFamily = TVlModeValues;
266
+
267
+ export const MODEL_FAMILY_VALUES: TVlModeValues[] = [
268
+ ...VL_MODE_RAW_VALID_VALUES,
269
+ ];
270
+
245
271
  export interface IModelConfigForInsight {
246
272
  // model name
247
273
  [MIDSCENE_INSIGHT_MODEL_NAME]: string;
@@ -263,7 +289,7 @@ export interface IModelConfigForInsight {
263
289
  * DOM-based planning is not supported.
264
290
  *
265
291
  * Required: MIDSCENE_PLANNING_LOCATOR_MODE must be set to one of:
266
- * - 'qwen-vl'
292
+ * - 'qwen2.5-vl'
267
293
  * - 'qwen3-vl'
268
294
  * - 'gemini'
269
295
  * - 'doubao-vision'
@@ -344,16 +370,6 @@ export enum UITarsModelVersion {
344
370
  DOUBAO_1_5_20B = 'doubao-1.5-20B',
345
371
  }
346
372
 
347
- export const VL_MODE_RAW_VALID_VALUES: TVlModeValues[] = [
348
- 'doubao-vision',
349
- 'gemini',
350
- 'qwen-vl',
351
- 'qwen3-vl',
352
- 'vlm-ui-tars',
353
- 'vlm-ui-tars-doubao',
354
- 'vlm-ui-tars-doubao-1.5',
355
- ];
356
-
357
373
  /**
358
374
  * Callback to create custom OpenAI client instance
359
375
  * @param config - Resolved model configuration including apiKey, baseURL, modelName, intent, etc.
@@ -402,8 +418,8 @@ export interface IModelConfig {
402
418
  openaiApiKey?: string;
403
419
  openaiExtraConfig?: Record<string, unknown>;
404
420
  /**
405
- * - vlModeRaw: exists only in non-legacy logic. value can be 'doubao-vision', 'gemini', 'qwen-vl', 'vlm-ui-tars', 'vlm-ui-tars-doubao', 'vlm-ui-tars-doubao-1.5'
406
- * - vlMode: based on the results of the vlModoRaw classification,value can be 'doubao-vision', 'gemini', 'qwen-vl', 'vlm-ui-tars'
421
+ * - vlModeRaw: exists only in non-legacy logic. value can be 'doubao-vision', 'gemini', 'qwen2.5-vl', 'vlm-ui-tars', 'vlm-ui-tars-doubao', 'vlm-ui-tars-doubao-1.5'
422
+ * - vlMode: based on the results of the vlModoRaw classification,value can be 'doubao-vision', 'gemini', 'qwen2.5-vl', 'vlm-ui-tars'
407
423
  */
408
424
  vlModeRaw?: string;
409
425
  vlMode?: TVlModeTypes;