@midscene/shared 1.0.1-beta-20251118021215.0 → 1.0.1-beta-20251118060105.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/env/decide-model-config.mjs +30 -3
- package/dist/es/env/parse.mjs +52 -3
- package/dist/es/env/types.mjs +15 -10
- package/dist/lib/env/decide-model-config.js +29 -2
- package/dist/lib/env/parse.js +56 -1
- package/dist/lib/env/types.js +20 -9
- package/dist/types/env/parse.d.ts +20 -1
- package/dist/types/env/types.d.ts +19 -8
- package/package.json +1 -1
- package/src/env/decide-model-config.ts +64 -4
- package/src/env/parse.ts +142 -44
- package/src/env/types.ts +31 -15
|
@@ -4,7 +4,7 @@ import { getDebug } from "../logger.mjs";
|
|
|
4
4
|
import { assert } from "../utils.mjs";
|
|
5
5
|
import { createAssert, maskConfig, parseJson } from "./helper.mjs";
|
|
6
6
|
import { initDebugConfig } from "./init-debug.mjs";
|
|
7
|
-
import { parseVlModeAndUiTarsFromGlobalConfig, parseVlModeAndUiTarsModelVersionFromRawValue } from "./parse.mjs";
|
|
7
|
+
import { parseModelFamilyFromEnv, parseVlModeAndUiTarsFromGlobalConfig, parseVlModeAndUiTarsModelVersionFromRawValue } from "./parse.mjs";
|
|
8
8
|
const KEYS_MAP = {
|
|
9
9
|
insight: INSIGHT_MODEL_CONFIG_KEYS,
|
|
10
10
|
planning: PLANNING_MODEL_CONFIG_KEYS,
|
|
@@ -41,6 +41,33 @@ const getModelDescription = (vlMode, uiTarsVersion)=>{
|
|
|
41
41
|
else return `${vlMode} mode`;
|
|
42
42
|
return '';
|
|
43
43
|
};
|
|
44
|
+
const parseVlModeForIntent = (intent, allEnvConfig, result, debugLog)=>{
|
|
45
|
+
if ('planning' === intent) {
|
|
46
|
+
const parseResult = parseModelFamilyFromEnv(allEnvConfig);
|
|
47
|
+
parseResult.warnings.forEach((warning)=>{
|
|
48
|
+
console.warn(`[Midscene] ${warning}`);
|
|
49
|
+
});
|
|
50
|
+
if (parseResult.modelFamily) debugLog(`Using model family: ${parseResult.modelFamily}`);
|
|
51
|
+
return {
|
|
52
|
+
vlMode: parseResult.vlMode,
|
|
53
|
+
uiTarsVersion: parseResult.uiTarsVersion
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
if (void 0 !== result.vlModeRaw) {
|
|
57
|
+
const parsed = parseVlModeAndUiTarsModelVersionFromRawValue(result.vlModeRaw);
|
|
58
|
+
return {
|
|
59
|
+
vlMode: parsed.vlMode,
|
|
60
|
+
uiTarsVersion: parsed.uiTarsVersion
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
{
|
|
64
|
+
const parsed = parseVlModeAndUiTarsFromGlobalConfig(allEnvConfig);
|
|
65
|
+
return {
|
|
66
|
+
vlMode: parsed.vlMode,
|
|
67
|
+
uiTarsVersion: parsed.uiTarsVersion
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
};
|
|
44
71
|
const decideModelConfigFromIntentConfig = (intent, intentConfig)=>{
|
|
45
72
|
const debugLog = getDebug('ai:config');
|
|
46
73
|
debugLog('decideModelConfig base on agent.modelConfig()');
|
|
@@ -87,7 +114,7 @@ const decideModelConfigFromEnv = (intent, allEnvConfig)=>{
|
|
|
87
114
|
provider: allEnvConfig,
|
|
88
115
|
valueAssert: createAssert(keysForEnv.modelName, 'process.env', modelName)
|
|
89
116
|
});
|
|
90
|
-
const { vlMode, uiTarsVersion } =
|
|
117
|
+
const { vlMode, uiTarsVersion } = parseVlModeForIntent(intent, allEnvConfig, result, debugLog);
|
|
91
118
|
const modelDescription = getModelDescription(vlMode, uiTarsVersion);
|
|
92
119
|
const finalResult = {
|
|
93
120
|
...result,
|
|
@@ -107,7 +134,7 @@ const decideModelConfigFromEnv = (intent, allEnvConfig)=>{
|
|
|
107
134
|
provider: allEnvConfig,
|
|
108
135
|
valueAssert: createAssert(DEFAULT_MODEL_CONFIG_KEYS_LEGACY.modelName, 'process.env')
|
|
109
136
|
});
|
|
110
|
-
const { vlMode, uiTarsVersion } =
|
|
137
|
+
const { vlMode, uiTarsVersion } = parseVlModeForIntent(intent, allEnvConfig, result, debugLog);
|
|
111
138
|
const modelDescription = getModelDescription(vlMode, uiTarsVersion);
|
|
112
139
|
const finalResult = {
|
|
113
140
|
...result,
|
package/dist/es/env/parse.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, UITarsModelVersion, VL_MODE_RAW_VALID_VALUES } from "./types.mjs";
|
|
1
|
+
import { MIDSCENE_MODEL_FAMILY, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MODEL_FAMILY_VALUES, UITarsModelVersion, VL_MODE_RAW_VALID_VALUES } from "./types.mjs";
|
|
2
2
|
const parseVlModeAndUiTarsModelVersionFromRawValue = (vlModeRaw)=>{
|
|
3
3
|
if (!vlModeRaw) return {
|
|
4
4
|
vlMode: void 0,
|
|
@@ -38,7 +38,7 @@ const parseVlModeAndUiTarsFromGlobalConfig = (provider)=>{
|
|
|
38
38
|
uiTarsVersion: void 0
|
|
39
39
|
};
|
|
40
40
|
if (isQwen) return {
|
|
41
|
-
vlMode: '
|
|
41
|
+
vlMode: 'qwen2.5-vl',
|
|
42
42
|
uiTarsVersion: void 0
|
|
43
43
|
};
|
|
44
44
|
if (isDoubao) return {
|
|
@@ -66,4 +66,53 @@ const parseVlModeAndUiTarsFromGlobalConfig = (provider)=>{
|
|
|
66
66
|
uiTarsVersion: void 0
|
|
67
67
|
};
|
|
68
68
|
};
|
|
69
|
-
|
|
69
|
+
const detectLegacyVlModeEnvVars = (provider)=>{
|
|
70
|
+
const legacyVars = [
|
|
71
|
+
MIDSCENE_USE_DOUBAO_VISION,
|
|
72
|
+
MIDSCENE_USE_QWEN_VL,
|
|
73
|
+
MIDSCENE_USE_QWEN3_VL,
|
|
74
|
+
MIDSCENE_USE_VLM_UI_TARS,
|
|
75
|
+
MIDSCENE_USE_GEMINI
|
|
76
|
+
];
|
|
77
|
+
return legacyVars.filter((varName)=>provider[varName]);
|
|
78
|
+
};
|
|
79
|
+
function isValidModelFamily(value) {
|
|
80
|
+
return MODEL_FAMILY_VALUES.includes(value);
|
|
81
|
+
}
|
|
82
|
+
function mapLegacyToModelFamily(vlMode, uiTarsVersion) {
|
|
83
|
+
if (!vlMode) return;
|
|
84
|
+
if ('vlm-ui-tars' === vlMode) if (uiTarsVersion === UITarsModelVersion.V1_0) return 'vlm-ui-tars';
|
|
85
|
+
else if (uiTarsVersion === UITarsModelVersion.DOUBAO_1_5_20B) return 'vlm-ui-tars-doubao-1.5';
|
|
86
|
+
else return 'vlm-ui-tars-doubao';
|
|
87
|
+
return vlMode;
|
|
88
|
+
}
|
|
89
|
+
const parseModelFamilyFromEnv = (provider)=>{
|
|
90
|
+
const warnings = [];
|
|
91
|
+
const modelFamilyRaw = provider[MIDSCENE_MODEL_FAMILY];
|
|
92
|
+
const legacyVars = detectLegacyVlModeEnvVars(provider);
|
|
93
|
+
if (modelFamilyRaw && legacyVars.length > 0) throw new Error(`Conflicting configuration detected: Both MIDSCENE_MODEL_FAMILY and legacy environment variables (${legacyVars.join(', ')}) are set. Please use only MIDSCENE_MODEL_FAMILY.`);
|
|
94
|
+
if (modelFamilyRaw) {
|
|
95
|
+
if (!isValidModelFamily(modelFamilyRaw)) throw new Error(`Invalid MIDSCENE_MODEL_FAMILY value: "${modelFamilyRaw}". Must be one of: ${MODEL_FAMILY_VALUES.join(', ')}. See documentation: https://midscenejs.com/model-provider.html`);
|
|
96
|
+
const modelFamily = modelFamilyRaw;
|
|
97
|
+
const parsed = parseVlModeAndUiTarsModelVersionFromRawValue(modelFamily);
|
|
98
|
+
return {
|
|
99
|
+
vlMode: parsed.vlMode,
|
|
100
|
+
uiTarsVersion: parsed.uiTarsVersion,
|
|
101
|
+
modelFamily,
|
|
102
|
+
warnings
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
if (legacyVars.length > 0) {
|
|
106
|
+
const legacyResult = parseVlModeAndUiTarsFromGlobalConfig(provider);
|
|
107
|
+
warnings.push(`DEPRECATED: Environment ${legacyVars.length > 1 ? 'variables' : 'variable'} ${legacyVars.join(', ')} ${legacyVars.length > 1 ? 'are' : 'is'} deprecated. Please use MIDSCENE_MODEL_FAMILY instead. See migration guide for details.`);
|
|
108
|
+
const modelFamily = mapLegacyToModelFamily(legacyResult.vlMode, legacyResult.uiTarsVersion);
|
|
109
|
+
return {
|
|
110
|
+
vlMode: legacyResult.vlMode,
|
|
111
|
+
uiTarsVersion: legacyResult.uiTarsVersion,
|
|
112
|
+
modelFamily,
|
|
113
|
+
warnings
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
throw new Error(`MIDSCENE_MODEL_FAMILY is required for planning tasks. Please set it to one of: ${MODEL_FAMILY_VALUES.join(', ')}. See documentation: https://midscenejs.com/model-provider.html`);
|
|
117
|
+
};
|
|
118
|
+
export { detectLegacyVlModeEnvVars, parseModelFamilyFromEnv, parseVlModeAndUiTarsFromGlobalConfig, parseVlModeAndUiTarsModelVersionFromRawValue };
|
package/dist/es/env/types.mjs
CHANGED
|
@@ -56,6 +56,7 @@ const MIDSCENE_PLANNING_MODEL_BASE_URL = 'MIDSCENE_PLANNING_MODEL_BASE_URL';
|
|
|
56
56
|
const MIDSCENE_PLANNING_MODEL_API_KEY = 'MIDSCENE_PLANNING_MODEL_API_KEY';
|
|
57
57
|
const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON';
|
|
58
58
|
const MIDSCENE_PLANNING_LOCATOR_MODE = 'MIDSCENE_PLANNING_LOCATOR_MODE';
|
|
59
|
+
const MIDSCENE_MODEL_FAMILY = 'MIDSCENE_MODEL_FAMILY';
|
|
59
60
|
const UNUSED_ENV_KEYS = [
|
|
60
61
|
MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG
|
|
61
62
|
];
|
|
@@ -129,7 +130,8 @@ const MODEL_ENV_KEYS = [
|
|
|
129
130
|
MIDSCENE_PLANNING_MODEL_BASE_URL,
|
|
130
131
|
MIDSCENE_PLANNING_MODEL_API_KEY,
|
|
131
132
|
MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
|
|
132
|
-
MIDSCENE_PLANNING_LOCATOR_MODE
|
|
133
|
+
MIDSCENE_PLANNING_LOCATOR_MODE,
|
|
134
|
+
MIDSCENE_MODEL_FAMILY
|
|
133
135
|
];
|
|
134
136
|
const ALL_ENV_KEYS = [
|
|
135
137
|
...UNUSED_ENV_KEYS,
|
|
@@ -137,20 +139,23 @@ const ALL_ENV_KEYS = [
|
|
|
137
139
|
...GLOBAL_ENV_KEYS,
|
|
138
140
|
...MODEL_ENV_KEYS
|
|
139
141
|
];
|
|
140
|
-
var types_UITarsModelVersion = /*#__PURE__*/ function(UITarsModelVersion) {
|
|
141
|
-
UITarsModelVersion["V1_0"] = "1.0";
|
|
142
|
-
UITarsModelVersion["V1_5"] = "1.5";
|
|
143
|
-
UITarsModelVersion["DOUBAO_1_5_15B"] = "doubao-1.5-15B";
|
|
144
|
-
UITarsModelVersion["DOUBAO_1_5_20B"] = "doubao-1.5-20B";
|
|
145
|
-
return UITarsModelVersion;
|
|
146
|
-
}({});
|
|
147
142
|
const VL_MODE_RAW_VALID_VALUES = [
|
|
148
143
|
'doubao-vision',
|
|
149
144
|
'gemini',
|
|
150
|
-
'
|
|
145
|
+
'qwen2.5-vl',
|
|
151
146
|
'qwen3-vl',
|
|
152
147
|
'vlm-ui-tars',
|
|
153
148
|
'vlm-ui-tars-doubao',
|
|
154
149
|
'vlm-ui-tars-doubao-1.5'
|
|
155
150
|
];
|
|
156
|
-
|
|
151
|
+
const MODEL_FAMILY_VALUES = [
|
|
152
|
+
...VL_MODE_RAW_VALID_VALUES
|
|
153
|
+
];
|
|
154
|
+
var types_UITarsModelVersion = /*#__PURE__*/ function(UITarsModelVersion) {
|
|
155
|
+
UITarsModelVersion["V1_0"] = "1.0";
|
|
156
|
+
UITarsModelVersion["V1_5"] = "1.5";
|
|
157
|
+
UITarsModelVersion["DOUBAO_1_5_15B"] = "doubao-1.5-15B";
|
|
158
|
+
UITarsModelVersion["DOUBAO_1_5_20B"] = "doubao-1.5-20B";
|
|
159
|
+
return UITarsModelVersion;
|
|
160
|
+
}({});
|
|
161
|
+
export { ALL_ENV_KEYS, BASIC_ENV_KEYS, BOOLEAN_ENV_KEYS, DOCKER_CONTAINER, GLOBAL_ENV_KEYS, MATCH_BY_POSITION, MIDSCENE_ADB_PATH, MIDSCENE_ADB_REMOTE_HOST, MIDSCENE_ADB_REMOTE_PORT, MIDSCENE_ANDROID_IME_STRATEGY, MIDSCENE_CACHE, MIDSCENE_CACHE_MAX_FILENAME_LENGTH, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_MODE, MIDSCENE_DEBUG_MODEL_PROFILE, MIDSCENE_DEBUG_MODEL_RESPONSE, MIDSCENE_FORCE_DEEP_THINK, MIDSCENE_INSIGHT_LOCATOR_MODE, MIDSCENE_INSIGHT_MODEL_API_KEY, MIDSCENE_INSIGHT_MODEL_BASE_URL, MIDSCENE_INSIGHT_MODEL_HTTP_PROXY, MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON, MIDSCENE_INSIGHT_MODEL_NAME, MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY, MIDSCENE_IOS_DEVICE_UDID, MIDSCENE_IOS_SIMULATOR_UDID, MIDSCENE_LOCATOR_MODE, MIDSCENE_MCP_ANDROID_MODE, MIDSCENE_MCP_CHROME_PATH, MIDSCENE_MCP_USE_PUPPETEER_MODE, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_FAMILY, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_MAX_TOKENS, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_LOCATOR_MODE, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, MIDSCENE_PREFERRED_LANGUAGE, MIDSCENE_REPLANNING_CYCLE_LIMIT, MIDSCENE_REPORT_TAG_NAME, MIDSCENE_RUN_DIR, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MIDSCENE_USE_VL_MODEL, MODEL_API_KEY, MODEL_BASE_URL, MODEL_ENV_KEYS, MODEL_FAMILY_VALUES, NUMBER_ENV_KEYS, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MAX_TOKENS, STRING_ENV_KEYS, types_UITarsModelVersion as UITarsModelVersion, UNUSED_ENV_KEYS, VL_MODE_RAW_VALID_VALUES };
|
|
@@ -71,6 +71,33 @@ const getModelDescription = (vlMode, uiTarsVersion)=>{
|
|
|
71
71
|
else return `${vlMode} mode`;
|
|
72
72
|
return '';
|
|
73
73
|
};
|
|
74
|
+
const parseVlModeForIntent = (intent, allEnvConfig, result, debugLog)=>{
|
|
75
|
+
if ('planning' === intent) {
|
|
76
|
+
const parseResult = (0, external_parse_js_namespaceObject.parseModelFamilyFromEnv)(allEnvConfig);
|
|
77
|
+
parseResult.warnings.forEach((warning)=>{
|
|
78
|
+
console.warn(`[Midscene] ${warning}`);
|
|
79
|
+
});
|
|
80
|
+
if (parseResult.modelFamily) debugLog(`Using model family: ${parseResult.modelFamily}`);
|
|
81
|
+
return {
|
|
82
|
+
vlMode: parseResult.vlMode,
|
|
83
|
+
uiTarsVersion: parseResult.uiTarsVersion
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
if (void 0 !== result.vlModeRaw) {
|
|
87
|
+
const parsed = (0, external_parse_js_namespaceObject.parseVlModeAndUiTarsModelVersionFromRawValue)(result.vlModeRaw);
|
|
88
|
+
return {
|
|
89
|
+
vlMode: parsed.vlMode,
|
|
90
|
+
uiTarsVersion: parsed.uiTarsVersion
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
{
|
|
94
|
+
const parsed = (0, external_parse_js_namespaceObject.parseVlModeAndUiTarsFromGlobalConfig)(allEnvConfig);
|
|
95
|
+
return {
|
|
96
|
+
vlMode: parsed.vlMode,
|
|
97
|
+
uiTarsVersion: parsed.uiTarsVersion
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
};
|
|
74
101
|
const decideModelConfigFromIntentConfig = (intent, intentConfig)=>{
|
|
75
102
|
const debugLog = (0, external_logger_js_namespaceObject.getDebug)('ai:config');
|
|
76
103
|
debugLog('decideModelConfig base on agent.modelConfig()');
|
|
@@ -117,7 +144,7 @@ const decideModelConfigFromEnv = (intent, allEnvConfig)=>{
|
|
|
117
144
|
provider: allEnvConfig,
|
|
118
145
|
valueAssert: (0, external_helper_js_namespaceObject.createAssert)(keysForEnv.modelName, 'process.env', modelName)
|
|
119
146
|
});
|
|
120
|
-
const { vlMode, uiTarsVersion } = (
|
|
147
|
+
const { vlMode, uiTarsVersion } = parseVlModeForIntent(intent, allEnvConfig, result, debugLog);
|
|
121
148
|
const modelDescription = getModelDescription(vlMode, uiTarsVersion);
|
|
122
149
|
const finalResult = {
|
|
123
150
|
...result,
|
|
@@ -137,7 +164,7 @@ const decideModelConfigFromEnv = (intent, allEnvConfig)=>{
|
|
|
137
164
|
provider: allEnvConfig,
|
|
138
165
|
valueAssert: (0, external_helper_js_namespaceObject.createAssert)(external_constants_js_namespaceObject.DEFAULT_MODEL_CONFIG_KEYS_LEGACY.modelName, 'process.env')
|
|
139
166
|
});
|
|
140
|
-
const { vlMode, uiTarsVersion } = (
|
|
167
|
+
const { vlMode, uiTarsVersion } = parseVlModeForIntent(intent, allEnvConfig, result, debugLog);
|
|
141
168
|
const modelDescription = getModelDescription(vlMode, uiTarsVersion);
|
|
142
169
|
const finalResult = {
|
|
143
170
|
...result,
|
package/dist/lib/env/parse.js
CHANGED
|
@@ -25,6 +25,8 @@ var __webpack_exports__ = {};
|
|
|
25
25
|
__webpack_require__.r(__webpack_exports__);
|
|
26
26
|
__webpack_require__.d(__webpack_exports__, {
|
|
27
27
|
parseVlModeAndUiTarsFromGlobalConfig: ()=>parseVlModeAndUiTarsFromGlobalConfig,
|
|
28
|
+
detectLegacyVlModeEnvVars: ()=>detectLegacyVlModeEnvVars,
|
|
29
|
+
parseModelFamilyFromEnv: ()=>parseModelFamilyFromEnv,
|
|
28
30
|
parseVlModeAndUiTarsModelVersionFromRawValue: ()=>parseVlModeAndUiTarsModelVersionFromRawValue
|
|
29
31
|
});
|
|
30
32
|
const external_types_js_namespaceObject = require("./types.js");
|
|
@@ -67,7 +69,7 @@ const parseVlModeAndUiTarsFromGlobalConfig = (provider)=>{
|
|
|
67
69
|
uiTarsVersion: void 0
|
|
68
70
|
};
|
|
69
71
|
if (isQwen) return {
|
|
70
|
-
vlMode: '
|
|
72
|
+
vlMode: 'qwen2.5-vl',
|
|
71
73
|
uiTarsVersion: void 0
|
|
72
74
|
};
|
|
73
75
|
if (isDoubao) return {
|
|
@@ -95,9 +97,62 @@ const parseVlModeAndUiTarsFromGlobalConfig = (provider)=>{
|
|
|
95
97
|
uiTarsVersion: void 0
|
|
96
98
|
};
|
|
97
99
|
};
|
|
100
|
+
const detectLegacyVlModeEnvVars = (provider)=>{
|
|
101
|
+
const legacyVars = [
|
|
102
|
+
external_types_js_namespaceObject.MIDSCENE_USE_DOUBAO_VISION,
|
|
103
|
+
external_types_js_namespaceObject.MIDSCENE_USE_QWEN_VL,
|
|
104
|
+
external_types_js_namespaceObject.MIDSCENE_USE_QWEN3_VL,
|
|
105
|
+
external_types_js_namespaceObject.MIDSCENE_USE_VLM_UI_TARS,
|
|
106
|
+
external_types_js_namespaceObject.MIDSCENE_USE_GEMINI
|
|
107
|
+
];
|
|
108
|
+
return legacyVars.filter((varName)=>provider[varName]);
|
|
109
|
+
};
|
|
110
|
+
function isValidModelFamily(value) {
|
|
111
|
+
return external_types_js_namespaceObject.MODEL_FAMILY_VALUES.includes(value);
|
|
112
|
+
}
|
|
113
|
+
function mapLegacyToModelFamily(vlMode, uiTarsVersion) {
|
|
114
|
+
if (!vlMode) return;
|
|
115
|
+
if ('vlm-ui-tars' === vlMode) if (uiTarsVersion === external_types_js_namespaceObject.UITarsModelVersion.V1_0) return 'vlm-ui-tars';
|
|
116
|
+
else if (uiTarsVersion === external_types_js_namespaceObject.UITarsModelVersion.DOUBAO_1_5_20B) return 'vlm-ui-tars-doubao-1.5';
|
|
117
|
+
else return 'vlm-ui-tars-doubao';
|
|
118
|
+
return vlMode;
|
|
119
|
+
}
|
|
120
|
+
const parseModelFamilyFromEnv = (provider)=>{
|
|
121
|
+
const warnings = [];
|
|
122
|
+
const modelFamilyRaw = provider[external_types_js_namespaceObject.MIDSCENE_MODEL_FAMILY];
|
|
123
|
+
const legacyVars = detectLegacyVlModeEnvVars(provider);
|
|
124
|
+
if (modelFamilyRaw && legacyVars.length > 0) throw new Error(`Conflicting configuration detected: Both MIDSCENE_MODEL_FAMILY and legacy environment variables (${legacyVars.join(', ')}) are set. Please use only MIDSCENE_MODEL_FAMILY.`);
|
|
125
|
+
if (modelFamilyRaw) {
|
|
126
|
+
if (!isValidModelFamily(modelFamilyRaw)) throw new Error(`Invalid MIDSCENE_MODEL_FAMILY value: "${modelFamilyRaw}". Must be one of: ${external_types_js_namespaceObject.MODEL_FAMILY_VALUES.join(', ')}. See documentation: https://midscenejs.com/model-provider.html`);
|
|
127
|
+
const modelFamily = modelFamilyRaw;
|
|
128
|
+
const parsed = parseVlModeAndUiTarsModelVersionFromRawValue(modelFamily);
|
|
129
|
+
return {
|
|
130
|
+
vlMode: parsed.vlMode,
|
|
131
|
+
uiTarsVersion: parsed.uiTarsVersion,
|
|
132
|
+
modelFamily,
|
|
133
|
+
warnings
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
if (legacyVars.length > 0) {
|
|
137
|
+
const legacyResult = parseVlModeAndUiTarsFromGlobalConfig(provider);
|
|
138
|
+
warnings.push(`DEPRECATED: Environment ${legacyVars.length > 1 ? 'variables' : 'variable'} ${legacyVars.join(', ')} ${legacyVars.length > 1 ? 'are' : 'is'} deprecated. Please use MIDSCENE_MODEL_FAMILY instead. See migration guide for details.`);
|
|
139
|
+
const modelFamily = mapLegacyToModelFamily(legacyResult.vlMode, legacyResult.uiTarsVersion);
|
|
140
|
+
return {
|
|
141
|
+
vlMode: legacyResult.vlMode,
|
|
142
|
+
uiTarsVersion: legacyResult.uiTarsVersion,
|
|
143
|
+
modelFamily,
|
|
144
|
+
warnings
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
throw new Error(`MIDSCENE_MODEL_FAMILY is required for planning tasks. Please set it to one of: ${external_types_js_namespaceObject.MODEL_FAMILY_VALUES.join(', ')}. See documentation: https://midscenejs.com/model-provider.html`);
|
|
148
|
+
};
|
|
149
|
+
exports.detectLegacyVlModeEnvVars = __webpack_exports__.detectLegacyVlModeEnvVars;
|
|
150
|
+
exports.parseModelFamilyFromEnv = __webpack_exports__.parseModelFamilyFromEnv;
|
|
98
151
|
exports.parseVlModeAndUiTarsFromGlobalConfig = __webpack_exports__.parseVlModeAndUiTarsFromGlobalConfig;
|
|
99
152
|
exports.parseVlModeAndUiTarsModelVersionFromRawValue = __webpack_exports__.parseVlModeAndUiTarsModelVersionFromRawValue;
|
|
100
153
|
for(var __webpack_i__ in __webpack_exports__)if (-1 === [
|
|
154
|
+
"detectLegacyVlModeEnvVars",
|
|
155
|
+
"parseModelFamilyFromEnv",
|
|
101
156
|
"parseVlModeAndUiTarsFromGlobalConfig",
|
|
102
157
|
"parseVlModeAndUiTarsModelVersionFromRawValue"
|
|
103
158
|
].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
|
package/dist/lib/env/types.js
CHANGED
|
@@ -56,6 +56,7 @@ __webpack_require__.d(__webpack_exports__, {
|
|
|
56
56
|
MIDSCENE_MCP_USE_PUPPETEER_MODE: ()=>MIDSCENE_MCP_USE_PUPPETEER_MODE,
|
|
57
57
|
MIDSCENE_MODEL_API_KEY: ()=>MIDSCENE_MODEL_API_KEY,
|
|
58
58
|
MIDSCENE_MODEL_BASE_URL: ()=>MIDSCENE_MODEL_BASE_URL,
|
|
59
|
+
MIDSCENE_MODEL_FAMILY: ()=>MIDSCENE_MODEL_FAMILY,
|
|
59
60
|
MIDSCENE_MODEL_HTTP_PROXY: ()=>MIDSCENE_MODEL_HTTP_PROXY,
|
|
60
61
|
MIDSCENE_MODEL_INIT_CONFIG_JSON: ()=>MIDSCENE_MODEL_INIT_CONFIG_JSON,
|
|
61
62
|
MIDSCENE_MODEL_MAX_TOKENS: ()=>MIDSCENE_MODEL_MAX_TOKENS,
|
|
@@ -84,6 +85,7 @@ __webpack_require__.d(__webpack_exports__, {
|
|
|
84
85
|
MODEL_API_KEY: ()=>MODEL_API_KEY,
|
|
85
86
|
MODEL_BASE_URL: ()=>MODEL_BASE_URL,
|
|
86
87
|
MODEL_ENV_KEYS: ()=>MODEL_ENV_KEYS,
|
|
88
|
+
MODEL_FAMILY_VALUES: ()=>MODEL_FAMILY_VALUES,
|
|
87
89
|
NUMBER_ENV_KEYS: ()=>NUMBER_ENV_KEYS,
|
|
88
90
|
OPENAI_API_KEY: ()=>OPENAI_API_KEY,
|
|
89
91
|
OPENAI_BASE_URL: ()=>OPENAI_BASE_URL,
|
|
@@ -151,6 +153,7 @@ const MIDSCENE_PLANNING_MODEL_BASE_URL = 'MIDSCENE_PLANNING_MODEL_BASE_URL';
|
|
|
151
153
|
const MIDSCENE_PLANNING_MODEL_API_KEY = 'MIDSCENE_PLANNING_MODEL_API_KEY';
|
|
152
154
|
const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON';
|
|
153
155
|
const MIDSCENE_PLANNING_LOCATOR_MODE = 'MIDSCENE_PLANNING_LOCATOR_MODE';
|
|
156
|
+
const MIDSCENE_MODEL_FAMILY = 'MIDSCENE_MODEL_FAMILY';
|
|
154
157
|
const UNUSED_ENV_KEYS = [
|
|
155
158
|
MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG
|
|
156
159
|
];
|
|
@@ -224,7 +227,8 @@ const MODEL_ENV_KEYS = [
|
|
|
224
227
|
MIDSCENE_PLANNING_MODEL_BASE_URL,
|
|
225
228
|
MIDSCENE_PLANNING_MODEL_API_KEY,
|
|
226
229
|
MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
|
|
227
|
-
MIDSCENE_PLANNING_LOCATOR_MODE
|
|
230
|
+
MIDSCENE_PLANNING_LOCATOR_MODE,
|
|
231
|
+
MIDSCENE_MODEL_FAMILY
|
|
228
232
|
];
|
|
229
233
|
const ALL_ENV_KEYS = [
|
|
230
234
|
...UNUSED_ENV_KEYS,
|
|
@@ -232,22 +236,25 @@ const ALL_ENV_KEYS = [
|
|
|
232
236
|
...GLOBAL_ENV_KEYS,
|
|
233
237
|
...MODEL_ENV_KEYS
|
|
234
238
|
];
|
|
235
|
-
var UITarsModelVersion = /*#__PURE__*/ function(UITarsModelVersion) {
|
|
236
|
-
UITarsModelVersion["V1_0"] = "1.0";
|
|
237
|
-
UITarsModelVersion["V1_5"] = "1.5";
|
|
238
|
-
UITarsModelVersion["DOUBAO_1_5_15B"] = "doubao-1.5-15B";
|
|
239
|
-
UITarsModelVersion["DOUBAO_1_5_20B"] = "doubao-1.5-20B";
|
|
240
|
-
return UITarsModelVersion;
|
|
241
|
-
}({});
|
|
242
239
|
const VL_MODE_RAW_VALID_VALUES = [
|
|
243
240
|
'doubao-vision',
|
|
244
241
|
'gemini',
|
|
245
|
-
'
|
|
242
|
+
'qwen2.5-vl',
|
|
246
243
|
'qwen3-vl',
|
|
247
244
|
'vlm-ui-tars',
|
|
248
245
|
'vlm-ui-tars-doubao',
|
|
249
246
|
'vlm-ui-tars-doubao-1.5'
|
|
250
247
|
];
|
|
248
|
+
const MODEL_FAMILY_VALUES = [
|
|
249
|
+
...VL_MODE_RAW_VALID_VALUES
|
|
250
|
+
];
|
|
251
|
+
var UITarsModelVersion = /*#__PURE__*/ function(UITarsModelVersion) {
|
|
252
|
+
UITarsModelVersion["V1_0"] = "1.0";
|
|
253
|
+
UITarsModelVersion["V1_5"] = "1.5";
|
|
254
|
+
UITarsModelVersion["DOUBAO_1_5_15B"] = "doubao-1.5-15B";
|
|
255
|
+
UITarsModelVersion["DOUBAO_1_5_20B"] = "doubao-1.5-20B";
|
|
256
|
+
return UITarsModelVersion;
|
|
257
|
+
}({});
|
|
251
258
|
exports.ALL_ENV_KEYS = __webpack_exports__.ALL_ENV_KEYS;
|
|
252
259
|
exports.BASIC_ENV_KEYS = __webpack_exports__.BASIC_ENV_KEYS;
|
|
253
260
|
exports.BOOLEAN_ENV_KEYS = __webpack_exports__.BOOLEAN_ENV_KEYS;
|
|
@@ -280,6 +287,7 @@ exports.MIDSCENE_MCP_CHROME_PATH = __webpack_exports__.MIDSCENE_MCP_CHROME_PATH;
|
|
|
280
287
|
exports.MIDSCENE_MCP_USE_PUPPETEER_MODE = __webpack_exports__.MIDSCENE_MCP_USE_PUPPETEER_MODE;
|
|
281
288
|
exports.MIDSCENE_MODEL_API_KEY = __webpack_exports__.MIDSCENE_MODEL_API_KEY;
|
|
282
289
|
exports.MIDSCENE_MODEL_BASE_URL = __webpack_exports__.MIDSCENE_MODEL_BASE_URL;
|
|
290
|
+
exports.MIDSCENE_MODEL_FAMILY = __webpack_exports__.MIDSCENE_MODEL_FAMILY;
|
|
283
291
|
exports.MIDSCENE_MODEL_HTTP_PROXY = __webpack_exports__.MIDSCENE_MODEL_HTTP_PROXY;
|
|
284
292
|
exports.MIDSCENE_MODEL_INIT_CONFIG_JSON = __webpack_exports__.MIDSCENE_MODEL_INIT_CONFIG_JSON;
|
|
285
293
|
exports.MIDSCENE_MODEL_MAX_TOKENS = __webpack_exports__.MIDSCENE_MODEL_MAX_TOKENS;
|
|
@@ -308,6 +316,7 @@ exports.MIDSCENE_USE_VL_MODEL = __webpack_exports__.MIDSCENE_USE_VL_MODEL;
|
|
|
308
316
|
exports.MODEL_API_KEY = __webpack_exports__.MODEL_API_KEY;
|
|
309
317
|
exports.MODEL_BASE_URL = __webpack_exports__.MODEL_BASE_URL;
|
|
310
318
|
exports.MODEL_ENV_KEYS = __webpack_exports__.MODEL_ENV_KEYS;
|
|
319
|
+
exports.MODEL_FAMILY_VALUES = __webpack_exports__.MODEL_FAMILY_VALUES;
|
|
311
320
|
exports.NUMBER_ENV_KEYS = __webpack_exports__.NUMBER_ENV_KEYS;
|
|
312
321
|
exports.OPENAI_API_KEY = __webpack_exports__.OPENAI_API_KEY;
|
|
313
322
|
exports.OPENAI_BASE_URL = __webpack_exports__.OPENAI_BASE_URL;
|
|
@@ -349,6 +358,7 @@ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
|
|
|
349
358
|
"MIDSCENE_MCP_USE_PUPPETEER_MODE",
|
|
350
359
|
"MIDSCENE_MODEL_API_KEY",
|
|
351
360
|
"MIDSCENE_MODEL_BASE_URL",
|
|
361
|
+
"MIDSCENE_MODEL_FAMILY",
|
|
352
362
|
"MIDSCENE_MODEL_HTTP_PROXY",
|
|
353
363
|
"MIDSCENE_MODEL_INIT_CONFIG_JSON",
|
|
354
364
|
"MIDSCENE_MODEL_MAX_TOKENS",
|
|
@@ -377,6 +387,7 @@ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
|
|
|
377
387
|
"MODEL_API_KEY",
|
|
378
388
|
"MODEL_BASE_URL",
|
|
379
389
|
"MODEL_ENV_KEYS",
|
|
390
|
+
"MODEL_FAMILY_VALUES",
|
|
380
391
|
"NUMBER_ENV_KEYS",
|
|
381
392
|
"OPENAI_API_KEY",
|
|
382
393
|
"OPENAI_BASE_URL",
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { type TVlModeTypes, UITarsModelVersion } from './types';
|
|
1
|
+
import { type TModelFamily, type TVlModeTypes, UITarsModelVersion } from './types';
|
|
2
2
|
export declare const parseVlModeAndUiTarsModelVersionFromRawValue: (vlModeRaw?: string) => {
|
|
3
3
|
vlMode?: TVlModeTypes;
|
|
4
4
|
uiTarsVersion?: UITarsModelVersion;
|
|
@@ -10,3 +10,22 @@ export declare const parseVlModeAndUiTarsFromGlobalConfig: (provider: Record<str
|
|
|
10
10
|
vlMode?: TVlModeTypes;
|
|
11
11
|
uiTarsVersion?: UITarsModelVersion;
|
|
12
12
|
};
|
|
13
|
+
/**
|
|
14
|
+
* Check if old MIDSCENE_USE_* environment variables are being used
|
|
15
|
+
* @param provider - Environment variable provider
|
|
16
|
+
* @returns Array of legacy environment variable names that are set
|
|
17
|
+
*/
|
|
18
|
+
export declare const detectLegacyVlModeEnvVars: (provider: Record<string, string | undefined>) => string[];
|
|
19
|
+
/**
|
|
20
|
+
* Parse model family from environment variables with validation and warnings
|
|
21
|
+
* Supports both new MIDSCENE_MODEL_FAMILY and legacy MIDSCENE_USE_* variables
|
|
22
|
+
*
|
|
23
|
+
* @param provider - Environment variable provider
|
|
24
|
+
* @returns Object with vlMode, uiTarsVersion, and warnings
|
|
25
|
+
*/
|
|
26
|
+
export declare const parseModelFamilyFromEnv: (provider: Record<string, string | undefined>) => {
|
|
27
|
+
vlMode?: TVlModeTypes;
|
|
28
|
+
uiTarsVersion?: UITarsModelVersion;
|
|
29
|
+
warnings: string[];
|
|
30
|
+
modelFamily?: TModelFamily;
|
|
31
|
+
};
|
|
@@ -80,6 +80,7 @@ export declare const MIDSCENE_PLANNING_MODEL_BASE_URL = "MIDSCENE_PLANNING_MODEL
|
|
|
80
80
|
export declare const MIDSCENE_PLANNING_MODEL_API_KEY = "MIDSCENE_PLANNING_MODEL_API_KEY";
|
|
81
81
|
export declare const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON = "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON";
|
|
82
82
|
export declare const MIDSCENE_PLANNING_LOCATOR_MODE = "MIDSCENE_PLANNING_LOCATOR_MODE";
|
|
83
|
+
export declare const MIDSCENE_MODEL_FAMILY = "MIDSCENE_MODEL_FAMILY";
|
|
83
84
|
/**
|
|
84
85
|
* env keys declared but unused
|
|
85
86
|
*/
|
|
@@ -103,12 +104,23 @@ export declare const GLOBAL_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_FORC
|
|
|
103
104
|
* Can be override by both agent.modelConfig and overrideAIConfig
|
|
104
105
|
* Can only be access after agent.constructor
|
|
105
106
|
*/
|
|
106
|
-
export declare const MODEL_ENV_KEYS: readonly ["MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "MIDSCENE_LOCATOR_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MODEL_API_KEY", "MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_LOCATOR_MODE", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_LOCATOR_MODE"];
|
|
107
|
-
export declare const ALL_ENV_KEYS: readonly [...string[], "MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR", "MIDSCENE_CACHE", "MIDSCENE_FORCE_DEEP_THINK", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER", "MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "MIDSCENE_LOCATOR_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MODEL_API_KEY", "MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_LOCATOR_MODE", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_LOCATOR_MODE"];
|
|
107
|
+
export declare const MODEL_ENV_KEYS: readonly ["MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "MIDSCENE_LOCATOR_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MODEL_API_KEY", "MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_LOCATOR_MODE", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_LOCATOR_MODE", "MIDSCENE_MODEL_FAMILY"];
|
|
108
|
+
export declare const ALL_ENV_KEYS: readonly [...string[], "MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR", "MIDSCENE_CACHE", "MIDSCENE_FORCE_DEEP_THINK", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER", "MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "MIDSCENE_LOCATOR_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MODEL_API_KEY", "MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_LOCATOR_MODE", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_LOCATOR_MODE", "MIDSCENE_MODEL_FAMILY"];
|
|
108
109
|
export type TEnvKeys = (typeof ALL_ENV_KEYS)[number];
|
|
109
110
|
export type TGlobalConfig = Record<TEnvKeys, string | undefined>;
|
|
110
|
-
export type TVlModeValues = '
|
|
111
|
-
export type TVlModeTypes = '
|
|
111
|
+
export type TVlModeValues = 'qwen2.5-vl' | 'qwen3-vl' | 'doubao-vision' | 'gemini' | 'vlm-ui-tars' | 'vlm-ui-tars-doubao' | 'vlm-ui-tars-doubao-1.5';
|
|
112
|
+
export type TVlModeTypes = 'qwen2.5-vl' | 'qwen3-vl' | 'doubao-vision' | 'gemini' | 'vlm-ui-tars';
|
|
113
|
+
export declare const VL_MODE_RAW_VALID_VALUES: TVlModeValues[];
|
|
114
|
+
/**
|
|
115
|
+
* Model family values - unified model configuration approach
|
|
116
|
+
* Replaces the old MIDSCENE_USE_* environment variables
|
|
117
|
+
*
|
|
118
|
+
* Note: These values directly correspond to VL_MODE_RAW_VALID_VALUES
|
|
119
|
+
* - 'qwen2.5-vl' is Qwen 2.5
|
|
120
|
+
* - 'qwen3-vl' is Qwen 3
|
|
121
|
+
*/
|
|
122
|
+
export type TModelFamily = TVlModeValues;
|
|
123
|
+
export declare const MODEL_FAMILY_VALUES: TVlModeValues[];
|
|
112
124
|
export interface IModelConfigForInsight {
|
|
113
125
|
[MIDSCENE_INSIGHT_MODEL_NAME]: string;
|
|
114
126
|
[MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY]?: string;
|
|
@@ -125,7 +137,7 @@ export interface IModelConfigForInsight {
|
|
|
125
137
|
* DOM-based planning is not supported.
|
|
126
138
|
*
|
|
127
139
|
* Required: MIDSCENE_PLANNING_LOCATOR_MODE must be set to one of:
|
|
128
|
-
* - '
|
|
140
|
+
* - 'qwen2.5-vl'
|
|
129
141
|
* - 'qwen3-vl'
|
|
130
142
|
* - 'gemini'
|
|
131
143
|
* - 'doubao-vision'
|
|
@@ -184,7 +196,6 @@ export declare enum UITarsModelVersion {
|
|
|
184
196
|
DOUBAO_1_5_15B = "doubao-1.5-15B",
|
|
185
197
|
DOUBAO_1_5_20B = "doubao-1.5-20B"
|
|
186
198
|
}
|
|
187
|
-
export declare const VL_MODE_RAW_VALID_VALUES: TVlModeValues[];
|
|
188
199
|
/**
|
|
189
200
|
* Callback to create custom OpenAI client instance
|
|
190
201
|
* @param config - Resolved model configuration including apiKey, baseURL, modelName, intent, etc.
|
|
@@ -229,8 +240,8 @@ export interface IModelConfig {
|
|
|
229
240
|
openaiApiKey?: string;
|
|
230
241
|
openaiExtraConfig?: Record<string, unknown>;
|
|
231
242
|
/**
|
|
232
|
-
* - vlModeRaw: exists only in non-legacy logic. value can be 'doubao-vision', 'gemini', '
|
|
233
|
-
* - vlMode: based on the results of the vlModoRaw classification,value can be 'doubao-vision', 'gemini', '
|
|
243
|
+
* - vlModeRaw: exists only in non-legacy logic. value can be 'doubao-vision', 'gemini', 'qwen2.5-vl', 'vlm-ui-tars', 'vlm-ui-tars-doubao', 'vlm-ui-tars-doubao-1.5'
|
|
244
|
+
* - vlMode: based on the results of the vlModoRaw classification,value can be 'doubao-vision', 'gemini', 'qwen2.5-vl', 'vlm-ui-tars'
|
|
234
245
|
*/
|
|
235
246
|
vlModeRaw?: string;
|
|
236
247
|
vlMode?: TVlModeTypes;
|
package/package.json
CHANGED
|
@@ -31,6 +31,7 @@ import { assert } from '../utils';
|
|
|
31
31
|
import { createAssert, maskConfig, parseJson } from './helper';
|
|
32
32
|
import { initDebugConfig } from './init-debug';
|
|
33
33
|
import {
|
|
34
|
+
parseModelFamilyFromEnv,
|
|
34
35
|
parseVlModeAndUiTarsFromGlobalConfig,
|
|
35
36
|
parseVlModeAndUiTarsModelVersionFromRawValue,
|
|
36
37
|
} from './parse';
|
|
@@ -167,6 +168,56 @@ const getModelDescription = (
|
|
|
167
168
|
return '';
|
|
168
169
|
};
|
|
169
170
|
|
|
171
|
+
/**
|
|
172
|
+
* Parse vlMode and uiTarsVersion based on intent and config
|
|
173
|
+
* Consolidates the logic for handling planning intent vs other intents
|
|
174
|
+
*/
|
|
175
|
+
const parseVlModeForIntent = (
|
|
176
|
+
intent: TIntent,
|
|
177
|
+
allEnvConfig: Record<string, string | undefined>,
|
|
178
|
+
result: { vlModeRaw?: string },
|
|
179
|
+
debugLog: (...args: any[]) => void,
|
|
180
|
+
): {
|
|
181
|
+
vlMode?: TVlModeTypes;
|
|
182
|
+
uiTarsVersion?: UITarsModelVersion;
|
|
183
|
+
} => {
|
|
184
|
+
if (intent === 'planning') {
|
|
185
|
+
const parseResult = parseModelFamilyFromEnv(allEnvConfig);
|
|
186
|
+
|
|
187
|
+
// Output warnings to debug log
|
|
188
|
+
parseResult.warnings.forEach((warning) => {
|
|
189
|
+
console.warn(`[Midscene] ${warning}`);
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
if (parseResult.modelFamily) {
|
|
193
|
+
debugLog(`Using model family: ${parseResult.modelFamily}`);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
return {
|
|
197
|
+
vlMode: parseResult.vlMode,
|
|
198
|
+
uiTarsVersion: parseResult.uiTarsVersion,
|
|
199
|
+
};
|
|
200
|
+
} else {
|
|
201
|
+
// For other intents, use parseVlModeAndUiTarsModelVersionFromRawValue if vlModeRaw is available
|
|
202
|
+
// Otherwise, use parseVlModeAndUiTarsFromGlobalConfig
|
|
203
|
+
if (result.vlModeRaw !== undefined) {
|
|
204
|
+
const parsed = parseVlModeAndUiTarsModelVersionFromRawValue(
|
|
205
|
+
result.vlModeRaw,
|
|
206
|
+
);
|
|
207
|
+
return {
|
|
208
|
+
vlMode: parsed.vlMode,
|
|
209
|
+
uiTarsVersion: parsed.uiTarsVersion,
|
|
210
|
+
};
|
|
211
|
+
} else {
|
|
212
|
+
const parsed = parseVlModeAndUiTarsFromGlobalConfig(allEnvConfig);
|
|
213
|
+
return {
|
|
214
|
+
vlMode: parsed.vlMode,
|
|
215
|
+
uiTarsVersion: parsed.uiTarsVersion,
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
};
|
|
220
|
+
|
|
170
221
|
export const decideModelConfigFromIntentConfig = (
|
|
171
222
|
intent: TIntent,
|
|
172
223
|
intentConfig: Record<string, string | undefined>,
|
|
@@ -254,8 +305,13 @@ export const decideModelConfigFromEnv = (
|
|
|
254
305
|
valueAssert: createAssert(keysForEnv.modelName, 'process.env', modelName),
|
|
255
306
|
});
|
|
256
307
|
|
|
257
|
-
const { vlMode, uiTarsVersion } =
|
|
258
|
-
|
|
308
|
+
const { vlMode, uiTarsVersion } = parseVlModeForIntent(
|
|
309
|
+
intent,
|
|
310
|
+
allEnvConfig,
|
|
311
|
+
result,
|
|
312
|
+
debugLog,
|
|
313
|
+
);
|
|
314
|
+
|
|
259
315
|
const modelDescription = getModelDescription(vlMode, uiTarsVersion);
|
|
260
316
|
|
|
261
317
|
const finalResult: IModelConfig = {
|
|
@@ -287,8 +343,12 @@ export const decideModelConfigFromEnv = (
|
|
|
287
343
|
),
|
|
288
344
|
});
|
|
289
345
|
|
|
290
|
-
const { vlMode, uiTarsVersion } =
|
|
291
|
-
|
|
346
|
+
const { vlMode, uiTarsVersion } = parseVlModeForIntent(
|
|
347
|
+
intent,
|
|
348
|
+
allEnvConfig,
|
|
349
|
+
result,
|
|
350
|
+
debugLog,
|
|
351
|
+
);
|
|
292
352
|
|
|
293
353
|
const modelDescription = getModelDescription(vlMode, uiTarsVersion);
|
|
294
354
|
|
package/src/env/parse.ts
CHANGED
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
import {
|
|
2
|
+
MIDSCENE_MODEL_FAMILY,
|
|
2
3
|
MIDSCENE_USE_DOUBAO_VISION,
|
|
3
4
|
MIDSCENE_USE_GEMINI,
|
|
4
5
|
MIDSCENE_USE_QWEN3_VL,
|
|
5
6
|
MIDSCENE_USE_QWEN_VL,
|
|
6
7
|
MIDSCENE_USE_VLM_UI_TARS,
|
|
8
|
+
MODEL_FAMILY_VALUES,
|
|
9
|
+
type TModelFamily,
|
|
7
10
|
type TVlModeTypes,
|
|
8
11
|
type TVlModeValues,
|
|
9
12
|
UITarsModelVersion,
|
|
@@ -17,10 +20,7 @@ export const parseVlModeAndUiTarsModelVersionFromRawValue = (
|
|
|
17
20
|
uiTarsVersion?: UITarsModelVersion;
|
|
18
21
|
} => {
|
|
19
22
|
if (!vlModeRaw) {
|
|
20
|
-
return {
|
|
21
|
-
vlMode: undefined,
|
|
22
|
-
uiTarsVersion: undefined,
|
|
23
|
-
};
|
|
23
|
+
return { vlMode: undefined, uiTarsVersion: undefined };
|
|
24
24
|
}
|
|
25
25
|
|
|
26
26
|
if (!VL_MODE_RAW_VALID_VALUES.includes(vlModeRaw as never)) {
|
|
@@ -31,21 +31,17 @@ export const parseVlModeAndUiTarsModelVersionFromRawValue = (
|
|
|
31
31
|
const raw = vlModeRaw as TVlModeValues;
|
|
32
32
|
|
|
33
33
|
if (raw === 'vlm-ui-tars') {
|
|
34
|
-
return {
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
} else if (raw === 'vlm-ui-tars-doubao' || raw === 'vlm-ui-tars-doubao-1.5') {
|
|
34
|
+
return { vlMode: 'vlm-ui-tars', uiTarsVersion: UITarsModelVersion.V1_0 };
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
if (raw === 'vlm-ui-tars-doubao' || raw === 'vlm-ui-tars-doubao-1.5') {
|
|
39
38
|
return {
|
|
40
39
|
vlMode: 'vlm-ui-tars',
|
|
41
40
|
uiTarsVersion: UITarsModelVersion.DOUBAO_1_5_20B,
|
|
42
41
|
};
|
|
43
42
|
}
|
|
44
43
|
|
|
45
|
-
return {
|
|
46
|
-
vlMode: raw as TVlModeTypes,
|
|
47
|
-
uiTarsVersion: undefined,
|
|
48
|
-
};
|
|
44
|
+
return { vlMode: raw as TVlModeTypes, uiTarsVersion: undefined };
|
|
49
45
|
};
|
|
50
46
|
|
|
51
47
|
/**
|
|
@@ -77,34 +73,13 @@ export const parseVlModeAndUiTarsFromGlobalConfig = (
|
|
|
77
73
|
);
|
|
78
74
|
}
|
|
79
75
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
if (isQwen) {
|
|
88
|
-
return {
|
|
89
|
-
vlMode: 'qwen-vl',
|
|
90
|
-
uiTarsVersion: undefined,
|
|
91
|
-
};
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
if (isDoubao) {
|
|
95
|
-
return {
|
|
96
|
-
vlMode: 'doubao-vision',
|
|
97
|
-
uiTarsVersion: undefined,
|
|
98
|
-
};
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
if (isGemini) {
|
|
102
|
-
return {
|
|
103
|
-
vlMode: 'gemini',
|
|
104
|
-
uiTarsVersion: undefined,
|
|
105
|
-
};
|
|
106
|
-
}
|
|
76
|
+
// Simple modes without version
|
|
77
|
+
if (isQwen3) return { vlMode: 'qwen3-vl', uiTarsVersion: undefined };
|
|
78
|
+
if (isQwen) return { vlMode: 'qwen2.5-vl', uiTarsVersion: undefined };
|
|
79
|
+
if (isDoubao) return { vlMode: 'doubao-vision', uiTarsVersion: undefined };
|
|
80
|
+
if (isGemini) return { vlMode: 'gemini', uiTarsVersion: undefined };
|
|
107
81
|
|
|
82
|
+
// UI-TARS with version detection
|
|
108
83
|
if (isUiTars) {
|
|
109
84
|
if (isUiTars === '1') {
|
|
110
85
|
return {
|
|
@@ -124,8 +99,131 @@ export const parseVlModeAndUiTarsFromGlobalConfig = (
|
|
|
124
99
|
}
|
|
125
100
|
}
|
|
126
101
|
|
|
127
|
-
return {
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
102
|
+
return { vlMode: undefined, uiTarsVersion: undefined };
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Check if old MIDSCENE_USE_* environment variables are being used
|
|
107
|
+
* @param provider - Environment variable provider
|
|
108
|
+
* @returns Array of legacy environment variable names that are set
|
|
109
|
+
*/
|
|
110
|
+
export const detectLegacyVlModeEnvVars = (
|
|
111
|
+
provider: Record<string, string | undefined>,
|
|
112
|
+
): string[] => {
|
|
113
|
+
const legacyVars = [
|
|
114
|
+
MIDSCENE_USE_DOUBAO_VISION,
|
|
115
|
+
MIDSCENE_USE_QWEN_VL,
|
|
116
|
+
MIDSCENE_USE_QWEN3_VL,
|
|
117
|
+
MIDSCENE_USE_VLM_UI_TARS,
|
|
118
|
+
MIDSCENE_USE_GEMINI,
|
|
119
|
+
];
|
|
120
|
+
|
|
121
|
+
return legacyVars.filter((varName) => provider[varName]);
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Type guard to check if a string is a valid TModelFamily
|
|
126
|
+
*/
|
|
127
|
+
function isValidModelFamily(value: string): value is TModelFamily {
|
|
128
|
+
return (MODEL_FAMILY_VALUES as readonly string[]).includes(value);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Map legacy vlMode and uiTarsVersion to model family
|
|
133
|
+
* @param vlMode - The VL mode type
|
|
134
|
+
* @param uiTarsVersion - The UI-TARS version (if applicable)
|
|
135
|
+
* @returns The corresponding model family value
|
|
136
|
+
*/
|
|
137
|
+
function mapLegacyToModelFamily(
|
|
138
|
+
vlMode?: TVlModeTypes,
|
|
139
|
+
uiTarsVersion?: UITarsModelVersion,
|
|
140
|
+
): TModelFamily | undefined {
|
|
141
|
+
if (!vlMode) return undefined;
|
|
142
|
+
|
|
143
|
+
if (vlMode === 'vlm-ui-tars') {
|
|
144
|
+
// UI-TARS needs special handling for version
|
|
145
|
+
if (uiTarsVersion === UITarsModelVersion.V1_0) {
|
|
146
|
+
return 'vlm-ui-tars';
|
|
147
|
+
} else if (uiTarsVersion === UITarsModelVersion.DOUBAO_1_5_20B) {
|
|
148
|
+
return 'vlm-ui-tars-doubao-1.5';
|
|
149
|
+
} else {
|
|
150
|
+
// Handle other UI-TARS versions (vlm-ui-tars-doubao)
|
|
151
|
+
return 'vlm-ui-tars-doubao';
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// For other modes, model family directly matches vlMode
|
|
156
|
+
return vlMode as TModelFamily;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Parse model family from environment variables with validation and warnings
|
|
161
|
+
* Supports both new MIDSCENE_MODEL_FAMILY and legacy MIDSCENE_USE_* variables
|
|
162
|
+
*
|
|
163
|
+
* @param provider - Environment variable provider
|
|
164
|
+
* @returns Object with vlMode, uiTarsVersion, and warnings
|
|
165
|
+
*/
|
|
166
|
+
export const parseModelFamilyFromEnv = (
|
|
167
|
+
provider: Record<string, string | undefined>,
|
|
168
|
+
): {
|
|
169
|
+
vlMode?: TVlModeTypes;
|
|
170
|
+
uiTarsVersion?: UITarsModelVersion;
|
|
171
|
+
warnings: string[];
|
|
172
|
+
modelFamily?: TModelFamily;
|
|
173
|
+
} => {
|
|
174
|
+
const warnings: string[] = [];
|
|
175
|
+
const modelFamilyRaw = provider[MIDSCENE_MODEL_FAMILY];
|
|
176
|
+
const legacyVars = detectLegacyVlModeEnvVars(provider);
|
|
177
|
+
|
|
178
|
+
// Case 1: Both new and legacy variables are set - ERROR
|
|
179
|
+
if (modelFamilyRaw && legacyVars.length > 0) {
|
|
180
|
+
throw new Error(
|
|
181
|
+
`Conflicting configuration detected: Both MIDSCENE_MODEL_FAMILY and legacy environment variables (${legacyVars.join(', ')}) are set. Please use only MIDSCENE_MODEL_FAMILY.`,
|
|
182
|
+
);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Case 2: Only new MIDSCENE_MODEL_FAMILY is set
|
|
186
|
+
if (modelFamilyRaw) {
|
|
187
|
+
// Validate model family value
|
|
188
|
+
if (!isValidModelFamily(modelFamilyRaw)) {
|
|
189
|
+
throw new Error(
|
|
190
|
+
`Invalid MIDSCENE_MODEL_FAMILY value: "${modelFamilyRaw}". Must be one of: ${MODEL_FAMILY_VALUES.join(', ')}. See documentation: https://midscenejs.com/model-provider.html`,
|
|
191
|
+
);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
const modelFamily = modelFamilyRaw;
|
|
195
|
+
const parsed = parseVlModeAndUiTarsModelVersionFromRawValue(modelFamily);
|
|
196
|
+
return {
|
|
197
|
+
vlMode: parsed.vlMode,
|
|
198
|
+
uiTarsVersion: parsed.uiTarsVersion,
|
|
199
|
+
modelFamily,
|
|
200
|
+
warnings,
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Case 3: Only legacy variables are set - WARN
|
|
205
|
+
if (legacyVars.length > 0) {
|
|
206
|
+
const legacyResult = parseVlModeAndUiTarsFromGlobalConfig(provider);
|
|
207
|
+
|
|
208
|
+
warnings.push(
|
|
209
|
+
`DEPRECATED: Environment ${legacyVars.length > 1 ? 'variables' : 'variable'} ${legacyVars.join(', ')} ${legacyVars.length > 1 ? 'are' : 'is'} deprecated. Please use MIDSCENE_MODEL_FAMILY instead. See migration guide for details.`,
|
|
210
|
+
);
|
|
211
|
+
|
|
212
|
+
const modelFamily = mapLegacyToModelFamily(
|
|
213
|
+
legacyResult.vlMode,
|
|
214
|
+
legacyResult.uiTarsVersion,
|
|
215
|
+
);
|
|
216
|
+
|
|
217
|
+
return {
|
|
218
|
+
vlMode: legacyResult.vlMode,
|
|
219
|
+
uiTarsVersion: legacyResult.uiTarsVersion,
|
|
220
|
+
modelFamily,
|
|
221
|
+
warnings,
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// Case 4: No configuration set - ERROR
|
|
226
|
+
throw new Error(
|
|
227
|
+
`MIDSCENE_MODEL_FAMILY is required for planning tasks. Please set it to one of: ${MODEL_FAMILY_VALUES.join(', ')}. See documentation: https://midscenejs.com/model-provider.html`,
|
|
228
|
+
);
|
|
131
229
|
};
|
package/src/env/types.ts
CHANGED
|
@@ -114,6 +114,7 @@ export const MIDSCENE_PLANNING_MODEL_API_KEY =
|
|
|
114
114
|
export const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON =
|
|
115
115
|
'MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON';
|
|
116
116
|
export const MIDSCENE_PLANNING_LOCATOR_MODE = 'MIDSCENE_PLANNING_LOCATOR_MODE';
|
|
117
|
+
export const MIDSCENE_MODEL_FAMILY = 'MIDSCENE_MODEL_FAMILY';
|
|
117
118
|
|
|
118
119
|
/**
|
|
119
120
|
* env keys declared but unused
|
|
@@ -214,6 +215,7 @@ export const MODEL_ENV_KEYS = [
|
|
|
214
215
|
MIDSCENE_PLANNING_MODEL_API_KEY,
|
|
215
216
|
MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
|
|
216
217
|
MIDSCENE_PLANNING_LOCATOR_MODE,
|
|
218
|
+
MIDSCENE_MODEL_FAMILY,
|
|
217
219
|
] as const;
|
|
218
220
|
|
|
219
221
|
export const ALL_ENV_KEYS = [
|
|
@@ -227,7 +229,7 @@ export type TEnvKeys = (typeof ALL_ENV_KEYS)[number];
|
|
|
227
229
|
export type TGlobalConfig = Record<TEnvKeys, string | undefined>;
|
|
228
230
|
|
|
229
231
|
export type TVlModeValues =
|
|
230
|
-
| '
|
|
232
|
+
| 'qwen2.5-vl'
|
|
231
233
|
| 'qwen3-vl'
|
|
232
234
|
| 'doubao-vision'
|
|
233
235
|
| 'gemini'
|
|
@@ -236,12 +238,36 @@ export type TVlModeValues =
|
|
|
236
238
|
| 'vlm-ui-tars-doubao-1.5';
|
|
237
239
|
|
|
238
240
|
export type TVlModeTypes =
|
|
239
|
-
| '
|
|
241
|
+
| 'qwen2.5-vl'
|
|
240
242
|
| 'qwen3-vl'
|
|
241
243
|
| 'doubao-vision'
|
|
242
244
|
| 'gemini'
|
|
243
245
|
| 'vlm-ui-tars';
|
|
244
246
|
|
|
247
|
+
export const VL_MODE_RAW_VALID_VALUES: TVlModeValues[] = [
|
|
248
|
+
'doubao-vision',
|
|
249
|
+
'gemini',
|
|
250
|
+
'qwen2.5-vl',
|
|
251
|
+
'qwen3-vl',
|
|
252
|
+
'vlm-ui-tars',
|
|
253
|
+
'vlm-ui-tars-doubao',
|
|
254
|
+
'vlm-ui-tars-doubao-1.5',
|
|
255
|
+
];
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Model family values - unified model configuration approach
|
|
259
|
+
* Replaces the old MIDSCENE_USE_* environment variables
|
|
260
|
+
*
|
|
261
|
+
* Note: These values directly correspond to VL_MODE_RAW_VALID_VALUES
|
|
262
|
+
* - 'qwen2.5-vl' is Qwen 2.5
|
|
263
|
+
* - 'qwen3-vl' is Qwen 3
|
|
264
|
+
*/
|
|
265
|
+
export type TModelFamily = TVlModeValues;
|
|
266
|
+
|
|
267
|
+
export const MODEL_FAMILY_VALUES: TVlModeValues[] = [
|
|
268
|
+
...VL_MODE_RAW_VALID_VALUES,
|
|
269
|
+
];
|
|
270
|
+
|
|
245
271
|
export interface IModelConfigForInsight {
|
|
246
272
|
// model name
|
|
247
273
|
[MIDSCENE_INSIGHT_MODEL_NAME]: string;
|
|
@@ -263,7 +289,7 @@ export interface IModelConfigForInsight {
|
|
|
263
289
|
* DOM-based planning is not supported.
|
|
264
290
|
*
|
|
265
291
|
* Required: MIDSCENE_PLANNING_LOCATOR_MODE must be set to one of:
|
|
266
|
-
* - '
|
|
292
|
+
* - 'qwen2.5-vl'
|
|
267
293
|
* - 'qwen3-vl'
|
|
268
294
|
* - 'gemini'
|
|
269
295
|
* - 'doubao-vision'
|
|
@@ -344,16 +370,6 @@ export enum UITarsModelVersion {
|
|
|
344
370
|
DOUBAO_1_5_20B = 'doubao-1.5-20B',
|
|
345
371
|
}
|
|
346
372
|
|
|
347
|
-
export const VL_MODE_RAW_VALID_VALUES: TVlModeValues[] = [
|
|
348
|
-
'doubao-vision',
|
|
349
|
-
'gemini',
|
|
350
|
-
'qwen-vl',
|
|
351
|
-
'qwen3-vl',
|
|
352
|
-
'vlm-ui-tars',
|
|
353
|
-
'vlm-ui-tars-doubao',
|
|
354
|
-
'vlm-ui-tars-doubao-1.5',
|
|
355
|
-
];
|
|
356
|
-
|
|
357
373
|
/**
|
|
358
374
|
* Callback to create custom OpenAI client instance
|
|
359
375
|
* @param config - Resolved model configuration including apiKey, baseURL, modelName, intent, etc.
|
|
@@ -402,8 +418,8 @@ export interface IModelConfig {
|
|
|
402
418
|
openaiApiKey?: string;
|
|
403
419
|
openaiExtraConfig?: Record<string, unknown>;
|
|
404
420
|
/**
|
|
405
|
-
* - vlModeRaw: exists only in non-legacy logic. value can be 'doubao-vision', 'gemini', '
|
|
406
|
-
* - vlMode: based on the results of the vlModoRaw classification,value can be 'doubao-vision', 'gemini', '
|
|
421
|
+
* - vlModeRaw: exists only in non-legacy logic. value can be 'doubao-vision', 'gemini', 'qwen2.5-vl', 'vlm-ui-tars', 'vlm-ui-tars-doubao', 'vlm-ui-tars-doubao-1.5'
|
|
422
|
+
* - vlMode: based on the results of the vlModoRaw classification,value can be 'doubao-vision', 'gemini', 'qwen2.5-vl', 'vlm-ui-tars'
|
|
407
423
|
*/
|
|
408
424
|
vlModeRaw?: string;
|
|
409
425
|
vlMode?: TVlModeTypes;
|