@midscene/shared 1.0.1-beta-20251110055007.0 → 1.0.1-beta-20251110115555.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/env/constants.mjs +10 -19
- package/dist/es/env/decide-model-config.mjs +2 -3
- package/dist/es/env/model-config-manager.mjs +5 -9
- package/dist/es/env/types.mjs +16 -30
- package/dist/lib/env/constants.js +13 -25
- package/dist/lib/env/decide-model-config.js +1 -2
- package/dist/lib/env/model-config-manager.js +5 -9
- package/dist/lib/env/types.js +36 -71
- package/dist/types/env/constants.d.ts +1 -2
- package/dist/types/env/decide-model-config.d.ts +2 -2
- package/dist/types/env/types.d.ts +22 -39
- package/package.json +1 -1
- package/src/env/constants.ts +15 -42
- package/src/env/decide-model-config.ts +3 -6
- package/src/env/model-config-manager.ts +5 -8
- package/src/env/types.ts +33 -73
|
@@ -1,21 +1,12 @@
|
|
|
1
|
-
import {
|
|
2
|
-
const
|
|
3
|
-
modelName:
|
|
4
|
-
socksProxy:
|
|
5
|
-
httpProxy:
|
|
6
|
-
openaiBaseURL:
|
|
7
|
-
openaiApiKey:
|
|
8
|
-
openaiExtraConfig:
|
|
9
|
-
vlMode:
|
|
10
|
-
};
|
|
11
|
-
const GROUNDING_MODEL_CONFIG_KEYS = {
|
|
12
|
-
modelName: MIDSCENE_GROUNDING_MODEL_NAME,
|
|
13
|
-
socksProxy: MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY,
|
|
14
|
-
httpProxy: MIDSCENE_GROUNDING_MODEL_HTTP_PROXY,
|
|
15
|
-
openaiBaseURL: MIDSCENE_GROUNDING_MODEL_BASE_URL,
|
|
16
|
-
openaiApiKey: MIDSCENE_GROUNDING_MODEL_API_KEY,
|
|
17
|
-
openaiExtraConfig: MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON,
|
|
18
|
-
vlMode: MIDSCENE_GROUNDING_LOCATOR_MODE
|
|
1
|
+
import { MIDSCENE_INSIGHT_LOCATOR_MODE, MIDSCENE_INSIGHT_MODEL_API_KEY, MIDSCENE_INSIGHT_MODEL_BASE_URL, MIDSCENE_INSIGHT_MODEL_HTTP_PROXY, MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON, MIDSCENE_INSIGHT_MODEL_NAME, MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY, MIDSCENE_LOCATOR_MODE, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_LOCATOR_MODE, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, OPENAI_API_KEY, OPENAI_BASE_URL } from "./types.mjs";
|
|
2
|
+
const INSIGHT_MODEL_CONFIG_KEYS = {
|
|
3
|
+
modelName: MIDSCENE_INSIGHT_MODEL_NAME,
|
|
4
|
+
socksProxy: MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
|
|
5
|
+
httpProxy: MIDSCENE_INSIGHT_MODEL_HTTP_PROXY,
|
|
6
|
+
openaiBaseURL: MIDSCENE_INSIGHT_MODEL_BASE_URL,
|
|
7
|
+
openaiApiKey: MIDSCENE_INSIGHT_MODEL_API_KEY,
|
|
8
|
+
openaiExtraConfig: MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
|
|
9
|
+
vlMode: MIDSCENE_INSIGHT_LOCATOR_MODE
|
|
19
10
|
};
|
|
20
11
|
const PLANNING_MODEL_CONFIG_KEYS = {
|
|
21
12
|
modelName: MIDSCENE_PLANNING_MODEL_NAME,
|
|
@@ -44,4 +35,4 @@ const DEFAULT_MODEL_CONFIG_KEYS_LEGACY = {
|
|
|
44
35
|
openaiExtraConfig: MIDSCENE_OPENAI_INIT_CONFIG_JSON,
|
|
45
36
|
vlMode: 'DEFAULT_MODEL_CONFIG_KEYS has no vlMode key'
|
|
46
37
|
};
|
|
47
|
-
export { DEFAULT_MODEL_CONFIG_KEYS, DEFAULT_MODEL_CONFIG_KEYS_LEGACY,
|
|
38
|
+
export { DEFAULT_MODEL_CONFIG_KEYS, DEFAULT_MODEL_CONFIG_KEYS_LEGACY, INSIGHT_MODEL_CONFIG_KEYS, PLANNING_MODEL_CONFIG_KEYS };
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { DEFAULT_MODEL_CONFIG_KEYS, DEFAULT_MODEL_CONFIG_KEYS_LEGACY,
|
|
1
|
+
import { DEFAULT_MODEL_CONFIG_KEYS, DEFAULT_MODEL_CONFIG_KEYS_LEGACY, INSIGHT_MODEL_CONFIG_KEYS, PLANNING_MODEL_CONFIG_KEYS } from "./constants.mjs";
|
|
2
2
|
import { MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MODEL_API_KEY, MODEL_BASE_URL, OPENAI_API_KEY, OPENAI_BASE_URL } from "./types.mjs";
|
|
3
3
|
import { getDebug } from "../logger.mjs";
|
|
4
4
|
import { assert } from "../utils.mjs";
|
|
@@ -6,8 +6,7 @@ import { createAssert, maskConfig, parseJson } from "./helper.mjs";
|
|
|
6
6
|
import { initDebugConfig } from "./init-debug.mjs";
|
|
7
7
|
import { parseVlModeAndUiTarsFromGlobalConfig, parseVlModeAndUiTarsModelVersionFromRawValue } from "./parse.mjs";
|
|
8
8
|
const KEYS_MAP = {
|
|
9
|
-
|
|
10
|
-
grounding: GROUNDING_MODEL_CONFIG_KEYS,
|
|
9
|
+
insight: INSIGHT_MODEL_CONFIG_KEYS,
|
|
11
10
|
planning: PLANNING_MODEL_CONFIG_KEYS,
|
|
12
11
|
default: DEFAULT_MODEL_CONFIG_KEYS
|
|
13
12
|
};
|
|
@@ -11,17 +11,15 @@ function _define_property(obj, key, value) {
|
|
|
11
11
|
return obj;
|
|
12
12
|
}
|
|
13
13
|
const ALL_INTENTS = [
|
|
14
|
-
'
|
|
14
|
+
'insight',
|
|
15
15
|
'default',
|
|
16
|
-
'grounding',
|
|
17
16
|
'planning'
|
|
18
17
|
];
|
|
19
18
|
class ModelConfigManager {
|
|
20
19
|
calcIntentConfigMap(modelConfigFn) {
|
|
21
20
|
const intentConfigMap = {
|
|
22
|
-
|
|
21
|
+
insight: void 0,
|
|
23
22
|
default: void 0,
|
|
24
|
-
grounding: void 0,
|
|
25
23
|
planning: void 0
|
|
26
24
|
};
|
|
27
25
|
for (const i of ALL_INTENTS){
|
|
@@ -35,9 +33,8 @@ class ModelConfigManager {
|
|
|
35
33
|
}
|
|
36
34
|
calcModelConfigMapBaseOnIntent(intentConfigMap) {
|
|
37
35
|
const modelConfigMap = {
|
|
38
|
-
|
|
36
|
+
insight: void 0,
|
|
39
37
|
default: void 0,
|
|
40
|
-
grounding: void 0,
|
|
41
38
|
planning: void 0
|
|
42
39
|
};
|
|
43
40
|
for (const i of ALL_INTENTS){
|
|
@@ -51,9 +48,8 @@ class ModelConfigManager {
|
|
|
51
48
|
}
|
|
52
49
|
calcModelConfigMapBaseOnEnv(allEnvConfig) {
|
|
53
50
|
const modelConfigMap = {
|
|
54
|
-
|
|
51
|
+
insight: void 0,
|
|
55
52
|
default: void 0,
|
|
56
|
-
grounding: void 0,
|
|
57
53
|
planning: void 0
|
|
58
54
|
};
|
|
59
55
|
for (const i of ALL_INTENTS){
|
|
@@ -101,7 +97,7 @@ Learn more: https://midscenejs.com/choose-a-model`);
|
|
|
101
97
|
registerGlobalConfigManager(globalConfigManager) {
|
|
102
98
|
this.globalConfigManager = globalConfigManager;
|
|
103
99
|
}
|
|
104
|
-
throwErrorIfNonVLModel(intent = '
|
|
100
|
+
throwErrorIfNonVLModel(intent = 'insight') {
|
|
105
101
|
const modelConfig = this.getModelConfig(intent);
|
|
106
102
|
if (!modelConfig.vlMode) throw new Error('No visual language model (VL model) detected for the current scenario. Element localization may be inaccurate. Please verify your model configuration. Learn more: https://midscenejs.com/choose-a-model');
|
|
107
103
|
}
|
package/dist/es/env/types.mjs
CHANGED
|
@@ -42,13 +42,13 @@ const MIDSCENE_PREFERRED_LANGUAGE = 'MIDSCENE_PREFERRED_LANGUAGE';
|
|
|
42
42
|
const MIDSCENE_CACHE_MAX_FILENAME_LENGTH = 'MIDSCENE_CACHE_MAX_FILENAME_LENGTH';
|
|
43
43
|
const MIDSCENE_RUN_DIR = 'MIDSCENE_RUN_DIR';
|
|
44
44
|
const MIDSCENE_LOCATOR_MODE = 'MIDSCENE_LOCATOR_MODE';
|
|
45
|
-
const
|
|
46
|
-
const
|
|
47
|
-
const
|
|
48
|
-
const
|
|
49
|
-
const
|
|
50
|
-
const
|
|
51
|
-
const
|
|
45
|
+
const MIDSCENE_INSIGHT_MODEL_NAME = 'MIDSCENE_INSIGHT_MODEL_NAME';
|
|
46
|
+
const MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY = 'MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY';
|
|
47
|
+
const MIDSCENE_INSIGHT_MODEL_HTTP_PROXY = 'MIDSCENE_INSIGHT_MODEL_HTTP_PROXY';
|
|
48
|
+
const MIDSCENE_INSIGHT_MODEL_BASE_URL = 'MIDSCENE_INSIGHT_MODEL_BASE_URL';
|
|
49
|
+
const MIDSCENE_INSIGHT_MODEL_API_KEY = 'MIDSCENE_INSIGHT_MODEL_API_KEY';
|
|
50
|
+
const MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON';
|
|
51
|
+
const MIDSCENE_INSIGHT_LOCATOR_MODE = 'MIDSCENE_INSIGHT_LOCATOR_MODE';
|
|
52
52
|
const MIDSCENE_PLANNING_MODEL_NAME = 'MIDSCENE_PLANNING_MODEL_NAME';
|
|
53
53
|
const MIDSCENE_PLANNING_MODEL_SOCKS_PROXY = 'MIDSCENE_PLANNING_MODEL_SOCKS_PROXY';
|
|
54
54
|
const MIDSCENE_PLANNING_MODEL_HTTP_PROXY = 'MIDSCENE_PLANNING_MODEL_HTTP_PROXY';
|
|
@@ -56,13 +56,6 @@ const MIDSCENE_PLANNING_MODEL_BASE_URL = 'MIDSCENE_PLANNING_MODEL_BASE_URL';
|
|
|
56
56
|
const MIDSCENE_PLANNING_MODEL_API_KEY = 'MIDSCENE_PLANNING_MODEL_API_KEY';
|
|
57
57
|
const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON';
|
|
58
58
|
const MIDSCENE_PLANNING_LOCATOR_MODE = 'MIDSCENE_PLANNING_LOCATOR_MODE';
|
|
59
|
-
const MIDSCENE_GROUNDING_MODEL_NAME = 'MIDSCENE_GROUNDING_MODEL_NAME';
|
|
60
|
-
const MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY = 'MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY';
|
|
61
|
-
const MIDSCENE_GROUNDING_MODEL_HTTP_PROXY = 'MIDSCENE_GROUNDING_MODEL_HTTP_PROXY';
|
|
62
|
-
const MIDSCENE_GROUNDING_MODEL_BASE_URL = 'MIDSCENE_GROUNDING_MODEL_BASE_URL';
|
|
63
|
-
const MIDSCENE_GROUNDING_MODEL_API_KEY = 'MIDSCENE_GROUNDING_MODEL_API_KEY';
|
|
64
|
-
const MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON';
|
|
65
|
-
const MIDSCENE_GROUNDING_LOCATOR_MODE = 'MIDSCENE_GROUNDING_LOCATOR_MODE';
|
|
66
59
|
const UNUSED_ENV_KEYS = [
|
|
67
60
|
MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG
|
|
68
61
|
];
|
|
@@ -123,27 +116,20 @@ const MODEL_ENV_KEYS = [
|
|
|
123
116
|
MIDSCENE_OPENAI_SOCKS_PROXY,
|
|
124
117
|
MODEL_API_KEY,
|
|
125
118
|
MODEL_BASE_URL,
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
119
|
+
MIDSCENE_INSIGHT_MODEL_NAME,
|
|
120
|
+
MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
|
|
121
|
+
MIDSCENE_INSIGHT_MODEL_HTTP_PROXY,
|
|
122
|
+
MIDSCENE_INSIGHT_MODEL_BASE_URL,
|
|
123
|
+
MIDSCENE_INSIGHT_MODEL_API_KEY,
|
|
124
|
+
MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
|
|
125
|
+
MIDSCENE_INSIGHT_LOCATOR_MODE,
|
|
133
126
|
MIDSCENE_PLANNING_MODEL_NAME,
|
|
134
127
|
MIDSCENE_PLANNING_MODEL_SOCKS_PROXY,
|
|
135
128
|
MIDSCENE_PLANNING_MODEL_HTTP_PROXY,
|
|
136
129
|
MIDSCENE_PLANNING_MODEL_BASE_URL,
|
|
137
130
|
MIDSCENE_PLANNING_MODEL_API_KEY,
|
|
138
131
|
MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
|
|
139
|
-
MIDSCENE_PLANNING_LOCATOR_MODE
|
|
140
|
-
MIDSCENE_GROUNDING_MODEL_NAME,
|
|
141
|
-
MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY,
|
|
142
|
-
MIDSCENE_GROUNDING_MODEL_HTTP_PROXY,
|
|
143
|
-
MIDSCENE_GROUNDING_MODEL_BASE_URL,
|
|
144
|
-
MIDSCENE_GROUNDING_MODEL_API_KEY,
|
|
145
|
-
MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON,
|
|
146
|
-
MIDSCENE_GROUNDING_LOCATOR_MODE
|
|
132
|
+
MIDSCENE_PLANNING_LOCATOR_MODE
|
|
147
133
|
];
|
|
148
134
|
const ALL_ENV_KEYS = [
|
|
149
135
|
...UNUSED_ENV_KEYS,
|
|
@@ -167,4 +153,4 @@ const VL_MODE_RAW_VALID_VALUES = [
|
|
|
167
153
|
'vlm-ui-tars-doubao',
|
|
168
154
|
'vlm-ui-tars-doubao-1.5'
|
|
169
155
|
];
|
|
170
|
-
export { ALL_ENV_KEYS, BASIC_ENV_KEYS, BOOLEAN_ENV_KEYS, DOCKER_CONTAINER, GLOBAL_ENV_KEYS, MATCH_BY_POSITION, MIDSCENE_ADB_PATH, MIDSCENE_ADB_REMOTE_HOST, MIDSCENE_ADB_REMOTE_PORT, MIDSCENE_ANDROID_IME_STRATEGY, MIDSCENE_CACHE, MIDSCENE_CACHE_MAX_FILENAME_LENGTH, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_MODE, MIDSCENE_DEBUG_MODEL_PROFILE, MIDSCENE_DEBUG_MODEL_RESPONSE, MIDSCENE_FORCE_DEEP_THINK,
|
|
156
|
+
export { ALL_ENV_KEYS, BASIC_ENV_KEYS, BOOLEAN_ENV_KEYS, DOCKER_CONTAINER, GLOBAL_ENV_KEYS, MATCH_BY_POSITION, MIDSCENE_ADB_PATH, MIDSCENE_ADB_REMOTE_HOST, MIDSCENE_ADB_REMOTE_PORT, MIDSCENE_ANDROID_IME_STRATEGY, MIDSCENE_CACHE, MIDSCENE_CACHE_MAX_FILENAME_LENGTH, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_MODE, MIDSCENE_DEBUG_MODEL_PROFILE, MIDSCENE_DEBUG_MODEL_RESPONSE, MIDSCENE_FORCE_DEEP_THINK, MIDSCENE_INSIGHT_LOCATOR_MODE, MIDSCENE_INSIGHT_MODEL_API_KEY, MIDSCENE_INSIGHT_MODEL_BASE_URL, MIDSCENE_INSIGHT_MODEL_HTTP_PROXY, MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON, MIDSCENE_INSIGHT_MODEL_NAME, MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY, MIDSCENE_IOS_DEVICE_UDID, MIDSCENE_IOS_SIMULATOR_UDID, MIDSCENE_LOCATOR_MODE, MIDSCENE_MCP_ANDROID_MODE, MIDSCENE_MCP_CHROME_PATH, MIDSCENE_MCP_USE_PUPPETEER_MODE, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_MAX_TOKENS, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_LOCATOR_MODE, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, MIDSCENE_PREFERRED_LANGUAGE, MIDSCENE_REPLANNING_CYCLE_LIMIT, MIDSCENE_REPORT_TAG_NAME, MIDSCENE_RUN_DIR, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MIDSCENE_USE_VL_MODEL, MODEL_API_KEY, MODEL_BASE_URL, MODEL_ENV_KEYS, NUMBER_ENV_KEYS, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MAX_TOKENS, STRING_ENV_KEYS, types_UITarsModelVersion as UITarsModelVersion, UNUSED_ENV_KEYS, VL_MODE_RAW_VALID_VALUES };
|
|
@@ -25,29 +25,19 @@ var __webpack_exports__ = {};
|
|
|
25
25
|
__webpack_require__.r(__webpack_exports__);
|
|
26
26
|
__webpack_require__.d(__webpack_exports__, {
|
|
27
27
|
DEFAULT_MODEL_CONFIG_KEYS_LEGACY: ()=>DEFAULT_MODEL_CONFIG_KEYS_LEGACY,
|
|
28
|
-
|
|
28
|
+
INSIGHT_MODEL_CONFIG_KEYS: ()=>INSIGHT_MODEL_CONFIG_KEYS,
|
|
29
29
|
DEFAULT_MODEL_CONFIG_KEYS: ()=>DEFAULT_MODEL_CONFIG_KEYS,
|
|
30
|
-
PLANNING_MODEL_CONFIG_KEYS: ()=>PLANNING_MODEL_CONFIG_KEYS
|
|
31
|
-
GROUNDING_MODEL_CONFIG_KEYS: ()=>GROUNDING_MODEL_CONFIG_KEYS
|
|
30
|
+
PLANNING_MODEL_CONFIG_KEYS: ()=>PLANNING_MODEL_CONFIG_KEYS
|
|
32
31
|
});
|
|
33
32
|
const external_types_js_namespaceObject = require("./types.js");
|
|
34
|
-
const
|
|
35
|
-
modelName: external_types_js_namespaceObject.
|
|
36
|
-
socksProxy: external_types_js_namespaceObject.
|
|
37
|
-
httpProxy: external_types_js_namespaceObject.
|
|
38
|
-
openaiBaseURL: external_types_js_namespaceObject.
|
|
39
|
-
openaiApiKey: external_types_js_namespaceObject.
|
|
40
|
-
openaiExtraConfig: external_types_js_namespaceObject.
|
|
41
|
-
vlMode: external_types_js_namespaceObject.
|
|
42
|
-
};
|
|
43
|
-
const GROUNDING_MODEL_CONFIG_KEYS = {
|
|
44
|
-
modelName: external_types_js_namespaceObject.MIDSCENE_GROUNDING_MODEL_NAME,
|
|
45
|
-
socksProxy: external_types_js_namespaceObject.MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY,
|
|
46
|
-
httpProxy: external_types_js_namespaceObject.MIDSCENE_GROUNDING_MODEL_HTTP_PROXY,
|
|
47
|
-
openaiBaseURL: external_types_js_namespaceObject.MIDSCENE_GROUNDING_MODEL_BASE_URL,
|
|
48
|
-
openaiApiKey: external_types_js_namespaceObject.MIDSCENE_GROUNDING_MODEL_API_KEY,
|
|
49
|
-
openaiExtraConfig: external_types_js_namespaceObject.MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON,
|
|
50
|
-
vlMode: external_types_js_namespaceObject.MIDSCENE_GROUNDING_LOCATOR_MODE
|
|
33
|
+
const INSIGHT_MODEL_CONFIG_KEYS = {
|
|
34
|
+
modelName: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_NAME,
|
|
35
|
+
socksProxy: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
|
|
36
|
+
httpProxy: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_HTTP_PROXY,
|
|
37
|
+
openaiBaseURL: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_BASE_URL,
|
|
38
|
+
openaiApiKey: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_API_KEY,
|
|
39
|
+
openaiExtraConfig: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
|
|
40
|
+
vlMode: external_types_js_namespaceObject.MIDSCENE_INSIGHT_LOCATOR_MODE
|
|
51
41
|
};
|
|
52
42
|
const PLANNING_MODEL_CONFIG_KEYS = {
|
|
53
43
|
modelName: external_types_js_namespaceObject.MIDSCENE_PLANNING_MODEL_NAME,
|
|
@@ -78,15 +68,13 @@ const DEFAULT_MODEL_CONFIG_KEYS_LEGACY = {
|
|
|
78
68
|
};
|
|
79
69
|
exports.DEFAULT_MODEL_CONFIG_KEYS = __webpack_exports__.DEFAULT_MODEL_CONFIG_KEYS;
|
|
80
70
|
exports.DEFAULT_MODEL_CONFIG_KEYS_LEGACY = __webpack_exports__.DEFAULT_MODEL_CONFIG_KEYS_LEGACY;
|
|
81
|
-
exports.
|
|
71
|
+
exports.INSIGHT_MODEL_CONFIG_KEYS = __webpack_exports__.INSIGHT_MODEL_CONFIG_KEYS;
|
|
82
72
|
exports.PLANNING_MODEL_CONFIG_KEYS = __webpack_exports__.PLANNING_MODEL_CONFIG_KEYS;
|
|
83
|
-
exports.VQA_MODEL_CONFIG_KEYS = __webpack_exports__.VQA_MODEL_CONFIG_KEYS;
|
|
84
73
|
for(var __webpack_i__ in __webpack_exports__)if (-1 === [
|
|
85
74
|
"DEFAULT_MODEL_CONFIG_KEYS",
|
|
86
75
|
"DEFAULT_MODEL_CONFIG_KEYS_LEGACY",
|
|
87
|
-
"
|
|
88
|
-
"PLANNING_MODEL_CONFIG_KEYS"
|
|
89
|
-
"VQA_MODEL_CONFIG_KEYS"
|
|
76
|
+
"INSIGHT_MODEL_CONFIG_KEYS",
|
|
77
|
+
"PLANNING_MODEL_CONFIG_KEYS"
|
|
90
78
|
].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
|
|
91
79
|
Object.defineProperty(exports, '__esModule', {
|
|
92
80
|
value: true
|
|
@@ -36,8 +36,7 @@ const external_helper_js_namespaceObject = require("./helper.js");
|
|
|
36
36
|
const external_init_debug_js_namespaceObject = require("./init-debug.js");
|
|
37
37
|
const external_parse_js_namespaceObject = require("./parse.js");
|
|
38
38
|
const KEYS_MAP = {
|
|
39
|
-
|
|
40
|
-
grounding: external_constants_js_namespaceObject.GROUNDING_MODEL_CONFIG_KEYS,
|
|
39
|
+
insight: external_constants_js_namespaceObject.INSIGHT_MODEL_CONFIG_KEYS,
|
|
41
40
|
planning: external_constants_js_namespaceObject.PLANNING_MODEL_CONFIG_KEYS,
|
|
42
41
|
default: external_constants_js_namespaceObject.DEFAULT_MODEL_CONFIG_KEYS
|
|
43
42
|
};
|
|
@@ -39,17 +39,15 @@ function _define_property(obj, key, value) {
|
|
|
39
39
|
return obj;
|
|
40
40
|
}
|
|
41
41
|
const ALL_INTENTS = [
|
|
42
|
-
'
|
|
42
|
+
'insight',
|
|
43
43
|
'default',
|
|
44
|
-
'grounding',
|
|
45
44
|
'planning'
|
|
46
45
|
];
|
|
47
46
|
class ModelConfigManager {
|
|
48
47
|
calcIntentConfigMap(modelConfigFn) {
|
|
49
48
|
const intentConfigMap = {
|
|
50
|
-
|
|
49
|
+
insight: void 0,
|
|
51
50
|
default: void 0,
|
|
52
|
-
grounding: void 0,
|
|
53
51
|
planning: void 0
|
|
54
52
|
};
|
|
55
53
|
for (const i of ALL_INTENTS){
|
|
@@ -63,9 +61,8 @@ class ModelConfigManager {
|
|
|
63
61
|
}
|
|
64
62
|
calcModelConfigMapBaseOnIntent(intentConfigMap) {
|
|
65
63
|
const modelConfigMap = {
|
|
66
|
-
|
|
64
|
+
insight: void 0,
|
|
67
65
|
default: void 0,
|
|
68
|
-
grounding: void 0,
|
|
69
66
|
planning: void 0
|
|
70
67
|
};
|
|
71
68
|
for (const i of ALL_INTENTS){
|
|
@@ -79,9 +76,8 @@ class ModelConfigManager {
|
|
|
79
76
|
}
|
|
80
77
|
calcModelConfigMapBaseOnEnv(allEnvConfig) {
|
|
81
78
|
const modelConfigMap = {
|
|
82
|
-
|
|
79
|
+
insight: void 0,
|
|
83
80
|
default: void 0,
|
|
84
|
-
grounding: void 0,
|
|
85
81
|
planning: void 0
|
|
86
82
|
};
|
|
87
83
|
for (const i of ALL_INTENTS){
|
|
@@ -129,7 +125,7 @@ Learn more: https://midscenejs.com/choose-a-model`);
|
|
|
129
125
|
registerGlobalConfigManager(globalConfigManager) {
|
|
130
126
|
this.globalConfigManager = globalConfigManager;
|
|
131
127
|
}
|
|
132
|
-
throwErrorIfNonVLModel(intent = '
|
|
128
|
+
throwErrorIfNonVLModel(intent = 'insight') {
|
|
133
129
|
const modelConfig = this.getModelConfig(intent);
|
|
134
130
|
if (!modelConfig.vlMode) throw new Error('No visual language model (VL model) detected for the current scenario. Element localization may be inaccurate. Please verify your model configuration. Learn more: https://midscenejs.com/choose-a-model');
|
|
135
131
|
}
|
package/dist/lib/env/types.js
CHANGED
|
@@ -41,13 +41,13 @@ __webpack_require__.d(__webpack_exports__, {
|
|
|
41
41
|
MIDSCENE_DEBUG_MODEL_PROFILE: ()=>MIDSCENE_DEBUG_MODEL_PROFILE,
|
|
42
42
|
MIDSCENE_DEBUG_MODEL_RESPONSE: ()=>MIDSCENE_DEBUG_MODEL_RESPONSE,
|
|
43
43
|
MIDSCENE_FORCE_DEEP_THINK: ()=>MIDSCENE_FORCE_DEEP_THINK,
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
44
|
+
MIDSCENE_INSIGHT_LOCATOR_MODE: ()=>MIDSCENE_INSIGHT_LOCATOR_MODE,
|
|
45
|
+
MIDSCENE_INSIGHT_MODEL_API_KEY: ()=>MIDSCENE_INSIGHT_MODEL_API_KEY,
|
|
46
|
+
MIDSCENE_INSIGHT_MODEL_BASE_URL: ()=>MIDSCENE_INSIGHT_MODEL_BASE_URL,
|
|
47
|
+
MIDSCENE_INSIGHT_MODEL_HTTP_PROXY: ()=>MIDSCENE_INSIGHT_MODEL_HTTP_PROXY,
|
|
48
|
+
MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON: ()=>MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
|
|
49
|
+
MIDSCENE_INSIGHT_MODEL_NAME: ()=>MIDSCENE_INSIGHT_MODEL_NAME,
|
|
50
|
+
MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY: ()=>MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
|
|
51
51
|
MIDSCENE_IOS_DEVICE_UDID: ()=>MIDSCENE_IOS_DEVICE_UDID,
|
|
52
52
|
MIDSCENE_IOS_SIMULATOR_UDID: ()=>MIDSCENE_IOS_SIMULATOR_UDID,
|
|
53
53
|
MIDSCENE_LOCATOR_MODE: ()=>MIDSCENE_LOCATOR_MODE,
|
|
@@ -81,13 +81,6 @@ __webpack_require__.d(__webpack_exports__, {
|
|
|
81
81
|
MIDSCENE_USE_QWEN_VL: ()=>MIDSCENE_USE_QWEN_VL,
|
|
82
82
|
MIDSCENE_USE_VLM_UI_TARS: ()=>MIDSCENE_USE_VLM_UI_TARS,
|
|
83
83
|
MIDSCENE_USE_VL_MODEL: ()=>MIDSCENE_USE_VL_MODEL,
|
|
84
|
-
MIDSCENE_VQA_LOCATOR_MODE: ()=>MIDSCENE_VQA_LOCATOR_MODE,
|
|
85
|
-
MIDSCENE_VQA_MODEL_API_KEY: ()=>MIDSCENE_VQA_MODEL_API_KEY,
|
|
86
|
-
MIDSCENE_VQA_MODEL_BASE_URL: ()=>MIDSCENE_VQA_MODEL_BASE_URL,
|
|
87
|
-
MIDSCENE_VQA_MODEL_HTTP_PROXY: ()=>MIDSCENE_VQA_MODEL_HTTP_PROXY,
|
|
88
|
-
MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON: ()=>MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON,
|
|
89
|
-
MIDSCENE_VQA_MODEL_NAME: ()=>MIDSCENE_VQA_MODEL_NAME,
|
|
90
|
-
MIDSCENE_VQA_MODEL_SOCKS_PROXY: ()=>MIDSCENE_VQA_MODEL_SOCKS_PROXY,
|
|
91
84
|
MODEL_API_KEY: ()=>MODEL_API_KEY,
|
|
92
85
|
MODEL_BASE_URL: ()=>MODEL_BASE_URL,
|
|
93
86
|
MODEL_ENV_KEYS: ()=>MODEL_ENV_KEYS,
|
|
@@ -144,13 +137,13 @@ const MIDSCENE_PREFERRED_LANGUAGE = 'MIDSCENE_PREFERRED_LANGUAGE';
|
|
|
144
137
|
const MIDSCENE_CACHE_MAX_FILENAME_LENGTH = 'MIDSCENE_CACHE_MAX_FILENAME_LENGTH';
|
|
145
138
|
const MIDSCENE_RUN_DIR = 'MIDSCENE_RUN_DIR';
|
|
146
139
|
const MIDSCENE_LOCATOR_MODE = 'MIDSCENE_LOCATOR_MODE';
|
|
147
|
-
const
|
|
148
|
-
const
|
|
149
|
-
const
|
|
150
|
-
const
|
|
151
|
-
const
|
|
152
|
-
const
|
|
153
|
-
const
|
|
140
|
+
const MIDSCENE_INSIGHT_MODEL_NAME = 'MIDSCENE_INSIGHT_MODEL_NAME';
|
|
141
|
+
const MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY = 'MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY';
|
|
142
|
+
const MIDSCENE_INSIGHT_MODEL_HTTP_PROXY = 'MIDSCENE_INSIGHT_MODEL_HTTP_PROXY';
|
|
143
|
+
const MIDSCENE_INSIGHT_MODEL_BASE_URL = 'MIDSCENE_INSIGHT_MODEL_BASE_URL';
|
|
144
|
+
const MIDSCENE_INSIGHT_MODEL_API_KEY = 'MIDSCENE_INSIGHT_MODEL_API_KEY';
|
|
145
|
+
const MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON';
|
|
146
|
+
const MIDSCENE_INSIGHT_LOCATOR_MODE = 'MIDSCENE_INSIGHT_LOCATOR_MODE';
|
|
154
147
|
const MIDSCENE_PLANNING_MODEL_NAME = 'MIDSCENE_PLANNING_MODEL_NAME';
|
|
155
148
|
const MIDSCENE_PLANNING_MODEL_SOCKS_PROXY = 'MIDSCENE_PLANNING_MODEL_SOCKS_PROXY';
|
|
156
149
|
const MIDSCENE_PLANNING_MODEL_HTTP_PROXY = 'MIDSCENE_PLANNING_MODEL_HTTP_PROXY';
|
|
@@ -158,13 +151,6 @@ const MIDSCENE_PLANNING_MODEL_BASE_URL = 'MIDSCENE_PLANNING_MODEL_BASE_URL';
|
|
|
158
151
|
const MIDSCENE_PLANNING_MODEL_API_KEY = 'MIDSCENE_PLANNING_MODEL_API_KEY';
|
|
159
152
|
const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON';
|
|
160
153
|
const MIDSCENE_PLANNING_LOCATOR_MODE = 'MIDSCENE_PLANNING_LOCATOR_MODE';
|
|
161
|
-
const MIDSCENE_GROUNDING_MODEL_NAME = 'MIDSCENE_GROUNDING_MODEL_NAME';
|
|
162
|
-
const MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY = 'MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY';
|
|
163
|
-
const MIDSCENE_GROUNDING_MODEL_HTTP_PROXY = 'MIDSCENE_GROUNDING_MODEL_HTTP_PROXY';
|
|
164
|
-
const MIDSCENE_GROUNDING_MODEL_BASE_URL = 'MIDSCENE_GROUNDING_MODEL_BASE_URL';
|
|
165
|
-
const MIDSCENE_GROUNDING_MODEL_API_KEY = 'MIDSCENE_GROUNDING_MODEL_API_KEY';
|
|
166
|
-
const MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON';
|
|
167
|
-
const MIDSCENE_GROUNDING_LOCATOR_MODE = 'MIDSCENE_GROUNDING_LOCATOR_MODE';
|
|
168
154
|
const UNUSED_ENV_KEYS = [
|
|
169
155
|
MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG
|
|
170
156
|
];
|
|
@@ -225,27 +211,20 @@ const MODEL_ENV_KEYS = [
|
|
|
225
211
|
MIDSCENE_OPENAI_SOCKS_PROXY,
|
|
226
212
|
MODEL_API_KEY,
|
|
227
213
|
MODEL_BASE_URL,
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
214
|
+
MIDSCENE_INSIGHT_MODEL_NAME,
|
|
215
|
+
MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
|
|
216
|
+
MIDSCENE_INSIGHT_MODEL_HTTP_PROXY,
|
|
217
|
+
MIDSCENE_INSIGHT_MODEL_BASE_URL,
|
|
218
|
+
MIDSCENE_INSIGHT_MODEL_API_KEY,
|
|
219
|
+
MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
|
|
220
|
+
MIDSCENE_INSIGHT_LOCATOR_MODE,
|
|
235
221
|
MIDSCENE_PLANNING_MODEL_NAME,
|
|
236
222
|
MIDSCENE_PLANNING_MODEL_SOCKS_PROXY,
|
|
237
223
|
MIDSCENE_PLANNING_MODEL_HTTP_PROXY,
|
|
238
224
|
MIDSCENE_PLANNING_MODEL_BASE_URL,
|
|
239
225
|
MIDSCENE_PLANNING_MODEL_API_KEY,
|
|
240
226
|
MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
|
|
241
|
-
MIDSCENE_PLANNING_LOCATOR_MODE
|
|
242
|
-
MIDSCENE_GROUNDING_MODEL_NAME,
|
|
243
|
-
MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY,
|
|
244
|
-
MIDSCENE_GROUNDING_MODEL_HTTP_PROXY,
|
|
245
|
-
MIDSCENE_GROUNDING_MODEL_BASE_URL,
|
|
246
|
-
MIDSCENE_GROUNDING_MODEL_API_KEY,
|
|
247
|
-
MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON,
|
|
248
|
-
MIDSCENE_GROUNDING_LOCATOR_MODE
|
|
227
|
+
MIDSCENE_PLANNING_LOCATOR_MODE
|
|
249
228
|
];
|
|
250
229
|
const ALL_ENV_KEYS = [
|
|
251
230
|
...UNUSED_ENV_KEYS,
|
|
@@ -286,13 +265,13 @@ exports.MIDSCENE_DEBUG_MODE = __webpack_exports__.MIDSCENE_DEBUG_MODE;
|
|
|
286
265
|
exports.MIDSCENE_DEBUG_MODEL_PROFILE = __webpack_exports__.MIDSCENE_DEBUG_MODEL_PROFILE;
|
|
287
266
|
exports.MIDSCENE_DEBUG_MODEL_RESPONSE = __webpack_exports__.MIDSCENE_DEBUG_MODEL_RESPONSE;
|
|
288
267
|
exports.MIDSCENE_FORCE_DEEP_THINK = __webpack_exports__.MIDSCENE_FORCE_DEEP_THINK;
|
|
289
|
-
exports.
|
|
290
|
-
exports.
|
|
291
|
-
exports.
|
|
292
|
-
exports.
|
|
293
|
-
exports.
|
|
294
|
-
exports.
|
|
295
|
-
exports.
|
|
268
|
+
exports.MIDSCENE_INSIGHT_LOCATOR_MODE = __webpack_exports__.MIDSCENE_INSIGHT_LOCATOR_MODE;
|
|
269
|
+
exports.MIDSCENE_INSIGHT_MODEL_API_KEY = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_API_KEY;
|
|
270
|
+
exports.MIDSCENE_INSIGHT_MODEL_BASE_URL = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_BASE_URL;
|
|
271
|
+
exports.MIDSCENE_INSIGHT_MODEL_HTTP_PROXY = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_HTTP_PROXY;
|
|
272
|
+
exports.MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON;
|
|
273
|
+
exports.MIDSCENE_INSIGHT_MODEL_NAME = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_NAME;
|
|
274
|
+
exports.MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY;
|
|
296
275
|
exports.MIDSCENE_IOS_DEVICE_UDID = __webpack_exports__.MIDSCENE_IOS_DEVICE_UDID;
|
|
297
276
|
exports.MIDSCENE_IOS_SIMULATOR_UDID = __webpack_exports__.MIDSCENE_IOS_SIMULATOR_UDID;
|
|
298
277
|
exports.MIDSCENE_LOCATOR_MODE = __webpack_exports__.MIDSCENE_LOCATOR_MODE;
|
|
@@ -326,13 +305,6 @@ exports.MIDSCENE_USE_QWEN3_VL = __webpack_exports__.MIDSCENE_USE_QWEN3_VL;
|
|
|
326
305
|
exports.MIDSCENE_USE_QWEN_VL = __webpack_exports__.MIDSCENE_USE_QWEN_VL;
|
|
327
306
|
exports.MIDSCENE_USE_VLM_UI_TARS = __webpack_exports__.MIDSCENE_USE_VLM_UI_TARS;
|
|
328
307
|
exports.MIDSCENE_USE_VL_MODEL = __webpack_exports__.MIDSCENE_USE_VL_MODEL;
|
|
329
|
-
exports.MIDSCENE_VQA_LOCATOR_MODE = __webpack_exports__.MIDSCENE_VQA_LOCATOR_MODE;
|
|
330
|
-
exports.MIDSCENE_VQA_MODEL_API_KEY = __webpack_exports__.MIDSCENE_VQA_MODEL_API_KEY;
|
|
331
|
-
exports.MIDSCENE_VQA_MODEL_BASE_URL = __webpack_exports__.MIDSCENE_VQA_MODEL_BASE_URL;
|
|
332
|
-
exports.MIDSCENE_VQA_MODEL_HTTP_PROXY = __webpack_exports__.MIDSCENE_VQA_MODEL_HTTP_PROXY;
|
|
333
|
-
exports.MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON = __webpack_exports__.MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON;
|
|
334
|
-
exports.MIDSCENE_VQA_MODEL_NAME = __webpack_exports__.MIDSCENE_VQA_MODEL_NAME;
|
|
335
|
-
exports.MIDSCENE_VQA_MODEL_SOCKS_PROXY = __webpack_exports__.MIDSCENE_VQA_MODEL_SOCKS_PROXY;
|
|
336
308
|
exports.MODEL_API_KEY = __webpack_exports__.MODEL_API_KEY;
|
|
337
309
|
exports.MODEL_BASE_URL = __webpack_exports__.MODEL_BASE_URL;
|
|
338
310
|
exports.MODEL_ENV_KEYS = __webpack_exports__.MODEL_ENV_KEYS;
|
|
@@ -362,13 +334,13 @@ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
|
|
|
362
334
|
"MIDSCENE_DEBUG_MODEL_PROFILE",
|
|
363
335
|
"MIDSCENE_DEBUG_MODEL_RESPONSE",
|
|
364
336
|
"MIDSCENE_FORCE_DEEP_THINK",
|
|
365
|
-
"
|
|
366
|
-
"
|
|
367
|
-
"
|
|
368
|
-
"
|
|
369
|
-
"
|
|
370
|
-
"
|
|
371
|
-
"
|
|
337
|
+
"MIDSCENE_INSIGHT_LOCATOR_MODE",
|
|
338
|
+
"MIDSCENE_INSIGHT_MODEL_API_KEY",
|
|
339
|
+
"MIDSCENE_INSIGHT_MODEL_BASE_URL",
|
|
340
|
+
"MIDSCENE_INSIGHT_MODEL_HTTP_PROXY",
|
|
341
|
+
"MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON",
|
|
342
|
+
"MIDSCENE_INSIGHT_MODEL_NAME",
|
|
343
|
+
"MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY",
|
|
372
344
|
"MIDSCENE_IOS_DEVICE_UDID",
|
|
373
345
|
"MIDSCENE_IOS_SIMULATOR_UDID",
|
|
374
346
|
"MIDSCENE_LOCATOR_MODE",
|
|
@@ -402,13 +374,6 @@ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
|
|
|
402
374
|
"MIDSCENE_USE_QWEN_VL",
|
|
403
375
|
"MIDSCENE_USE_VLM_UI_TARS",
|
|
404
376
|
"MIDSCENE_USE_VL_MODEL",
|
|
405
|
-
"MIDSCENE_VQA_LOCATOR_MODE",
|
|
406
|
-
"MIDSCENE_VQA_MODEL_API_KEY",
|
|
407
|
-
"MIDSCENE_VQA_MODEL_BASE_URL",
|
|
408
|
-
"MIDSCENE_VQA_MODEL_HTTP_PROXY",
|
|
409
|
-
"MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON",
|
|
410
|
-
"MIDSCENE_VQA_MODEL_NAME",
|
|
411
|
-
"MIDSCENE_VQA_MODEL_SOCKS_PROXY",
|
|
412
377
|
"MODEL_API_KEY",
|
|
413
378
|
"MODEL_BASE_URL",
|
|
414
379
|
"MODEL_ENV_KEYS",
|
|
@@ -16,8 +16,7 @@ interface IModelConfigKeys {
|
|
|
16
16
|
*/
|
|
17
17
|
vlMode: string;
|
|
18
18
|
}
|
|
19
|
-
export declare const
|
|
20
|
-
export declare const GROUNDING_MODEL_CONFIG_KEYS: IModelConfigKeys;
|
|
19
|
+
export declare const INSIGHT_MODEL_CONFIG_KEYS: IModelConfigKeys;
|
|
21
20
|
export declare const PLANNING_MODEL_CONFIG_KEYS: IModelConfigKeys;
|
|
22
21
|
export declare const DEFAULT_MODEL_CONFIG_KEYS: IModelConfigKeys;
|
|
23
22
|
export declare const DEFAULT_MODEL_CONFIG_KEYS_LEGACY: IModelConfigKeys;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { IModelConfig, TIntent } from './types';
|
|
2
|
-
import { DEFAULT_MODEL_CONFIG_KEYS, DEFAULT_MODEL_CONFIG_KEYS_LEGACY,
|
|
3
|
-
type TModelConfigKeys = typeof
|
|
2
|
+
import { DEFAULT_MODEL_CONFIG_KEYS, DEFAULT_MODEL_CONFIG_KEYS_LEGACY, INSIGHT_MODEL_CONFIG_KEYS, PLANNING_MODEL_CONFIG_KEYS } from './constants';
|
|
3
|
+
type TModelConfigKeys = typeof INSIGHT_MODEL_CONFIG_KEYS | typeof PLANNING_MODEL_CONFIG_KEYS | typeof DEFAULT_MODEL_CONFIG_KEYS | typeof DEFAULT_MODEL_CONFIG_KEYS_LEGACY;
|
|
4
4
|
/**
|
|
5
5
|
* Choose OpenAI SDK config
|
|
6
6
|
*/
|
|
@@ -66,13 +66,13 @@ export declare const MIDSCENE_PREFERRED_LANGUAGE = "MIDSCENE_PREFERRED_LANGUAGE"
|
|
|
66
66
|
export declare const MIDSCENE_CACHE_MAX_FILENAME_LENGTH = "MIDSCENE_CACHE_MAX_FILENAME_LENGTH";
|
|
67
67
|
export declare const MIDSCENE_RUN_DIR = "MIDSCENE_RUN_DIR";
|
|
68
68
|
export declare const MIDSCENE_LOCATOR_MODE = "MIDSCENE_LOCATOR_MODE";
|
|
69
|
-
export declare const
|
|
70
|
-
export declare const
|
|
71
|
-
export declare const
|
|
72
|
-
export declare const
|
|
73
|
-
export declare const
|
|
74
|
-
export declare const
|
|
75
|
-
export declare const
|
|
69
|
+
export declare const MIDSCENE_INSIGHT_MODEL_NAME = "MIDSCENE_INSIGHT_MODEL_NAME";
|
|
70
|
+
export declare const MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY = "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY";
|
|
71
|
+
export declare const MIDSCENE_INSIGHT_MODEL_HTTP_PROXY = "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY";
|
|
72
|
+
export declare const MIDSCENE_INSIGHT_MODEL_BASE_URL = "MIDSCENE_INSIGHT_MODEL_BASE_URL";
|
|
73
|
+
export declare const MIDSCENE_INSIGHT_MODEL_API_KEY = "MIDSCENE_INSIGHT_MODEL_API_KEY";
|
|
74
|
+
export declare const MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON = "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON";
|
|
75
|
+
export declare const MIDSCENE_INSIGHT_LOCATOR_MODE = "MIDSCENE_INSIGHT_LOCATOR_MODE";
|
|
76
76
|
export declare const MIDSCENE_PLANNING_MODEL_NAME = "MIDSCENE_PLANNING_MODEL_NAME";
|
|
77
77
|
export declare const MIDSCENE_PLANNING_MODEL_SOCKS_PROXY = "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY";
|
|
78
78
|
export declare const MIDSCENE_PLANNING_MODEL_HTTP_PROXY = "MIDSCENE_PLANNING_MODEL_HTTP_PROXY";
|
|
@@ -80,13 +80,6 @@ export declare const MIDSCENE_PLANNING_MODEL_BASE_URL = "MIDSCENE_PLANNING_MODEL
|
|
|
80
80
|
export declare const MIDSCENE_PLANNING_MODEL_API_KEY = "MIDSCENE_PLANNING_MODEL_API_KEY";
|
|
81
81
|
export declare const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON = "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON";
|
|
82
82
|
export declare const MIDSCENE_PLANNING_LOCATOR_MODE = "MIDSCENE_PLANNING_LOCATOR_MODE";
|
|
83
|
-
export declare const MIDSCENE_GROUNDING_MODEL_NAME = "MIDSCENE_GROUNDING_MODEL_NAME";
|
|
84
|
-
export declare const MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY = "MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY";
|
|
85
|
-
export declare const MIDSCENE_GROUNDING_MODEL_HTTP_PROXY = "MIDSCENE_GROUNDING_MODEL_HTTP_PROXY";
|
|
86
|
-
export declare const MIDSCENE_GROUNDING_MODEL_BASE_URL = "MIDSCENE_GROUNDING_MODEL_BASE_URL";
|
|
87
|
-
export declare const MIDSCENE_GROUNDING_MODEL_API_KEY = "MIDSCENE_GROUNDING_MODEL_API_KEY";
|
|
88
|
-
export declare const MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON = "MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON";
|
|
89
|
-
export declare const MIDSCENE_GROUNDING_LOCATOR_MODE = "MIDSCENE_GROUNDING_LOCATOR_MODE";
|
|
90
83
|
/**
|
|
91
84
|
* env keys declared but unused
|
|
92
85
|
*/
|
|
@@ -110,20 +103,20 @@ export declare const GLOBAL_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_FORC
|
|
|
110
103
|
* Can be override by both agent.modelConfig and overrideAIConfig
|
|
111
104
|
* Can only be access after agent.constructor
|
|
112
105
|
*/
|
|
113
|
-
export declare const MODEL_ENV_KEYS: readonly ["MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "MIDSCENE_LOCATOR_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MODEL_API_KEY", "MODEL_BASE_URL", "
|
|
114
|
-
export declare const ALL_ENV_KEYS: readonly [...string[], "MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR", "MIDSCENE_CACHE", "MIDSCENE_FORCE_DEEP_THINK", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER", "MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "MIDSCENE_LOCATOR_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MODEL_API_KEY", "MODEL_BASE_URL", "
|
|
106
|
+
export declare const MODEL_ENV_KEYS: readonly ["MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "MIDSCENE_LOCATOR_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MODEL_API_KEY", "MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_LOCATOR_MODE", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_LOCATOR_MODE"];
|
|
107
|
+
export declare const ALL_ENV_KEYS: readonly [...string[], "MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR", "MIDSCENE_CACHE", "MIDSCENE_FORCE_DEEP_THINK", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER", "MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "MIDSCENE_LOCATOR_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MODEL_API_KEY", "MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_LOCATOR_MODE", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_LOCATOR_MODE"];
|
|
115
108
|
export type TEnvKeys = (typeof ALL_ENV_KEYS)[number];
|
|
116
109
|
export type TGlobalConfig = Record<TEnvKeys, string | undefined>;
|
|
117
110
|
export type TVlModeValues = 'qwen-vl' | 'qwen3-vl' | 'doubao-vision' | 'gemini' | 'vlm-ui-tars' | 'vlm-ui-tars-doubao' | 'vlm-ui-tars-doubao-1.5';
|
|
118
111
|
export type TVlModeTypes = 'qwen-vl' | 'qwen3-vl' | 'doubao-vision' | 'gemini' | 'vlm-ui-tars';
|
|
119
|
-
export interface
|
|
120
|
-
[
|
|
121
|
-
[
|
|
122
|
-
[
|
|
123
|
-
[
|
|
124
|
-
[
|
|
125
|
-
[
|
|
126
|
-
[
|
|
112
|
+
export interface IModelConfigForInsight {
|
|
113
|
+
[MIDSCENE_INSIGHT_MODEL_NAME]: string;
|
|
114
|
+
[MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY]?: string;
|
|
115
|
+
[MIDSCENE_INSIGHT_MODEL_HTTP_PROXY]?: string;
|
|
116
|
+
[MIDSCENE_INSIGHT_MODEL_BASE_URL]?: string;
|
|
117
|
+
[MIDSCENE_INSIGHT_MODEL_API_KEY]?: string;
|
|
118
|
+
[MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON]?: string;
|
|
119
|
+
[MIDSCENE_INSIGHT_LOCATOR_MODE]?: TVlModeValues;
|
|
127
120
|
}
|
|
128
121
|
/**
|
|
129
122
|
* Model configuration for Planning intent.
|
|
@@ -149,15 +142,6 @@ export interface IModelConfigForPlanning {
|
|
|
149
142
|
[MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON]?: string;
|
|
150
143
|
[MIDSCENE_PLANNING_LOCATOR_MODE]?: TVlModeValues;
|
|
151
144
|
}
|
|
152
|
-
export interface IModeConfigForGrounding {
|
|
153
|
-
[MIDSCENE_GROUNDING_MODEL_NAME]: string;
|
|
154
|
-
[MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY]?: string;
|
|
155
|
-
[MIDSCENE_GROUNDING_MODEL_HTTP_PROXY]?: string;
|
|
156
|
-
[MIDSCENE_GROUNDING_MODEL_BASE_URL]?: string;
|
|
157
|
-
[MIDSCENE_GROUNDING_MODEL_API_KEY]?: string;
|
|
158
|
-
[MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON]?: string;
|
|
159
|
-
[MIDSCENE_GROUNDING_LOCATOR_MODE]?: TVlModeValues;
|
|
160
|
-
}
|
|
161
145
|
export interface IModelConfigForDefault {
|
|
162
146
|
[MIDSCENE_MODEL_NAME]: string;
|
|
163
147
|
[MIDSCENE_MODEL_SOCKS_PROXY]?: string;
|
|
@@ -177,24 +161,23 @@ export interface IModelConfigForDefaultLegacy {
|
|
|
177
161
|
[MIDSCENE_LOCATOR_MODE]?: TVlModeValues;
|
|
178
162
|
}
|
|
179
163
|
/**
|
|
180
|
-
* -
|
|
181
|
-
* - grounding:short for Visual Grounding
|
|
164
|
+
* - insight: Visual Question Answering and Visual Grounding (unified)
|
|
182
165
|
* - planning: planning
|
|
183
|
-
* - default: all except
|
|
166
|
+
* - default: all except insight、planning
|
|
184
167
|
*/
|
|
185
|
-
export type TIntent = '
|
|
168
|
+
export type TIntent = 'insight' | 'planning' | 'default';
|
|
186
169
|
/**
|
|
187
170
|
* Internal type with intent parameter for ModelConfigManager
|
|
188
171
|
* @internal
|
|
189
172
|
*/
|
|
190
173
|
export type TModelConfigFnInternal = (options: {
|
|
191
174
|
intent: TIntent;
|
|
192
|
-
}) =>
|
|
175
|
+
}) => IModelConfigForInsight | IModelConfigForPlanning | IModelConfigForDefault;
|
|
193
176
|
/**
|
|
194
177
|
* User-facing model config function type
|
|
195
178
|
* Users return config objects without needing to know about intent parameter
|
|
196
179
|
*/
|
|
197
|
-
export type TModelConfigFn = () =>
|
|
180
|
+
export type TModelConfigFn = () => IModelConfigForInsight | IModelConfigForPlanning | IModelConfigForDefault;
|
|
198
181
|
export declare enum UITarsModelVersion {
|
|
199
182
|
V1_0 = "1.0",
|
|
200
183
|
V1_5 = "1.5",
|
package/package.json
CHANGED
package/src/env/constants.ts
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
2
|
+
MIDSCENE_INSIGHT_LOCATOR_MODE,
|
|
3
|
+
MIDSCENE_INSIGHT_MODEL_API_KEY,
|
|
4
|
+
MIDSCENE_INSIGHT_MODEL_BASE_URL,
|
|
5
|
+
MIDSCENE_INSIGHT_MODEL_HTTP_PROXY,
|
|
6
|
+
MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
|
|
7
|
+
MIDSCENE_INSIGHT_MODEL_NAME,
|
|
8
|
+
MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
|
|
9
9
|
MIDSCENE_LOCATOR_MODE,
|
|
10
10
|
MIDSCENE_MODEL_API_KEY,
|
|
11
11
|
MIDSCENE_MODEL_BASE_URL,
|
|
@@ -23,14 +23,6 @@ import {
|
|
|
23
23
|
MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
|
|
24
24
|
MIDSCENE_PLANNING_MODEL_NAME,
|
|
25
25
|
MIDSCENE_PLANNING_MODEL_SOCKS_PROXY,
|
|
26
|
-
MIDSCENE_VQA_LOCATOR_MODE,
|
|
27
|
-
MIDSCENE_VQA_MODEL_API_KEY,
|
|
28
|
-
MIDSCENE_VQA_MODEL_BASE_URL,
|
|
29
|
-
MIDSCENE_VQA_MODEL_HTTP_PROXY,
|
|
30
|
-
MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON,
|
|
31
|
-
// VQA
|
|
32
|
-
MIDSCENE_VQA_MODEL_NAME,
|
|
33
|
-
MIDSCENE_VQA_MODEL_SOCKS_PROXY,
|
|
34
26
|
OPENAI_API_KEY,
|
|
35
27
|
OPENAI_BASE_URL,
|
|
36
28
|
} from './types';
|
|
@@ -54,42 +46,23 @@ interface IModelConfigKeys {
|
|
|
54
46
|
vlMode: string;
|
|
55
47
|
}
|
|
56
48
|
|
|
57
|
-
export const
|
|
58
|
-
modelName:
|
|
49
|
+
export const INSIGHT_MODEL_CONFIG_KEYS: IModelConfigKeys = {
|
|
50
|
+
modelName: MIDSCENE_INSIGHT_MODEL_NAME,
|
|
59
51
|
/**
|
|
60
52
|
* proxy
|
|
61
53
|
*/
|
|
62
|
-
socksProxy:
|
|
63
|
-
httpProxy:
|
|
54
|
+
socksProxy: MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
|
|
55
|
+
httpProxy: MIDSCENE_INSIGHT_MODEL_HTTP_PROXY,
|
|
64
56
|
/**
|
|
65
57
|
* OpenAI
|
|
66
58
|
*/
|
|
67
|
-
openaiBaseURL:
|
|
68
|
-
openaiApiKey:
|
|
69
|
-
openaiExtraConfig:
|
|
59
|
+
openaiBaseURL: MIDSCENE_INSIGHT_MODEL_BASE_URL,
|
|
60
|
+
openaiApiKey: MIDSCENE_INSIGHT_MODEL_API_KEY,
|
|
61
|
+
openaiExtraConfig: MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
|
|
70
62
|
/**
|
|
71
63
|
* Extra
|
|
72
64
|
*/
|
|
73
|
-
vlMode:
|
|
74
|
-
} as const;
|
|
75
|
-
|
|
76
|
-
export const GROUNDING_MODEL_CONFIG_KEYS: IModelConfigKeys = {
|
|
77
|
-
modelName: MIDSCENE_GROUNDING_MODEL_NAME,
|
|
78
|
-
/**
|
|
79
|
-
* proxy
|
|
80
|
-
*/
|
|
81
|
-
socksProxy: MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY,
|
|
82
|
-
httpProxy: MIDSCENE_GROUNDING_MODEL_HTTP_PROXY,
|
|
83
|
-
/**
|
|
84
|
-
* OpenAI
|
|
85
|
-
*/
|
|
86
|
-
openaiBaseURL: MIDSCENE_GROUNDING_MODEL_BASE_URL,
|
|
87
|
-
openaiApiKey: MIDSCENE_GROUNDING_MODEL_API_KEY,
|
|
88
|
-
openaiExtraConfig: MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON,
|
|
89
|
-
/**
|
|
90
|
-
* Extra
|
|
91
|
-
*/
|
|
92
|
-
vlMode: MIDSCENE_GROUNDING_LOCATOR_MODE,
|
|
65
|
+
vlMode: MIDSCENE_INSIGHT_LOCATOR_MODE,
|
|
93
66
|
} as const;
|
|
94
67
|
|
|
95
68
|
export const PLANNING_MODEL_CONFIG_KEYS: IModelConfigKeys = {
|
|
@@ -8,9 +8,8 @@ import type {
|
|
|
8
8
|
import {
|
|
9
9
|
DEFAULT_MODEL_CONFIG_KEYS,
|
|
10
10
|
DEFAULT_MODEL_CONFIG_KEYS_LEGACY,
|
|
11
|
-
|
|
11
|
+
INSIGHT_MODEL_CONFIG_KEYS,
|
|
12
12
|
PLANNING_MODEL_CONFIG_KEYS,
|
|
13
|
-
VQA_MODEL_CONFIG_KEYS,
|
|
14
13
|
} from './constants';
|
|
15
14
|
import {
|
|
16
15
|
MIDSCENE_MODEL_API_KEY,
|
|
@@ -37,15 +36,13 @@ import {
|
|
|
37
36
|
} from './parse';
|
|
38
37
|
|
|
39
38
|
type TModelConfigKeys =
|
|
40
|
-
| typeof
|
|
41
|
-
| typeof GROUNDING_MODEL_CONFIG_KEYS
|
|
39
|
+
| typeof INSIGHT_MODEL_CONFIG_KEYS
|
|
42
40
|
| typeof PLANNING_MODEL_CONFIG_KEYS
|
|
43
41
|
| typeof DEFAULT_MODEL_CONFIG_KEYS
|
|
44
42
|
| typeof DEFAULT_MODEL_CONFIG_KEYS_LEGACY;
|
|
45
43
|
|
|
46
44
|
const KEYS_MAP: Record<TIntent, TModelConfigKeys> = {
|
|
47
|
-
|
|
48
|
-
grounding: GROUNDING_MODEL_CONFIG_KEYS,
|
|
45
|
+
insight: INSIGHT_MODEL_CONFIG_KEYS,
|
|
49
46
|
planning: PLANNING_MODEL_CONFIG_KEYS,
|
|
50
47
|
default: DEFAULT_MODEL_CONFIG_KEYS,
|
|
51
48
|
} as const;
|
|
@@ -13,7 +13,7 @@ import type {
|
|
|
13
13
|
} from './types';
|
|
14
14
|
import { VL_MODE_RAW_VALID_VALUES as VL_MODES } from './types';
|
|
15
15
|
|
|
16
|
-
const ALL_INTENTS: TIntent[] = ['
|
|
16
|
+
const ALL_INTENTS: TIntent[] = ['insight', 'default', 'planning'];
|
|
17
17
|
|
|
18
18
|
export type TIntentConfigMap = Record<
|
|
19
19
|
TIntent,
|
|
@@ -51,9 +51,8 @@ export class ModelConfigManager {
|
|
|
51
51
|
modelConfigFn: TModelConfigFnInternal,
|
|
52
52
|
): TIntentConfigMap {
|
|
53
53
|
const intentConfigMap: TIntentConfigMap = {
|
|
54
|
-
|
|
54
|
+
insight: undefined,
|
|
55
55
|
default: undefined,
|
|
56
|
-
grounding: undefined,
|
|
57
56
|
planning: undefined,
|
|
58
57
|
};
|
|
59
58
|
|
|
@@ -71,9 +70,8 @@ export class ModelConfigManager {
|
|
|
71
70
|
|
|
72
71
|
private calcModelConfigMapBaseOnIntent(intentConfigMap: TIntentConfigMap) {
|
|
73
72
|
const modelConfigMap: Record<TIntent, IModelConfig | undefined> = {
|
|
74
|
-
|
|
73
|
+
insight: undefined,
|
|
75
74
|
default: undefined,
|
|
76
|
-
grounding: undefined,
|
|
77
75
|
planning: undefined,
|
|
78
76
|
};
|
|
79
77
|
for (const i of ALL_INTENTS) {
|
|
@@ -93,9 +91,8 @@ export class ModelConfigManager {
|
|
|
93
91
|
allEnvConfig: Record<string, string | undefined>,
|
|
94
92
|
) {
|
|
95
93
|
const modelConfigMap: Record<TIntent, IModelConfig | undefined> = {
|
|
96
|
-
|
|
94
|
+
insight: undefined,
|
|
97
95
|
default: undefined,
|
|
98
|
-
grounding: undefined,
|
|
99
96
|
planning: undefined,
|
|
100
97
|
};
|
|
101
98
|
for (const i of ALL_INTENTS) {
|
|
@@ -177,7 +174,7 @@ Learn more: https://midscenejs.com/choose-a-model`,
|
|
|
177
174
|
this.globalConfigManager = globalConfigManager;
|
|
178
175
|
}
|
|
179
176
|
|
|
180
|
-
throwErrorIfNonVLModel(intent: TIntent = '
|
|
177
|
+
throwErrorIfNonVLModel(intent: TIntent = 'insight') {
|
|
181
178
|
const modelConfig = this.getModelConfig(intent);
|
|
182
179
|
|
|
183
180
|
if (!modelConfig.vlMode) {
|
package/src/env/types.ts
CHANGED
|
@@ -88,15 +88,18 @@ export const MIDSCENE_RUN_DIR = 'MIDSCENE_RUN_DIR';
|
|
|
88
88
|
// default new
|
|
89
89
|
export const MIDSCENE_LOCATOR_MODE = 'MIDSCENE_LOCATOR_MODE';
|
|
90
90
|
|
|
91
|
-
// VQA
|
|
92
|
-
export const
|
|
93
|
-
export const
|
|
94
|
-
|
|
95
|
-
export const
|
|
96
|
-
|
|
97
|
-
export const
|
|
98
|
-
'
|
|
99
|
-
export const
|
|
91
|
+
// INSIGHT (unified VQA and Grounding)
|
|
92
|
+
export const MIDSCENE_INSIGHT_MODEL_NAME = 'MIDSCENE_INSIGHT_MODEL_NAME';
|
|
93
|
+
export const MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY =
|
|
94
|
+
'MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY';
|
|
95
|
+
export const MIDSCENE_INSIGHT_MODEL_HTTP_PROXY =
|
|
96
|
+
'MIDSCENE_INSIGHT_MODEL_HTTP_PROXY';
|
|
97
|
+
export const MIDSCENE_INSIGHT_MODEL_BASE_URL =
|
|
98
|
+
'MIDSCENE_INSIGHT_MODEL_BASE_URL';
|
|
99
|
+
export const MIDSCENE_INSIGHT_MODEL_API_KEY = 'MIDSCENE_INSIGHT_MODEL_API_KEY';
|
|
100
|
+
export const MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON =
|
|
101
|
+
'MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON';
|
|
102
|
+
export const MIDSCENE_INSIGHT_LOCATOR_MODE = 'MIDSCENE_INSIGHT_LOCATOR_MODE';
|
|
100
103
|
|
|
101
104
|
// PLANNING
|
|
102
105
|
export const MIDSCENE_PLANNING_MODEL_NAME = 'MIDSCENE_PLANNING_MODEL_NAME';
|
|
@@ -112,21 +115,6 @@ export const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON =
|
|
|
112
115
|
'MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON';
|
|
113
116
|
export const MIDSCENE_PLANNING_LOCATOR_MODE = 'MIDSCENE_PLANNING_LOCATOR_MODE';
|
|
114
117
|
|
|
115
|
-
// GROUNDING
|
|
116
|
-
export const MIDSCENE_GROUNDING_MODEL_NAME = 'MIDSCENE_GROUNDING_MODEL_NAME';
|
|
117
|
-
export const MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY =
|
|
118
|
-
'MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY';
|
|
119
|
-
export const MIDSCENE_GROUNDING_MODEL_HTTP_PROXY =
|
|
120
|
-
'MIDSCENE_GROUNDING_MODEL_HTTP_PROXY';
|
|
121
|
-
export const MIDSCENE_GROUNDING_MODEL_BASE_URL =
|
|
122
|
-
'MIDSCENE_GROUNDING_MODEL_BASE_URL';
|
|
123
|
-
export const MIDSCENE_GROUNDING_MODEL_API_KEY =
|
|
124
|
-
'MIDSCENE_GROUNDING_MODEL_API_KEY';
|
|
125
|
-
export const MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON =
|
|
126
|
-
'MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON';
|
|
127
|
-
export const MIDSCENE_GROUNDING_LOCATOR_MODE =
|
|
128
|
-
'MIDSCENE_GROUNDING_LOCATOR_MODE';
|
|
129
|
-
|
|
130
118
|
/**
|
|
131
119
|
* env keys declared but unused
|
|
132
120
|
*/
|
|
@@ -210,14 +198,14 @@ export const MODEL_ENV_KEYS = [
|
|
|
210
198
|
MIDSCENE_OPENAI_SOCKS_PROXY,
|
|
211
199
|
MODEL_API_KEY,
|
|
212
200
|
MODEL_BASE_URL,
|
|
213
|
-
// VQA
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
201
|
+
// INSIGHT (unified VQA and Grounding)
|
|
202
|
+
MIDSCENE_INSIGHT_MODEL_NAME,
|
|
203
|
+
MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
|
|
204
|
+
MIDSCENE_INSIGHT_MODEL_HTTP_PROXY,
|
|
205
|
+
MIDSCENE_INSIGHT_MODEL_BASE_URL,
|
|
206
|
+
MIDSCENE_INSIGHT_MODEL_API_KEY,
|
|
207
|
+
MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
|
|
208
|
+
MIDSCENE_INSIGHT_LOCATOR_MODE,
|
|
221
209
|
// PLANNING
|
|
222
210
|
MIDSCENE_PLANNING_MODEL_NAME,
|
|
223
211
|
MIDSCENE_PLANNING_MODEL_SOCKS_PROXY,
|
|
@@ -226,14 +214,6 @@ export const MODEL_ENV_KEYS = [
|
|
|
226
214
|
MIDSCENE_PLANNING_MODEL_API_KEY,
|
|
227
215
|
MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
|
|
228
216
|
MIDSCENE_PLANNING_LOCATOR_MODE,
|
|
229
|
-
// GROUNDING
|
|
230
|
-
MIDSCENE_GROUNDING_MODEL_NAME,
|
|
231
|
-
MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY,
|
|
232
|
-
MIDSCENE_GROUNDING_MODEL_HTTP_PROXY,
|
|
233
|
-
MIDSCENE_GROUNDING_MODEL_BASE_URL,
|
|
234
|
-
MIDSCENE_GROUNDING_MODEL_API_KEY,
|
|
235
|
-
MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON,
|
|
236
|
-
MIDSCENE_GROUNDING_LOCATOR_MODE,
|
|
237
217
|
] as const;
|
|
238
218
|
|
|
239
219
|
export const ALL_ENV_KEYS = [
|
|
@@ -262,18 +242,18 @@ export type TVlModeTypes =
|
|
|
262
242
|
| 'gemini'
|
|
263
243
|
| 'vlm-ui-tars';
|
|
264
244
|
|
|
265
|
-
export interface
|
|
245
|
+
export interface IModelConfigForInsight {
|
|
266
246
|
// model name
|
|
267
|
-
[
|
|
247
|
+
[MIDSCENE_INSIGHT_MODEL_NAME]: string;
|
|
268
248
|
// proxy
|
|
269
|
-
[
|
|
270
|
-
[
|
|
249
|
+
[MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY]?: string;
|
|
250
|
+
[MIDSCENE_INSIGHT_MODEL_HTTP_PROXY]?: string;
|
|
271
251
|
// OpenAI
|
|
272
|
-
[
|
|
273
|
-
[
|
|
274
|
-
[
|
|
252
|
+
[MIDSCENE_INSIGHT_MODEL_BASE_URL]?: string;
|
|
253
|
+
[MIDSCENE_INSIGHT_MODEL_API_KEY]?: string;
|
|
254
|
+
[MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON]?: string;
|
|
275
255
|
// extra
|
|
276
|
-
[
|
|
256
|
+
[MIDSCENE_INSIGHT_LOCATOR_MODE]?: TVlModeValues;
|
|
277
257
|
}
|
|
278
258
|
|
|
279
259
|
/**
|
|
@@ -305,20 +285,6 @@ export interface IModelConfigForPlanning {
|
|
|
305
285
|
[MIDSCENE_PLANNING_LOCATOR_MODE]?: TVlModeValues;
|
|
306
286
|
}
|
|
307
287
|
|
|
308
|
-
export interface IModeConfigForGrounding {
|
|
309
|
-
// model name
|
|
310
|
-
[MIDSCENE_GROUNDING_MODEL_NAME]: string;
|
|
311
|
-
// proxy
|
|
312
|
-
[MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY]?: string;
|
|
313
|
-
[MIDSCENE_GROUNDING_MODEL_HTTP_PROXY]?: string;
|
|
314
|
-
// OpenAI
|
|
315
|
-
[MIDSCENE_GROUNDING_MODEL_BASE_URL]?: string;
|
|
316
|
-
[MIDSCENE_GROUNDING_MODEL_API_KEY]?: string;
|
|
317
|
-
[MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON]?: string;
|
|
318
|
-
// extra
|
|
319
|
-
[MIDSCENE_GROUNDING_LOCATOR_MODE]?: TVlModeValues;
|
|
320
|
-
}
|
|
321
|
-
|
|
322
288
|
export interface IModelConfigForDefault {
|
|
323
289
|
// model name
|
|
324
290
|
[MIDSCENE_MODEL_NAME]: string;
|
|
@@ -348,12 +314,11 @@ export interface IModelConfigForDefaultLegacy {
|
|
|
348
314
|
}
|
|
349
315
|
|
|
350
316
|
/**
|
|
351
|
-
* -
|
|
352
|
-
* - grounding:short for Visual Grounding
|
|
317
|
+
* - insight: Visual Question Answering and Visual Grounding (unified)
|
|
353
318
|
* - planning: planning
|
|
354
|
-
* - default: all except
|
|
319
|
+
* - default: all except insight、planning
|
|
355
320
|
*/
|
|
356
|
-
export type TIntent = '
|
|
321
|
+
export type TIntent = 'insight' | 'planning' | 'default';
|
|
357
322
|
|
|
358
323
|
/**
|
|
359
324
|
* Internal type with intent parameter for ModelConfigManager
|
|
@@ -361,20 +326,15 @@ export type TIntent = 'VQA' | 'planning' | 'grounding' | 'default';
|
|
|
361
326
|
*/
|
|
362
327
|
export type TModelConfigFnInternal = (options: {
|
|
363
328
|
intent: TIntent;
|
|
364
|
-
}) =>
|
|
365
|
-
| IModelConfigForVQA
|
|
366
|
-
| IModelConfigForPlanning
|
|
367
|
-
| IModeConfigForGrounding
|
|
368
|
-
| IModelConfigForDefault;
|
|
329
|
+
}) => IModelConfigForInsight | IModelConfigForPlanning | IModelConfigForDefault;
|
|
369
330
|
|
|
370
331
|
/**
|
|
371
332
|
* User-facing model config function type
|
|
372
333
|
* Users return config objects without needing to know about intent parameter
|
|
373
334
|
*/
|
|
374
335
|
export type TModelConfigFn = () =>
|
|
375
|
-
|
|
|
336
|
+
| IModelConfigForInsight
|
|
376
337
|
| IModelConfigForPlanning
|
|
377
|
-
| IModeConfigForGrounding
|
|
378
338
|
| IModelConfigForDefault;
|
|
379
339
|
|
|
380
340
|
export enum UITarsModelVersion {
|