@midscene/shared 0.30.10 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/build/rspack-config.mjs +4 -0
- package/dist/es/constants/example-code.mjs +4 -4
- package/dist/es/env/constants.mjs +27 -82
- package/dist/es/env/global-config-manager.mjs +2 -3
- package/dist/es/env/helper.mjs +12 -17
- package/dist/es/env/init-debug.mjs +6 -6
- package/dist/es/env/model-config-manager.mjs +45 -65
- package/dist/es/env/parse-model-config.mjs +112 -0
- package/dist/es/env/types.mjs +70 -162
- package/dist/es/extractor/dom-util.mjs +10 -18
- package/dist/es/extractor/index.mjs +2 -3
- package/dist/es/extractor/locator.mjs +8 -15
- package/dist/es/extractor/tree.mjs +2 -5
- package/dist/es/extractor/util.mjs +4 -28
- package/dist/es/extractor/web-extractor.mjs +7 -14
- package/dist/es/index.mjs +2 -1
- package/dist/es/mcp/base-server.mjs +250 -0
- package/dist/es/mcp/base-tools.mjs +84 -0
- package/dist/es/mcp/index.mjs +5 -0
- package/dist/es/mcp/inject-report-html-plugin.mjs +53 -0
- package/dist/es/mcp/tool-generator.mjs +207 -0
- package/dist/es/mcp/types.mjs +3 -0
- package/dist/es/node/fs.mjs +2 -2
- package/dist/es/utils.mjs +2 -3
- package/dist/es/zod-schema-utils.mjs +54 -0
- package/dist/lib/baseDB.js +2 -2
- package/dist/lib/build/copy-static.js +4 -4
- package/dist/lib/build/rspack-config.js +38 -0
- package/dist/lib/common.js +4 -4
- package/dist/lib/constants/example-code.js +6 -6
- package/dist/lib/constants/index.js +13 -13
- package/dist/lib/env/basic.js +2 -2
- package/dist/lib/env/constants.js +32 -90
- package/dist/lib/env/global-config-manager.js +4 -5
- package/dist/lib/env/helper.js +13 -22
- package/dist/lib/env/index.js +24 -28
- package/dist/lib/env/init-debug.js +7 -7
- package/dist/lib/env/model-config-manager.js +47 -67
- package/dist/lib/env/parse-model-config.js +155 -0
- package/dist/lib/env/types.js +146 -379
- package/dist/lib/env/utils.js +4 -4
- package/dist/lib/extractor/constants.js +4 -4
- package/dist/lib/extractor/debug.js +1 -1
- package/dist/lib/extractor/dom-util.js +18 -26
- package/dist/lib/extractor/index.js +11 -21
- package/dist/lib/extractor/locator.js +10 -20
- package/dist/lib/extractor/tree.js +4 -7
- package/dist/lib/extractor/util.js +17 -50
- package/dist/lib/extractor/web-extractor.js +12 -19
- package/dist/lib/img/box-select.js +4 -4
- package/dist/lib/img/draw-box.js +2 -2
- package/dist/lib/img/get-jimp.js +16 -34
- package/dist/lib/img/get-photon.js +24 -47
- package/dist/lib/img/get-sharp.js +16 -34
- package/dist/lib/img/index.js +18 -18
- package/dist/lib/img/info.js +4 -4
- package/dist/lib/img/transform.js +10 -10
- package/dist/lib/index.js +8 -4
- package/dist/lib/logger.js +4 -4
- package/dist/lib/mcp/base-server.js +300 -0
- package/dist/lib/mcp/base-tools.js +118 -0
- package/dist/lib/mcp/index.js +86 -0
- package/dist/lib/mcp/inject-report-html-plugin.js +98 -0
- package/dist/lib/mcp/tool-generator.js +244 -0
- package/dist/lib/mcp/types.js +40 -0
- package/dist/lib/node/fs.js +6 -6
- package/dist/lib/node/index.js +6 -8
- package/dist/lib/polyfills/async-hooks.js +2 -2
- package/dist/lib/polyfills/index.js +6 -8
- package/dist/lib/types/index.js +2 -2
- package/dist/lib/us-keyboard-layout.js +2 -2
- package/dist/lib/utils.js +13 -14
- package/dist/lib/zod-schema-utils.js +97 -0
- package/dist/types/build/rspack-config.d.ts +8 -0
- package/dist/types/constants/example-code.d.ts +1 -1
- package/dist/types/env/constants.d.ts +5 -18
- package/dist/types/env/global-config-manager.d.ts +1 -2
- package/dist/types/env/helper.d.ts +2 -4
- package/dist/types/env/model-config-manager.d.ts +8 -7
- package/dist/types/env/parse-model-config.d.ts +28 -0
- package/dist/types/env/types.d.ts +152 -191
- package/dist/types/extractor/dom-util.d.ts +2 -15
- package/dist/types/extractor/index.d.ts +1 -2
- package/dist/types/extractor/locator.d.ts +0 -1
- package/dist/types/extractor/tree.d.ts +1 -4
- package/dist/types/extractor/util.d.ts +0 -3
- package/dist/types/index.d.ts +1 -0
- package/dist/types/mcp/base-server.d.ts +77 -0
- package/dist/types/mcp/base-tools.d.ts +55 -0
- package/dist/types/mcp/index.d.ts +5 -0
- package/dist/types/mcp/inject-report-html-plugin.d.ts +18 -0
- package/dist/types/mcp/tool-generator.d.ts +11 -0
- package/dist/types/mcp/types.d.ts +100 -0
- package/dist/types/types/index.d.ts +5 -2
- package/dist/types/zod-schema-utils.d.ts +23 -0
- package/package.json +19 -4
- package/src/build/rspack-config.ts +12 -0
- package/src/constants/example-code.ts +4 -4
- package/src/env/constants.ts +58 -203
- package/src/env/global-config-manager.ts +7 -7
- package/src/env/helper.ts +10 -31
- package/src/env/init-debug.ts +11 -6
- package/src/env/model-config-manager.ts +91 -87
- package/src/env/parse-model-config.ts +265 -0
- package/src/env/types.ts +212 -344
- package/src/extractor/dom-util.ts +15 -12
- package/src/extractor/index.ts +0 -3
- package/src/extractor/locator.ts +3 -12
- package/src/extractor/tree.ts +4 -4
- package/src/extractor/util.ts +0 -32
- package/src/index.ts +2 -0
- package/src/mcp/base-server.ts +435 -0
- package/src/mcp/base-tools.ts +196 -0
- package/src/mcp/index.ts +5 -0
- package/src/mcp/inject-report-html-plugin.ts +119 -0
- package/src/mcp/tool-generator.ts +330 -0
- package/src/mcp/types.ts +108 -0
- package/src/node/fs.ts +1 -1
- package/src/types/index.ts +8 -2
- package/src/utils.ts +1 -1
- package/src/zod-schema-utils.ts +133 -0
- package/dist/es/env/decide-model-config.mjs +0 -172
- package/dist/es/env/parse.mjs +0 -69
- package/dist/lib/env/decide-model-config.js +0 -212
- package/dist/lib/env/parse.js +0 -106
- package/dist/types/env/decide-model-config.d.ts +0 -14
- package/dist/types/env/parse.d.ts +0 -12
- package/src/env/decide-model-config.ts +0 -319
- package/src/env/parse.ts +0 -131
|
@@ -113,12 +113,12 @@ tasks:
|
|
|
113
113
|
# Auto Planning (.ai)
|
|
114
114
|
# ----------------
|
|
115
115
|
|
|
116
|
-
# Perform an interaction. \`ai\` is a shorthand for \`
|
|
116
|
+
# Perform an interaction. \`ai\` is a shorthand for \`aiAct\`.
|
|
117
117
|
- ai: <prompt>
|
|
118
118
|
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
|
|
119
119
|
|
|
120
120
|
# This usage is the same as \`ai\`.
|
|
121
|
-
-
|
|
121
|
+
- aiAct: <prompt>
|
|
122
122
|
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
|
|
123
123
|
|
|
124
124
|
# Instant Action (.aiTap, .aiDoubleClick, .aiHover, .aiInput, .aiKeyboardPress, .aiScroll)
|
|
@@ -166,8 +166,8 @@ tasks:
|
|
|
166
166
|
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
|
|
167
167
|
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
|
|
168
168
|
|
|
169
|
-
#
|
|
170
|
-
-
|
|
169
|
+
# Record the current screenshot with a description in the report file.
|
|
170
|
+
- recordToReport: <title> # Optional, the title of the screenshot. If not provided, the title will be 'untitled'.
|
|
171
171
|
content: <content> # Optional, the description of the screenshot.
|
|
172
172
|
|
|
173
173
|
# Data Extraction
|
|
@@ -1,79 +1,33 @@
|
|
|
1
|
-
import {
|
|
2
|
-
const
|
|
3
|
-
modelName:
|
|
4
|
-
socksProxy:
|
|
5
|
-
httpProxy:
|
|
6
|
-
openaiBaseURL:
|
|
7
|
-
openaiApiKey:
|
|
8
|
-
openaiExtraConfig:
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
azureOpenaiScope: MIDSCENE_VQA_AZURE_OPENAI_SCOPE,
|
|
12
|
-
azureOpenaiKey: MIDSCENE_VQA_AZURE_OPENAI_KEY,
|
|
13
|
-
azureOpenaiEndpoint: MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT,
|
|
14
|
-
azureOpenaiApiVersion: MIDSCENE_VQA_AZURE_OPENAI_API_VERSION,
|
|
15
|
-
azureOpenaiDeployment: MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT,
|
|
16
|
-
azureExtraConfig: MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON,
|
|
17
|
-
useAnthropicSdk: MIDSCENE_VQA_USE_ANTHROPIC_SDK,
|
|
18
|
-
anthropicApiKey: MIDSCENE_VQA_ANTHROPIC_API_KEY,
|
|
19
|
-
vlMode: MIDSCENE_VQA_VL_MODE
|
|
20
|
-
};
|
|
21
|
-
const GROUNDING_MODEL_CONFIG_KEYS = {
|
|
22
|
-
modelName: MIDSCENE_GROUNDING_MODEL_NAME,
|
|
23
|
-
socksProxy: MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY,
|
|
24
|
-
httpProxy: MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY,
|
|
25
|
-
openaiBaseURL: MIDSCENE_GROUNDING_OPENAI_BASE_URL,
|
|
26
|
-
openaiApiKey: MIDSCENE_GROUNDING_OPENAI_API_KEY,
|
|
27
|
-
openaiExtraConfig: MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON,
|
|
28
|
-
openaiUseAzureDeprecated: MIDSCENE_GROUNDING_OPENAI_USE_AZURE,
|
|
29
|
-
useAzureOpenai: MIDSCENE_GROUNDING_USE_AZURE_OPENAI,
|
|
30
|
-
azureOpenaiScope: MIDSCENE_GROUNDING_AZURE_OPENAI_SCOPE,
|
|
31
|
-
azureOpenaiKey: MIDSCENE_GROUNDING_AZURE_OPENAI_KEY,
|
|
32
|
-
azureOpenaiEndpoint: MIDSCENE_GROUNDING_AZURE_OPENAI_ENDPOINT,
|
|
33
|
-
azureOpenaiApiVersion: MIDSCENE_GROUNDING_AZURE_OPENAI_API_VERSION,
|
|
34
|
-
azureOpenaiDeployment: MIDSCENE_GROUNDING_AZURE_OPENAI_DEPLOYMENT,
|
|
35
|
-
azureExtraConfig: MIDSCENE_GROUNDING_AZURE_OPENAI_INIT_CONFIG_JSON,
|
|
36
|
-
useAnthropicSdk: MIDSCENE_GROUNDING_USE_ANTHROPIC_SDK,
|
|
37
|
-
anthropicApiKey: MIDSCENE_GROUNDING_ANTHROPIC_API_KEY,
|
|
38
|
-
vlMode: MIDSCENE_GROUNDING_VL_MODE
|
|
1
|
+
import { MIDSCENE_INSIGHT_MODEL_API_KEY, MIDSCENE_INSIGHT_MODEL_BASE_URL, MIDSCENE_INSIGHT_MODEL_HTTP_PROXY, MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON, MIDSCENE_INSIGHT_MODEL_NAME, MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY, MIDSCENE_INSIGHT_MODEL_TIMEOUT, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_FAMILY, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_MODEL_TIMEOUT, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_TIMEOUT, OPENAI_API_KEY, OPENAI_BASE_URL } from "./types.mjs";
|
|
2
|
+
const INSIGHT_MODEL_CONFIG_KEYS = {
|
|
3
|
+
modelName: MIDSCENE_INSIGHT_MODEL_NAME,
|
|
4
|
+
socksProxy: MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
|
|
5
|
+
httpProxy: MIDSCENE_INSIGHT_MODEL_HTTP_PROXY,
|
|
6
|
+
openaiBaseURL: MIDSCENE_INSIGHT_MODEL_BASE_URL,
|
|
7
|
+
openaiApiKey: MIDSCENE_INSIGHT_MODEL_API_KEY,
|
|
8
|
+
openaiExtraConfig: MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
|
|
9
|
+
modelFamily: 'THERE_IS_NO_MODEL_FAMILY_FOR_INSIGHT',
|
|
10
|
+
timeout: MIDSCENE_INSIGHT_MODEL_TIMEOUT
|
|
39
11
|
};
|
|
40
12
|
const PLANNING_MODEL_CONFIG_KEYS = {
|
|
41
13
|
modelName: MIDSCENE_PLANNING_MODEL_NAME,
|
|
42
|
-
socksProxy:
|
|
43
|
-
httpProxy:
|
|
44
|
-
openaiBaseURL:
|
|
45
|
-
openaiApiKey:
|
|
46
|
-
openaiExtraConfig:
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
azureOpenaiScope: MIDSCENE_PLANNING_AZURE_OPENAI_SCOPE,
|
|
50
|
-
azureOpenaiKey: MIDSCENE_PLANNING_AZURE_OPENAI_KEY,
|
|
51
|
-
azureOpenaiEndpoint: MIDSCENE_PLANNING_AZURE_OPENAI_ENDPOINT,
|
|
52
|
-
azureOpenaiApiVersion: MIDSCENE_PLANNING_AZURE_OPENAI_API_VERSION,
|
|
53
|
-
azureOpenaiDeployment: MIDSCENE_PLANNING_AZURE_OPENAI_DEPLOYMENT,
|
|
54
|
-
azureExtraConfig: MIDSCENE_PLANNING_AZURE_OPENAI_INIT_CONFIG_JSON,
|
|
55
|
-
useAnthropicSdk: MIDSCENE_PLANNING_USE_ANTHROPIC_SDK,
|
|
56
|
-
anthropicApiKey: MIDSCENE_PLANNING_ANTHROPIC_API_KEY,
|
|
57
|
-
vlMode: MIDSCENE_PLANNING_VL_MODE
|
|
14
|
+
socksProxy: MIDSCENE_PLANNING_MODEL_SOCKS_PROXY,
|
|
15
|
+
httpProxy: MIDSCENE_PLANNING_MODEL_HTTP_PROXY,
|
|
16
|
+
openaiBaseURL: MIDSCENE_PLANNING_MODEL_BASE_URL,
|
|
17
|
+
openaiApiKey: MIDSCENE_PLANNING_MODEL_API_KEY,
|
|
18
|
+
openaiExtraConfig: MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
|
|
19
|
+
modelFamily: 'THERE_IS_NO_MODEL_FAMILY_FOR_PLANNING',
|
|
20
|
+
timeout: MIDSCENE_PLANNING_MODEL_TIMEOUT
|
|
58
21
|
};
|
|
59
22
|
const DEFAULT_MODEL_CONFIG_KEYS = {
|
|
60
23
|
modelName: MIDSCENE_MODEL_NAME,
|
|
61
|
-
socksProxy:
|
|
62
|
-
httpProxy:
|
|
63
|
-
openaiBaseURL:
|
|
64
|
-
openaiApiKey:
|
|
65
|
-
openaiExtraConfig:
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
azureOpenaiScope: MIDSCENE_AZURE_OPENAI_SCOPE,
|
|
69
|
-
azureOpenaiKey: MIDSCENE_AZURE_OPENAI_KEY,
|
|
70
|
-
azureOpenaiEndpoint: MIDSCENE_AZURE_OPENAI_ENDPOINT,
|
|
71
|
-
azureOpenaiApiVersion: MIDSCENE_AZURE_OPENAI_API_VERSION,
|
|
72
|
-
azureOpenaiDeployment: MIDSCENE_AZURE_OPENAI_DEPLOYMENT,
|
|
73
|
-
azureExtraConfig: MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,
|
|
74
|
-
useAnthropicSdk: MIDSCENE_USE_ANTHROPIC_SDK,
|
|
75
|
-
anthropicApiKey: MIDSCENE_ANTHROPIC_API_KEY,
|
|
76
|
-
vlMode: MIDSCENE_VL_MODE
|
|
24
|
+
socksProxy: MIDSCENE_MODEL_SOCKS_PROXY,
|
|
25
|
+
httpProxy: MIDSCENE_MODEL_HTTP_PROXY,
|
|
26
|
+
openaiBaseURL: MIDSCENE_MODEL_BASE_URL,
|
|
27
|
+
openaiApiKey: MIDSCENE_MODEL_API_KEY,
|
|
28
|
+
openaiExtraConfig: MIDSCENE_MODEL_INIT_CONFIG_JSON,
|
|
29
|
+
modelFamily: MIDSCENE_MODEL_FAMILY,
|
|
30
|
+
timeout: MIDSCENE_MODEL_TIMEOUT
|
|
77
31
|
};
|
|
78
32
|
const DEFAULT_MODEL_CONFIG_KEYS_LEGACY = {
|
|
79
33
|
modelName: MIDSCENE_MODEL_NAME,
|
|
@@ -82,16 +36,7 @@ const DEFAULT_MODEL_CONFIG_KEYS_LEGACY = {
|
|
|
82
36
|
openaiBaseURL: OPENAI_BASE_URL,
|
|
83
37
|
openaiApiKey: OPENAI_API_KEY,
|
|
84
38
|
openaiExtraConfig: MIDSCENE_OPENAI_INIT_CONFIG_JSON,
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
azureOpenaiScope: MIDSCENE_AZURE_OPENAI_SCOPE,
|
|
88
|
-
azureOpenaiKey: AZURE_OPENAI_KEY,
|
|
89
|
-
azureOpenaiEndpoint: AZURE_OPENAI_ENDPOINT,
|
|
90
|
-
azureOpenaiApiVersion: AZURE_OPENAI_API_VERSION,
|
|
91
|
-
azureOpenaiDeployment: AZURE_OPENAI_DEPLOYMENT,
|
|
92
|
-
azureExtraConfig: MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,
|
|
93
|
-
useAnthropicSdk: MIDSCENE_USE_ANTHROPIC_SDK,
|
|
94
|
-
anthropicApiKey: ANTHROPIC_API_KEY,
|
|
95
|
-
vlMode: 'DEFAULT_MODEL_CONFIG_KEYS has no vlMode key'
|
|
39
|
+
modelFamily: 'DEFAULT_MODEL_CONFIG_KEYS has no modelFamily key',
|
|
40
|
+
timeout: MIDSCENE_MODEL_TIMEOUT
|
|
96
41
|
};
|
|
97
|
-
export { DEFAULT_MODEL_CONFIG_KEYS, DEFAULT_MODEL_CONFIG_KEYS_LEGACY,
|
|
42
|
+
export { DEFAULT_MODEL_CONFIG_KEYS, DEFAULT_MODEL_CONFIG_KEYS_LEGACY, INSIGHT_MODEL_CONFIG_KEYS, PLANNING_MODEL_CONFIG_KEYS };
|
|
@@ -30,8 +30,8 @@ class GlobalConfigManager {
|
|
|
30
30
|
}
|
|
31
31
|
getEnvConfigValue(key) {
|
|
32
32
|
const allConfig = this.getAllEnvConfig();
|
|
33
|
+
if (key === MATCH_BY_POSITION) throw new Error('MATCH_BY_POSITION is discarded, use MIDSCENE_MODEL_FAMILY instead');
|
|
33
34
|
if (!STRING_ENV_KEYS.includes(key)) throw new Error(`getEnvConfigValue with key ${key} is not supported.`);
|
|
34
|
-
if (key === MATCH_BY_POSITION) throw new Error('MATCH_BY_POSITION is deprecated, use MIDSCENE_USE_VL_MODEL instead');
|
|
35
35
|
const value = allConfig[key];
|
|
36
36
|
this.keysHaveBeenRead[key] = true;
|
|
37
37
|
if ('string' == typeof value) return value.trim();
|
|
@@ -58,7 +58,6 @@ class GlobalConfigManager {
|
|
|
58
58
|
this.globalModelConfigManager = globalModelConfigManager;
|
|
59
59
|
}
|
|
60
60
|
overrideAIConfig(newConfig, extendMode = false) {
|
|
61
|
-
var _this_override;
|
|
62
61
|
for(const key in newConfig){
|
|
63
62
|
if (![
|
|
64
63
|
...GLOBAL_ENV_KEYS,
|
|
@@ -69,7 +68,7 @@ class GlobalConfigManager {
|
|
|
69
68
|
if (this.keysHaveBeenRead[key]) console.warn(`Warning: try to override AI config with key ${key} ,but it has been read.`);
|
|
70
69
|
}
|
|
71
70
|
const savedNewConfig = extendMode ? {
|
|
72
|
-
...
|
|
71
|
+
...this.override?.newConfig,
|
|
73
72
|
...newConfig
|
|
74
73
|
} : newConfig;
|
|
75
74
|
this.override = {
|
package/dist/es/env/helper.mjs
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { assert } from "../utils.mjs";
|
|
2
1
|
const maskKey = (key, maskChar = '*')=>{
|
|
3
2
|
if ('string' != typeof key || 0 === key.length) return key;
|
|
4
3
|
const prefixLen = 3;
|
|
@@ -12,21 +11,21 @@ const maskKey = (key, maskChar = '*')=>{
|
|
|
12
11
|
return `${prefix}${mask}${suffix}`;
|
|
13
12
|
};
|
|
14
13
|
const maskConfig = (config)=>Object.fromEntries(Object.entries(config).map(([key, value])=>{
|
|
15
|
-
if ([
|
|
16
|
-
'openaiApiKey',
|
|
17
|
-
'azureOpenaiKey',
|
|
18
|
-
'anthropicApiKey'
|
|
19
|
-
].includes(key)) return [
|
|
14
|
+
if (!value) return [
|
|
20
15
|
key,
|
|
21
|
-
|
|
16
|
+
value
|
|
22
17
|
];
|
|
23
|
-
if ([
|
|
24
|
-
'openaiExtraConfig',
|
|
25
|
-
'azureExtraConfig'
|
|
26
|
-
].includes(key)) return [
|
|
18
|
+
if ('string' == typeof value && /key/i.test(key)) return [
|
|
27
19
|
key,
|
|
28
|
-
maskKey(
|
|
20
|
+
maskKey(value)
|
|
29
21
|
];
|
|
22
|
+
if ('object' == typeof value) {
|
|
23
|
+
const valueStr = JSON.stringify(value);
|
|
24
|
+
if (/key/i.test(valueStr)) return [
|
|
25
|
+
key,
|
|
26
|
+
maskKey(valueStr)
|
|
27
|
+
];
|
|
28
|
+
}
|
|
30
29
|
return [
|
|
31
30
|
key,
|
|
32
31
|
value
|
|
@@ -41,8 +40,4 @@ const parseJson = (key, value)=>{
|
|
|
41
40
|
});
|
|
42
41
|
}
|
|
43
42
|
};
|
|
44
|
-
|
|
45
|
-
if (modelName) modelVendorFlag ? assert(value, `The ${key} must be a non-empty string because of the ${modelNameKey} is declared as ${modelName} and ${modelVendorFlag} has also been specified in ${provider}, but got: ${value}. Please check your config.`) : assert(value, `The ${key} must be a non-empty string because of the ${modelNameKey} is declared as ${modelName} in ${provider}, but got: ${value}. Please check your config.`);
|
|
46
|
-
else assert(value, `The ${key} must be a non-empty string, but got: ${value}. Please check your config.`);
|
|
47
|
-
};
|
|
48
|
-
export { createAssert, maskConfig, parseJson };
|
|
43
|
+
export { maskConfig, parseJson };
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
import { enableDebug } from "../logger.mjs";
|
|
2
2
|
import { getBasicEnvValue } from "./basic.mjs";
|
|
3
|
-
import {
|
|
3
|
+
import { MIDSCENE_DEBUG_MODEL_PROFILE, MIDSCENE_DEBUG_MODEL_RESPONSE } from "./types.mjs";
|
|
4
4
|
const initDebugConfig = ()=>{
|
|
5
|
-
const shouldPrintTiming = getBasicEnvValue(
|
|
5
|
+
const shouldPrintTiming = getBasicEnvValue(MIDSCENE_DEBUG_MODEL_PROFILE);
|
|
6
6
|
let debugConfig = '';
|
|
7
7
|
if (shouldPrintTiming) {
|
|
8
|
-
console.warn('
|
|
8
|
+
console.warn('MIDSCENE_DEBUG_MODEL_PROFILE is deprecated, use DEBUG=midscene:ai:profile instead');
|
|
9
9
|
debugConfig = 'ai:profile';
|
|
10
10
|
}
|
|
11
|
-
const
|
|
12
|
-
if (
|
|
13
|
-
console.warn('
|
|
11
|
+
const shouldPrintModelResponse = getBasicEnvValue(MIDSCENE_DEBUG_MODEL_RESPONSE);
|
|
12
|
+
if (shouldPrintModelResponse) {
|
|
13
|
+
console.warn('MIDSCENE_DEBUG_MODEL_RESPONSE is deprecated, use DEBUG=midscene:ai:response instead');
|
|
14
14
|
debugConfig = debugConfig ? 'ai:*' : 'ai:call';
|
|
15
15
|
}
|
|
16
16
|
if (debugConfig) enableDebug(debugConfig);
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { decideModelConfigFromIntentConfig } from "./parse-model-config.mjs";
|
|
2
2
|
function _define_property(obj, key, value) {
|
|
3
3
|
if (key in obj) Object.defineProperty(obj, key, {
|
|
4
4
|
value: value,
|
|
@@ -9,91 +9,71 @@ function _define_property(obj, key, value) {
|
|
|
9
9
|
else obj[key] = value;
|
|
10
10
|
return obj;
|
|
11
11
|
}
|
|
12
|
-
const ALL_INTENTS = [
|
|
13
|
-
'VQA',
|
|
14
|
-
'default',
|
|
15
|
-
'grounding',
|
|
16
|
-
'planning'
|
|
17
|
-
];
|
|
18
12
|
class ModelConfigManager {
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
13
|
+
initialize() {
|
|
14
|
+
if (this.isInitialized) return;
|
|
15
|
+
let configMap;
|
|
16
|
+
if (this.modelConfig) {
|
|
17
|
+
this.isolatedMode = true;
|
|
18
|
+
configMap = this.normalizeModelConfig(this.modelConfig);
|
|
19
|
+
} else configMap = this.globalConfigManager?.getAllEnvConfig() || {};
|
|
20
|
+
const defaultConfig = decideModelConfigFromIntentConfig('default', configMap);
|
|
21
|
+
if (!defaultConfig) throw new Error('default model config is not found, which should not happen');
|
|
22
|
+
const insightConfig = decideModelConfigFromIntentConfig('insight', configMap);
|
|
23
|
+
const planningConfig = decideModelConfigFromIntentConfig('planning', configMap);
|
|
24
|
+
this.modelConfigMap = {
|
|
25
|
+
default: {
|
|
26
|
+
...defaultConfig,
|
|
27
|
+
createOpenAIClient: this.createOpenAIClientFn
|
|
28
|
+
},
|
|
29
|
+
insight: {
|
|
30
|
+
...insightConfig || defaultConfig,
|
|
31
|
+
createOpenAIClient: this.createOpenAIClientFn
|
|
32
|
+
},
|
|
33
|
+
planning: {
|
|
34
|
+
...planningConfig || defaultConfig,
|
|
35
|
+
createOpenAIClient: this.createOpenAIClientFn
|
|
36
|
+
}
|
|
25
37
|
};
|
|
26
|
-
|
|
27
|
-
const result = modelConfigFn({
|
|
28
|
-
intent: i
|
|
29
|
-
});
|
|
30
|
-
if (!result) throw new Error(`The agent has an option named modelConfig is a function, but it return ${result} when call with intent ${i}, which should be a object.`);
|
|
31
|
-
intentConfigMap[i] = result;
|
|
32
|
-
}
|
|
33
|
-
return intentConfigMap;
|
|
38
|
+
this.isInitialized = true;
|
|
34
39
|
}
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
};
|
|
42
|
-
for (const i of ALL_INTENTS){
|
|
43
|
-
const result = decideModelConfigFromIntentConfig(i, intentConfigMap[i]);
|
|
44
|
-
modelConfigMap[i] = result;
|
|
45
|
-
}
|
|
46
|
-
return modelConfigMap;
|
|
47
|
-
}
|
|
48
|
-
calcModelConfigMapBaseOnEnv(allEnvConfig) {
|
|
49
|
-
const modelConfigMap = {
|
|
50
|
-
VQA: void 0,
|
|
51
|
-
default: void 0,
|
|
52
|
-
grounding: void 0,
|
|
53
|
-
planning: void 0
|
|
54
|
-
};
|
|
55
|
-
for (const i of ALL_INTENTS){
|
|
56
|
-
const result = decideModelConfigFromEnv(i, allEnvConfig);
|
|
57
|
-
modelConfigMap[i] = result;
|
|
58
|
-
}
|
|
59
|
-
return modelConfigMap;
|
|
40
|
+
normalizeModelConfig(config) {
|
|
41
|
+
return Object.entries(config).reduce((acc, [key, value])=>{
|
|
42
|
+
if (null == value) return acc;
|
|
43
|
+
acc[key] = String(value);
|
|
44
|
+
return acc;
|
|
45
|
+
}, Object.create(null));
|
|
60
46
|
}
|
|
61
47
|
clearModelConfigMap() {
|
|
62
48
|
if (this.isolatedMode) throw new Error('ModelConfigManager work in isolated mode, so clearModelConfigMap should not be called');
|
|
63
|
-
this.
|
|
49
|
+
this.isInitialized = false;
|
|
64
50
|
}
|
|
65
51
|
getModelConfig(intent) {
|
|
66
|
-
if (this.
|
|
67
|
-
|
|
68
|
-
return this.modelConfigMap[intent];
|
|
69
|
-
}
|
|
70
|
-
if (!this.modelConfigMap) {
|
|
71
|
-
if (!this.globalConfigManager) throw new Error('globalConfigManager is not registered, which should not happen');
|
|
72
|
-
this.modelConfigMap = this.calcModelConfigMapBaseOnEnv(this.globalConfigManager.getAllEnvConfig());
|
|
73
|
-
}
|
|
52
|
+
if (!this.isInitialized) this.initialize();
|
|
53
|
+
if (!this.modelConfigMap) throw new Error('modelConfigMap is not initialized, which should not happen');
|
|
74
54
|
return this.modelConfigMap[intent];
|
|
75
55
|
}
|
|
76
56
|
getUploadTestServerUrl() {
|
|
77
57
|
const { openaiExtraConfig } = this.getModelConfig('default');
|
|
78
|
-
const serverUrl =
|
|
58
|
+
const serverUrl = openaiExtraConfig?.REPORT_SERVER_URL;
|
|
79
59
|
return serverUrl;
|
|
80
60
|
}
|
|
81
61
|
registerGlobalConfigManager(globalConfigManager) {
|
|
82
62
|
this.globalConfigManager = globalConfigManager;
|
|
83
63
|
}
|
|
84
|
-
throwErrorIfNonVLModel(
|
|
85
|
-
const modelConfig = this.getModelConfig(
|
|
86
|
-
if (!modelConfig.vlMode) throw new Error('
|
|
64
|
+
throwErrorIfNonVLModel() {
|
|
65
|
+
const modelConfig = this.getModelConfig('default');
|
|
66
|
+
if (!modelConfig.vlMode) throw new Error('MIDSCENE_MODEL_FAMILY is not set to a visual language model (VL model), the element localization can not be achieved. Check your model configuration. See https://midscenejs.com/model-strategy.html');
|
|
87
67
|
}
|
|
88
|
-
constructor(
|
|
68
|
+
constructor(modelConfig, createOpenAIClientFn){
|
|
89
69
|
_define_property(this, "modelConfigMap", void 0);
|
|
70
|
+
_define_property(this, "isInitialized", false);
|
|
90
71
|
_define_property(this, "isolatedMode", false);
|
|
91
72
|
_define_property(this, "globalConfigManager", void 0);
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
}
|
|
73
|
+
_define_property(this, "modelConfig", void 0);
|
|
74
|
+
_define_property(this, "createOpenAIClientFn", void 0);
|
|
75
|
+
this.modelConfig = modelConfig;
|
|
76
|
+
this.createOpenAIClientFn = createOpenAIClientFn;
|
|
97
77
|
}
|
|
98
78
|
}
|
|
99
79
|
export { ModelConfigManager };
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import { DEFAULT_MODEL_CONFIG_KEYS, INSIGHT_MODEL_CONFIG_KEYS, PLANNING_MODEL_CONFIG_KEYS } from "./constants.mjs";
|
|
2
|
+
import { MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MODEL_FAMILY_VALUES, OPENAI_API_KEY, OPENAI_BASE_URL, UITarsModelVersion } from "./types.mjs";
|
|
3
|
+
import { getDebug } from "../logger.mjs";
|
|
4
|
+
import { assert } from "../utils.mjs";
|
|
5
|
+
import { maskConfig, parseJson } from "./helper.mjs";
|
|
6
|
+
import { initDebugConfig } from "./init-debug.mjs";
|
|
7
|
+
const KEYS_MAP = {
|
|
8
|
+
insight: INSIGHT_MODEL_CONFIG_KEYS,
|
|
9
|
+
planning: PLANNING_MODEL_CONFIG_KEYS,
|
|
10
|
+
default: DEFAULT_MODEL_CONFIG_KEYS
|
|
11
|
+
};
|
|
12
|
+
const modelFamilyToVLConfig = (modelFamily)=>{
|
|
13
|
+
if (!modelFamily) return {
|
|
14
|
+
vlMode: void 0,
|
|
15
|
+
uiTarsVersion: void 0
|
|
16
|
+
};
|
|
17
|
+
if ('vlm-ui-tars' === modelFamily) return {
|
|
18
|
+
vlMode: 'vlm-ui-tars',
|
|
19
|
+
uiTarsVersion: UITarsModelVersion.V1_0
|
|
20
|
+
};
|
|
21
|
+
if ('vlm-ui-tars-doubao' === modelFamily || 'vlm-ui-tars-doubao-1.5' === modelFamily) return {
|
|
22
|
+
vlMode: 'vlm-ui-tars',
|
|
23
|
+
uiTarsVersion: UITarsModelVersion.DOUBAO_1_5_20B
|
|
24
|
+
};
|
|
25
|
+
if (!MODEL_FAMILY_VALUES.includes(modelFamily)) throw new Error(`Invalid MIDSCENE_MODEL_FAMILY value: ${modelFamily}`);
|
|
26
|
+
return {
|
|
27
|
+
vlMode: modelFamily,
|
|
28
|
+
uiTarsVersion: void 0
|
|
29
|
+
};
|
|
30
|
+
};
|
|
31
|
+
const legacyConfigToModelFamily = (provider)=>{
|
|
32
|
+
const isDoubao = provider[MIDSCENE_USE_DOUBAO_VISION];
|
|
33
|
+
const isQwen = provider[MIDSCENE_USE_QWEN_VL];
|
|
34
|
+
const isQwen3 = provider[MIDSCENE_USE_QWEN3_VL];
|
|
35
|
+
const isUiTars = provider[MIDSCENE_USE_VLM_UI_TARS];
|
|
36
|
+
const isGemini = provider[MIDSCENE_USE_GEMINI];
|
|
37
|
+
const enabledModes = [
|
|
38
|
+
isDoubao && MIDSCENE_USE_DOUBAO_VISION,
|
|
39
|
+
isQwen && MIDSCENE_USE_QWEN_VL,
|
|
40
|
+
isQwen3 && MIDSCENE_USE_QWEN3_VL,
|
|
41
|
+
isUiTars && MIDSCENE_USE_VLM_UI_TARS,
|
|
42
|
+
isGemini && MIDSCENE_USE_GEMINI
|
|
43
|
+
].filter(Boolean);
|
|
44
|
+
if (enabledModes.length > 1) throw new Error(`Only one vision mode can be enabled at a time. Currently enabled modes: ${enabledModes.join(', ')}. Please disable all but one mode.`);
|
|
45
|
+
if (isQwen3) return 'qwen3-vl';
|
|
46
|
+
if (isQwen) return 'qwen2.5-vl';
|
|
47
|
+
if (isDoubao) return 'doubao-vision';
|
|
48
|
+
if (isGemini) return 'gemini';
|
|
49
|
+
if (isUiTars) if ('1' === isUiTars) return 'vlm-ui-tars';
|
|
50
|
+
else if ('DOUBAO' === isUiTars || 'DOUBAO-1.5' === isUiTars) return 'vlm-ui-tars-doubao-1.5';
|
|
51
|
+
else return 'vlm-ui-tars-doubao';
|
|
52
|
+
};
|
|
53
|
+
const parseOpenaiSdkConfig = ({ keys, provider, useLegacyLogic = false })=>{
|
|
54
|
+
initDebugConfig();
|
|
55
|
+
const debugLog = getDebug('ai:config');
|
|
56
|
+
debugLog('enter parseOpenaiSdkConfig with keys:', keys);
|
|
57
|
+
const legacyAPIKey = useLegacyLogic ? provider[OPENAI_API_KEY] : void 0;
|
|
58
|
+
const legacyBaseURL = useLegacyLogic ? provider[OPENAI_BASE_URL] : void 0;
|
|
59
|
+
const legacySocksProxy = useLegacyLogic ? provider[MIDSCENE_OPENAI_SOCKS_PROXY] : void 0;
|
|
60
|
+
const legacyHttpProxy = useLegacyLogic ? provider[MIDSCENE_OPENAI_HTTP_PROXY] : void 0;
|
|
61
|
+
const legacyOpenaiExtraConfig = useLegacyLogic ? provider[MIDSCENE_OPENAI_INIT_CONFIG_JSON] : void 0;
|
|
62
|
+
const legacyModelFamily = useLegacyLogic ? legacyConfigToModelFamily(provider) : void 0;
|
|
63
|
+
const modelFamilyRaw = provider[keys.modelFamily] || legacyModelFamily;
|
|
64
|
+
const openaiApiKey = provider[keys.openaiApiKey] || legacyAPIKey;
|
|
65
|
+
const openaiBaseURL = provider[keys.openaiBaseURL] || legacyBaseURL;
|
|
66
|
+
const socksProxy = provider[keys.socksProxy] || legacySocksProxy;
|
|
67
|
+
const httpProxy = provider[keys.httpProxy] || legacyHttpProxy;
|
|
68
|
+
const modelName = provider[keys.modelName];
|
|
69
|
+
const openaiExtraConfigStr = provider[keys.openaiExtraConfig];
|
|
70
|
+
const openaiExtraConfig = parseJson(keys.openaiExtraConfig, openaiExtraConfigStr || legacyOpenaiExtraConfig);
|
|
71
|
+
const { vlMode, uiTarsVersion } = modelFamilyToVLConfig(modelFamilyRaw);
|
|
72
|
+
const getModelDescription = (vlMode, uiTarsVersion)=>{
|
|
73
|
+
if (vlMode) if (uiTarsVersion) return `UI-TARS=${uiTarsVersion}`;
|
|
74
|
+
else return `${vlMode} mode`;
|
|
75
|
+
return '';
|
|
76
|
+
};
|
|
77
|
+
const modelDescription = getModelDescription(vlMode, uiTarsVersion);
|
|
78
|
+
return {
|
|
79
|
+
socksProxy,
|
|
80
|
+
httpProxy,
|
|
81
|
+
vlModeRaw: vlMode,
|
|
82
|
+
openaiBaseURL,
|
|
83
|
+
openaiApiKey,
|
|
84
|
+
openaiExtraConfig,
|
|
85
|
+
vlMode,
|
|
86
|
+
uiTarsModelVersion: uiTarsVersion,
|
|
87
|
+
modelName: modelName,
|
|
88
|
+
modelDescription,
|
|
89
|
+
intent: '-',
|
|
90
|
+
timeout: provider[keys.timeout] ? Number(provider[keys.timeout]) : void 0
|
|
91
|
+
};
|
|
92
|
+
};
|
|
93
|
+
const decideModelConfigFromIntentConfig = (intent, configMap)=>{
|
|
94
|
+
const debugLog = getDebug('ai:config');
|
|
95
|
+
debugLog('will decideModelConfig base on agent.modelConfig()', intent, maskConfig(configMap));
|
|
96
|
+
const keysForFn = KEYS_MAP[intent];
|
|
97
|
+
const modelName = configMap[keysForFn.modelName];
|
|
98
|
+
if (!modelName) return void debugLog('no modelName found for intent', intent);
|
|
99
|
+
const finalResult = parseOpenaiSdkConfig({
|
|
100
|
+
keys: keysForFn,
|
|
101
|
+
provider: configMap,
|
|
102
|
+
useLegacyLogic: 'default' === intent
|
|
103
|
+
});
|
|
104
|
+
finalResult.intent = intent;
|
|
105
|
+
debugLog('decideModelConfig result by agent.modelConfig() with intent', intent, maskConfig({
|
|
106
|
+
...finalResult
|
|
107
|
+
}));
|
|
108
|
+
assert(finalResult.openaiBaseURL, `failed to get base URL of model (intent=${intent}). See https://midscenejs.com/model-strategy`);
|
|
109
|
+
if (!finalResult.modelName) console.warn(`modelName is not set for intent ${intent}, this may cause unexpected behavior. See https://midscenejs.com/model-strategy`);
|
|
110
|
+
return finalResult;
|
|
111
|
+
};
|
|
112
|
+
export { decideModelConfigFromIntentConfig, legacyConfigToModelFamily, modelFamilyToVLConfig, parseOpenaiSdkConfig };
|