@midscene/shared 0.30.10 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/build/rspack-config.mjs +4 -0
- package/dist/es/constants/example-code.mjs +4 -4
- package/dist/es/env/constants.mjs +27 -82
- package/dist/es/env/global-config-manager.mjs +2 -3
- package/dist/es/env/helper.mjs +12 -17
- package/dist/es/env/init-debug.mjs +6 -6
- package/dist/es/env/model-config-manager.mjs +45 -65
- package/dist/es/env/parse-model-config.mjs +112 -0
- package/dist/es/env/types.mjs +70 -162
- package/dist/es/extractor/dom-util.mjs +10 -18
- package/dist/es/extractor/index.mjs +2 -3
- package/dist/es/extractor/locator.mjs +8 -15
- package/dist/es/extractor/tree.mjs +2 -5
- package/dist/es/extractor/util.mjs +4 -28
- package/dist/es/extractor/web-extractor.mjs +7 -14
- package/dist/es/index.mjs +2 -1
- package/dist/es/mcp/base-server.mjs +250 -0
- package/dist/es/mcp/base-tools.mjs +84 -0
- package/dist/es/mcp/index.mjs +5 -0
- package/dist/es/mcp/inject-report-html-plugin.mjs +53 -0
- package/dist/es/mcp/tool-generator.mjs +207 -0
- package/dist/es/mcp/types.mjs +3 -0
- package/dist/es/node/fs.mjs +2 -2
- package/dist/es/utils.mjs +2 -3
- package/dist/es/zod-schema-utils.mjs +54 -0
- package/dist/lib/baseDB.js +2 -2
- package/dist/lib/build/copy-static.js +4 -4
- package/dist/lib/build/rspack-config.js +38 -0
- package/dist/lib/common.js +4 -4
- package/dist/lib/constants/example-code.js +6 -6
- package/dist/lib/constants/index.js +13 -13
- package/dist/lib/env/basic.js +2 -2
- package/dist/lib/env/constants.js +32 -90
- package/dist/lib/env/global-config-manager.js +4 -5
- package/dist/lib/env/helper.js +13 -22
- package/dist/lib/env/index.js +24 -28
- package/dist/lib/env/init-debug.js +7 -7
- package/dist/lib/env/model-config-manager.js +47 -67
- package/dist/lib/env/parse-model-config.js +155 -0
- package/dist/lib/env/types.js +146 -379
- package/dist/lib/env/utils.js +4 -4
- package/dist/lib/extractor/constants.js +4 -4
- package/dist/lib/extractor/debug.js +1 -1
- package/dist/lib/extractor/dom-util.js +18 -26
- package/dist/lib/extractor/index.js +11 -21
- package/dist/lib/extractor/locator.js +10 -20
- package/dist/lib/extractor/tree.js +4 -7
- package/dist/lib/extractor/util.js +17 -50
- package/dist/lib/extractor/web-extractor.js +12 -19
- package/dist/lib/img/box-select.js +4 -4
- package/dist/lib/img/draw-box.js +2 -2
- package/dist/lib/img/get-jimp.js +16 -34
- package/dist/lib/img/get-photon.js +24 -47
- package/dist/lib/img/get-sharp.js +16 -34
- package/dist/lib/img/index.js +18 -18
- package/dist/lib/img/info.js +4 -4
- package/dist/lib/img/transform.js +10 -10
- package/dist/lib/index.js +8 -4
- package/dist/lib/logger.js +4 -4
- package/dist/lib/mcp/base-server.js +300 -0
- package/dist/lib/mcp/base-tools.js +118 -0
- package/dist/lib/mcp/index.js +86 -0
- package/dist/lib/mcp/inject-report-html-plugin.js +98 -0
- package/dist/lib/mcp/tool-generator.js +244 -0
- package/dist/lib/mcp/types.js +40 -0
- package/dist/lib/node/fs.js +6 -6
- package/dist/lib/node/index.js +6 -8
- package/dist/lib/polyfills/async-hooks.js +2 -2
- package/dist/lib/polyfills/index.js +6 -8
- package/dist/lib/types/index.js +2 -2
- package/dist/lib/us-keyboard-layout.js +2 -2
- package/dist/lib/utils.js +13 -14
- package/dist/lib/zod-schema-utils.js +97 -0
- package/dist/types/build/rspack-config.d.ts +8 -0
- package/dist/types/constants/example-code.d.ts +1 -1
- package/dist/types/env/constants.d.ts +5 -18
- package/dist/types/env/global-config-manager.d.ts +1 -2
- package/dist/types/env/helper.d.ts +2 -4
- package/dist/types/env/model-config-manager.d.ts +8 -7
- package/dist/types/env/parse-model-config.d.ts +28 -0
- package/dist/types/env/types.d.ts +152 -191
- package/dist/types/extractor/dom-util.d.ts +2 -15
- package/dist/types/extractor/index.d.ts +1 -2
- package/dist/types/extractor/locator.d.ts +0 -1
- package/dist/types/extractor/tree.d.ts +1 -4
- package/dist/types/extractor/util.d.ts +0 -3
- package/dist/types/index.d.ts +1 -0
- package/dist/types/mcp/base-server.d.ts +77 -0
- package/dist/types/mcp/base-tools.d.ts +55 -0
- package/dist/types/mcp/index.d.ts +5 -0
- package/dist/types/mcp/inject-report-html-plugin.d.ts +18 -0
- package/dist/types/mcp/tool-generator.d.ts +11 -0
- package/dist/types/mcp/types.d.ts +100 -0
- package/dist/types/types/index.d.ts +5 -2
- package/dist/types/zod-schema-utils.d.ts +23 -0
- package/package.json +19 -4
- package/src/build/rspack-config.ts +12 -0
- package/src/constants/example-code.ts +4 -4
- package/src/env/constants.ts +58 -203
- package/src/env/global-config-manager.ts +7 -7
- package/src/env/helper.ts +10 -31
- package/src/env/init-debug.ts +11 -6
- package/src/env/model-config-manager.ts +91 -87
- package/src/env/parse-model-config.ts +265 -0
- package/src/env/types.ts +212 -344
- package/src/extractor/dom-util.ts +15 -12
- package/src/extractor/index.ts +0 -3
- package/src/extractor/locator.ts +3 -12
- package/src/extractor/tree.ts +4 -4
- package/src/extractor/util.ts +0 -32
- package/src/index.ts +2 -0
- package/src/mcp/base-server.ts +435 -0
- package/src/mcp/base-tools.ts +196 -0
- package/src/mcp/index.ts +5 -0
- package/src/mcp/inject-report-html-plugin.ts +119 -0
- package/src/mcp/tool-generator.ts +330 -0
- package/src/mcp/types.ts +108 -0
- package/src/node/fs.ts +1 -1
- package/src/types/index.ts +8 -2
- package/src/utils.ts +1 -1
- package/src/zod-schema-utils.ts +133 -0
- package/dist/es/env/decide-model-config.mjs +0 -172
- package/dist/es/env/parse.mjs +0 -69
- package/dist/lib/env/decide-model-config.js +0 -212
- package/dist/lib/env/parse.js +0 -106
- package/dist/types/env/decide-model-config.d.ts +0 -14
- package/dist/types/env/parse.d.ts +0 -12
- package/src/env/decide-model-config.ts +0 -319
- package/src/env/parse.ts +0 -131
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { getDebug } from '../logger';
|
|
2
1
|
import { initDebugConfig } from './init-debug';
|
|
3
2
|
import type { ModelConfigManager } from './model-config-manager';
|
|
4
3
|
import {
|
|
@@ -62,14 +61,16 @@ export class GlobalConfigManager {
|
|
|
62
61
|
getEnvConfigValue(key: (typeof STRING_ENV_KEYS)[number]) {
|
|
63
62
|
const allConfig = this.getAllEnvConfig();
|
|
64
63
|
|
|
65
|
-
if (!STRING_ENV_KEYS.includes(key)) {
|
|
66
|
-
throw new Error(`getEnvConfigValue with key ${key} is not supported.`);
|
|
67
|
-
}
|
|
68
64
|
if (key === MATCH_BY_POSITION) {
|
|
69
65
|
throw new Error(
|
|
70
|
-
'MATCH_BY_POSITION is
|
|
66
|
+
'MATCH_BY_POSITION is discarded, use MIDSCENE_MODEL_FAMILY instead',
|
|
71
67
|
);
|
|
72
68
|
}
|
|
69
|
+
|
|
70
|
+
if (!STRING_ENV_KEYS.includes(key)) {
|
|
71
|
+
throw new Error(`getEnvConfigValue with key ${key} is not supported.`);
|
|
72
|
+
}
|
|
73
|
+
|
|
73
74
|
const value = allConfig[key];
|
|
74
75
|
this.keysHaveBeenRead[key] = true;
|
|
75
76
|
if (typeof value === 'string') {
|
|
@@ -122,8 +123,7 @@ export class GlobalConfigManager {
|
|
|
122
123
|
}
|
|
123
124
|
|
|
124
125
|
/**
|
|
125
|
-
*
|
|
126
|
-
* can only override keys in MODEL_ENV_KEYS
|
|
126
|
+
* @deprecated use the modelConfig param in Agent constructor instead
|
|
127
127
|
*/
|
|
128
128
|
overrideAIConfig(
|
|
129
129
|
newConfig: Partial<
|
package/src/env/helper.ts
CHANGED
|
@@ -22,13 +22,19 @@ const maskKey = (key: string, maskChar = '*') => {
|
|
|
22
22
|
return `${prefix}${mask}${suffix}`;
|
|
23
23
|
};
|
|
24
24
|
|
|
25
|
-
export const maskConfig = (config:
|
|
25
|
+
export const maskConfig = (config: Record<string, unknown>) => {
|
|
26
26
|
return Object.fromEntries(
|
|
27
27
|
Object.entries(config).map(([key, value]) => {
|
|
28
|
-
if ([
|
|
28
|
+
if (!value) return [key, value];
|
|
29
|
+
|
|
30
|
+
if (typeof value === 'string' && /key/i.test(key)) {
|
|
29
31
|
return [key, maskKey(value)];
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
+
}
|
|
33
|
+
if (typeof value === 'object') {
|
|
34
|
+
const valueStr = JSON.stringify(value);
|
|
35
|
+
if (/key/i.test(valueStr)) {
|
|
36
|
+
return [key, maskKey(valueStr)];
|
|
37
|
+
}
|
|
32
38
|
}
|
|
33
39
|
return [key, value];
|
|
34
40
|
}),
|
|
@@ -50,30 +56,3 @@ export const parseJson = (key: string, value: string | undefined) => {
|
|
|
50
56
|
}
|
|
51
57
|
return undefined;
|
|
52
58
|
};
|
|
53
|
-
|
|
54
|
-
export const createAssert =
|
|
55
|
-
(
|
|
56
|
-
modelNameKey: string,
|
|
57
|
-
provider: 'process.env' | 'modelConfig',
|
|
58
|
-
modelName?: string,
|
|
59
|
-
) =>
|
|
60
|
-
(value: string | undefined, key: string, modelVendorFlag?: string) => {
|
|
61
|
-
if (modelName) {
|
|
62
|
-
if (modelVendorFlag) {
|
|
63
|
-
assert(
|
|
64
|
-
value,
|
|
65
|
-
`The ${key} must be a non-empty string because of the ${modelNameKey} is declared as ${modelName} and ${modelVendorFlag} has also been specified in ${provider}, but got: ${value}. Please check your config.`,
|
|
66
|
-
);
|
|
67
|
-
} else {
|
|
68
|
-
assert(
|
|
69
|
-
value,
|
|
70
|
-
`The ${key} must be a non-empty string because of the ${modelNameKey} is declared as ${modelName} in ${provider}, but got: ${value}. Please check your config.`,
|
|
71
|
-
);
|
|
72
|
-
}
|
|
73
|
-
} else {
|
|
74
|
-
assert(
|
|
75
|
-
value,
|
|
76
|
-
`The ${key} must be a non-empty string, but got: ${value}. Please check your config.`,
|
|
77
|
-
);
|
|
78
|
-
}
|
|
79
|
-
};
|
package/src/env/init-debug.ts
CHANGED
|
@@ -1,21 +1,26 @@
|
|
|
1
1
|
import { enableDebug } from '../logger';
|
|
2
2
|
import { getBasicEnvValue } from './basic';
|
|
3
|
-
import {
|
|
3
|
+
import {
|
|
4
|
+
MIDSCENE_DEBUG_MODEL_PROFILE,
|
|
5
|
+
MIDSCENE_DEBUG_MODEL_RESPONSE,
|
|
6
|
+
} from './types';
|
|
4
7
|
|
|
5
8
|
export const initDebugConfig = () => {
|
|
6
|
-
const shouldPrintTiming = getBasicEnvValue(
|
|
9
|
+
const shouldPrintTiming = getBasicEnvValue(MIDSCENE_DEBUG_MODEL_PROFILE);
|
|
7
10
|
let debugConfig = '';
|
|
8
11
|
if (shouldPrintTiming) {
|
|
9
12
|
console.warn(
|
|
10
|
-
'
|
|
13
|
+
'MIDSCENE_DEBUG_MODEL_PROFILE is deprecated, use DEBUG=midscene:ai:profile instead',
|
|
11
14
|
);
|
|
12
15
|
debugConfig = 'ai:profile';
|
|
13
16
|
}
|
|
14
|
-
const
|
|
17
|
+
const shouldPrintModelResponse = getBasicEnvValue(
|
|
18
|
+
MIDSCENE_DEBUG_MODEL_RESPONSE,
|
|
19
|
+
);
|
|
15
20
|
|
|
16
|
-
if (
|
|
21
|
+
if (shouldPrintModelResponse) {
|
|
17
22
|
console.warn(
|
|
18
|
-
'
|
|
23
|
+
'MIDSCENE_DEBUG_MODEL_RESPONSE is deprecated, use DEBUG=midscene:ai:response instead',
|
|
19
24
|
);
|
|
20
25
|
if (debugConfig) {
|
|
21
26
|
debugConfig = 'ai:*';
|
|
@@ -1,87 +1,101 @@
|
|
|
1
|
-
import {
|
|
2
|
-
decideModelConfigFromEnv,
|
|
3
|
-
decideModelConfigFromIntentConfig,
|
|
4
|
-
} from './decide-model-config';
|
|
5
1
|
import type { GlobalConfigManager } from './global-config-manager';
|
|
2
|
+
import { decideModelConfigFromIntentConfig } from './parse-model-config';
|
|
6
3
|
|
|
7
|
-
import type {
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
export type TIntentConfigMap = Record<
|
|
4
|
+
import type {
|
|
5
|
+
CreateOpenAIClientFn,
|
|
6
|
+
IModelConfig,
|
|
12
7
|
TIntent,
|
|
13
|
-
|
|
14
|
-
|
|
8
|
+
TModelConfig,
|
|
9
|
+
} from './types';
|
|
15
10
|
|
|
16
11
|
export class ModelConfigManager {
|
|
17
12
|
private modelConfigMap: Record<TIntent, IModelConfig> | undefined = undefined;
|
|
18
13
|
|
|
19
|
-
|
|
20
|
-
|
|
14
|
+
private isInitialized = false;
|
|
15
|
+
|
|
16
|
+
// once modelConfig is set, isolatedMode will be true
|
|
17
|
+
// modelConfigMap will only depend on provided config and not be affected by process.env
|
|
21
18
|
private isolatedMode = false;
|
|
22
19
|
|
|
23
20
|
private globalConfigManager: GlobalConfigManager | undefined = undefined;
|
|
24
21
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
22
|
+
private modelConfig?: TModelConfig;
|
|
23
|
+
private createOpenAIClientFn?: CreateOpenAIClientFn;
|
|
24
|
+
|
|
25
|
+
constructor(
|
|
26
|
+
modelConfig?: TModelConfig,
|
|
27
|
+
createOpenAIClientFn?: CreateOpenAIClientFn,
|
|
28
|
+
) {
|
|
29
|
+
this.modelConfig = modelConfig;
|
|
30
|
+
this.createOpenAIClientFn = createOpenAIClientFn;
|
|
32
31
|
}
|
|
33
32
|
|
|
34
|
-
private
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
grounding: undefined,
|
|
39
|
-
planning: undefined,
|
|
40
|
-
};
|
|
33
|
+
private initialize() {
|
|
34
|
+
if (this.isInitialized) {
|
|
35
|
+
return;
|
|
36
|
+
}
|
|
41
37
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
}
|
|
49
|
-
intentConfigMap[i] = result;
|
|
38
|
+
let configMap: Record<string, string | undefined>;
|
|
39
|
+
if (this.modelConfig) {
|
|
40
|
+
this.isolatedMode = true;
|
|
41
|
+
configMap = this.normalizeModelConfig(this.modelConfig);
|
|
42
|
+
} else {
|
|
43
|
+
configMap = this.globalConfigManager?.getAllEnvConfig() || {};
|
|
50
44
|
}
|
|
51
|
-
return intentConfigMap;
|
|
52
|
-
}
|
|
53
45
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
for (const i of ALL_INTENTS) {
|
|
62
|
-
const result = decideModelConfigFromIntentConfig(
|
|
63
|
-
i,
|
|
64
|
-
intentConfigMap[i] as unknown as Record<string, string | undefined>,
|
|
46
|
+
const defaultConfig = decideModelConfigFromIntentConfig(
|
|
47
|
+
'default',
|
|
48
|
+
configMap,
|
|
49
|
+
);
|
|
50
|
+
if (!defaultConfig) {
|
|
51
|
+
throw new Error(
|
|
52
|
+
'default model config is not found, which should not happen',
|
|
65
53
|
);
|
|
66
|
-
modelConfigMap[i] = result;
|
|
67
54
|
}
|
|
68
|
-
return modelConfigMap as Record<TIntent, IModelConfig>;
|
|
69
|
-
}
|
|
70
55
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
56
|
+
const insightConfig = decideModelConfigFromIntentConfig(
|
|
57
|
+
'insight',
|
|
58
|
+
configMap,
|
|
59
|
+
);
|
|
60
|
+
|
|
61
|
+
const planningConfig = decideModelConfigFromIntentConfig(
|
|
62
|
+
'planning',
|
|
63
|
+
configMap,
|
|
64
|
+
);
|
|
65
|
+
|
|
66
|
+
// Each intent uses its own timeout from parsed config (MIDSCENE_MODEL_TIMEOUT,
|
|
67
|
+
// MIDSCENE_INSIGHT_MODEL_TIMEOUT, MIDSCENE_PLANNING_MODEL_TIMEOUT).
|
|
68
|
+
this.modelConfigMap = {
|
|
69
|
+
default: {
|
|
70
|
+
...defaultConfig,
|
|
71
|
+
createOpenAIClient: this.createOpenAIClientFn,
|
|
72
|
+
},
|
|
73
|
+
insight: {
|
|
74
|
+
...(insightConfig || defaultConfig),
|
|
75
|
+
createOpenAIClient: this.createOpenAIClientFn,
|
|
76
|
+
},
|
|
77
|
+
planning: {
|
|
78
|
+
...(planningConfig || defaultConfig),
|
|
79
|
+
createOpenAIClient: this.createOpenAIClientFn,
|
|
80
|
+
},
|
|
79
81
|
};
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
82
|
+
|
|
83
|
+
this.isInitialized = true;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
private normalizeModelConfig(
|
|
87
|
+
config: TModelConfig,
|
|
88
|
+
): Record<string, string | undefined> {
|
|
89
|
+
return Object.entries(config).reduce<Record<string, string | undefined>>(
|
|
90
|
+
(acc, [key, value]) => {
|
|
91
|
+
if (value === undefined || value === null) {
|
|
92
|
+
return acc;
|
|
93
|
+
}
|
|
94
|
+
acc[key] = String(value);
|
|
95
|
+
return acc;
|
|
96
|
+
},
|
|
97
|
+
Object.create(null),
|
|
98
|
+
);
|
|
85
99
|
}
|
|
86
100
|
|
|
87
101
|
/**
|
|
@@ -93,7 +107,7 @@ export class ModelConfigManager {
|
|
|
93
107
|
'ModelConfigManager work in isolated mode, so clearModelConfigMap should not be called',
|
|
94
108
|
);
|
|
95
109
|
}
|
|
96
|
-
this.
|
|
110
|
+
this.isInitialized = false;
|
|
97
111
|
}
|
|
98
112
|
|
|
99
113
|
/**
|
|
@@ -101,26 +115,16 @@ export class ModelConfigManager {
|
|
|
101
115
|
* if isolatedMode is false, modelConfigMap can be changed by process.env so we need to recalculate it when it's undefined
|
|
102
116
|
*/
|
|
103
117
|
getModelConfig(intent: TIntent): IModelConfig {
|
|
104
|
-
if
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
if (!this.modelConfigMap) {
|
|
113
|
-
if (!this.globalConfigManager) {
|
|
114
|
-
throw new Error(
|
|
115
|
-
'globalConfigManager is not registered, which should not happen',
|
|
116
|
-
);
|
|
117
|
-
}
|
|
118
|
-
this.modelConfigMap = this.calcModelConfigMapBaseOnEnv(
|
|
119
|
-
this.globalConfigManager.getAllEnvConfig(),
|
|
120
|
-
);
|
|
121
|
-
}
|
|
122
|
-
return this.modelConfigMap[intent];
|
|
118
|
+
// check if initialized
|
|
119
|
+
if (!this.isInitialized) {
|
|
120
|
+
this.initialize();
|
|
121
|
+
}
|
|
122
|
+
if (!this.modelConfigMap) {
|
|
123
|
+
throw new Error(
|
|
124
|
+
'modelConfigMap is not initialized, which should not happen',
|
|
125
|
+
);
|
|
123
126
|
}
|
|
127
|
+
return this.modelConfigMap[intent];
|
|
124
128
|
}
|
|
125
129
|
|
|
126
130
|
getUploadTestServerUrl(): string | undefined {
|
|
@@ -133,12 +137,12 @@ export class ModelConfigManager {
|
|
|
133
137
|
this.globalConfigManager = globalConfigManager;
|
|
134
138
|
}
|
|
135
139
|
|
|
136
|
-
throwErrorIfNonVLModel(
|
|
137
|
-
const modelConfig = this.getModelConfig(
|
|
140
|
+
throwErrorIfNonVLModel() {
|
|
141
|
+
const modelConfig = this.getModelConfig('default');
|
|
138
142
|
|
|
139
143
|
if (!modelConfig.vlMode) {
|
|
140
144
|
throw new Error(
|
|
141
|
-
'
|
|
145
|
+
'MIDSCENE_MODEL_FAMILY is not set to a visual language model (VL model), the element localization can not be achieved. Check your model configuration. See https://midscenejs.com/model-strategy.html',
|
|
142
146
|
);
|
|
143
147
|
}
|
|
144
148
|
}
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
import {
|
|
2
|
+
DEFAULT_MODEL_CONFIG_KEYS,
|
|
3
|
+
type DEFAULT_MODEL_CONFIG_KEYS_LEGACY,
|
|
4
|
+
INSIGHT_MODEL_CONFIG_KEYS,
|
|
5
|
+
PLANNING_MODEL_CONFIG_KEYS,
|
|
6
|
+
} from './constants';
|
|
7
|
+
import {
|
|
8
|
+
type IModelConfig,
|
|
9
|
+
MIDSCENE_MODEL_FAMILY,
|
|
10
|
+
MIDSCENE_OPENAI_HTTP_PROXY,
|
|
11
|
+
MIDSCENE_OPENAI_INIT_CONFIG_JSON,
|
|
12
|
+
MIDSCENE_OPENAI_SOCKS_PROXY,
|
|
13
|
+
MIDSCENE_USE_DOUBAO_VISION,
|
|
14
|
+
MIDSCENE_USE_GEMINI,
|
|
15
|
+
MIDSCENE_USE_QWEN3_VL,
|
|
16
|
+
MIDSCENE_USE_QWEN_VL,
|
|
17
|
+
MIDSCENE_USE_VLM_UI_TARS,
|
|
18
|
+
MODEL_FAMILY_VALUES,
|
|
19
|
+
OPENAI_API_KEY,
|
|
20
|
+
OPENAI_BASE_URL,
|
|
21
|
+
type TIntent,
|
|
22
|
+
type TModelFamily,
|
|
23
|
+
type TVlModeTypes,
|
|
24
|
+
UITarsModelVersion,
|
|
25
|
+
} from './types';
|
|
26
|
+
|
|
27
|
+
import { getDebug } from '../logger';
|
|
28
|
+
import { assert } from '../utils';
|
|
29
|
+
import { maskConfig, parseJson } from './helper';
|
|
30
|
+
import { initDebugConfig } from './init-debug';
|
|
31
|
+
|
|
32
|
+
type TModelConfigKeys =
|
|
33
|
+
| typeof INSIGHT_MODEL_CONFIG_KEYS
|
|
34
|
+
| typeof PLANNING_MODEL_CONFIG_KEYS
|
|
35
|
+
| typeof DEFAULT_MODEL_CONFIG_KEYS
|
|
36
|
+
| typeof DEFAULT_MODEL_CONFIG_KEYS_LEGACY;
|
|
37
|
+
|
|
38
|
+
const KEYS_MAP: Record<TIntent, TModelConfigKeys> = {
|
|
39
|
+
insight: INSIGHT_MODEL_CONFIG_KEYS,
|
|
40
|
+
planning: PLANNING_MODEL_CONFIG_KEYS,
|
|
41
|
+
default: DEFAULT_MODEL_CONFIG_KEYS,
|
|
42
|
+
} as const;
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Convert model family to VL configuration
|
|
46
|
+
* @param modelFamily - The model family value
|
|
47
|
+
* @returns Object containing vlMode and uiTarsVersion
|
|
48
|
+
*/
|
|
49
|
+
export const modelFamilyToVLConfig = (
|
|
50
|
+
modelFamily?: TModelFamily,
|
|
51
|
+
): {
|
|
52
|
+
vlMode?: TVlModeTypes;
|
|
53
|
+
uiTarsVersion?: UITarsModelVersion;
|
|
54
|
+
} => {
|
|
55
|
+
if (!modelFamily) {
|
|
56
|
+
return { vlMode: undefined, uiTarsVersion: undefined };
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// UI-TARS variants with version handling
|
|
60
|
+
if (modelFamily === 'vlm-ui-tars') {
|
|
61
|
+
return { vlMode: 'vlm-ui-tars', uiTarsVersion: UITarsModelVersion.V1_0 };
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
if (
|
|
65
|
+
modelFamily === 'vlm-ui-tars-doubao' ||
|
|
66
|
+
modelFamily === 'vlm-ui-tars-doubao-1.5'
|
|
67
|
+
) {
|
|
68
|
+
return {
|
|
69
|
+
vlMode: 'vlm-ui-tars',
|
|
70
|
+
uiTarsVersion: UITarsModelVersion.DOUBAO_1_5_20B,
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Check if the modelFamily is valid
|
|
75
|
+
if (!MODEL_FAMILY_VALUES.includes(modelFamily as any)) {
|
|
76
|
+
throw new Error(`Invalid MIDSCENE_MODEL_FAMILY value: ${modelFamily}`);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// For other model families, they directly map to vlMode
|
|
80
|
+
return { vlMode: modelFamily as TVlModeTypes, uiTarsVersion: undefined };
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Convert legacy environment variables to model family
|
|
85
|
+
* @param provider - Environment variable provider (e.g., process.env)
|
|
86
|
+
* @returns The corresponding model family value, or undefined if no legacy config is found
|
|
87
|
+
*/
|
|
88
|
+
export const legacyConfigToModelFamily = (
|
|
89
|
+
provider: Record<string, string | undefined>,
|
|
90
|
+
): TModelFamily | undefined => {
|
|
91
|
+
// Step 1: Parse legacy environment variables to get vlMode and uiTarsVersion
|
|
92
|
+
const isDoubao = provider[MIDSCENE_USE_DOUBAO_VISION];
|
|
93
|
+
const isQwen = provider[MIDSCENE_USE_QWEN_VL];
|
|
94
|
+
const isQwen3 = provider[MIDSCENE_USE_QWEN3_VL];
|
|
95
|
+
const isUiTars = provider[MIDSCENE_USE_VLM_UI_TARS];
|
|
96
|
+
const isGemini = provider[MIDSCENE_USE_GEMINI];
|
|
97
|
+
|
|
98
|
+
const enabledModes = [
|
|
99
|
+
isDoubao && MIDSCENE_USE_DOUBAO_VISION,
|
|
100
|
+
isQwen && MIDSCENE_USE_QWEN_VL,
|
|
101
|
+
isQwen3 && MIDSCENE_USE_QWEN3_VL,
|
|
102
|
+
isUiTars && MIDSCENE_USE_VLM_UI_TARS,
|
|
103
|
+
isGemini && MIDSCENE_USE_GEMINI,
|
|
104
|
+
].filter(Boolean);
|
|
105
|
+
|
|
106
|
+
if (enabledModes.length > 1) {
|
|
107
|
+
throw new Error(
|
|
108
|
+
`Only one vision mode can be enabled at a time. Currently enabled modes: ${enabledModes.join(', ')}. Please disable all but one mode.`,
|
|
109
|
+
);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Step 2: Map to model family based on detected mode
|
|
113
|
+
// Simple modes that directly map to model family
|
|
114
|
+
if (isQwen3) return 'qwen3-vl';
|
|
115
|
+
if (isQwen) return 'qwen2.5-vl';
|
|
116
|
+
if (isDoubao) return 'doubao-vision';
|
|
117
|
+
if (isGemini) return 'gemini';
|
|
118
|
+
|
|
119
|
+
// UI-TARS with version detection
|
|
120
|
+
if (isUiTars) {
|
|
121
|
+
if (isUiTars === '1') {
|
|
122
|
+
return 'vlm-ui-tars';
|
|
123
|
+
} else if (isUiTars === 'DOUBAO' || isUiTars === 'DOUBAO-1.5') {
|
|
124
|
+
return 'vlm-ui-tars-doubao-1.5';
|
|
125
|
+
} else {
|
|
126
|
+
// Handle other UI-TARS versions
|
|
127
|
+
return 'vlm-ui-tars-doubao';
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return undefined;
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Parse OpenAI SDK config
|
|
136
|
+
*/
|
|
137
|
+
export const parseOpenaiSdkConfig = ({
|
|
138
|
+
keys,
|
|
139
|
+
provider,
|
|
140
|
+
useLegacyLogic = false,
|
|
141
|
+
}: {
|
|
142
|
+
keys: TModelConfigKeys;
|
|
143
|
+
provider: Record<string, string | undefined>;
|
|
144
|
+
useLegacyLogic?: boolean;
|
|
145
|
+
}): IModelConfig => {
|
|
146
|
+
initDebugConfig();
|
|
147
|
+
const debugLog = getDebug('ai:config');
|
|
148
|
+
|
|
149
|
+
debugLog('enter parseOpenaiSdkConfig with keys:', keys);
|
|
150
|
+
|
|
151
|
+
const legacyAPIKey = useLegacyLogic ? provider[OPENAI_API_KEY] : undefined;
|
|
152
|
+
const legacyBaseURL = useLegacyLogic ? provider[OPENAI_BASE_URL] : undefined;
|
|
153
|
+
const legacySocksProxy = useLegacyLogic
|
|
154
|
+
? provider[MIDSCENE_OPENAI_SOCKS_PROXY]
|
|
155
|
+
: undefined;
|
|
156
|
+
const legacyHttpProxy = useLegacyLogic
|
|
157
|
+
? provider[MIDSCENE_OPENAI_HTTP_PROXY]
|
|
158
|
+
: undefined;
|
|
159
|
+
const legacyOpenaiExtraConfig = useLegacyLogic
|
|
160
|
+
? provider[MIDSCENE_OPENAI_INIT_CONFIG_JSON]
|
|
161
|
+
: undefined;
|
|
162
|
+
const legacyModelFamily = useLegacyLogic
|
|
163
|
+
? legacyConfigToModelFamily(provider)
|
|
164
|
+
: undefined;
|
|
165
|
+
|
|
166
|
+
const modelFamilyRaw = provider[keys.modelFamily] || legacyModelFamily;
|
|
167
|
+
const openaiApiKey: string | undefined =
|
|
168
|
+
provider[keys.openaiApiKey] || legacyAPIKey;
|
|
169
|
+
const openaiBaseURL: string | undefined =
|
|
170
|
+
provider[keys.openaiBaseURL] || legacyBaseURL;
|
|
171
|
+
const socksProxy: string | undefined =
|
|
172
|
+
provider[keys.socksProxy] || legacySocksProxy;
|
|
173
|
+
const httpProxy: string | undefined =
|
|
174
|
+
provider[keys.httpProxy] || legacyHttpProxy;
|
|
175
|
+
const modelName: string | undefined = provider[keys.modelName];
|
|
176
|
+
const openaiExtraConfigStr: string | undefined =
|
|
177
|
+
provider[keys.openaiExtraConfig];
|
|
178
|
+
const openaiExtraConfig = parseJson(
|
|
179
|
+
keys.openaiExtraConfig,
|
|
180
|
+
openaiExtraConfigStr || legacyOpenaiExtraConfig,
|
|
181
|
+
);
|
|
182
|
+
|
|
183
|
+
const { vlMode, uiTarsVersion } = modelFamilyToVLConfig(
|
|
184
|
+
modelFamilyRaw as unknown as TModelFamily,
|
|
185
|
+
);
|
|
186
|
+
|
|
187
|
+
const getModelDescription = (
|
|
188
|
+
vlMode: TVlModeTypes | undefined,
|
|
189
|
+
uiTarsVersion: UITarsModelVersion | undefined,
|
|
190
|
+
) => {
|
|
191
|
+
if (vlMode) {
|
|
192
|
+
if (uiTarsVersion) {
|
|
193
|
+
return `UI-TARS=${uiTarsVersion}`;
|
|
194
|
+
} else {
|
|
195
|
+
return `${vlMode} mode`;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
return '';
|
|
199
|
+
};
|
|
200
|
+
const modelDescription = getModelDescription(vlMode, uiTarsVersion);
|
|
201
|
+
|
|
202
|
+
return {
|
|
203
|
+
socksProxy,
|
|
204
|
+
httpProxy,
|
|
205
|
+
vlModeRaw: vlMode,
|
|
206
|
+
openaiBaseURL,
|
|
207
|
+
openaiApiKey,
|
|
208
|
+
openaiExtraConfig,
|
|
209
|
+
vlMode,
|
|
210
|
+
uiTarsModelVersion: uiTarsVersion,
|
|
211
|
+
modelName: modelName!,
|
|
212
|
+
modelDescription,
|
|
213
|
+
intent: '-' as any,
|
|
214
|
+
timeout: provider[keys.timeout]
|
|
215
|
+
? Number(provider[keys.timeout])
|
|
216
|
+
: undefined,
|
|
217
|
+
};
|
|
218
|
+
};
|
|
219
|
+
|
|
220
|
+
export const decideModelConfigFromIntentConfig = (
|
|
221
|
+
intent: TIntent,
|
|
222
|
+
configMap: Record<string, string | undefined>,
|
|
223
|
+
): IModelConfig | undefined => {
|
|
224
|
+
const debugLog = getDebug('ai:config');
|
|
225
|
+
|
|
226
|
+
debugLog(
|
|
227
|
+
'will decideModelConfig base on agent.modelConfig()',
|
|
228
|
+
intent,
|
|
229
|
+
maskConfig(configMap),
|
|
230
|
+
);
|
|
231
|
+
|
|
232
|
+
const keysForFn = KEYS_MAP[intent];
|
|
233
|
+
const modelName = configMap[keysForFn.modelName];
|
|
234
|
+
|
|
235
|
+
if (!modelName) {
|
|
236
|
+
debugLog('no modelName found for intent', intent);
|
|
237
|
+
return undefined;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
const finalResult = parseOpenaiSdkConfig({
|
|
241
|
+
keys: keysForFn,
|
|
242
|
+
provider: configMap,
|
|
243
|
+
useLegacyLogic: intent === 'default',
|
|
244
|
+
});
|
|
245
|
+
finalResult.intent = intent;
|
|
246
|
+
|
|
247
|
+
debugLog(
|
|
248
|
+
'decideModelConfig result by agent.modelConfig() with intent',
|
|
249
|
+
intent,
|
|
250
|
+
maskConfig({ ...finalResult }),
|
|
251
|
+
);
|
|
252
|
+
|
|
253
|
+
assert(
|
|
254
|
+
finalResult.openaiBaseURL,
|
|
255
|
+
`failed to get base URL of model (intent=${intent}). See https://midscenejs.com/model-strategy`,
|
|
256
|
+
);
|
|
257
|
+
|
|
258
|
+
if (!finalResult.modelName) {
|
|
259
|
+
console.warn(
|
|
260
|
+
`modelName is not set for intent ${intent}, this may cause unexpected behavior. See https://midscenejs.com/model-strategy`,
|
|
261
|
+
);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
return finalResult;
|
|
265
|
+
};
|