@midscene/shared 0.30.10 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/dist/es/build/rspack-config.mjs +4 -0
  2. package/dist/es/constants/example-code.mjs +4 -4
  3. package/dist/es/env/constants.mjs +27 -82
  4. package/dist/es/env/global-config-manager.mjs +2 -3
  5. package/dist/es/env/helper.mjs +12 -17
  6. package/dist/es/env/init-debug.mjs +6 -6
  7. package/dist/es/env/model-config-manager.mjs +45 -65
  8. package/dist/es/env/parse-model-config.mjs +112 -0
  9. package/dist/es/env/types.mjs +70 -162
  10. package/dist/es/extractor/dom-util.mjs +10 -18
  11. package/dist/es/extractor/index.mjs +2 -3
  12. package/dist/es/extractor/locator.mjs +8 -15
  13. package/dist/es/extractor/tree.mjs +2 -5
  14. package/dist/es/extractor/util.mjs +4 -28
  15. package/dist/es/extractor/web-extractor.mjs +7 -14
  16. package/dist/es/index.mjs +2 -1
  17. package/dist/es/mcp/base-server.mjs +250 -0
  18. package/dist/es/mcp/base-tools.mjs +84 -0
  19. package/dist/es/mcp/index.mjs +5 -0
  20. package/dist/es/mcp/inject-report-html-plugin.mjs +53 -0
  21. package/dist/es/mcp/tool-generator.mjs +207 -0
  22. package/dist/es/mcp/types.mjs +3 -0
  23. package/dist/es/node/fs.mjs +2 -2
  24. package/dist/es/utils.mjs +2 -3
  25. package/dist/es/zod-schema-utils.mjs +54 -0
  26. package/dist/lib/baseDB.js +2 -2
  27. package/dist/lib/build/copy-static.js +4 -4
  28. package/dist/lib/build/rspack-config.js +38 -0
  29. package/dist/lib/common.js +4 -4
  30. package/dist/lib/constants/example-code.js +6 -6
  31. package/dist/lib/constants/index.js +13 -13
  32. package/dist/lib/env/basic.js +2 -2
  33. package/dist/lib/env/constants.js +32 -90
  34. package/dist/lib/env/global-config-manager.js +4 -5
  35. package/dist/lib/env/helper.js +13 -22
  36. package/dist/lib/env/index.js +24 -28
  37. package/dist/lib/env/init-debug.js +7 -7
  38. package/dist/lib/env/model-config-manager.js +47 -67
  39. package/dist/lib/env/parse-model-config.js +155 -0
  40. package/dist/lib/env/types.js +146 -379
  41. package/dist/lib/env/utils.js +4 -4
  42. package/dist/lib/extractor/constants.js +4 -4
  43. package/dist/lib/extractor/debug.js +1 -1
  44. package/dist/lib/extractor/dom-util.js +18 -26
  45. package/dist/lib/extractor/index.js +11 -21
  46. package/dist/lib/extractor/locator.js +10 -20
  47. package/dist/lib/extractor/tree.js +4 -7
  48. package/dist/lib/extractor/util.js +17 -50
  49. package/dist/lib/extractor/web-extractor.js +12 -19
  50. package/dist/lib/img/box-select.js +4 -4
  51. package/dist/lib/img/draw-box.js +2 -2
  52. package/dist/lib/img/get-jimp.js +16 -34
  53. package/dist/lib/img/get-photon.js +24 -47
  54. package/dist/lib/img/get-sharp.js +16 -34
  55. package/dist/lib/img/index.js +18 -18
  56. package/dist/lib/img/info.js +4 -4
  57. package/dist/lib/img/transform.js +10 -10
  58. package/dist/lib/index.js +8 -4
  59. package/dist/lib/logger.js +4 -4
  60. package/dist/lib/mcp/base-server.js +300 -0
  61. package/dist/lib/mcp/base-tools.js +118 -0
  62. package/dist/lib/mcp/index.js +86 -0
  63. package/dist/lib/mcp/inject-report-html-plugin.js +98 -0
  64. package/dist/lib/mcp/tool-generator.js +244 -0
  65. package/dist/lib/mcp/types.js +40 -0
  66. package/dist/lib/node/fs.js +6 -6
  67. package/dist/lib/node/index.js +6 -8
  68. package/dist/lib/polyfills/async-hooks.js +2 -2
  69. package/dist/lib/polyfills/index.js +6 -8
  70. package/dist/lib/types/index.js +2 -2
  71. package/dist/lib/us-keyboard-layout.js +2 -2
  72. package/dist/lib/utils.js +13 -14
  73. package/dist/lib/zod-schema-utils.js +97 -0
  74. package/dist/types/build/rspack-config.d.ts +8 -0
  75. package/dist/types/constants/example-code.d.ts +1 -1
  76. package/dist/types/env/constants.d.ts +5 -18
  77. package/dist/types/env/global-config-manager.d.ts +1 -2
  78. package/dist/types/env/helper.d.ts +2 -4
  79. package/dist/types/env/model-config-manager.d.ts +8 -7
  80. package/dist/types/env/parse-model-config.d.ts +28 -0
  81. package/dist/types/env/types.d.ts +152 -191
  82. package/dist/types/extractor/dom-util.d.ts +2 -15
  83. package/dist/types/extractor/index.d.ts +1 -2
  84. package/dist/types/extractor/locator.d.ts +0 -1
  85. package/dist/types/extractor/tree.d.ts +1 -4
  86. package/dist/types/extractor/util.d.ts +0 -3
  87. package/dist/types/index.d.ts +1 -0
  88. package/dist/types/mcp/base-server.d.ts +77 -0
  89. package/dist/types/mcp/base-tools.d.ts +55 -0
  90. package/dist/types/mcp/index.d.ts +5 -0
  91. package/dist/types/mcp/inject-report-html-plugin.d.ts +18 -0
  92. package/dist/types/mcp/tool-generator.d.ts +11 -0
  93. package/dist/types/mcp/types.d.ts +100 -0
  94. package/dist/types/types/index.d.ts +5 -2
  95. package/dist/types/zod-schema-utils.d.ts +23 -0
  96. package/package.json +19 -4
  97. package/src/build/rspack-config.ts +12 -0
  98. package/src/constants/example-code.ts +4 -4
  99. package/src/env/constants.ts +58 -203
  100. package/src/env/global-config-manager.ts +7 -7
  101. package/src/env/helper.ts +10 -31
  102. package/src/env/init-debug.ts +11 -6
  103. package/src/env/model-config-manager.ts +91 -87
  104. package/src/env/parse-model-config.ts +265 -0
  105. package/src/env/types.ts +212 -344
  106. package/src/extractor/dom-util.ts +15 -12
  107. package/src/extractor/index.ts +0 -3
  108. package/src/extractor/locator.ts +3 -12
  109. package/src/extractor/tree.ts +4 -4
  110. package/src/extractor/util.ts +0 -32
  111. package/src/index.ts +2 -0
  112. package/src/mcp/base-server.ts +435 -0
  113. package/src/mcp/base-tools.ts +196 -0
  114. package/src/mcp/index.ts +5 -0
  115. package/src/mcp/inject-report-html-plugin.ts +119 -0
  116. package/src/mcp/tool-generator.ts +330 -0
  117. package/src/mcp/types.ts +108 -0
  118. package/src/node/fs.ts +1 -1
  119. package/src/types/index.ts +8 -2
  120. package/src/utils.ts +1 -1
  121. package/src/zod-schema-utils.ts +133 -0
  122. package/dist/es/env/decide-model-config.mjs +0 -172
  123. package/dist/es/env/parse.mjs +0 -69
  124. package/dist/lib/env/decide-model-config.js +0 -212
  125. package/dist/lib/env/parse.js +0 -106
  126. package/dist/types/env/decide-model-config.d.ts +0 -14
  127. package/dist/types/env/parse.d.ts +0 -12
  128. package/src/env/decide-model-config.ts +0 -319
  129. package/src/env/parse.ts +0 -131
@@ -1,4 +1,3 @@
1
- import { getDebug } from '../logger';
2
1
  import { initDebugConfig } from './init-debug';
3
2
  import type { ModelConfigManager } from './model-config-manager';
4
3
  import {
@@ -62,14 +61,16 @@ export class GlobalConfigManager {
62
61
  getEnvConfigValue(key: (typeof STRING_ENV_KEYS)[number]) {
63
62
  const allConfig = this.getAllEnvConfig();
64
63
 
65
- if (!STRING_ENV_KEYS.includes(key)) {
66
- throw new Error(`getEnvConfigValue with key ${key} is not supported.`);
67
- }
68
64
  if (key === MATCH_BY_POSITION) {
69
65
  throw new Error(
70
- 'MATCH_BY_POSITION is deprecated, use MIDSCENE_USE_VL_MODEL instead',
66
+ 'MATCH_BY_POSITION is discarded, use MIDSCENE_MODEL_FAMILY instead',
71
67
  );
72
68
  }
69
+
70
+ if (!STRING_ENV_KEYS.includes(key)) {
71
+ throw new Error(`getEnvConfigValue with key ${key} is not supported.`);
72
+ }
73
+
73
74
  const value = allConfig[key];
74
75
  this.keysHaveBeenRead[key] = true;
75
76
  if (typeof value === 'string') {
@@ -122,8 +123,7 @@ export class GlobalConfigManager {
122
123
  }
123
124
 
124
125
  /**
125
- * for overrideAIConfig
126
- * can only override keys in MODEL_ENV_KEYS
126
+ * @deprecated use the modelConfig param in Agent constructor instead
127
127
  */
128
128
  overrideAIConfig(
129
129
  newConfig: Partial<
package/src/env/helper.ts CHANGED
@@ -22,13 +22,19 @@ const maskKey = (key: string, maskChar = '*') => {
22
22
  return `${prefix}${mask}${suffix}`;
23
23
  };
24
24
 
25
- export const maskConfig = (config: IModelConfig) => {
25
+ export const maskConfig = (config: Record<string, unknown>) => {
26
26
  return Object.fromEntries(
27
27
  Object.entries(config).map(([key, value]) => {
28
- if (['openaiApiKey', 'azureOpenaiKey', 'anthropicApiKey'].includes(key)) {
28
+ if (!value) return [key, value];
29
+
30
+ if (typeof value === 'string' && /key/i.test(key)) {
29
31
  return [key, maskKey(value)];
30
- } else if (['openaiExtraConfig', 'azureExtraConfig'].includes(key)) {
31
- return [key, maskKey(JSON.stringify(value))];
32
+ }
33
+ if (typeof value === 'object') {
34
+ const valueStr = JSON.stringify(value);
35
+ if (/key/i.test(valueStr)) {
36
+ return [key, maskKey(valueStr)];
37
+ }
32
38
  }
33
39
  return [key, value];
34
40
  }),
@@ -50,30 +56,3 @@ export const parseJson = (key: string, value: string | undefined) => {
50
56
  }
51
57
  return undefined;
52
58
  };
53
-
54
- export const createAssert =
55
- (
56
- modelNameKey: string,
57
- provider: 'process.env' | 'modelConfig',
58
- modelName?: string,
59
- ) =>
60
- (value: string | undefined, key: string, modelVendorFlag?: string) => {
61
- if (modelName) {
62
- if (modelVendorFlag) {
63
- assert(
64
- value,
65
- `The ${key} must be a non-empty string because of the ${modelNameKey} is declared as ${modelName} and ${modelVendorFlag} has also been specified in ${provider}, but got: ${value}. Please check your config.`,
66
- );
67
- } else {
68
- assert(
69
- value,
70
- `The ${key} must be a non-empty string because of the ${modelNameKey} is declared as ${modelName} in ${provider}, but got: ${value}. Please check your config.`,
71
- );
72
- }
73
- } else {
74
- assert(
75
- value,
76
- `The ${key} must be a non-empty string, but got: ${value}. Please check your config.`,
77
- );
78
- }
79
- };
@@ -1,21 +1,26 @@
1
1
  import { enableDebug } from '../logger';
2
2
  import { getBasicEnvValue } from './basic';
3
- import { MIDSCENE_DEBUG_AI_PROFILE, MIDSCENE_DEBUG_AI_RESPONSE } from './types';
3
+ import {
4
+ MIDSCENE_DEBUG_MODEL_PROFILE,
5
+ MIDSCENE_DEBUG_MODEL_RESPONSE,
6
+ } from './types';
4
7
 
5
8
  export const initDebugConfig = () => {
6
- const shouldPrintTiming = getBasicEnvValue(MIDSCENE_DEBUG_AI_PROFILE);
9
+ const shouldPrintTiming = getBasicEnvValue(MIDSCENE_DEBUG_MODEL_PROFILE);
7
10
  let debugConfig = '';
8
11
  if (shouldPrintTiming) {
9
12
  console.warn(
10
- 'MIDSCENE_DEBUG_AI_PROFILE is deprecated, use DEBUG=midscene:ai:profile instead',
13
+ 'MIDSCENE_DEBUG_MODEL_PROFILE is deprecated, use DEBUG=midscene:ai:profile instead',
11
14
  );
12
15
  debugConfig = 'ai:profile';
13
16
  }
14
- const shouldPrintAIResponse = getBasicEnvValue(MIDSCENE_DEBUG_AI_RESPONSE);
17
+ const shouldPrintModelResponse = getBasicEnvValue(
18
+ MIDSCENE_DEBUG_MODEL_RESPONSE,
19
+ );
15
20
 
16
- if (shouldPrintAIResponse) {
21
+ if (shouldPrintModelResponse) {
17
22
  console.warn(
18
- 'MIDSCENE_DEBUG_AI_RESPONSE is deprecated, use DEBUG=midscene:ai:response instead',
23
+ 'MIDSCENE_DEBUG_MODEL_RESPONSE is deprecated, use DEBUG=midscene:ai:response instead',
19
24
  );
20
25
  if (debugConfig) {
21
26
  debugConfig = 'ai:*';
@@ -1,87 +1,101 @@
1
- import {
2
- decideModelConfigFromEnv,
3
- decideModelConfigFromIntentConfig,
4
- } from './decide-model-config';
5
1
  import type { GlobalConfigManager } from './global-config-manager';
2
+ import { decideModelConfigFromIntentConfig } from './parse-model-config';
6
3
 
7
- import type { IModelConfig, TIntent, TModelConfigFn } from './types';
8
-
9
- const ALL_INTENTS: TIntent[] = ['VQA', 'default', 'grounding', 'planning'];
10
-
11
- export type TIntentConfigMap = Record<
4
+ import type {
5
+ CreateOpenAIClientFn,
6
+ IModelConfig,
12
7
  TIntent,
13
- ReturnType<TModelConfigFn> | undefined
14
- >;
8
+ TModelConfig,
9
+ } from './types';
15
10
 
16
11
  export class ModelConfigManager {
17
12
  private modelConfigMap: Record<TIntent, IModelConfig> | undefined = undefined;
18
13
 
19
- // once modelConfigFn is set, isolatedMode will be true
20
- // modelConfigMap will only depend on modelConfigFn and not effect by process.env
14
+ private isInitialized = false;
15
+
16
+ // once modelConfig is set, isolatedMode will be true
17
+ // modelConfigMap will only depend on provided config and not be affected by process.env
21
18
  private isolatedMode = false;
22
19
 
23
20
  private globalConfigManager: GlobalConfigManager | undefined = undefined;
24
21
 
25
- constructor(modelConfigFn?: TModelConfigFn) {
26
- if (modelConfigFn) {
27
- this.isolatedMode = true;
28
- const intentConfigMap = this.calcIntentConfigMap(modelConfigFn);
29
- this.modelConfigMap =
30
- this.calcModelConfigMapBaseOnIntent(intentConfigMap);
31
- }
22
+ private modelConfig?: TModelConfig;
23
+ private createOpenAIClientFn?: CreateOpenAIClientFn;
24
+
25
+ constructor(
26
+ modelConfig?: TModelConfig,
27
+ createOpenAIClientFn?: CreateOpenAIClientFn,
28
+ ) {
29
+ this.modelConfig = modelConfig;
30
+ this.createOpenAIClientFn = createOpenAIClientFn;
32
31
  }
33
32
 
34
- private calcIntentConfigMap(modelConfigFn: TModelConfigFn): TIntentConfigMap {
35
- const intentConfigMap: TIntentConfigMap = {
36
- VQA: undefined,
37
- default: undefined,
38
- grounding: undefined,
39
- planning: undefined,
40
- };
33
+ private initialize() {
34
+ if (this.isInitialized) {
35
+ return;
36
+ }
41
37
 
42
- for (const i of ALL_INTENTS) {
43
- const result = modelConfigFn({ intent: i });
44
- if (!result) {
45
- throw new Error(
46
- `The agent has an option named modelConfig is a function, but it return ${result} when call with intent ${i}, which should be a object.`,
47
- );
48
- }
49
- intentConfigMap[i] = result;
38
+ let configMap: Record<string, string | undefined>;
39
+ if (this.modelConfig) {
40
+ this.isolatedMode = true;
41
+ configMap = this.normalizeModelConfig(this.modelConfig);
42
+ } else {
43
+ configMap = this.globalConfigManager?.getAllEnvConfig() || {};
50
44
  }
51
- return intentConfigMap;
52
- }
53
45
 
54
- private calcModelConfigMapBaseOnIntent(intentConfigMap: TIntentConfigMap) {
55
- const modelConfigMap: Record<TIntent, IModelConfig | undefined> = {
56
- VQA: undefined,
57
- default: undefined,
58
- grounding: undefined,
59
- planning: undefined,
60
- };
61
- for (const i of ALL_INTENTS) {
62
- const result = decideModelConfigFromIntentConfig(
63
- i,
64
- intentConfigMap[i] as unknown as Record<string, string | undefined>,
46
+ const defaultConfig = decideModelConfigFromIntentConfig(
47
+ 'default',
48
+ configMap,
49
+ );
50
+ if (!defaultConfig) {
51
+ throw new Error(
52
+ 'default model config is not found, which should not happen',
65
53
  );
66
- modelConfigMap[i] = result;
67
54
  }
68
- return modelConfigMap as Record<TIntent, IModelConfig>;
69
- }
70
55
 
71
- private calcModelConfigMapBaseOnEnv(
72
- allEnvConfig: Record<string, string | undefined>,
73
- ) {
74
- const modelConfigMap: Record<TIntent, IModelConfig | undefined> = {
75
- VQA: undefined,
76
- default: undefined,
77
- grounding: undefined,
78
- planning: undefined,
56
+ const insightConfig = decideModelConfigFromIntentConfig(
57
+ 'insight',
58
+ configMap,
59
+ );
60
+
61
+ const planningConfig = decideModelConfigFromIntentConfig(
62
+ 'planning',
63
+ configMap,
64
+ );
65
+
66
+ // Each intent uses its own timeout from parsed config (MIDSCENE_MODEL_TIMEOUT,
67
+ // MIDSCENE_INSIGHT_MODEL_TIMEOUT, MIDSCENE_PLANNING_MODEL_TIMEOUT).
68
+ this.modelConfigMap = {
69
+ default: {
70
+ ...defaultConfig,
71
+ createOpenAIClient: this.createOpenAIClientFn,
72
+ },
73
+ insight: {
74
+ ...(insightConfig || defaultConfig),
75
+ createOpenAIClient: this.createOpenAIClientFn,
76
+ },
77
+ planning: {
78
+ ...(planningConfig || defaultConfig),
79
+ createOpenAIClient: this.createOpenAIClientFn,
80
+ },
79
81
  };
80
- for (const i of ALL_INTENTS) {
81
- const result = decideModelConfigFromEnv(i, allEnvConfig);
82
- modelConfigMap[i] = result;
83
- }
84
- return modelConfigMap as Record<TIntent, IModelConfig>;
82
+
83
+ this.isInitialized = true;
84
+ }
85
+
86
+ private normalizeModelConfig(
87
+ config: TModelConfig,
88
+ ): Record<string, string | undefined> {
89
+ return Object.entries(config).reduce<Record<string, string | undefined>>(
90
+ (acc, [key, value]) => {
91
+ if (value === undefined || value === null) {
92
+ return acc;
93
+ }
94
+ acc[key] = String(value);
95
+ return acc;
96
+ },
97
+ Object.create(null),
98
+ );
85
99
  }
86
100
 
87
101
  /**
@@ -93,7 +107,7 @@ export class ModelConfigManager {
93
107
  'ModelConfigManager work in isolated mode, so clearModelConfigMap should not be called',
94
108
  );
95
109
  }
96
- this.modelConfigMap = undefined;
110
+ this.isInitialized = false;
97
111
  }
98
112
 
99
113
  /**
@@ -101,26 +115,16 @@ export class ModelConfigManager {
101
115
  * if isolatedMode is false, modelConfigMap can be changed by process.env so we need to recalculate it when it's undefined
102
116
  */
103
117
  getModelConfig(intent: TIntent): IModelConfig {
104
- if (this.isolatedMode) {
105
- if (!this.modelConfigMap) {
106
- throw new Error(
107
- 'modelConfigMap is not initialized in isolated mode, which should not happen',
108
- );
109
- }
110
- return this.modelConfigMap[intent];
111
- } else {
112
- if (!this.modelConfigMap) {
113
- if (!this.globalConfigManager) {
114
- throw new Error(
115
- 'globalConfigManager is not registered, which should not happen',
116
- );
117
- }
118
- this.modelConfigMap = this.calcModelConfigMapBaseOnEnv(
119
- this.globalConfigManager.getAllEnvConfig(),
120
- );
121
- }
122
- return this.modelConfigMap[intent];
118
+ // check if initialized
119
+ if (!this.isInitialized) {
120
+ this.initialize();
121
+ }
122
+ if (!this.modelConfigMap) {
123
+ throw new Error(
124
+ 'modelConfigMap is not initialized, which should not happen',
125
+ );
123
126
  }
127
+ return this.modelConfigMap[intent];
124
128
  }
125
129
 
126
130
  getUploadTestServerUrl(): string | undefined {
@@ -133,12 +137,12 @@ export class ModelConfigManager {
133
137
  this.globalConfigManager = globalConfigManager;
134
138
  }
135
139
 
136
- throwErrorIfNonVLModel(intent: TIntent = 'grounding') {
137
- const modelConfig = this.getModelConfig(intent);
140
+ throwErrorIfNonVLModel() {
141
+ const modelConfig = this.getModelConfig('default');
138
142
 
139
143
  if (!modelConfig.vlMode) {
140
144
  throw new Error(
141
- 'No visual language model (VL model) detected for the current scenario. Element localization may be inaccurate. Please verify your model configuration. Learn more: https://midscenejs.com/choose-a-model',
145
+ 'MIDSCENE_MODEL_FAMILY is not set to a visual language model (VL model), the element localization can not be achieved. Check your model configuration. See https://midscenejs.com/model-strategy.html',
142
146
  );
143
147
  }
144
148
  }
@@ -0,0 +1,265 @@
1
+ import {
2
+ DEFAULT_MODEL_CONFIG_KEYS,
3
+ type DEFAULT_MODEL_CONFIG_KEYS_LEGACY,
4
+ INSIGHT_MODEL_CONFIG_KEYS,
5
+ PLANNING_MODEL_CONFIG_KEYS,
6
+ } from './constants';
7
+ import {
8
+ type IModelConfig,
9
+ MIDSCENE_MODEL_FAMILY,
10
+ MIDSCENE_OPENAI_HTTP_PROXY,
11
+ MIDSCENE_OPENAI_INIT_CONFIG_JSON,
12
+ MIDSCENE_OPENAI_SOCKS_PROXY,
13
+ MIDSCENE_USE_DOUBAO_VISION,
14
+ MIDSCENE_USE_GEMINI,
15
+ MIDSCENE_USE_QWEN3_VL,
16
+ MIDSCENE_USE_QWEN_VL,
17
+ MIDSCENE_USE_VLM_UI_TARS,
18
+ MODEL_FAMILY_VALUES,
19
+ OPENAI_API_KEY,
20
+ OPENAI_BASE_URL,
21
+ type TIntent,
22
+ type TModelFamily,
23
+ type TVlModeTypes,
24
+ UITarsModelVersion,
25
+ } from './types';
26
+
27
+ import { getDebug } from '../logger';
28
+ import { assert } from '../utils';
29
+ import { maskConfig, parseJson } from './helper';
30
+ import { initDebugConfig } from './init-debug';
31
+
32
+ type TModelConfigKeys =
33
+ | typeof INSIGHT_MODEL_CONFIG_KEYS
34
+ | typeof PLANNING_MODEL_CONFIG_KEYS
35
+ | typeof DEFAULT_MODEL_CONFIG_KEYS
36
+ | typeof DEFAULT_MODEL_CONFIG_KEYS_LEGACY;
37
+
38
+ const KEYS_MAP: Record<TIntent, TModelConfigKeys> = {
39
+ insight: INSIGHT_MODEL_CONFIG_KEYS,
40
+ planning: PLANNING_MODEL_CONFIG_KEYS,
41
+ default: DEFAULT_MODEL_CONFIG_KEYS,
42
+ } as const;
43
+
44
+ /**
45
+ * Convert model family to VL configuration
46
+ * @param modelFamily - The model family value
47
+ * @returns Object containing vlMode and uiTarsVersion
48
+ */
49
+ export const modelFamilyToVLConfig = (
50
+ modelFamily?: TModelFamily,
51
+ ): {
52
+ vlMode?: TVlModeTypes;
53
+ uiTarsVersion?: UITarsModelVersion;
54
+ } => {
55
+ if (!modelFamily) {
56
+ return { vlMode: undefined, uiTarsVersion: undefined };
57
+ }
58
+
59
+ // UI-TARS variants with version handling
60
+ if (modelFamily === 'vlm-ui-tars') {
61
+ return { vlMode: 'vlm-ui-tars', uiTarsVersion: UITarsModelVersion.V1_0 };
62
+ }
63
+
64
+ if (
65
+ modelFamily === 'vlm-ui-tars-doubao' ||
66
+ modelFamily === 'vlm-ui-tars-doubao-1.5'
67
+ ) {
68
+ return {
69
+ vlMode: 'vlm-ui-tars',
70
+ uiTarsVersion: UITarsModelVersion.DOUBAO_1_5_20B,
71
+ };
72
+ }
73
+
74
+ // Check if the modelFamily is valid
75
+ if (!MODEL_FAMILY_VALUES.includes(modelFamily as any)) {
76
+ throw new Error(`Invalid MIDSCENE_MODEL_FAMILY value: ${modelFamily}`);
77
+ }
78
+
79
+ // For other model families, they directly map to vlMode
80
+ return { vlMode: modelFamily as TVlModeTypes, uiTarsVersion: undefined };
81
+ };
82
+
83
+ /**
84
+ * Convert legacy environment variables to model family
85
+ * @param provider - Environment variable provider (e.g., process.env)
86
+ * @returns The corresponding model family value, or undefined if no legacy config is found
87
+ */
88
+ export const legacyConfigToModelFamily = (
89
+ provider: Record<string, string | undefined>,
90
+ ): TModelFamily | undefined => {
91
+ // Step 1: Parse legacy environment variables to get vlMode and uiTarsVersion
92
+ const isDoubao = provider[MIDSCENE_USE_DOUBAO_VISION];
93
+ const isQwen = provider[MIDSCENE_USE_QWEN_VL];
94
+ const isQwen3 = provider[MIDSCENE_USE_QWEN3_VL];
95
+ const isUiTars = provider[MIDSCENE_USE_VLM_UI_TARS];
96
+ const isGemini = provider[MIDSCENE_USE_GEMINI];
97
+
98
+ const enabledModes = [
99
+ isDoubao && MIDSCENE_USE_DOUBAO_VISION,
100
+ isQwen && MIDSCENE_USE_QWEN_VL,
101
+ isQwen3 && MIDSCENE_USE_QWEN3_VL,
102
+ isUiTars && MIDSCENE_USE_VLM_UI_TARS,
103
+ isGemini && MIDSCENE_USE_GEMINI,
104
+ ].filter(Boolean);
105
+
106
+ if (enabledModes.length > 1) {
107
+ throw new Error(
108
+ `Only one vision mode can be enabled at a time. Currently enabled modes: ${enabledModes.join(', ')}. Please disable all but one mode.`,
109
+ );
110
+ }
111
+
112
+ // Step 2: Map to model family based on detected mode
113
+ // Simple modes that directly map to model family
114
+ if (isQwen3) return 'qwen3-vl';
115
+ if (isQwen) return 'qwen2.5-vl';
116
+ if (isDoubao) return 'doubao-vision';
117
+ if (isGemini) return 'gemini';
118
+
119
+ // UI-TARS with version detection
120
+ if (isUiTars) {
121
+ if (isUiTars === '1') {
122
+ return 'vlm-ui-tars';
123
+ } else if (isUiTars === 'DOUBAO' || isUiTars === 'DOUBAO-1.5') {
124
+ return 'vlm-ui-tars-doubao-1.5';
125
+ } else {
126
+ // Handle other UI-TARS versions
127
+ return 'vlm-ui-tars-doubao';
128
+ }
129
+ }
130
+
131
+ return undefined;
132
+ };
133
+
134
+ /**
135
+ * Parse OpenAI SDK config
136
+ */
137
+ export const parseOpenaiSdkConfig = ({
138
+ keys,
139
+ provider,
140
+ useLegacyLogic = false,
141
+ }: {
142
+ keys: TModelConfigKeys;
143
+ provider: Record<string, string | undefined>;
144
+ useLegacyLogic?: boolean;
145
+ }): IModelConfig => {
146
+ initDebugConfig();
147
+ const debugLog = getDebug('ai:config');
148
+
149
+ debugLog('enter parseOpenaiSdkConfig with keys:', keys);
150
+
151
+ const legacyAPIKey = useLegacyLogic ? provider[OPENAI_API_KEY] : undefined;
152
+ const legacyBaseURL = useLegacyLogic ? provider[OPENAI_BASE_URL] : undefined;
153
+ const legacySocksProxy = useLegacyLogic
154
+ ? provider[MIDSCENE_OPENAI_SOCKS_PROXY]
155
+ : undefined;
156
+ const legacyHttpProxy = useLegacyLogic
157
+ ? provider[MIDSCENE_OPENAI_HTTP_PROXY]
158
+ : undefined;
159
+ const legacyOpenaiExtraConfig = useLegacyLogic
160
+ ? provider[MIDSCENE_OPENAI_INIT_CONFIG_JSON]
161
+ : undefined;
162
+ const legacyModelFamily = useLegacyLogic
163
+ ? legacyConfigToModelFamily(provider)
164
+ : undefined;
165
+
166
+ const modelFamilyRaw = provider[keys.modelFamily] || legacyModelFamily;
167
+ const openaiApiKey: string | undefined =
168
+ provider[keys.openaiApiKey] || legacyAPIKey;
169
+ const openaiBaseURL: string | undefined =
170
+ provider[keys.openaiBaseURL] || legacyBaseURL;
171
+ const socksProxy: string | undefined =
172
+ provider[keys.socksProxy] || legacySocksProxy;
173
+ const httpProxy: string | undefined =
174
+ provider[keys.httpProxy] || legacyHttpProxy;
175
+ const modelName: string | undefined = provider[keys.modelName];
176
+ const openaiExtraConfigStr: string | undefined =
177
+ provider[keys.openaiExtraConfig];
178
+ const openaiExtraConfig = parseJson(
179
+ keys.openaiExtraConfig,
180
+ openaiExtraConfigStr || legacyOpenaiExtraConfig,
181
+ );
182
+
183
+ const { vlMode, uiTarsVersion } = modelFamilyToVLConfig(
184
+ modelFamilyRaw as unknown as TModelFamily,
185
+ );
186
+
187
+ const getModelDescription = (
188
+ vlMode: TVlModeTypes | undefined,
189
+ uiTarsVersion: UITarsModelVersion | undefined,
190
+ ) => {
191
+ if (vlMode) {
192
+ if (uiTarsVersion) {
193
+ return `UI-TARS=${uiTarsVersion}`;
194
+ } else {
195
+ return `${vlMode} mode`;
196
+ }
197
+ }
198
+ return '';
199
+ };
200
+ const modelDescription = getModelDescription(vlMode, uiTarsVersion);
201
+
202
+ return {
203
+ socksProxy,
204
+ httpProxy,
205
+ vlModeRaw: vlMode,
206
+ openaiBaseURL,
207
+ openaiApiKey,
208
+ openaiExtraConfig,
209
+ vlMode,
210
+ uiTarsModelVersion: uiTarsVersion,
211
+ modelName: modelName!,
212
+ modelDescription,
213
+ intent: '-' as any,
214
+ timeout: provider[keys.timeout]
215
+ ? Number(provider[keys.timeout])
216
+ : undefined,
217
+ };
218
+ };
219
+
220
+ export const decideModelConfigFromIntentConfig = (
221
+ intent: TIntent,
222
+ configMap: Record<string, string | undefined>,
223
+ ): IModelConfig | undefined => {
224
+ const debugLog = getDebug('ai:config');
225
+
226
+ debugLog(
227
+ 'will decideModelConfig base on agent.modelConfig()',
228
+ intent,
229
+ maskConfig(configMap),
230
+ );
231
+
232
+ const keysForFn = KEYS_MAP[intent];
233
+ const modelName = configMap[keysForFn.modelName];
234
+
235
+ if (!modelName) {
236
+ debugLog('no modelName found for intent', intent);
237
+ return undefined;
238
+ }
239
+
240
+ const finalResult = parseOpenaiSdkConfig({
241
+ keys: keysForFn,
242
+ provider: configMap,
243
+ useLegacyLogic: intent === 'default',
244
+ });
245
+ finalResult.intent = intent;
246
+
247
+ debugLog(
248
+ 'decideModelConfig result by agent.modelConfig() with intent',
249
+ intent,
250
+ maskConfig({ ...finalResult }),
251
+ );
252
+
253
+ assert(
254
+ finalResult.openaiBaseURL,
255
+ `failed to get base URL of model (intent=${intent}). See https://midscenejs.com/model-strategy`,
256
+ );
257
+
258
+ if (!finalResult.modelName) {
259
+ console.warn(
260
+ `modelName is not set for intent ${intent}, this may cause unexpected behavior. See https://midscenejs.com/model-strategy`,
261
+ );
262
+ }
263
+
264
+ return finalResult;
265
+ };