@midscene/shared 1.0.1-beta-20251022061922.0 → 1.0.1-beta-20251024063839.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/env/types.ts CHANGED
@@ -1,10 +1,10 @@
1
1
  // config keys
2
- export const MIDSCENE_OPENAI_INIT_CONFIG_JSON =
3
- 'MIDSCENE_OPENAI_INIT_CONFIG_JSON';
2
+ export const MIDSCENE_MODEL_INIT_CONFIG_JSON =
3
+ 'MIDSCENE_MODEL_INIT_CONFIG_JSON';
4
4
  export const MIDSCENE_MODEL_NAME = 'MIDSCENE_MODEL_NAME';
5
5
  export const MIDSCENE_LANGSMITH_DEBUG = 'MIDSCENE_LANGSMITH_DEBUG';
6
- export const MIDSCENE_DEBUG_AI_PROFILE = 'MIDSCENE_DEBUG_AI_PROFILE';
7
- export const MIDSCENE_DEBUG_AI_RESPONSE = 'MIDSCENE_DEBUG_AI_RESPONSE';
6
+ export const MIDSCENE_DEBUG_MODEL_PROFILE = 'MIDSCENE_DEBUG_MODEL_PROFILE';
7
+ export const MIDSCENE_DEBUG_MODEL_RESPONSE = 'MIDSCENE_DEBUG_MODEL_RESPONSE';
8
8
  export const MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG =
9
9
  'MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG';
10
10
  export const MIDSCENE_DEBUG_MODE = 'MIDSCENE_DEBUG_MODE';
@@ -15,9 +15,20 @@ export const MIDSCENE_MCP_ANDROID_MODE = 'MIDSCENE_MCP_ANDROID_MODE';
15
15
  export const DOCKER_CONTAINER = 'DOCKER_CONTAINER';
16
16
  export const MIDSCENE_FORCE_DEEP_THINK = 'MIDSCENE_FORCE_DEEP_THINK';
17
17
 
18
- export const MIDSCENE_OPENAI_SOCKS_PROXY = 'MIDSCENE_OPENAI_SOCKS_PROXY';
19
- export const MIDSCENE_OPENAI_HTTP_PROXY = 'MIDSCENE_OPENAI_HTTP_PROXY';
18
+ export const MIDSCENE_MODEL_SOCKS_PROXY = 'MIDSCENE_MODEL_SOCKS_PROXY';
19
+ export const MIDSCENE_MODEL_HTTP_PROXY = 'MIDSCENE_MODEL_HTTP_PROXY';
20
+
21
+ // New primary names for public API
22
+ export const MODEL_API_KEY = 'MODEL_API_KEY';
23
+ export const MODEL_BASE_URL = 'MODEL_BASE_URL';
24
+
25
+ /**
26
+ * @deprecated Use MODEL_API_KEY instead. This is kept for backward compatibility.
27
+ */
20
28
  export const OPENAI_API_KEY = 'OPENAI_API_KEY';
29
+ /**
30
+ * @deprecated Use MODEL_BASE_URL instead. This is kept for backward compatibility.
31
+ */
21
32
  export const OPENAI_BASE_URL = 'OPENAI_BASE_URL';
22
33
  export const OPENAI_MAX_TOKENS = 'OPENAI_MAX_TOKENS';
23
34
 
@@ -50,48 +61,48 @@ export const MIDSCENE_CACHE_MAX_FILENAME_LENGTH =
50
61
  export const MIDSCENE_RUN_DIR = 'MIDSCENE_RUN_DIR';
51
62
 
52
63
  // default new
53
- export const MIDSCENE_OPENAI_BASE_URL = 'MIDSCENE_OPENAI_BASE_URL';
54
- export const MIDSCENE_OPENAI_API_KEY = 'MIDSCENE_OPENAI_API_KEY';
55
- export const MIDSCENE_VL_MODE = 'MIDSCENE_VL_MODE';
64
+ export const MIDSCENE_MODEL_BASE_URL = 'MIDSCENE_MODEL_BASE_URL';
65
+ export const MIDSCENE_MODEL_API_KEY = 'MIDSCENE_MODEL_API_KEY';
66
+ export const MIDSCENE_LOCATOR_MODE = 'MIDSCENE_LOCATOR_MODE';
56
67
 
57
68
  // VQA
58
69
  export const MIDSCENE_VQA_MODEL_NAME = 'MIDSCENE_VQA_MODEL_NAME';
59
- export const MIDSCENE_VQA_OPENAI_SOCKS_PROXY =
60
- 'MIDSCENE_VQA_OPENAI_SOCKS_PROXY';
61
- export const MIDSCENE_VQA_OPENAI_HTTP_PROXY = 'MIDSCENE_VQA_OPENAI_HTTP_PROXY';
62
- export const MIDSCENE_VQA_OPENAI_BASE_URL = 'MIDSCENE_VQA_OPENAI_BASE_URL';
63
- export const MIDSCENE_VQA_OPENAI_API_KEY = 'MIDSCENE_VQA_OPENAI_API_KEY';
64
- export const MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON =
65
- 'MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON';
66
- export const MIDSCENE_VQA_VL_MODE = 'MIDSCENE_VQA_VL_MODE';
70
+ export const MIDSCENE_VQA_MODEL_SOCKS_PROXY = 'MIDSCENE_VQA_MODEL_SOCKS_PROXY';
71
+ export const MIDSCENE_VQA_MODEL_HTTP_PROXY = 'MIDSCENE_VQA_MODEL_HTTP_PROXY';
72
+ export const MIDSCENE_VQA_MODEL_BASE_URL = 'MIDSCENE_VQA_MODEL_BASE_URL';
73
+ export const MIDSCENE_VQA_MODEL_API_KEY = 'MIDSCENE_VQA_MODEL_API_KEY';
74
+ export const MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON =
75
+ 'MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON';
76
+ export const MIDSCENE_VQA_LOCATOR_MODE = 'MIDSCENE_VQA_LOCATOR_MODE';
67
77
 
68
78
  // PLANNING
69
79
  export const MIDSCENE_PLANNING_MODEL_NAME = 'MIDSCENE_PLANNING_MODEL_NAME';
70
- export const MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY =
71
- 'MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY';
72
- export const MIDSCENE_PLANNING_OPENAI_HTTP_PROXY =
73
- 'MIDSCENE_PLANNING_OPENAI_HTTP_PROXY';
74
- export const MIDSCENE_PLANNING_OPENAI_BASE_URL =
75
- 'MIDSCENE_PLANNING_OPENAI_BASE_URL';
76
- export const MIDSCENE_PLANNING_OPENAI_API_KEY =
77
- 'MIDSCENE_PLANNING_OPENAI_API_KEY';
78
- export const MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON =
79
- 'MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON';
80
- export const MIDSCENE_PLANNING_VL_MODE = 'MIDSCENE_PLANNING_VL_MODE';
80
+ export const MIDSCENE_PLANNING_MODEL_SOCKS_PROXY =
81
+ 'MIDSCENE_PLANNING_MODEL_SOCKS_PROXY';
82
+ export const MIDSCENE_PLANNING_MODEL_HTTP_PROXY =
83
+ 'MIDSCENE_PLANNING_MODEL_HTTP_PROXY';
84
+ export const MIDSCENE_PLANNING_MODEL_BASE_URL =
85
+ 'MIDSCENE_PLANNING_MODEL_BASE_URL';
86
+ export const MIDSCENE_PLANNING_MODEL_API_KEY =
87
+ 'MIDSCENE_PLANNING_MODEL_API_KEY';
88
+ export const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON =
89
+ 'MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON';
90
+ export const MIDSCENE_PLANNING_LOCATOR_MODE = 'MIDSCENE_PLANNING_LOCATOR_MODE';
81
91
 
82
92
  // GROUNDING
83
93
  export const MIDSCENE_GROUNDING_MODEL_NAME = 'MIDSCENE_GROUNDING_MODEL_NAME';
84
- export const MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY =
85
- 'MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY';
86
- export const MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY =
87
- 'MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY';
88
- export const MIDSCENE_GROUNDING_OPENAI_BASE_URL =
89
- 'MIDSCENE_GROUNDING_OPENAI_BASE_URL';
90
- export const MIDSCENE_GROUNDING_OPENAI_API_KEY =
91
- 'MIDSCENE_GROUNDING_OPENAI_API_KEY';
92
- export const MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON =
93
- 'MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON';
94
- export const MIDSCENE_GROUNDING_VL_MODE = 'MIDSCENE_GROUNDING_VL_MODE';
94
+ export const MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY =
95
+ 'MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY';
96
+ export const MIDSCENE_GROUNDING_MODEL_HTTP_PROXY =
97
+ 'MIDSCENE_GROUNDING_MODEL_HTTP_PROXY';
98
+ export const MIDSCENE_GROUNDING_MODEL_BASE_URL =
99
+ 'MIDSCENE_GROUNDING_MODEL_BASE_URL';
100
+ export const MIDSCENE_GROUNDING_MODEL_API_KEY =
101
+ 'MIDSCENE_GROUNDING_MODEL_API_KEY';
102
+ export const MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON =
103
+ 'MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON';
104
+ export const MIDSCENE_GROUNDING_LOCATOR_MODE =
105
+ 'MIDSCENE_GROUNDING_LOCATOR_MODE';
95
106
 
96
107
  /**
97
108
  * env keys declared but unused
@@ -104,8 +115,8 @@ export const UNUSED_ENV_KEYS = [MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG];
104
115
  */
105
116
  export const BASIC_ENV_KEYS = [
106
117
  MIDSCENE_DEBUG_MODE,
107
- MIDSCENE_DEBUG_AI_PROFILE,
108
- MIDSCENE_DEBUG_AI_RESPONSE,
118
+ MIDSCENE_DEBUG_MODEL_PROFILE,
119
+ MIDSCENE_DEBUG_MODEL_RESPONSE,
109
120
  MIDSCENE_RUN_DIR,
110
121
  ] as const;
111
122
 
@@ -156,45 +167,47 @@ export const GLOBAL_ENV_KEYS = [
156
167
  export const MODEL_ENV_KEYS = [
157
168
  // model default
158
169
  MIDSCENE_MODEL_NAME,
159
- MIDSCENE_OPENAI_INIT_CONFIG_JSON,
160
- MIDSCENE_OPENAI_API_KEY,
161
- MIDSCENE_OPENAI_BASE_URL,
162
- MIDSCENE_OPENAI_SOCKS_PROXY,
163
- MIDSCENE_OPENAI_HTTP_PROXY,
170
+ MIDSCENE_MODEL_INIT_CONFIG_JSON,
171
+ MIDSCENE_MODEL_API_KEY,
172
+ MIDSCENE_MODEL_BASE_URL,
173
+ MIDSCENE_MODEL_SOCKS_PROXY,
174
+ MIDSCENE_MODEL_HTTP_PROXY,
164
175
  MIDSCENE_USE_VLM_UI_TARS,
165
176
  MIDSCENE_USE_QWEN_VL,
166
177
  MIDSCENE_USE_QWEN3_VL,
167
178
  MIDSCENE_USE_DOUBAO_VISION,
168
179
  MIDSCENE_USE_GEMINI,
169
180
  MIDSCENE_USE_VL_MODEL,
170
- MIDSCENE_VL_MODE,
181
+ MIDSCENE_LOCATOR_MODE,
171
182
  // model default legacy
172
183
  OPENAI_API_KEY,
173
184
  OPENAI_BASE_URL,
185
+ MODEL_API_KEY,
186
+ MODEL_BASE_URL,
174
187
  // VQA
175
188
  MIDSCENE_VQA_MODEL_NAME,
176
- MIDSCENE_VQA_OPENAI_SOCKS_PROXY,
177
- MIDSCENE_VQA_OPENAI_HTTP_PROXY,
178
- MIDSCENE_VQA_OPENAI_BASE_URL,
179
- MIDSCENE_VQA_OPENAI_API_KEY,
180
- MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON,
181
- MIDSCENE_VQA_VL_MODE,
189
+ MIDSCENE_VQA_MODEL_SOCKS_PROXY,
190
+ MIDSCENE_VQA_MODEL_HTTP_PROXY,
191
+ MIDSCENE_VQA_MODEL_BASE_URL,
192
+ MIDSCENE_VQA_MODEL_API_KEY,
193
+ MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON,
194
+ MIDSCENE_VQA_LOCATOR_MODE,
182
195
  // PLANNING
183
196
  MIDSCENE_PLANNING_MODEL_NAME,
184
- MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY,
185
- MIDSCENE_PLANNING_OPENAI_HTTP_PROXY,
186
- MIDSCENE_PLANNING_OPENAI_BASE_URL,
187
- MIDSCENE_PLANNING_OPENAI_API_KEY,
188
- MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON,
189
- MIDSCENE_PLANNING_VL_MODE,
197
+ MIDSCENE_PLANNING_MODEL_SOCKS_PROXY,
198
+ MIDSCENE_PLANNING_MODEL_HTTP_PROXY,
199
+ MIDSCENE_PLANNING_MODEL_BASE_URL,
200
+ MIDSCENE_PLANNING_MODEL_API_KEY,
201
+ MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
202
+ MIDSCENE_PLANNING_LOCATOR_MODE,
190
203
  // GROUNDING
191
204
  MIDSCENE_GROUNDING_MODEL_NAME,
192
- MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY,
193
- MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY,
194
- MIDSCENE_GROUNDING_OPENAI_BASE_URL,
195
- MIDSCENE_GROUNDING_OPENAI_API_KEY,
196
- MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON,
197
- MIDSCENE_GROUNDING_VL_MODE,
205
+ MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY,
206
+ MIDSCENE_GROUNDING_MODEL_HTTP_PROXY,
207
+ MIDSCENE_GROUNDING_MODEL_BASE_URL,
208
+ MIDSCENE_GROUNDING_MODEL_API_KEY,
209
+ MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON,
210
+ MIDSCENE_GROUNDING_LOCATOR_MODE,
198
211
  ] as const;
199
212
 
200
213
  export const ALL_ENV_KEYS = [
@@ -227,14 +240,14 @@ export interface IModelConfigForVQA {
227
240
  // model name
228
241
  [MIDSCENE_VQA_MODEL_NAME]: string;
229
242
  // proxy
230
- [MIDSCENE_VQA_OPENAI_SOCKS_PROXY]?: string;
231
- [MIDSCENE_VQA_OPENAI_HTTP_PROXY]?: string;
243
+ [MIDSCENE_VQA_MODEL_SOCKS_PROXY]?: string;
244
+ [MIDSCENE_VQA_MODEL_HTTP_PROXY]?: string;
232
245
  // OpenAI
233
- [MIDSCENE_VQA_OPENAI_BASE_URL]?: string;
234
- [MIDSCENE_VQA_OPENAI_API_KEY]?: string;
235
- [MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON]?: string;
246
+ [MIDSCENE_VQA_MODEL_BASE_URL]?: string;
247
+ [MIDSCENE_VQA_MODEL_API_KEY]?: string;
248
+ [MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON]?: string;
236
249
  // extra
237
- [MIDSCENE_VQA_VL_MODE]?: TVlModeValues;
250
+ [MIDSCENE_VQA_LOCATOR_MODE]?: TVlModeValues;
238
251
  }
239
252
 
240
253
  /**
@@ -243,7 +256,7 @@ export interface IModelConfigForVQA {
243
256
  * IMPORTANT: Planning MUST use a vision language model (VL mode).
244
257
  * DOM-based planning is not supported.
245
258
  *
246
- * Required: MIDSCENE_PLANNING_VL_MODE must be set to one of:
259
+ * Required: MIDSCENE_PLANNING_LOCATOR_MODE must be set to one of:
247
260
  * - 'qwen-vl'
248
261
  * - 'qwen3-vl'
249
262
  * - 'gemini'
@@ -256,56 +269,56 @@ export interface IModelConfigForPlanning {
256
269
  // model name
257
270
  [MIDSCENE_PLANNING_MODEL_NAME]: string;
258
271
  // proxy
259
- [MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY]?: string;
260
- [MIDSCENE_PLANNING_OPENAI_HTTP_PROXY]?: string;
272
+ [MIDSCENE_PLANNING_MODEL_SOCKS_PROXY]?: string;
273
+ [MIDSCENE_PLANNING_MODEL_HTTP_PROXY]?: string;
261
274
  // OpenAI
262
- [MIDSCENE_PLANNING_OPENAI_BASE_URL]?: string;
263
- [MIDSCENE_PLANNING_OPENAI_API_KEY]?: string;
264
- [MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON]?: string;
275
+ [MIDSCENE_PLANNING_MODEL_BASE_URL]?: string;
276
+ [MIDSCENE_PLANNING_MODEL_API_KEY]?: string;
277
+ [MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON]?: string;
265
278
  // extra
266
- [MIDSCENE_PLANNING_VL_MODE]?: TVlModeValues;
279
+ [MIDSCENE_PLANNING_LOCATOR_MODE]?: TVlModeValues;
267
280
  }
268
281
 
269
282
  export interface IModeConfigForGrounding {
270
283
  // model name
271
284
  [MIDSCENE_GROUNDING_MODEL_NAME]: string;
272
285
  // proxy
273
- [MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY]?: string;
274
- [MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY]?: string;
286
+ [MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY]?: string;
287
+ [MIDSCENE_GROUNDING_MODEL_HTTP_PROXY]?: string;
275
288
  // OpenAI
276
- [MIDSCENE_GROUNDING_OPENAI_BASE_URL]?: string;
277
- [MIDSCENE_GROUNDING_OPENAI_API_KEY]?: string;
278
- [MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON]?: string;
289
+ [MIDSCENE_GROUNDING_MODEL_BASE_URL]?: string;
290
+ [MIDSCENE_GROUNDING_MODEL_API_KEY]?: string;
291
+ [MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON]?: string;
279
292
  // extra
280
- [MIDSCENE_GROUNDING_VL_MODE]?: TVlModeValues;
293
+ [MIDSCENE_GROUNDING_LOCATOR_MODE]?: TVlModeValues;
281
294
  }
282
295
 
283
296
  export interface IModelConfigForDefault {
284
297
  // model name
285
298
  [MIDSCENE_MODEL_NAME]: string;
286
299
  // proxy
287
- [MIDSCENE_OPENAI_SOCKS_PROXY]?: string;
288
- [MIDSCENE_OPENAI_HTTP_PROXY]?: string;
300
+ [MIDSCENE_MODEL_SOCKS_PROXY]?: string;
301
+ [MIDSCENE_MODEL_HTTP_PROXY]?: string;
289
302
  // OpenAI
290
- [MIDSCENE_OPENAI_BASE_URL]?: string;
291
- [MIDSCENE_OPENAI_API_KEY]?: string;
292
- [MIDSCENE_OPENAI_INIT_CONFIG_JSON]?: string;
303
+ [MIDSCENE_MODEL_BASE_URL]?: string;
304
+ [MIDSCENE_MODEL_API_KEY]?: string;
305
+ [MIDSCENE_MODEL_INIT_CONFIG_JSON]?: string;
293
306
  // extra
294
- [MIDSCENE_VL_MODE]?: TVlModeValues;
307
+ [MIDSCENE_LOCATOR_MODE]?: TVlModeValues;
295
308
  }
296
309
 
297
310
  export interface IModelConfigForDefaultLegacy {
298
311
  // model name
299
312
  [MIDSCENE_MODEL_NAME]: string;
300
313
  // proxy
301
- [MIDSCENE_OPENAI_SOCKS_PROXY]?: string;
302
- [MIDSCENE_OPENAI_HTTP_PROXY]?: string;
314
+ [MIDSCENE_MODEL_SOCKS_PROXY]?: string;
315
+ [MIDSCENE_MODEL_HTTP_PROXY]?: string;
303
316
  // OpenAI
304
317
  [OPENAI_BASE_URL]?: string;
305
318
  [OPENAI_API_KEY]?: string;
306
- [MIDSCENE_OPENAI_INIT_CONFIG_JSON]?: string;
319
+ [MIDSCENE_MODEL_INIT_CONFIG_JSON]?: string;
307
320
  // extra
308
- [MIDSCENE_VL_MODE]?: TVlModeValues;
321
+ [MIDSCENE_LOCATOR_MODE]?: TVlModeValues;
309
322
  }
310
323
 
311
324
  /**
@@ -316,7 +329,11 @@ export interface IModelConfigForDefaultLegacy {
316
329
  */
317
330
  export type TIntent = 'VQA' | 'planning' | 'grounding' | 'default';
318
331
 
319
- export type TModelConfigFn = (options: {
332
+ /**
333
+ * Internal type with intent parameter for ModelConfigManager
334
+ * @internal
335
+ */
336
+ export type TModelConfigFnInternal = (options: {
320
337
  intent: TIntent;
321
338
  }) =>
322
339
  | IModelConfigForVQA
@@ -324,6 +341,16 @@ export type TModelConfigFn = (options: {
324
341
  | IModeConfigForGrounding
325
342
  | IModelConfigForDefault;
326
343
 
344
+ /**
345
+ * User-facing model config function type
346
+ * Users return config objects without needing to know about intent parameter
347
+ */
348
+ export type TModelConfigFn = () =>
349
+ | IModelConfigForVQA
350
+ | IModelConfigForPlanning
351
+ | IModeConfigForGrounding
352
+ | IModelConfigForDefault;
353
+
327
354
  export enum UITarsModelVersion {
328
355
  V1_0 = '1.0',
329
356
  V1_5 = '1.5',