@midscene/shared 1.0.1-beta-20251024063839.0 → 1.0.1-beta-20251024064637.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/dist/es/env/constants.mjs +28 -28
  2. package/dist/es/env/decide-model-config.mjs +2 -5
  3. package/dist/es/env/init-debug.mjs +6 -6
  4. package/dist/es/env/model-config-manager.mjs +1 -2
  5. package/dist/es/env/types.mjs +53 -57
  6. package/dist/es/extractor/dom-util.mjs +7 -9
  7. package/dist/es/extractor/index.mjs +1 -2
  8. package/dist/es/extractor/tree.mjs +1 -3
  9. package/dist/es/extractor/util.mjs +1 -25
  10. package/dist/es/node/fs.mjs +2 -2
  11. package/dist/lib/env/constants.js +27 -27
  12. package/dist/lib/env/decide-model-config.js +2 -5
  13. package/dist/lib/env/init-debug.js +5 -5
  14. package/dist/lib/env/model-config-manager.js +1 -2
  15. package/dist/lib/env/types.js +130 -140
  16. package/dist/lib/extractor/dom-util.js +7 -9
  17. package/dist/lib/extractor/index.js +7 -14
  18. package/dist/lib/extractor/tree.js +1 -3
  19. package/dist/lib/extractor/util.js +0 -33
  20. package/dist/lib/node/fs.js +2 -2
  21. package/dist/types/env/model-config-manager.d.ts +2 -2
  22. package/dist/types/env/types.d.ts +59 -76
  23. package/dist/types/extractor/dom-util.d.ts +2 -15
  24. package/dist/types/extractor/index.d.ts +0 -1
  25. package/dist/types/extractor/tree.d.ts +1 -4
  26. package/dist/types/extractor/util.d.ts +0 -3
  27. package/dist/types/types/index.d.ts +6 -2
  28. package/package.json +1 -1
  29. package/src/env/constants.ts +52 -54
  30. package/src/env/decide-model-config.ts +2 -20
  31. package/src/env/init-debug.ts +6 -11
  32. package/src/env/model-config-manager.ts +3 -9
  33. package/src/env/types.ts +95 -122
  34. package/src/extractor/dom-util.ts +12 -8
  35. package/src/extractor/index.ts +0 -2
  36. package/src/extractor/locator.ts +0 -1
  37. package/src/extractor/tree.ts +4 -4
  38. package/src/extractor/util.ts +0 -32
  39. package/src/node/fs.ts +1 -1
  40. package/src/types/index.ts +9 -2
package/src/env/types.ts CHANGED
@@ -1,10 +1,10 @@
1
1
  // config keys
2
- export const MIDSCENE_MODEL_INIT_CONFIG_JSON =
3
- 'MIDSCENE_MODEL_INIT_CONFIG_JSON';
2
+ export const MIDSCENE_OPENAI_INIT_CONFIG_JSON =
3
+ 'MIDSCENE_OPENAI_INIT_CONFIG_JSON';
4
4
  export const MIDSCENE_MODEL_NAME = 'MIDSCENE_MODEL_NAME';
5
5
  export const MIDSCENE_LANGSMITH_DEBUG = 'MIDSCENE_LANGSMITH_DEBUG';
6
- export const MIDSCENE_DEBUG_MODEL_PROFILE = 'MIDSCENE_DEBUG_MODEL_PROFILE';
7
- export const MIDSCENE_DEBUG_MODEL_RESPONSE = 'MIDSCENE_DEBUG_MODEL_RESPONSE';
6
+ export const MIDSCENE_DEBUG_AI_PROFILE = 'MIDSCENE_DEBUG_AI_PROFILE';
7
+ export const MIDSCENE_DEBUG_AI_RESPONSE = 'MIDSCENE_DEBUG_AI_RESPONSE';
8
8
  export const MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG =
9
9
  'MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG';
10
10
  export const MIDSCENE_DEBUG_MODE = 'MIDSCENE_DEBUG_MODE';
@@ -15,20 +15,9 @@ export const MIDSCENE_MCP_ANDROID_MODE = 'MIDSCENE_MCP_ANDROID_MODE';
15
15
  export const DOCKER_CONTAINER = 'DOCKER_CONTAINER';
16
16
  export const MIDSCENE_FORCE_DEEP_THINK = 'MIDSCENE_FORCE_DEEP_THINK';
17
17
 
18
- export const MIDSCENE_MODEL_SOCKS_PROXY = 'MIDSCENE_MODEL_SOCKS_PROXY';
19
- export const MIDSCENE_MODEL_HTTP_PROXY = 'MIDSCENE_MODEL_HTTP_PROXY';
20
-
21
- // New primary names for public API
22
- export const MODEL_API_KEY = 'MODEL_API_KEY';
23
- export const MODEL_BASE_URL = 'MODEL_BASE_URL';
24
-
25
- /**
26
- * @deprecated Use MODEL_API_KEY instead. This is kept for backward compatibility.
27
- */
18
+ export const MIDSCENE_OPENAI_SOCKS_PROXY = 'MIDSCENE_OPENAI_SOCKS_PROXY';
19
+ export const MIDSCENE_OPENAI_HTTP_PROXY = 'MIDSCENE_OPENAI_HTTP_PROXY';
28
20
  export const OPENAI_API_KEY = 'OPENAI_API_KEY';
29
- /**
30
- * @deprecated Use MODEL_BASE_URL instead. This is kept for backward compatibility.
31
- */
32
21
  export const OPENAI_BASE_URL = 'OPENAI_BASE_URL';
33
22
  export const OPENAI_MAX_TOKENS = 'OPENAI_MAX_TOKENS';
34
23
 
@@ -61,48 +50,48 @@ export const MIDSCENE_CACHE_MAX_FILENAME_LENGTH =
61
50
  export const MIDSCENE_RUN_DIR = 'MIDSCENE_RUN_DIR';
62
51
 
63
52
  // default new
64
- export const MIDSCENE_MODEL_BASE_URL = 'MIDSCENE_MODEL_BASE_URL';
65
- export const MIDSCENE_MODEL_API_KEY = 'MIDSCENE_MODEL_API_KEY';
66
- export const MIDSCENE_LOCATOR_MODE = 'MIDSCENE_LOCATOR_MODE';
53
+ export const MIDSCENE_OPENAI_BASE_URL = 'MIDSCENE_OPENAI_BASE_URL';
54
+ export const MIDSCENE_OPENAI_API_KEY = 'MIDSCENE_OPENAI_API_KEY';
55
+ export const MIDSCENE_VL_MODE = 'MIDSCENE_VL_MODE';
67
56
 
68
57
  // VQA
69
58
  export const MIDSCENE_VQA_MODEL_NAME = 'MIDSCENE_VQA_MODEL_NAME';
70
- export const MIDSCENE_VQA_MODEL_SOCKS_PROXY = 'MIDSCENE_VQA_MODEL_SOCKS_PROXY';
71
- export const MIDSCENE_VQA_MODEL_HTTP_PROXY = 'MIDSCENE_VQA_MODEL_HTTP_PROXY';
72
- export const MIDSCENE_VQA_MODEL_BASE_URL = 'MIDSCENE_VQA_MODEL_BASE_URL';
73
- export const MIDSCENE_VQA_MODEL_API_KEY = 'MIDSCENE_VQA_MODEL_API_KEY';
74
- export const MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON =
75
- 'MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON';
76
- export const MIDSCENE_VQA_LOCATOR_MODE = 'MIDSCENE_VQA_LOCATOR_MODE';
59
+ export const MIDSCENE_VQA_OPENAI_SOCKS_PROXY =
60
+ 'MIDSCENE_VQA_OPENAI_SOCKS_PROXY';
61
+ export const MIDSCENE_VQA_OPENAI_HTTP_PROXY = 'MIDSCENE_VQA_OPENAI_HTTP_PROXY';
62
+ export const MIDSCENE_VQA_OPENAI_BASE_URL = 'MIDSCENE_VQA_OPENAI_BASE_URL';
63
+ export const MIDSCENE_VQA_OPENAI_API_KEY = 'MIDSCENE_VQA_OPENAI_API_KEY';
64
+ export const MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON =
65
+ 'MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON';
66
+ export const MIDSCENE_VQA_VL_MODE = 'MIDSCENE_VQA_VL_MODE';
77
67
 
78
68
  // PLANNING
79
69
  export const MIDSCENE_PLANNING_MODEL_NAME = 'MIDSCENE_PLANNING_MODEL_NAME';
80
- export const MIDSCENE_PLANNING_MODEL_SOCKS_PROXY =
81
- 'MIDSCENE_PLANNING_MODEL_SOCKS_PROXY';
82
- export const MIDSCENE_PLANNING_MODEL_HTTP_PROXY =
83
- 'MIDSCENE_PLANNING_MODEL_HTTP_PROXY';
84
- export const MIDSCENE_PLANNING_MODEL_BASE_URL =
85
- 'MIDSCENE_PLANNING_MODEL_BASE_URL';
86
- export const MIDSCENE_PLANNING_MODEL_API_KEY =
87
- 'MIDSCENE_PLANNING_MODEL_API_KEY';
88
- export const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON =
89
- 'MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON';
90
- export const MIDSCENE_PLANNING_LOCATOR_MODE = 'MIDSCENE_PLANNING_LOCATOR_MODE';
70
+ export const MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY =
71
+ 'MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY';
72
+ export const MIDSCENE_PLANNING_OPENAI_HTTP_PROXY =
73
+ 'MIDSCENE_PLANNING_OPENAI_HTTP_PROXY';
74
+ export const MIDSCENE_PLANNING_OPENAI_BASE_URL =
75
+ 'MIDSCENE_PLANNING_OPENAI_BASE_URL';
76
+ export const MIDSCENE_PLANNING_OPENAI_API_KEY =
77
+ 'MIDSCENE_PLANNING_OPENAI_API_KEY';
78
+ export const MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON =
79
+ 'MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON';
80
+ export const MIDSCENE_PLANNING_VL_MODE = 'MIDSCENE_PLANNING_VL_MODE';
91
81
 
92
82
  // GROUNDING
93
83
  export const MIDSCENE_GROUNDING_MODEL_NAME = 'MIDSCENE_GROUNDING_MODEL_NAME';
94
- export const MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY =
95
- 'MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY';
96
- export const MIDSCENE_GROUNDING_MODEL_HTTP_PROXY =
97
- 'MIDSCENE_GROUNDING_MODEL_HTTP_PROXY';
98
- export const MIDSCENE_GROUNDING_MODEL_BASE_URL =
99
- 'MIDSCENE_GROUNDING_MODEL_BASE_URL';
100
- export const MIDSCENE_GROUNDING_MODEL_API_KEY =
101
- 'MIDSCENE_GROUNDING_MODEL_API_KEY';
102
- export const MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON =
103
- 'MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON';
104
- export const MIDSCENE_GROUNDING_LOCATOR_MODE =
105
- 'MIDSCENE_GROUNDING_LOCATOR_MODE';
84
+ export const MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY =
85
+ 'MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY';
86
+ export const MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY =
87
+ 'MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY';
88
+ export const MIDSCENE_GROUNDING_OPENAI_BASE_URL =
89
+ 'MIDSCENE_GROUNDING_OPENAI_BASE_URL';
90
+ export const MIDSCENE_GROUNDING_OPENAI_API_KEY =
91
+ 'MIDSCENE_GROUNDING_OPENAI_API_KEY';
92
+ export const MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON =
93
+ 'MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON';
94
+ export const MIDSCENE_GROUNDING_VL_MODE = 'MIDSCENE_GROUNDING_VL_MODE';
106
95
 
107
96
  /**
108
97
  * env keys declared but unused
@@ -115,8 +104,8 @@ export const UNUSED_ENV_KEYS = [MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG];
115
104
  */
116
105
  export const BASIC_ENV_KEYS = [
117
106
  MIDSCENE_DEBUG_MODE,
118
- MIDSCENE_DEBUG_MODEL_PROFILE,
119
- MIDSCENE_DEBUG_MODEL_RESPONSE,
107
+ MIDSCENE_DEBUG_AI_PROFILE,
108
+ MIDSCENE_DEBUG_AI_RESPONSE,
120
109
  MIDSCENE_RUN_DIR,
121
110
  ] as const;
122
111
 
@@ -167,47 +156,45 @@ export const GLOBAL_ENV_KEYS = [
167
156
  export const MODEL_ENV_KEYS = [
168
157
  // model default
169
158
  MIDSCENE_MODEL_NAME,
170
- MIDSCENE_MODEL_INIT_CONFIG_JSON,
171
- MIDSCENE_MODEL_API_KEY,
172
- MIDSCENE_MODEL_BASE_URL,
173
- MIDSCENE_MODEL_SOCKS_PROXY,
174
- MIDSCENE_MODEL_HTTP_PROXY,
159
+ MIDSCENE_OPENAI_INIT_CONFIG_JSON,
160
+ MIDSCENE_OPENAI_API_KEY,
161
+ MIDSCENE_OPENAI_BASE_URL,
162
+ MIDSCENE_OPENAI_SOCKS_PROXY,
163
+ MIDSCENE_OPENAI_HTTP_PROXY,
175
164
  MIDSCENE_USE_VLM_UI_TARS,
176
165
  MIDSCENE_USE_QWEN_VL,
177
166
  MIDSCENE_USE_QWEN3_VL,
178
167
  MIDSCENE_USE_DOUBAO_VISION,
179
168
  MIDSCENE_USE_GEMINI,
180
169
  MIDSCENE_USE_VL_MODEL,
181
- MIDSCENE_LOCATOR_MODE,
170
+ MIDSCENE_VL_MODE,
182
171
  // model default legacy
183
172
  OPENAI_API_KEY,
184
173
  OPENAI_BASE_URL,
185
- MODEL_API_KEY,
186
- MODEL_BASE_URL,
187
174
  // VQA
188
175
  MIDSCENE_VQA_MODEL_NAME,
189
- MIDSCENE_VQA_MODEL_SOCKS_PROXY,
190
- MIDSCENE_VQA_MODEL_HTTP_PROXY,
191
- MIDSCENE_VQA_MODEL_BASE_URL,
192
- MIDSCENE_VQA_MODEL_API_KEY,
193
- MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON,
194
- MIDSCENE_VQA_LOCATOR_MODE,
176
+ MIDSCENE_VQA_OPENAI_SOCKS_PROXY,
177
+ MIDSCENE_VQA_OPENAI_HTTP_PROXY,
178
+ MIDSCENE_VQA_OPENAI_BASE_URL,
179
+ MIDSCENE_VQA_OPENAI_API_KEY,
180
+ MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON,
181
+ MIDSCENE_VQA_VL_MODE,
195
182
  // PLANNING
196
183
  MIDSCENE_PLANNING_MODEL_NAME,
197
- MIDSCENE_PLANNING_MODEL_SOCKS_PROXY,
198
- MIDSCENE_PLANNING_MODEL_HTTP_PROXY,
199
- MIDSCENE_PLANNING_MODEL_BASE_URL,
200
- MIDSCENE_PLANNING_MODEL_API_KEY,
201
- MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
202
- MIDSCENE_PLANNING_LOCATOR_MODE,
184
+ MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY,
185
+ MIDSCENE_PLANNING_OPENAI_HTTP_PROXY,
186
+ MIDSCENE_PLANNING_OPENAI_BASE_URL,
187
+ MIDSCENE_PLANNING_OPENAI_API_KEY,
188
+ MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON,
189
+ MIDSCENE_PLANNING_VL_MODE,
203
190
  // GROUNDING
204
191
  MIDSCENE_GROUNDING_MODEL_NAME,
205
- MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY,
206
- MIDSCENE_GROUNDING_MODEL_HTTP_PROXY,
207
- MIDSCENE_GROUNDING_MODEL_BASE_URL,
208
- MIDSCENE_GROUNDING_MODEL_API_KEY,
209
- MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON,
210
- MIDSCENE_GROUNDING_LOCATOR_MODE,
192
+ MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY,
193
+ MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY,
194
+ MIDSCENE_GROUNDING_OPENAI_BASE_URL,
195
+ MIDSCENE_GROUNDING_OPENAI_API_KEY,
196
+ MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON,
197
+ MIDSCENE_GROUNDING_VL_MODE,
211
198
  ] as const;
212
199
 
213
200
  export const ALL_ENV_KEYS = [
@@ -240,14 +227,14 @@ export interface IModelConfigForVQA {
240
227
  // model name
241
228
  [MIDSCENE_VQA_MODEL_NAME]: string;
242
229
  // proxy
243
- [MIDSCENE_VQA_MODEL_SOCKS_PROXY]?: string;
244
- [MIDSCENE_VQA_MODEL_HTTP_PROXY]?: string;
230
+ [MIDSCENE_VQA_OPENAI_SOCKS_PROXY]?: string;
231
+ [MIDSCENE_VQA_OPENAI_HTTP_PROXY]?: string;
245
232
  // OpenAI
246
- [MIDSCENE_VQA_MODEL_BASE_URL]?: string;
247
- [MIDSCENE_VQA_MODEL_API_KEY]?: string;
248
- [MIDSCENE_VQA_MODEL_INIT_CONFIG_JSON]?: string;
233
+ [MIDSCENE_VQA_OPENAI_BASE_URL]?: string;
234
+ [MIDSCENE_VQA_OPENAI_API_KEY]?: string;
235
+ [MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON]?: string;
249
236
  // extra
250
- [MIDSCENE_VQA_LOCATOR_MODE]?: TVlModeValues;
237
+ [MIDSCENE_VQA_VL_MODE]?: TVlModeValues;
251
238
  }
252
239
 
253
240
  /**
@@ -256,7 +243,7 @@ export interface IModelConfigForVQA {
256
243
  * IMPORTANT: Planning MUST use a vision language model (VL mode).
257
244
  * DOM-based planning is not supported.
258
245
  *
259
- * Required: MIDSCENE_PLANNING_LOCATOR_MODE must be set to one of:
246
+ * Required: MIDSCENE_PLANNING_VL_MODE must be set to one of:
260
247
  * - 'qwen-vl'
261
248
  * - 'qwen3-vl'
262
249
  * - 'gemini'
@@ -269,56 +256,56 @@ export interface IModelConfigForPlanning {
269
256
  // model name
270
257
  [MIDSCENE_PLANNING_MODEL_NAME]: string;
271
258
  // proxy
272
- [MIDSCENE_PLANNING_MODEL_SOCKS_PROXY]?: string;
273
- [MIDSCENE_PLANNING_MODEL_HTTP_PROXY]?: string;
259
+ [MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY]?: string;
260
+ [MIDSCENE_PLANNING_OPENAI_HTTP_PROXY]?: string;
274
261
  // OpenAI
275
- [MIDSCENE_PLANNING_MODEL_BASE_URL]?: string;
276
- [MIDSCENE_PLANNING_MODEL_API_KEY]?: string;
277
- [MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON]?: string;
262
+ [MIDSCENE_PLANNING_OPENAI_BASE_URL]?: string;
263
+ [MIDSCENE_PLANNING_OPENAI_API_KEY]?: string;
264
+ [MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON]?: string;
278
265
  // extra
279
- [MIDSCENE_PLANNING_LOCATOR_MODE]?: TVlModeValues;
266
+ [MIDSCENE_PLANNING_VL_MODE]?: TVlModeValues;
280
267
  }
281
268
 
282
269
  export interface IModeConfigForGrounding {
283
270
  // model name
284
271
  [MIDSCENE_GROUNDING_MODEL_NAME]: string;
285
272
  // proxy
286
- [MIDSCENE_GROUNDING_MODEL_SOCKS_PROXY]?: string;
287
- [MIDSCENE_GROUNDING_MODEL_HTTP_PROXY]?: string;
273
+ [MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY]?: string;
274
+ [MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY]?: string;
288
275
  // OpenAI
289
- [MIDSCENE_GROUNDING_MODEL_BASE_URL]?: string;
290
- [MIDSCENE_GROUNDING_MODEL_API_KEY]?: string;
291
- [MIDSCENE_GROUNDING_MODEL_INIT_CONFIG_JSON]?: string;
276
+ [MIDSCENE_GROUNDING_OPENAI_BASE_URL]?: string;
277
+ [MIDSCENE_GROUNDING_OPENAI_API_KEY]?: string;
278
+ [MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON]?: string;
292
279
  // extra
293
- [MIDSCENE_GROUNDING_LOCATOR_MODE]?: TVlModeValues;
280
+ [MIDSCENE_GROUNDING_VL_MODE]?: TVlModeValues;
294
281
  }
295
282
 
296
283
  export interface IModelConfigForDefault {
297
284
  // model name
298
285
  [MIDSCENE_MODEL_NAME]: string;
299
286
  // proxy
300
- [MIDSCENE_MODEL_SOCKS_PROXY]?: string;
301
- [MIDSCENE_MODEL_HTTP_PROXY]?: string;
287
+ [MIDSCENE_OPENAI_SOCKS_PROXY]?: string;
288
+ [MIDSCENE_OPENAI_HTTP_PROXY]?: string;
302
289
  // OpenAI
303
- [MIDSCENE_MODEL_BASE_URL]?: string;
304
- [MIDSCENE_MODEL_API_KEY]?: string;
305
- [MIDSCENE_MODEL_INIT_CONFIG_JSON]?: string;
290
+ [MIDSCENE_OPENAI_BASE_URL]?: string;
291
+ [MIDSCENE_OPENAI_API_KEY]?: string;
292
+ [MIDSCENE_OPENAI_INIT_CONFIG_JSON]?: string;
306
293
  // extra
307
- [MIDSCENE_LOCATOR_MODE]?: TVlModeValues;
294
+ [MIDSCENE_VL_MODE]?: TVlModeValues;
308
295
  }
309
296
 
310
297
  export interface IModelConfigForDefaultLegacy {
311
298
  // model name
312
299
  [MIDSCENE_MODEL_NAME]: string;
313
300
  // proxy
314
- [MIDSCENE_MODEL_SOCKS_PROXY]?: string;
315
- [MIDSCENE_MODEL_HTTP_PROXY]?: string;
301
+ [MIDSCENE_OPENAI_SOCKS_PROXY]?: string;
302
+ [MIDSCENE_OPENAI_HTTP_PROXY]?: string;
316
303
  // OpenAI
317
304
  [OPENAI_BASE_URL]?: string;
318
305
  [OPENAI_API_KEY]?: string;
319
- [MIDSCENE_MODEL_INIT_CONFIG_JSON]?: string;
306
+ [MIDSCENE_OPENAI_INIT_CONFIG_JSON]?: string;
320
307
  // extra
321
- [MIDSCENE_LOCATOR_MODE]?: TVlModeValues;
308
+ [MIDSCENE_VL_MODE]?: TVlModeValues;
322
309
  }
323
310
 
324
311
  /**
@@ -329,11 +316,7 @@ export interface IModelConfigForDefaultLegacy {
329
316
  */
330
317
  export type TIntent = 'VQA' | 'planning' | 'grounding' | 'default';
331
318
 
332
- /**
333
- * Internal type with intent parameter for ModelConfigManager
334
- * @internal
335
- */
336
- export type TModelConfigFnInternal = (options: {
319
+ export type TModelConfigFn = (options: {
337
320
  intent: TIntent;
338
321
  }) =>
339
322
  | IModelConfigForVQA
@@ -341,16 +324,6 @@ export type TModelConfigFnInternal = (options: {
341
324
  | IModeConfigForGrounding
342
325
  | IModelConfigForDefault;
343
326
 
344
- /**
345
- * User-facing model config function type
346
- * Users return config objects without needing to know about intent parameter
347
- */
348
- export type TModelConfigFn = () =>
349
- | IModelConfigForVQA
350
- | IModelConfigForPlanning
351
- | IModeConfigForGrounding
352
- | IModelConfigForDefault;
353
-
354
327
  export enum UITarsModelVersion {
355
328
  V1_0 = '1.0',
356
329
  V1_5 = '1.5',
@@ -1,4 +1,4 @@
1
- import { NodeType } from '../constants';
1
+ import type { LocateResultElement, Rect } from '../types';
2
2
  import { generateHashId } from '../utils';
3
3
 
4
4
  export function isFormElement(node: globalThis.Node) {
@@ -132,20 +132,24 @@ function includeBaseElement(node: globalThis.Node) {
132
132
  return false;
133
133
  }
134
134
 
135
- export function generateElementByPosition(position: { x: number; y: number }) {
135
+ export function generateElementByPosition(position: {
136
+ x: number;
137
+ y: number;
138
+ }): LocateResultElement {
139
+ const edgeSize = 8;
136
140
  const rect = {
137
- left: Math.max(position.x - 4, 0),
138
- top: Math.max(position.y - 4, 0),
139
- width: 8,
140
- height: 8,
141
+ left: Math.round(Math.max(position.x - edgeSize / 2, 0)),
142
+ top: Math.round(Math.max(position.y - edgeSize / 2, 0)),
143
+ width: edgeSize,
144
+ height: edgeSize,
141
145
  };
142
146
  const id = generateHashId(rect);
143
147
  const element = {
144
148
  id,
145
- attributes: { nodeType: NodeType.POSITION },
146
149
  rect,
147
150
  content: '',
148
- center: [position.x, position.y],
151
+ center: [position.x, position.y] as [number, number],
152
+ isOrderSensitive: false, // actually it's 'unknown'
149
153
  };
150
154
 
151
155
  return element;
@@ -35,8 +35,6 @@ export { extractTreeNode as webExtractNodeTree } from './web-extractor';
35
35
 
36
36
  export { extractTreeNodeAsString as webExtractNodeTreeAsString } from './web-extractor';
37
37
 
38
- export { setNodeHashCacheListOnWindow, getNodeFromCacheList } from './util';
39
-
40
38
  export {
41
39
  getXpathsByPoint,
42
40
  getNodeInfoByXpath,
@@ -1,7 +1,6 @@
1
1
  import type { ElementInfo } from '.';
2
2
  import type { Point } from '../types';
3
3
  import { isSvgElement } from './dom-util';
4
- import { getNodeFromCacheList } from './util';
5
4
  import { getRect, isElementPartiallyInViewport } from './util';
6
5
  import { collectElementInfo } from './web-extractor';
7
6
 
@@ -45,7 +45,7 @@ export function trimAttributes(
45
45
  res[currentKey] = truncateText(attributeVal, truncateTextLength);
46
46
  return res;
47
47
  },
48
- {} as BaseElement['attributes'],
48
+ {} as Record<string, string>,
49
49
  );
50
50
  return tailorAttributes;
51
51
  }
@@ -106,8 +106,8 @@ export function descriptionOfTree<
106
106
  .replace(/\sNode$/, '')
107
107
  .toLowerCase();
108
108
  }
109
- const markerId = node.node.indexId;
110
- const markerIdString = markerId ? `markerId="${markerId}"` : '';
109
+ // const markerId = node.node.indexId;
110
+ // const markerIdString = markerId ? `markerId="${markerId}"` : '';
111
111
  const rectAttribute = node.node.rect
112
112
  ? {
113
113
  left: node.node.rect.left,
@@ -116,7 +116,7 @@ export function descriptionOfTree<
116
116
  height: node.node.rect.height,
117
117
  }
118
118
  : {};
119
- before = `<${nodeTypeString} id="${node.node.id}" ${markerIdString} ${attributesString(trimAttributes(node.node.attributes || {}, truncateTextLength))} ${attributesString(rectAttribute)}>`;
119
+ before = `<${nodeTypeString} id="${node.node.id}" ${attributesString(trimAttributes(node.node.attributes || {}, truncateTextLength))} ${attributesString(rectAttribute)}>`;
120
120
  const content = truncateText(node.node.content, truncateTextLength);
121
121
  contentWithIndent = content ? `\n${indentStr} ${content}` : '';
122
122
  after = `</${nodeTypeString}>`;
@@ -399,42 +399,10 @@ export function midsceneGenerateHash(
399
399
  ): string {
400
400
  const slicedHash = generateHashId(rect, content);
401
401
 
402
- if (node) {
403
- if (!(window as any).midsceneNodeHashCacheList) {
404
- setNodeHashCacheListOnWindow();
405
- }
406
-
407
- setNodeToCacheList(node, slicedHash);
408
- }
409
-
410
402
  // Returns the first 10 characters as a short hash
411
403
  return slicedHash;
412
404
  }
413
405
 
414
- export function setNodeHashCacheListOnWindow() {
415
- if (typeof window !== 'undefined') {
416
- (window as any).midsceneNodeHashCacheList = [];
417
- }
418
- }
419
-
420
- export function setNodeToCacheList(node: globalThis.Node, id: string) {
421
- if (typeof window !== 'undefined') {
422
- if (getNodeFromCacheList(id)) {
423
- return;
424
- }
425
- (window as any).midsceneNodeHashCacheList?.push({ node, id });
426
- }
427
- }
428
-
429
- export function getNodeFromCacheList(id: string) {
430
- if (typeof window !== 'undefined') {
431
- return (window as any).midsceneNodeHashCacheList?.find(
432
- (item: { node: Node; id: string }) => item.id === id,
433
- )?.node;
434
- }
435
- return null;
436
- }
437
-
438
406
  export function generateId(numberId: number) {
439
407
  // const letters = 'ABCDEFGHIJKLMNPRSTUVXYZ';
440
408
  // const numbers = '0123456789';
package/src/node/fs.ts CHANGED
@@ -75,7 +75,7 @@ export async function getExtraReturnLogic(tree = false) {
75
75
  return null;
76
76
  }
77
77
 
78
- const elementInfosScriptContent = `${getElementInfosScriptContent()}midscene_element_inspector.setNodeHashCacheListOnWindow();`;
78
+ const elementInfosScriptContent = `${getElementInfosScriptContent()};`;
79
79
 
80
80
  if (tree) {
81
81
  return `${elementInfosScriptContent}midscene_element_inspector.webExtractNodeTree()`;
@@ -17,7 +17,7 @@ export type Rect = Point & Size & { zoom?: number };
17
17
  export abstract class BaseElement {
18
18
  abstract id: string;
19
19
 
20
- abstract indexId?: number; // markerId for web
20
+ // abstract indexId?: number; // markerId for web
21
21
 
22
22
  abstract attributes: {
23
23
  nodeType: NodeType;
@@ -30,7 +30,7 @@ export abstract class BaseElement {
30
30
 
31
31
  abstract center: [number, number];
32
32
 
33
- abstract xpaths?: string[];
33
+ // abstract xpaths?: string[];
34
34
 
35
35
  abstract isVisible: boolean;
36
36
  }
@@ -45,3 +45,10 @@ export interface ElementTreeNode<
45
45
  export interface WebElementInfo extends ElementInfo {
46
46
  zoom: number;
47
47
  }
48
+
49
+ export type LocateResultElement = {
50
+ center: [number, number];
51
+ rect: Rect;
52
+ id: string;
53
+ isOrderSensitive?: boolean;
54
+ };