@midscene/shared 0.28.12-beta-20250924093113.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, UITarsModelVersion, VL_MODE_RAW_VALID_VALUES } from "./types.mjs";
1
+ import { MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, UITarsModelVersion, VL_MODE_RAW_VALID_VALUES } from "./types.mjs";
2
2
  const parseVlModeAndUiTarsModelVersionFromRawValue = (vlModeRaw)=>{
3
3
  if (!vlModeRaw) return {
4
4
  vlMode: void 0,
@@ -22,21 +22,15 @@ const parseVlModeAndUiTarsModelVersionFromRawValue = (vlModeRaw)=>{
22
22
  const parseVlModeAndUiTarsFromGlobalConfig = (provider)=>{
23
23
  const isDoubao = provider[MIDSCENE_USE_DOUBAO_VISION];
24
24
  const isQwen = provider[MIDSCENE_USE_QWEN_VL];
25
- const isQwen3 = provider[MIDSCENE_USE_QWEN3_VL];
26
25
  const isUiTars = provider[MIDSCENE_USE_VLM_UI_TARS];
27
26
  const isGemini = provider[MIDSCENE_USE_GEMINI];
28
27
  const enabledModes = [
29
28
  isDoubao && MIDSCENE_USE_DOUBAO_VISION,
30
29
  isQwen && MIDSCENE_USE_QWEN_VL,
31
- isQwen3 && MIDSCENE_USE_QWEN3_VL,
32
30
  isUiTars && MIDSCENE_USE_VLM_UI_TARS,
33
31
  isGemini && MIDSCENE_USE_GEMINI
34
32
  ].filter(Boolean);
35
33
  if (enabledModes.length > 1) throw new Error(`Only one vision mode can be enabled at a time. Currently enabled modes: ${enabledModes.join(', ')}. Please disable all but one mode.`);
36
- if (isQwen3) return {
37
- vlMode: 'qwen3-vl',
38
- uiTarsVersion: void 0
39
- };
40
34
  if (isQwen) return {
41
35
  vlMode: 'qwen-vl',
42
36
  uiTarsVersion: void 0
@@ -22,7 +22,6 @@ const MIDSCENE_ANDROID_IME_STRATEGY = 'MIDSCENE_ANDROID_IME_STRATEGY';
22
22
  const MIDSCENE_CACHE = 'MIDSCENE_CACHE';
23
23
  const MIDSCENE_USE_VLM_UI_TARS = 'MIDSCENE_USE_VLM_UI_TARS';
24
24
  const MIDSCENE_USE_QWEN_VL = 'MIDSCENE_USE_QWEN_VL';
25
- const MIDSCENE_USE_QWEN3_VL = 'MIDSCENE_USE_QWEN3_VL';
26
25
  const MIDSCENE_USE_DOUBAO_VISION = 'MIDSCENE_USE_DOUBAO_VISION';
27
26
  const MIDSCENE_USE_GEMINI = 'MIDSCENE_USE_GEMINI';
28
27
  const MIDSCENE_USE_VL_MODEL = 'MIDSCENE_USE_VL_MODEL';
@@ -154,7 +153,6 @@ const MODEL_ENV_KEYS = [
154
153
  MIDSCENE_USE_ANTHROPIC_SDK,
155
154
  MIDSCENE_USE_VLM_UI_TARS,
156
155
  MIDSCENE_USE_QWEN_VL,
157
- MIDSCENE_USE_QWEN3_VL,
158
156
  MIDSCENE_USE_DOUBAO_VISION,
159
157
  MIDSCENE_USE_GEMINI,
160
158
  MIDSCENE_USE_VL_MODEL,
@@ -241,9 +239,8 @@ const VL_MODE_RAW_VALID_VALUES = [
241
239
  'doubao-vision',
242
240
  'gemini',
243
241
  'qwen-vl',
244
- 'qwen3-vl',
245
242
  'vlm-ui-tars',
246
243
  'vlm-ui-tars-doubao',
247
244
  'vlm-ui-tars-doubao-1.5'
248
245
  ];
249
- export { ALL_ENV_KEYS, ANTHROPIC_API_KEY, AZURE_OPENAI_API_VERSION, AZURE_OPENAI_DEPLOYMENT, AZURE_OPENAI_ENDPOINT, AZURE_OPENAI_KEY, BASIC_ENV_KEYS, BOOLEAN_ENV_KEYS, DOCKER_CONTAINER, GLOBAL_ENV_KEYS, MATCH_BY_POSITION, MIDSCENE_ADB_PATH, MIDSCENE_ADB_REMOTE_HOST, MIDSCENE_ADB_REMOTE_PORT, MIDSCENE_ANDROID_IME_STRATEGY, MIDSCENE_ANTHROPIC_API_KEY, MIDSCENE_API_TYPE, MIDSCENE_AZURE_OPENAI_API_VERSION, MIDSCENE_AZURE_OPENAI_DEPLOYMENT, MIDSCENE_AZURE_OPENAI_ENDPOINT, MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_AZURE_OPENAI_KEY, MIDSCENE_AZURE_OPENAI_SCOPE, MIDSCENE_CACHE, MIDSCENE_CACHE_MAX_FILENAME_LENGTH, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_AI_PROFILE, MIDSCENE_DEBUG_AI_RESPONSE, MIDSCENE_DEBUG_MODE, MIDSCENE_FORCE_DEEP_THINK, MIDSCENE_GROUNDING_ANTHROPIC_API_KEY, MIDSCENE_GROUNDING_AZURE_OPENAI_API_VERSION, MIDSCENE_GROUNDING_AZURE_OPENAI_DEPLOYMENT, MIDSCENE_GROUNDING_AZURE_OPENAI_ENDPOINT, MIDSCENE_GROUNDING_AZURE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_GROUNDING_AZURE_OPENAI_KEY, MIDSCENE_GROUNDING_AZURE_OPENAI_SCOPE, MIDSCENE_GROUNDING_MODEL_NAME, MIDSCENE_GROUNDING_OPENAI_API_KEY, MIDSCENE_GROUNDING_OPENAI_BASE_URL, MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY, MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON, MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY, MIDSCENE_GROUNDING_OPENAI_USE_AZURE, MIDSCENE_GROUNDING_USE_ANTHROPIC_SDK, MIDSCENE_GROUNDING_USE_AZURE_OPENAI, MIDSCENE_GROUNDING_VL_MODE, MIDSCENE_LANGSMITH_DEBUG, MIDSCENE_MCP_ANDROID_MODE, MIDSCENE_MCP_CHROME_PATH, MIDSCENE_MCP_USE_PUPPETEER_MODE, MIDSCENE_MODEL_NAME, MIDSCENE_OPENAI_API_KEY, MIDSCENE_OPENAI_BASE_URL, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_OPENAI_USE_AZURE, MIDSCENE_PLANNING_ANTHROPIC_API_KEY, MIDSCENE_PLANNING_AZURE_OPENAI_API_VERSION, MIDSCENE_PLANNING_AZURE_OPENAI_DEPLOYMENT, MIDSCENE_PLANNING_AZURE_OPENAI_ENDPOINT, MIDSCENE_PLANNING_AZURE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_PLANNING_AZURE_OPENAI_KEY, MIDSCENE_PLANNING_AZURE_OPENAI_SCOPE, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_OPENAI_API_KEY, MIDSCENE_PLANNING_OPENAI_BASE_URL, MIDSCENE_PLANNING_OPENAI_HTTP_PROXY, MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON, MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_OPENAI_USE_AZURE, MIDSCENE_PLANNING_USE_ANTHROPIC_SDK, MIDSCENE_PLANNING_USE_AZURE_OPENAI, MIDSCENE_PLANNING_VL_MODE, MIDSCENE_PREFERRED_LANGUAGE, MIDSCENE_REPLANNING_CYCLE_LIMIT, MIDSCENE_REPORT_TAG_NAME, MIDSCENE_RUN_DIR, MIDSCENE_USE_ANTHROPIC_SDK, MIDSCENE_USE_AZURE_OPENAI, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MIDSCENE_USE_VL_MODEL, MIDSCENE_VL_MODE, MIDSCENE_VQA_ANTHROPIC_API_KEY, MIDSCENE_VQA_AZURE_OPENAI_API_VERSION, MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT, MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT, MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_VQA_AZURE_OPENAI_KEY, MIDSCENE_VQA_AZURE_OPENAI_SCOPE, MIDSCENE_VQA_MODEL_NAME, MIDSCENE_VQA_OPENAI_API_KEY, MIDSCENE_VQA_OPENAI_BASE_URL, MIDSCENE_VQA_OPENAI_HTTP_PROXY, MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON, MIDSCENE_VQA_OPENAI_SOCKS_PROXY, MIDSCENE_VQA_OPENAI_USE_AZURE, MIDSCENE_VQA_USE_ANTHROPIC_SDK, MIDSCENE_VQA_USE_AZURE_OPENAI, MIDSCENE_VQA_VL_MODE, MODEL_ENV_KEYS, NUMBER_ENV_KEYS, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MAX_TOKENS, OPENAI_USE_AZURE, STRING_ENV_KEYS, types_UITarsModelVersion as UITarsModelVersion, UNUSED_ENV_KEYS, VL_MODE_RAW_VALID_VALUES };
246
+ export { ALL_ENV_KEYS, ANTHROPIC_API_KEY, AZURE_OPENAI_API_VERSION, AZURE_OPENAI_DEPLOYMENT, AZURE_OPENAI_ENDPOINT, AZURE_OPENAI_KEY, BASIC_ENV_KEYS, BOOLEAN_ENV_KEYS, DOCKER_CONTAINER, GLOBAL_ENV_KEYS, MATCH_BY_POSITION, MIDSCENE_ADB_PATH, MIDSCENE_ADB_REMOTE_HOST, MIDSCENE_ADB_REMOTE_PORT, MIDSCENE_ANDROID_IME_STRATEGY, MIDSCENE_ANTHROPIC_API_KEY, MIDSCENE_API_TYPE, MIDSCENE_AZURE_OPENAI_API_VERSION, MIDSCENE_AZURE_OPENAI_DEPLOYMENT, MIDSCENE_AZURE_OPENAI_ENDPOINT, MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_AZURE_OPENAI_KEY, MIDSCENE_AZURE_OPENAI_SCOPE, MIDSCENE_CACHE, MIDSCENE_CACHE_MAX_FILENAME_LENGTH, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_AI_PROFILE, MIDSCENE_DEBUG_AI_RESPONSE, MIDSCENE_DEBUG_MODE, MIDSCENE_FORCE_DEEP_THINK, MIDSCENE_GROUNDING_ANTHROPIC_API_KEY, MIDSCENE_GROUNDING_AZURE_OPENAI_API_VERSION, MIDSCENE_GROUNDING_AZURE_OPENAI_DEPLOYMENT, MIDSCENE_GROUNDING_AZURE_OPENAI_ENDPOINT, MIDSCENE_GROUNDING_AZURE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_GROUNDING_AZURE_OPENAI_KEY, MIDSCENE_GROUNDING_AZURE_OPENAI_SCOPE, MIDSCENE_GROUNDING_MODEL_NAME, MIDSCENE_GROUNDING_OPENAI_API_KEY, MIDSCENE_GROUNDING_OPENAI_BASE_URL, MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY, MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON, MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY, MIDSCENE_GROUNDING_OPENAI_USE_AZURE, MIDSCENE_GROUNDING_USE_ANTHROPIC_SDK, MIDSCENE_GROUNDING_USE_AZURE_OPENAI, MIDSCENE_GROUNDING_VL_MODE, MIDSCENE_LANGSMITH_DEBUG, MIDSCENE_MCP_ANDROID_MODE, MIDSCENE_MCP_CHROME_PATH, MIDSCENE_MCP_USE_PUPPETEER_MODE, MIDSCENE_MODEL_NAME, MIDSCENE_OPENAI_API_KEY, MIDSCENE_OPENAI_BASE_URL, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_OPENAI_USE_AZURE, MIDSCENE_PLANNING_ANTHROPIC_API_KEY, MIDSCENE_PLANNING_AZURE_OPENAI_API_VERSION, MIDSCENE_PLANNING_AZURE_OPENAI_DEPLOYMENT, MIDSCENE_PLANNING_AZURE_OPENAI_ENDPOINT, MIDSCENE_PLANNING_AZURE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_PLANNING_AZURE_OPENAI_KEY, MIDSCENE_PLANNING_AZURE_OPENAI_SCOPE, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_OPENAI_API_KEY, MIDSCENE_PLANNING_OPENAI_BASE_URL, MIDSCENE_PLANNING_OPENAI_HTTP_PROXY, MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON, MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_OPENAI_USE_AZURE, MIDSCENE_PLANNING_USE_ANTHROPIC_SDK, MIDSCENE_PLANNING_USE_AZURE_OPENAI, MIDSCENE_PLANNING_VL_MODE, MIDSCENE_PREFERRED_LANGUAGE, MIDSCENE_REPLANNING_CYCLE_LIMIT, MIDSCENE_REPORT_TAG_NAME, MIDSCENE_RUN_DIR, MIDSCENE_USE_ANTHROPIC_SDK, MIDSCENE_USE_AZURE_OPENAI, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MIDSCENE_USE_VL_MODEL, MIDSCENE_VL_MODE, MIDSCENE_VQA_ANTHROPIC_API_KEY, MIDSCENE_VQA_AZURE_OPENAI_API_VERSION, MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT, MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT, MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_VQA_AZURE_OPENAI_KEY, MIDSCENE_VQA_AZURE_OPENAI_SCOPE, MIDSCENE_VQA_MODEL_NAME, MIDSCENE_VQA_OPENAI_API_KEY, MIDSCENE_VQA_OPENAI_BASE_URL, MIDSCENE_VQA_OPENAI_HTTP_PROXY, MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON, MIDSCENE_VQA_OPENAI_SOCKS_PROXY, MIDSCENE_VQA_OPENAI_USE_AZURE, MIDSCENE_VQA_USE_ANTHROPIC_SDK, MIDSCENE_VQA_USE_AZURE_OPENAI, MIDSCENE_VQA_VL_MODE, MODEL_ENV_KEYS, NUMBER_ENV_KEYS, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MAX_TOKENS, OPENAI_USE_AZURE, STRING_ENV_KEYS, types_UITarsModelVersion as UITarsModelVersion, UNUSED_ENV_KEYS, VL_MODE_RAW_VALID_VALUES };
@@ -106,46 +106,26 @@ async function paddingToMatchBlock(image, blockSize = 28) {
106
106
  const { width, height } = image.bitmap;
107
107
  const targetWidth = Math.ceil(width / blockSize) * blockSize;
108
108
  const targetHeight = Math.ceil(height / blockSize) * blockSize;
109
- if (targetWidth === width && targetHeight === height) return {
110
- width,
111
- height,
112
- image
113
- };
109
+ if (targetWidth === width && targetHeight === height) return image;
114
110
  const Jimp = await get_jimp();
115
111
  const paddedImage = new Jimp(targetWidth, targetHeight, 0xffffffff);
116
112
  paddedImage.composite(image, 0, 0);
117
- return {
118
- width: targetWidth,
119
- height: targetHeight,
120
- image: paddedImage
121
- };
113
+ return paddedImage;
122
114
  }
123
115
  async function paddingToMatchBlockByBase64(imageBase64, blockSize = 28) {
124
116
  const jimpImage = await jimpFromBase64(imageBase64);
125
- const paddedResult = await paddingToMatchBlock(jimpImage, blockSize);
126
- return {
127
- width: paddedResult.width,
128
- height: paddedResult.height,
129
- imageBase64: await jimpToBase64(paddedResult.image)
130
- };
117
+ const paddedImage = await paddingToMatchBlock(jimpImage, blockSize);
118
+ return jimpToBase64(paddedImage);
131
119
  }
132
120
  async function cropByRect(imageBase64, rect, paddingImage) {
133
121
  const jimpImage = await jimpFromBase64(imageBase64);
134
122
  const { left, top, width, height } = rect;
135
123
  jimpImage.crop(left, top, width, height);
136
124
  if (paddingImage) {
137
- const paddedResult = await paddingToMatchBlock(jimpImage);
138
- return {
139
- width: paddedResult.width,
140
- height: paddedResult.height,
141
- imageBase64: await jimpToBase64(paddedResult.image)
142
- };
125
+ const paddedImage = await paddingToMatchBlock(jimpImage);
126
+ return jimpToBase64(paddedImage);
143
127
  }
144
- return {
145
- width: jimpImage.bitmap.width,
146
- height: jimpImage.bitmap.height,
147
- imageBase64: await jimpToBase64(jimpImage)
148
- };
128
+ return jimpToBase64(jimpImage);
149
129
  }
150
130
  async function jimpToBase64(image) {
151
131
  const Jimp = await get_jimp();
@@ -51,21 +51,15 @@ const parseVlModeAndUiTarsModelVersionFromRawValue = (vlModeRaw)=>{
51
51
  const parseVlModeAndUiTarsFromGlobalConfig = (provider)=>{
52
52
  const isDoubao = provider[external_types_js_namespaceObject.MIDSCENE_USE_DOUBAO_VISION];
53
53
  const isQwen = provider[external_types_js_namespaceObject.MIDSCENE_USE_QWEN_VL];
54
- const isQwen3 = provider[external_types_js_namespaceObject.MIDSCENE_USE_QWEN3_VL];
55
54
  const isUiTars = provider[external_types_js_namespaceObject.MIDSCENE_USE_VLM_UI_TARS];
56
55
  const isGemini = provider[external_types_js_namespaceObject.MIDSCENE_USE_GEMINI];
57
56
  const enabledModes = [
58
57
  isDoubao && external_types_js_namespaceObject.MIDSCENE_USE_DOUBAO_VISION,
59
58
  isQwen && external_types_js_namespaceObject.MIDSCENE_USE_QWEN_VL,
60
- isQwen3 && external_types_js_namespaceObject.MIDSCENE_USE_QWEN3_VL,
61
59
  isUiTars && external_types_js_namespaceObject.MIDSCENE_USE_VLM_UI_TARS,
62
60
  isGemini && external_types_js_namespaceObject.MIDSCENE_USE_GEMINI
63
61
  ].filter(Boolean);
64
62
  if (enabledModes.length > 1) throw new Error(`Only one vision mode can be enabled at a time. Currently enabled modes: ${enabledModes.join(', ')}. Please disable all but one mode.`);
65
- if (isQwen3) return {
66
- vlMode: 'qwen3-vl',
67
- uiTarsVersion: void 0
68
- };
69
63
  if (isQwen) return {
70
64
  vlMode: 'qwen-vl',
71
65
  uiTarsVersion: void 0
@@ -107,7 +107,6 @@ __webpack_require__.d(__webpack_exports__, {
107
107
  MIDSCENE_USE_AZURE_OPENAI: ()=>MIDSCENE_USE_AZURE_OPENAI,
108
108
  MIDSCENE_USE_DOUBAO_VISION: ()=>MIDSCENE_USE_DOUBAO_VISION,
109
109
  MIDSCENE_USE_GEMINI: ()=>MIDSCENE_USE_GEMINI,
110
- MIDSCENE_USE_QWEN3_VL: ()=>MIDSCENE_USE_QWEN3_VL,
111
110
  MIDSCENE_USE_QWEN_VL: ()=>MIDSCENE_USE_QWEN_VL,
112
111
  MIDSCENE_USE_VLM_UI_TARS: ()=>MIDSCENE_USE_VLM_UI_TARS,
113
112
  MIDSCENE_USE_VL_MODEL: ()=>MIDSCENE_USE_VL_MODEL,
@@ -164,7 +163,6 @@ const MIDSCENE_ANDROID_IME_STRATEGY = 'MIDSCENE_ANDROID_IME_STRATEGY';
164
163
  const MIDSCENE_CACHE = 'MIDSCENE_CACHE';
165
164
  const MIDSCENE_USE_VLM_UI_TARS = 'MIDSCENE_USE_VLM_UI_TARS';
166
165
  const MIDSCENE_USE_QWEN_VL = 'MIDSCENE_USE_QWEN_VL';
167
- const MIDSCENE_USE_QWEN3_VL = 'MIDSCENE_USE_QWEN3_VL';
168
166
  const MIDSCENE_USE_DOUBAO_VISION = 'MIDSCENE_USE_DOUBAO_VISION';
169
167
  const MIDSCENE_USE_GEMINI = 'MIDSCENE_USE_GEMINI';
170
168
  const MIDSCENE_USE_VL_MODEL = 'MIDSCENE_USE_VL_MODEL';
@@ -296,7 +294,6 @@ const MODEL_ENV_KEYS = [
296
294
  MIDSCENE_USE_ANTHROPIC_SDK,
297
295
  MIDSCENE_USE_VLM_UI_TARS,
298
296
  MIDSCENE_USE_QWEN_VL,
299
- MIDSCENE_USE_QWEN3_VL,
300
297
  MIDSCENE_USE_DOUBAO_VISION,
301
298
  MIDSCENE_USE_GEMINI,
302
299
  MIDSCENE_USE_VL_MODEL,
@@ -383,7 +380,6 @@ const VL_MODE_RAW_VALID_VALUES = [
383
380
  'doubao-vision',
384
381
  'gemini',
385
382
  'qwen-vl',
386
- 'qwen3-vl',
387
383
  'vlm-ui-tars',
388
384
  'vlm-ui-tars-doubao',
389
385
  'vlm-ui-tars-doubao-1.5'
@@ -471,7 +467,6 @@ exports.MIDSCENE_USE_ANTHROPIC_SDK = __webpack_exports__.MIDSCENE_USE_ANTHROPIC_
471
467
  exports.MIDSCENE_USE_AZURE_OPENAI = __webpack_exports__.MIDSCENE_USE_AZURE_OPENAI;
472
468
  exports.MIDSCENE_USE_DOUBAO_VISION = __webpack_exports__.MIDSCENE_USE_DOUBAO_VISION;
473
469
  exports.MIDSCENE_USE_GEMINI = __webpack_exports__.MIDSCENE_USE_GEMINI;
474
- exports.MIDSCENE_USE_QWEN3_VL = __webpack_exports__.MIDSCENE_USE_QWEN3_VL;
475
470
  exports.MIDSCENE_USE_QWEN_VL = __webpack_exports__.MIDSCENE_USE_QWEN_VL;
476
471
  exports.MIDSCENE_USE_VLM_UI_TARS = __webpack_exports__.MIDSCENE_USE_VLM_UI_TARS;
477
472
  exports.MIDSCENE_USE_VL_MODEL = __webpack_exports__.MIDSCENE_USE_VL_MODEL;
@@ -587,7 +582,6 @@ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
587
582
  "MIDSCENE_USE_AZURE_OPENAI",
588
583
  "MIDSCENE_USE_DOUBAO_VISION",
589
584
  "MIDSCENE_USE_GEMINI",
590
- "MIDSCENE_USE_QWEN3_VL",
591
585
  "MIDSCENE_USE_QWEN_VL",
592
586
  "MIDSCENE_USE_VLM_UI_TARS",
593
587
  "MIDSCENE_USE_VL_MODEL",
@@ -161,46 +161,26 @@ async function paddingToMatchBlock(image, blockSize = 28) {
161
161
  const { width, height } = image.bitmap;
162
162
  const targetWidth = Math.ceil(width / blockSize) * blockSize;
163
163
  const targetHeight = Math.ceil(height / blockSize) * blockSize;
164
- if (targetWidth === width && targetHeight === height) return {
165
- width,
166
- height,
167
- image
168
- };
164
+ if (targetWidth === width && targetHeight === height) return image;
169
165
  const Jimp = await external_get_jimp_js_default()();
170
166
  const paddedImage = new Jimp(targetWidth, targetHeight, 0xffffffff);
171
167
  paddedImage.composite(image, 0, 0);
172
- return {
173
- width: targetWidth,
174
- height: targetHeight,
175
- image: paddedImage
176
- };
168
+ return paddedImage;
177
169
  }
178
170
  async function paddingToMatchBlockByBase64(imageBase64, blockSize = 28) {
179
171
  const jimpImage = await jimpFromBase64(imageBase64);
180
- const paddedResult = await paddingToMatchBlock(jimpImage, blockSize);
181
- return {
182
- width: paddedResult.width,
183
- height: paddedResult.height,
184
- imageBase64: await jimpToBase64(paddedResult.image)
185
- };
172
+ const paddedImage = await paddingToMatchBlock(jimpImage, blockSize);
173
+ return jimpToBase64(paddedImage);
186
174
  }
187
175
  async function cropByRect(imageBase64, rect, paddingImage) {
188
176
  const jimpImage = await jimpFromBase64(imageBase64);
189
177
  const { left, top, width, height } = rect;
190
178
  jimpImage.crop(left, top, width, height);
191
179
  if (paddingImage) {
192
- const paddedResult = await paddingToMatchBlock(jimpImage);
193
- return {
194
- width: paddedResult.width,
195
- height: paddedResult.height,
196
- imageBase64: await jimpToBase64(paddedResult.image)
197
- };
180
+ const paddedImage = await paddingToMatchBlock(jimpImage);
181
+ return jimpToBase64(paddedImage);
198
182
  }
199
- return {
200
- width: jimpImage.bitmap.width,
201
- height: jimpImage.bitmap.height,
202
- imageBase64: await jimpToBase64(jimpImage)
203
- };
183
+ return jimpToBase64(jimpImage);
204
184
  }
205
185
  async function jimpToBase64(image) {
206
186
  const Jimp = await external_get_jimp_js_default()();
@@ -22,7 +22,6 @@ export declare const MIDSCENE_ANDROID_IME_STRATEGY = "MIDSCENE_ANDROID_IME_STRAT
22
22
  export declare const MIDSCENE_CACHE = "MIDSCENE_CACHE";
23
23
  export declare const MIDSCENE_USE_VLM_UI_TARS = "MIDSCENE_USE_VLM_UI_TARS";
24
24
  export declare const MIDSCENE_USE_QWEN_VL = "MIDSCENE_USE_QWEN_VL";
25
- export declare const MIDSCENE_USE_QWEN3_VL = "MIDSCENE_USE_QWEN3_VL";
26
25
  export declare const MIDSCENE_USE_DOUBAO_VISION = "MIDSCENE_USE_DOUBAO_VISION";
27
26
  export declare const MIDSCENE_USE_GEMINI = "MIDSCENE_USE_GEMINI";
28
27
  export declare const MIDSCENE_USE_VL_MODEL = "MIDSCENE_USE_VL_MODEL";
@@ -126,12 +125,12 @@ export declare const GLOBAL_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_LANG
126
125
  * Can be override by both agent.modelConfig and overrideAIConfig
127
126
  * Can only be access after agent.constructor
128
127
  */
129
- export declare const MODEL_ENV_KEYS: readonly ["MIDSCENE_MODEL_NAME", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_API_KEY", "MIDSCENE_OPENAI_BASE_URL", "MIDSCENE_OPENAI_USE_AZURE", "MIDSCENE_OPENAI_SOCKS_PROXY", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_USE_AZURE_OPENAI", "MIDSCENE_AZURE_OPENAI_SCOPE", "MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_USE_ANTHROPIC_SDK", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "ANTHROPIC_API_KEY", "MIDSCENE_AZURE_OPENAI_ENDPOINT", "MIDSCENE_AZURE_OPENAI_KEY", "MIDSCENE_AZURE_OPENAI_API_VERSION", "MIDSCENE_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_VL_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "OPENAI_USE_AZURE", "ANTHROPIC_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_KEY", "AZURE_OPENAI_API_VERSION", "AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_VQA_MODEL_NAME", "MIDSCENE_VQA_OPENAI_SOCKS_PROXY", "MIDSCENE_VQA_OPENAI_HTTP_PROXY", "MIDSCENE_VQA_OPENAI_BASE_URL", "MIDSCENE_VQA_OPENAI_API_KEY", "MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_VQA_OPENAI_USE_AZURE", "MIDSCENE_VQA_USE_AZURE_OPENAI", "MIDSCENE_VQA_AZURE_OPENAI_SCOPE", "MIDSCENE_VQA_AZURE_OPENAI_KEY", "MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT", "MIDSCENE_VQA_AZURE_OPENAI_API_VERSION", "MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_VQA_USE_ANTHROPIC_SDK", "MIDSCENE_VQA_ANTHROPIC_API_KEY", "MIDSCENE_VQA_VL_MODE", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY", "MIDSCENE_PLANNING_OPENAI_HTTP_PROXY", "MIDSCENE_PLANNING_OPENAI_BASE_URL", "MIDSCENE_PLANNING_OPENAI_API_KEY", "MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_OPENAI_USE_AZURE", "MIDSCENE_PLANNING_USE_AZURE_OPENAI", "MIDSCENE_PLANNING_AZURE_OPENAI_SCOPE", "MIDSCENE_PLANNING_AZURE_OPENAI_KEY", "MIDSCENE_PLANNING_AZURE_OPENAI_ENDPOINT", "MIDSCENE_PLANNING_AZURE_OPENAI_API_VERSION", "MIDSCENE_PLANNING_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_PLANNING_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_USE_ANTHROPIC_SDK", "MIDSCENE_PLANNING_ANTHROPIC_API_KEY", "MIDSCENE_PLANNING_VL_MODE", "MIDSCENE_GROUNDING_MODEL_NAME", "MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY", "MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY", "MIDSCENE_GROUNDING_OPENAI_BASE_URL", "MIDSCENE_GROUNDING_OPENAI_API_KEY", "MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_GROUNDING_OPENAI_USE_AZURE", "MIDSCENE_GROUNDING_USE_AZURE_OPENAI", "MIDSCENE_GROUNDING_AZURE_OPENAI_SCOPE", "MIDSCENE_GROUNDING_AZURE_OPENAI_KEY", "MIDSCENE_GROUNDING_AZURE_OPENAI_ENDPOINT", "MIDSCENE_GROUNDING_AZURE_OPENAI_API_VERSION", "MIDSCENE_GROUNDING_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_GROUNDING_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_GROUNDING_USE_ANTHROPIC_SDK", "MIDSCENE_GROUNDING_ANTHROPIC_API_KEY", "MIDSCENE_GROUNDING_VL_MODE"];
130
- export declare const ALL_ENV_KEYS: readonly [...string[], "MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_AI_PROFILE", "MIDSCENE_DEBUG_AI_RESPONSE", "MIDSCENE_RUN_DIR", "MIDSCENE_CACHE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_FORCE_DEEP_THINK", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER", "MIDSCENE_MODEL_NAME", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_API_KEY", "MIDSCENE_OPENAI_BASE_URL", "MIDSCENE_OPENAI_USE_AZURE", "MIDSCENE_OPENAI_SOCKS_PROXY", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_USE_AZURE_OPENAI", "MIDSCENE_AZURE_OPENAI_SCOPE", "MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_USE_ANTHROPIC_SDK", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "ANTHROPIC_API_KEY", "MIDSCENE_AZURE_OPENAI_ENDPOINT", "MIDSCENE_AZURE_OPENAI_KEY", "MIDSCENE_AZURE_OPENAI_API_VERSION", "MIDSCENE_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_VL_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "OPENAI_USE_AZURE", "ANTHROPIC_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_KEY", "AZURE_OPENAI_API_VERSION", "AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_VQA_MODEL_NAME", "MIDSCENE_VQA_OPENAI_SOCKS_PROXY", "MIDSCENE_VQA_OPENAI_HTTP_PROXY", "MIDSCENE_VQA_OPENAI_BASE_URL", "MIDSCENE_VQA_OPENAI_API_KEY", "MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_VQA_OPENAI_USE_AZURE", "MIDSCENE_VQA_USE_AZURE_OPENAI", "MIDSCENE_VQA_AZURE_OPENAI_SCOPE", "MIDSCENE_VQA_AZURE_OPENAI_KEY", "MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT", "MIDSCENE_VQA_AZURE_OPENAI_API_VERSION", "MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_VQA_USE_ANTHROPIC_SDK", "MIDSCENE_VQA_ANTHROPIC_API_KEY", "MIDSCENE_VQA_VL_MODE", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY", "MIDSCENE_PLANNING_OPENAI_HTTP_PROXY", "MIDSCENE_PLANNING_OPENAI_BASE_URL", "MIDSCENE_PLANNING_OPENAI_API_KEY", "MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_OPENAI_USE_AZURE", "MIDSCENE_PLANNING_USE_AZURE_OPENAI", "MIDSCENE_PLANNING_AZURE_OPENAI_SCOPE", "MIDSCENE_PLANNING_AZURE_OPENAI_KEY", "MIDSCENE_PLANNING_AZURE_OPENAI_ENDPOINT", "MIDSCENE_PLANNING_AZURE_OPENAI_API_VERSION", "MIDSCENE_PLANNING_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_PLANNING_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_USE_ANTHROPIC_SDK", "MIDSCENE_PLANNING_ANTHROPIC_API_KEY", "MIDSCENE_PLANNING_VL_MODE", "MIDSCENE_GROUNDING_MODEL_NAME", "MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY", "MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY", "MIDSCENE_GROUNDING_OPENAI_BASE_URL", "MIDSCENE_GROUNDING_OPENAI_API_KEY", "MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_GROUNDING_OPENAI_USE_AZURE", "MIDSCENE_GROUNDING_USE_AZURE_OPENAI", "MIDSCENE_GROUNDING_AZURE_OPENAI_SCOPE", "MIDSCENE_GROUNDING_AZURE_OPENAI_KEY", "MIDSCENE_GROUNDING_AZURE_OPENAI_ENDPOINT", "MIDSCENE_GROUNDING_AZURE_OPENAI_API_VERSION", "MIDSCENE_GROUNDING_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_GROUNDING_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_GROUNDING_USE_ANTHROPIC_SDK", "MIDSCENE_GROUNDING_ANTHROPIC_API_KEY", "MIDSCENE_GROUNDING_VL_MODE"];
128
+ export declare const MODEL_ENV_KEYS: readonly ["MIDSCENE_MODEL_NAME", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_API_KEY", "MIDSCENE_OPENAI_BASE_URL", "MIDSCENE_OPENAI_USE_AZURE", "MIDSCENE_OPENAI_SOCKS_PROXY", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_USE_AZURE_OPENAI", "MIDSCENE_AZURE_OPENAI_SCOPE", "MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_USE_ANTHROPIC_SDK", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "ANTHROPIC_API_KEY", "MIDSCENE_AZURE_OPENAI_ENDPOINT", "MIDSCENE_AZURE_OPENAI_KEY", "MIDSCENE_AZURE_OPENAI_API_VERSION", "MIDSCENE_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_VL_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "OPENAI_USE_AZURE", "ANTHROPIC_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_KEY", "AZURE_OPENAI_API_VERSION", "AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_VQA_MODEL_NAME", "MIDSCENE_VQA_OPENAI_SOCKS_PROXY", "MIDSCENE_VQA_OPENAI_HTTP_PROXY", "MIDSCENE_VQA_OPENAI_BASE_URL", "MIDSCENE_VQA_OPENAI_API_KEY", "MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_VQA_OPENAI_USE_AZURE", "MIDSCENE_VQA_USE_AZURE_OPENAI", "MIDSCENE_VQA_AZURE_OPENAI_SCOPE", "MIDSCENE_VQA_AZURE_OPENAI_KEY", "MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT", "MIDSCENE_VQA_AZURE_OPENAI_API_VERSION", "MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_VQA_USE_ANTHROPIC_SDK", "MIDSCENE_VQA_ANTHROPIC_API_KEY", "MIDSCENE_VQA_VL_MODE", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY", "MIDSCENE_PLANNING_OPENAI_HTTP_PROXY", "MIDSCENE_PLANNING_OPENAI_BASE_URL", "MIDSCENE_PLANNING_OPENAI_API_KEY", "MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_OPENAI_USE_AZURE", "MIDSCENE_PLANNING_USE_AZURE_OPENAI", "MIDSCENE_PLANNING_AZURE_OPENAI_SCOPE", "MIDSCENE_PLANNING_AZURE_OPENAI_KEY", "MIDSCENE_PLANNING_AZURE_OPENAI_ENDPOINT", "MIDSCENE_PLANNING_AZURE_OPENAI_API_VERSION", "MIDSCENE_PLANNING_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_PLANNING_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_USE_ANTHROPIC_SDK", "MIDSCENE_PLANNING_ANTHROPIC_API_KEY", "MIDSCENE_PLANNING_VL_MODE", "MIDSCENE_GROUNDING_MODEL_NAME", "MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY", "MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY", "MIDSCENE_GROUNDING_OPENAI_BASE_URL", "MIDSCENE_GROUNDING_OPENAI_API_KEY", "MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_GROUNDING_OPENAI_USE_AZURE", "MIDSCENE_GROUNDING_USE_AZURE_OPENAI", "MIDSCENE_GROUNDING_AZURE_OPENAI_SCOPE", "MIDSCENE_GROUNDING_AZURE_OPENAI_KEY", "MIDSCENE_GROUNDING_AZURE_OPENAI_ENDPOINT", "MIDSCENE_GROUNDING_AZURE_OPENAI_API_VERSION", "MIDSCENE_GROUNDING_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_GROUNDING_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_GROUNDING_USE_ANTHROPIC_SDK", "MIDSCENE_GROUNDING_ANTHROPIC_API_KEY", "MIDSCENE_GROUNDING_VL_MODE"];
129
+ export declare const ALL_ENV_KEYS: readonly [...string[], "MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_AI_PROFILE", "MIDSCENE_DEBUG_AI_RESPONSE", "MIDSCENE_RUN_DIR", "MIDSCENE_CACHE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_FORCE_DEEP_THINK", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER", "MIDSCENE_MODEL_NAME", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_API_KEY", "MIDSCENE_OPENAI_BASE_URL", "MIDSCENE_OPENAI_USE_AZURE", "MIDSCENE_OPENAI_SOCKS_PROXY", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_USE_AZURE_OPENAI", "MIDSCENE_AZURE_OPENAI_SCOPE", "MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_USE_ANTHROPIC_SDK", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "ANTHROPIC_API_KEY", "MIDSCENE_AZURE_OPENAI_ENDPOINT", "MIDSCENE_AZURE_OPENAI_KEY", "MIDSCENE_AZURE_OPENAI_API_VERSION", "MIDSCENE_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_VL_MODE", "OPENAI_API_KEY", "OPENAI_BASE_URL", "OPENAI_USE_AZURE", "ANTHROPIC_API_KEY", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_KEY", "AZURE_OPENAI_API_VERSION", "AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_VQA_MODEL_NAME", "MIDSCENE_VQA_OPENAI_SOCKS_PROXY", "MIDSCENE_VQA_OPENAI_HTTP_PROXY", "MIDSCENE_VQA_OPENAI_BASE_URL", "MIDSCENE_VQA_OPENAI_API_KEY", "MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_VQA_OPENAI_USE_AZURE", "MIDSCENE_VQA_USE_AZURE_OPENAI", "MIDSCENE_VQA_AZURE_OPENAI_SCOPE", "MIDSCENE_VQA_AZURE_OPENAI_KEY", "MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT", "MIDSCENE_VQA_AZURE_OPENAI_API_VERSION", "MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_VQA_USE_ANTHROPIC_SDK", "MIDSCENE_VQA_ANTHROPIC_API_KEY", "MIDSCENE_VQA_VL_MODE", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_OPENAI_SOCKS_PROXY", "MIDSCENE_PLANNING_OPENAI_HTTP_PROXY", "MIDSCENE_PLANNING_OPENAI_BASE_URL", "MIDSCENE_PLANNING_OPENAI_API_KEY", "MIDSCENE_PLANNING_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_OPENAI_USE_AZURE", "MIDSCENE_PLANNING_USE_AZURE_OPENAI", "MIDSCENE_PLANNING_AZURE_OPENAI_SCOPE", "MIDSCENE_PLANNING_AZURE_OPENAI_KEY", "MIDSCENE_PLANNING_AZURE_OPENAI_ENDPOINT", "MIDSCENE_PLANNING_AZURE_OPENAI_API_VERSION", "MIDSCENE_PLANNING_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_PLANNING_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_USE_ANTHROPIC_SDK", "MIDSCENE_PLANNING_ANTHROPIC_API_KEY", "MIDSCENE_PLANNING_VL_MODE", "MIDSCENE_GROUNDING_MODEL_NAME", "MIDSCENE_GROUNDING_OPENAI_SOCKS_PROXY", "MIDSCENE_GROUNDING_OPENAI_HTTP_PROXY", "MIDSCENE_GROUNDING_OPENAI_BASE_URL", "MIDSCENE_GROUNDING_OPENAI_API_KEY", "MIDSCENE_GROUNDING_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_GROUNDING_OPENAI_USE_AZURE", "MIDSCENE_GROUNDING_USE_AZURE_OPENAI", "MIDSCENE_GROUNDING_AZURE_OPENAI_SCOPE", "MIDSCENE_GROUNDING_AZURE_OPENAI_KEY", "MIDSCENE_GROUNDING_AZURE_OPENAI_ENDPOINT", "MIDSCENE_GROUNDING_AZURE_OPENAI_API_VERSION", "MIDSCENE_GROUNDING_AZURE_OPENAI_DEPLOYMENT", "MIDSCENE_GROUNDING_AZURE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_GROUNDING_USE_ANTHROPIC_SDK", "MIDSCENE_GROUNDING_ANTHROPIC_API_KEY", "MIDSCENE_GROUNDING_VL_MODE"];
131
130
  export type TEnvKeys = (typeof ALL_ENV_KEYS)[number];
132
131
  export type TGlobalConfig = Record<TEnvKeys, string | undefined>;
133
- export type TVlModeValues = 'qwen-vl' | 'qwen3-vl' | 'doubao-vision' | 'gemini' | 'vlm-ui-tars' | 'vlm-ui-tars-doubao' | 'vlm-ui-tars-doubao-1.5';
134
- export type TVlModeTypes = 'qwen-vl' | 'qwen3-vl' | 'doubao-vision' | 'gemini' | 'vlm-ui-tars';
132
+ export type TVlModeValues = 'qwen-vl' | 'doubao-vision' | 'gemini' | 'vlm-ui-tars' | 'vlm-ui-tars-doubao' | 'vlm-ui-tars-doubao-1.5';
133
+ export type TVlModeTypes = 'qwen-vl' | 'doubao-vision' | 'gemini' | 'vlm-ui-tars';
135
134
  export interface IModelConfigForVQA {
136
135
  [MIDSCENE_VQA_MODEL_NAME]: string;
137
136
  [MIDSCENE_VQA_OPENAI_SOCKS_PROXY]?: string;
@@ -49,21 +49,9 @@ export declare function zoomForGPT4o(originalWidth: number, originalHeight: numb
49
49
  height: number;
50
50
  };
51
51
  export declare function jimpFromBase64(base64: string): Promise<Jimp>;
52
- export declare function paddingToMatchBlock(image: Jimp, blockSize?: number): Promise<{
53
- width: number;
54
- height: number;
55
- image: Jimp;
56
- }>;
57
- export declare function paddingToMatchBlockByBase64(imageBase64: string, blockSize?: number): Promise<{
58
- width: number;
59
- height: number;
60
- imageBase64: string;
61
- }>;
62
- export declare function cropByRect(imageBase64: string, rect: Rect, paddingImage: boolean): Promise<{
63
- width: number;
64
- height: number;
65
- imageBase64: string;
66
- }>;
52
+ export declare function paddingToMatchBlock(image: Jimp, blockSize?: number): Promise<Jimp>;
53
+ export declare function paddingToMatchBlockByBase64(imageBase64: string, blockSize?: number): Promise<string>;
54
+ export declare function cropByRect(imageBase64: string, rect: Rect, paddingImage: boolean): Promise<string>;
67
55
  export declare function jimpToBase64(image: Jimp): Promise<string>;
68
56
  export declare const httpImg2Base64: (url: string) => Promise<string>;
69
57
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@midscene/shared",
3
- "version": "0.28.12-beta-20250924093113.0",
3
+ "version": "0.29.0",
4
4
  "repository": "https://github.com/web-infra-dev/midscene",
5
5
  "homepage": "https://midscenejs.com/",
6
6
  "types": "./dist/types/index.d.ts",
package/src/env/parse.ts CHANGED
@@ -1,7 +1,6 @@
1
1
  import {
2
2
  MIDSCENE_USE_DOUBAO_VISION,
3
3
  MIDSCENE_USE_GEMINI,
4
- MIDSCENE_USE_QWEN3_VL,
5
4
  MIDSCENE_USE_QWEN_VL,
6
5
  MIDSCENE_USE_VLM_UI_TARS,
7
6
  type TVlModeTypes,
@@ -59,14 +58,12 @@ export const parseVlModeAndUiTarsFromGlobalConfig = (
59
58
  } => {
60
59
  const isDoubao = provider[MIDSCENE_USE_DOUBAO_VISION];
61
60
  const isQwen = provider[MIDSCENE_USE_QWEN_VL];
62
- const isQwen3 = provider[MIDSCENE_USE_QWEN3_VL];
63
61
  const isUiTars = provider[MIDSCENE_USE_VLM_UI_TARS];
64
62
  const isGemini = provider[MIDSCENE_USE_GEMINI];
65
63
 
66
64
  const enabledModes = [
67
65
  isDoubao && MIDSCENE_USE_DOUBAO_VISION,
68
66
  isQwen && MIDSCENE_USE_QWEN_VL,
69
- isQwen3 && MIDSCENE_USE_QWEN3_VL,
70
67
  isUiTars && MIDSCENE_USE_VLM_UI_TARS,
71
68
  isGemini && MIDSCENE_USE_GEMINI,
72
69
  ].filter(Boolean);
@@ -77,13 +74,6 @@ export const parseVlModeAndUiTarsFromGlobalConfig = (
77
74
  );
78
75
  }
79
76
 
80
- if (isQwen3) {
81
- return {
82
- vlMode: 'qwen3-vl',
83
- uiTarsVersion: undefined,
84
- };
85
- }
86
-
87
77
  if (isQwen) {
88
78
  return {
89
79
  vlMode: 'qwen-vl',
package/src/env/types.ts CHANGED
@@ -29,7 +29,6 @@ export const MIDSCENE_ANDROID_IME_STRATEGY = 'MIDSCENE_ANDROID_IME_STRATEGY';
29
29
  export const MIDSCENE_CACHE = 'MIDSCENE_CACHE';
30
30
  export const MIDSCENE_USE_VLM_UI_TARS = 'MIDSCENE_USE_VLM_UI_TARS';
31
31
  export const MIDSCENE_USE_QWEN_VL = 'MIDSCENE_USE_QWEN_VL';
32
- export const MIDSCENE_USE_QWEN3_VL = 'MIDSCENE_USE_QWEN3_VL';
33
32
  export const MIDSCENE_USE_DOUBAO_VISION = 'MIDSCENE_USE_DOUBAO_VISION';
34
33
  export const MIDSCENE_USE_GEMINI = 'MIDSCENE_USE_GEMINI';
35
34
  export const MIDSCENE_USE_VL_MODEL = 'MIDSCENE_USE_VL_MODEL';
@@ -243,7 +242,6 @@ export const MODEL_ENV_KEYS = [
243
242
  MIDSCENE_USE_ANTHROPIC_SDK,
244
243
  MIDSCENE_USE_VLM_UI_TARS,
245
244
  MIDSCENE_USE_QWEN_VL,
246
- MIDSCENE_USE_QWEN3_VL,
247
245
  MIDSCENE_USE_DOUBAO_VISION,
248
246
  MIDSCENE_USE_GEMINI,
249
247
  MIDSCENE_USE_VL_MODEL,
@@ -330,7 +328,6 @@ export type TGlobalConfig = Record<TEnvKeys, string | undefined>;
330
328
 
331
329
  export type TVlModeValues =
332
330
  | 'qwen-vl'
333
- | 'qwen3-vl'
334
331
  | 'doubao-vision'
335
332
  | 'gemini'
336
333
  | 'vlm-ui-tars'
@@ -339,7 +336,6 @@ export type TVlModeValues =
339
336
 
340
337
  export type TVlModeTypes =
341
338
  | 'qwen-vl'
342
- | 'qwen3-vl'
343
339
  | 'doubao-vision'
344
340
  | 'gemini'
345
341
  | 'vlm-ui-tars';
@@ -501,7 +497,6 @@ export const VL_MODE_RAW_VALID_VALUES: TVlModeValues[] = [
501
497
  'doubao-vision',
502
498
  'gemini',
503
499
  'qwen-vl',
504
- 'qwen3-vl',
505
500
  'vlm-ui-tars',
506
501
  'vlm-ui-tars-doubao',
507
502
  'vlm-ui-tars-doubao-1.5',
@@ -228,18 +228,14 @@ export async function jimpFromBase64(base64: string): Promise<Jimp> {
228
228
  export async function paddingToMatchBlock(
229
229
  image: Jimp,
230
230
  blockSize = 28,
231
- ): Promise<{
232
- width: number;
233
- height: number;
234
- image: Jimp;
235
- }> {
231
+ ): Promise<Jimp> {
236
232
  const { width, height } = image.bitmap;
237
233
 
238
234
  const targetWidth = Math.ceil(width / blockSize) * blockSize;
239
235
  const targetHeight = Math.ceil(height / blockSize) * blockSize;
240
236
 
241
237
  if (targetWidth === width && targetHeight === height) {
242
- return { width, height, image };
238
+ return image;
243
239
  }
244
240
 
245
241
  const Jimp = await getJimp();
@@ -247,52 +243,31 @@ export async function paddingToMatchBlock(
247
243
 
248
244
  // Composite the original image onto the new canvas
249
245
  paddedImage.composite(image, 0, 0);
250
- return { width: targetWidth, height: targetHeight, image: paddedImage };
246
+ return paddedImage;
251
247
  }
252
248
 
253
249
  export async function paddingToMatchBlockByBase64(
254
250
  imageBase64: string,
255
251
  blockSize = 28,
256
- ): Promise<{
257
- width: number;
258
- height: number;
259
- imageBase64: string;
260
- }> {
252
+ ): Promise<string> {
261
253
  const jimpImage = await jimpFromBase64(imageBase64);
262
- const paddedResult = await paddingToMatchBlock(jimpImage, blockSize);
263
- return {
264
- width: paddedResult.width,
265
- height: paddedResult.height,
266
- imageBase64: await jimpToBase64(paddedResult.image),
267
- };
254
+ const paddedImage = await paddingToMatchBlock(jimpImage, blockSize);
255
+ return jimpToBase64(paddedImage);
268
256
  }
269
-
270
257
  export async function cropByRect(
271
258
  imageBase64: string,
272
259
  rect: Rect,
273
260
  paddingImage: boolean,
274
- ): Promise<{
275
- width: number;
276
- height: number;
277
- imageBase64: string;
278
- }> {
261
+ ): Promise<string> {
279
262
  const jimpImage = await jimpFromBase64(imageBase64);
280
263
  const { left, top, width, height } = rect;
281
264
  jimpImage.crop(left, top, width, height);
282
265
 
283
266
  if (paddingImage) {
284
- const paddedResult = await paddingToMatchBlock(jimpImage);
285
- return {
286
- width: paddedResult.width,
287
- height: paddedResult.height,
288
- imageBase64: await jimpToBase64(paddedResult.image),
289
- };
267
+ const paddedImage = await paddingToMatchBlock(jimpImage);
268
+ return jimpToBase64(paddedImage);
290
269
  }
291
- return {
292
- width: jimpImage.bitmap.width,
293
- height: jimpImage.bitmap.height,
294
- imageBase64: await jimpToBase64(jimpImage),
295
- };
270
+ return jimpToBase64(jimpImage);
296
271
  }
297
272
 
298
273
  export async function jimpToBase64(image: Jimp): Promise<string> {