@midscene/shared 1.8.5-beta-20260525033347.0 → 1.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  import { z } from "zod";
2
2
  import { getKeyAliases } from "../key-alias-utils.mjs";
3
+ import { CLIError } from "./cli-error.mjs";
3
4
  function parseValue(raw) {
4
5
  if (raw.startsWith('{') || raw.startsWith('[')) try {
5
6
  return JSON.parse(raw);
@@ -23,7 +24,20 @@ function walkCliArgs(args, setArgValue) {
23
24
  function parseCliArgs(args) {
24
25
  const result = {};
25
26
  walkCliArgs(args, (key, value)=>{
26
- result[key] = value;
27
+ const existing = result[key];
28
+ if (void 0 === existing) {
29
+ result[key] = value;
30
+ return;
31
+ }
32
+ if (Array.isArray(existing)) {
33
+ existing.push(value);
34
+ result[key] = existing;
35
+ return;
36
+ }
37
+ result[key] = [
38
+ existing,
39
+ value
40
+ ];
27
41
  });
28
42
  return result;
29
43
  }
@@ -92,4 +106,29 @@ function formatCliValidationError(scriptName, commandName, def, rawArgs) {
92
106
  const optionName = 'string' == typeof issue?.path[0] ? `--${issue.path[0]}` : 'CLI arguments';
93
107
  return `Invalid value for "${optionName}" in ${scriptName} ${commandName}: ${issue?.message ?? parsed.error.message}`;
94
108
  }
95
- export { formatCliValidationError, getCliOptionDisplay, parseCliArgs, parseValue };
109
+ function canonicalizeCliArgKeys(scriptName, commandName, def, rawArgs) {
110
+ if (0 === Object.keys(def.schema).length) return rawArgs;
111
+ const result = {
112
+ ...rawArgs
113
+ };
114
+ for (const schemaKey of Object.keys(def.schema)){
115
+ const cliOption = def.cli?.options?.[schemaKey];
116
+ const acceptedSpellings = getAcceptedCliOptionNames(schemaKey, cliOption);
117
+ let chosenSpelling;
118
+ let chosenValue;
119
+ for (const spelling of acceptedSpellings)if (spelling !== schemaKey) {
120
+ if (spelling in result) {
121
+ if (void 0 !== chosenSpelling) throw new CLIError(`Conflicting CLI options "--${chosenSpelling}" and "--${spelling}" for ${scriptName} ${commandName}: both target "${schemaKey}". Use one spelling.`);
122
+ chosenSpelling = spelling;
123
+ chosenValue = result[spelling];
124
+ }
125
+ }
126
+ if (void 0 !== chosenSpelling) {
127
+ if (schemaKey in result && result[schemaKey] !== chosenValue) throw new CLIError(`Conflicting CLI options "--${schemaKey}" and "--${chosenSpelling}" for ${scriptName} ${commandName}: both target "${schemaKey}". Use one spelling.`);
128
+ result[schemaKey] = chosenValue;
129
+ delete result[chosenSpelling];
130
+ }
131
+ }
132
+ return result;
133
+ }
134
+ export { canonicalizeCliArgKeys, formatCliValidationError, getCliOptionDisplay, parseCliArgs, parseValue };
@@ -3,7 +3,7 @@ import { tmpdir } from "node:os";
3
3
  import { join } from "node:path";
4
4
  import dotenv from "dotenv";
5
5
  import { getDebug } from "../logger.mjs";
6
- import { formatCliValidationError, getCliOptionDisplay, parseCliArgs, parseValue } from "./cli-args.mjs";
6
+ import { canonicalizeCliArgKeys, formatCliValidationError, getCliOptionDisplay, parseCliArgs, parseValue } from "./cli-args.mjs";
7
7
  import { CLIError, reportCLIError } from "./cli-error.mjs";
8
8
  const debug = getDebug('cli-runner');
9
9
  function outputContentItem(item, isError) {
@@ -112,8 +112,9 @@ async function runToolsCLI(tools, scriptName, options) {
112
112
  }
113
113
  const cliValidationError = formatCliValidationError(scriptName, match.name, match.def, parsedArgs);
114
114
  if (cliValidationError) throw new CLIError(cliValidationError);
115
- debug('command: %s, args: %s', match.name, JSON.stringify(parsedArgs));
116
- const result = await match.def.handler(parsedArgs);
115
+ const handlerArgs = canonicalizeCliArgKeys(scriptName, match.name, match.def, parsedArgs);
116
+ debug('command: %s, args: %s', match.name, JSON.stringify(handlerArgs));
117
+ const result = await match.def.handler(handlerArgs);
117
118
  debug('command %s completed, isError: %s', match.name, result.isError ?? false);
118
119
  outputResult(result);
119
120
  await tools.destroy();
@@ -5,7 +5,7 @@ const PLAYWRIGHT_EXAMPLE_CODE = `
5
5
  IMPORTANT: Follow these exact type signatures for AI functions:
6
6
 
7
7
  // Type signatures for AI functions:
8
- aiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<void>
8
+ aiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<string | undefined>
9
9
  aiInput(text: string, locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
10
10
  aiTap(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
11
11
  aiHover(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
@@ -1,4 +1,4 @@
1
- import { MIDSCENE_INSIGHT_MODEL_API_KEY, MIDSCENE_INSIGHT_MODEL_BASE_URL, MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON, MIDSCENE_INSIGHT_MODEL_FAMILY, MIDSCENE_INSIGHT_MODEL_HTTP_PROXY, MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON, MIDSCENE_INSIGHT_MODEL_NAME, MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET, MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT, MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED, MIDSCENE_INSIGHT_MODEL_RETRY_COUNT, MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL, MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY, MIDSCENE_INSIGHT_MODEL_TEMPERATURE, MIDSCENE_INSIGHT_MODEL_TIMEOUT, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_EXTRA_BODY_JSON, MIDSCENE_MODEL_FAMILY, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_REASONING_BUDGET, MIDSCENE_MODEL_REASONING_EFFORT, MIDSCENE_MODEL_REASONING_ENABLED, MIDSCENE_MODEL_RETRY_COUNT, MIDSCENE_MODEL_RETRY_INTERVAL, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_MODEL_TEMPERATURE, MIDSCENE_MODEL_TIMEOUT, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON, MIDSCENE_PLANNING_MODEL_FAMILY, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_REASONING_BUDGET, MIDSCENE_PLANNING_MODEL_REASONING_EFFORT, MIDSCENE_PLANNING_MODEL_REASONING_ENABLED, MIDSCENE_PLANNING_MODEL_RETRY_COUNT, MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_TEMPERATURE, MIDSCENE_PLANNING_MODEL_TIMEOUT, OPENAI_API_KEY, OPENAI_BASE_URL } from "./types.mjs";
1
+ import { MIDSCENE_INSIGHT_MODEL_API_KEY, MIDSCENE_INSIGHT_MODEL_BASE_URL, MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON, MIDSCENE_INSIGHT_MODEL_FAMILY, MIDSCENE_INSIGHT_MODEL_HTTP_PROXY, MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON, MIDSCENE_INSIGHT_MODEL_MAX_TOKENS, MIDSCENE_INSIGHT_MODEL_NAME, MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET, MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT, MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED, MIDSCENE_INSIGHT_MODEL_RETRY_COUNT, MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL, MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY, MIDSCENE_INSIGHT_MODEL_TEMPERATURE, MIDSCENE_INSIGHT_MODEL_TIMEOUT, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_EXTRA_BODY_JSON, MIDSCENE_MODEL_FAMILY, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_MAX_TOKENS, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_REASONING_BUDGET, MIDSCENE_MODEL_REASONING_EFFORT, MIDSCENE_MODEL_REASONING_ENABLED, MIDSCENE_MODEL_RETRY_COUNT, MIDSCENE_MODEL_RETRY_INTERVAL, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_MODEL_TEMPERATURE, MIDSCENE_MODEL_TIMEOUT, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON, MIDSCENE_PLANNING_MODEL_FAMILY, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_MAX_TOKENS, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_REASONING_BUDGET, MIDSCENE_PLANNING_MODEL_REASONING_EFFORT, MIDSCENE_PLANNING_MODEL_REASONING_ENABLED, MIDSCENE_PLANNING_MODEL_RETRY_COUNT, MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_TEMPERATURE, MIDSCENE_PLANNING_MODEL_TIMEOUT, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MAX_TOKENS } from "./types.mjs";
2
2
  const INSIGHT_MODEL_CONFIG_KEYS = {
3
3
  modelName: MIDSCENE_INSIGHT_MODEL_NAME,
4
4
  socksProxy: MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
@@ -7,6 +7,7 @@ const INSIGHT_MODEL_CONFIG_KEYS = {
7
7
  openaiApiKey: MIDSCENE_INSIGHT_MODEL_API_KEY,
8
8
  openaiExtraConfig: MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
9
9
  extraBody: MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON,
10
+ maxTokens: MIDSCENE_INSIGHT_MODEL_MAX_TOKENS,
10
11
  modelFamily: MIDSCENE_INSIGHT_MODEL_FAMILY,
11
12
  timeout: MIDSCENE_INSIGHT_MODEL_TIMEOUT,
12
13
  temperature: MIDSCENE_INSIGHT_MODEL_TEMPERATURE,
@@ -24,6 +25,7 @@ const PLANNING_MODEL_CONFIG_KEYS = {
24
25
  openaiApiKey: MIDSCENE_PLANNING_MODEL_API_KEY,
25
26
  openaiExtraConfig: MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
26
27
  extraBody: MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON,
28
+ maxTokens: MIDSCENE_PLANNING_MODEL_MAX_TOKENS,
27
29
  modelFamily: MIDSCENE_PLANNING_MODEL_FAMILY,
28
30
  timeout: MIDSCENE_PLANNING_MODEL_TIMEOUT,
29
31
  temperature: MIDSCENE_PLANNING_MODEL_TEMPERATURE,
@@ -41,6 +43,7 @@ const DEFAULT_MODEL_CONFIG_KEYS = {
41
43
  openaiApiKey: MIDSCENE_MODEL_API_KEY,
42
44
  openaiExtraConfig: MIDSCENE_MODEL_INIT_CONFIG_JSON,
43
45
  extraBody: MIDSCENE_MODEL_EXTRA_BODY_JSON,
46
+ maxTokens: MIDSCENE_MODEL_MAX_TOKENS,
44
47
  modelFamily: MIDSCENE_MODEL_FAMILY,
45
48
  timeout: MIDSCENE_MODEL_TIMEOUT,
46
49
  temperature: MIDSCENE_MODEL_TEMPERATURE,
@@ -58,6 +61,7 @@ const DEFAULT_MODEL_CONFIG_KEYS_LEGACY = {
58
61
  openaiApiKey: OPENAI_API_KEY,
59
62
  openaiExtraConfig: MIDSCENE_OPENAI_INIT_CONFIG_JSON,
60
63
  extraBody: MIDSCENE_MODEL_EXTRA_BODY_JSON,
64
+ maxTokens: OPENAI_MAX_TOKENS,
61
65
  modelFamily: 'DEFAULT_MODEL_CONFIG_KEYS has no modelFamily key',
62
66
  timeout: MIDSCENE_MODEL_TIMEOUT,
63
67
  temperature: MIDSCENE_MODEL_TEMPERATURE,
@@ -1,11 +1,11 @@
1
1
  import { DEFAULT_MODEL_CONFIG_KEYS, INSIGHT_MODEL_CONFIG_KEYS, PLANNING_MODEL_CONFIG_KEYS } from "./constants.mjs";
2
- import { MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MODEL_FAMILY_VALUES, OPENAI_API_KEY, OPENAI_BASE_URL, UITarsModelVersion } from "./types.mjs";
2
+ import { MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MODEL_FAMILY_VALUES, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MAX_TOKENS, UITarsModelVersion } from "./types.mjs";
3
3
  import { getDebug } from "../logger.mjs";
4
4
  import { assert } from "../utils.mjs";
5
5
  import { maskConfig, parseJson } from "./helper.mjs";
6
6
  import { initDebugConfig } from "./init-debug.mjs";
7
7
  const MODEL_CONFIG_DOC_URL = 'https://midscenejs.com/model-common-config.html';
8
- const getCurrentVersion = ()=>"1.8.5-beta-20260525033347.0";
8
+ const getCurrentVersion = ()=>"1.8.5";
9
9
  const getInvalidModelFamilyMessage = (modelFamily)=>`Invalid MIDSCENE_MODEL_FAMILY value: ${modelFamily}. Current version v${getCurrentVersion()} accepts the following model families: ${MODEL_FAMILY_VALUES.join(', ')}. You can also visit ${MODEL_CONFIG_DOC_URL} for the latest configuration information.`;
10
10
  const KEYS_MAP = {
11
11
  insight: INSIGHT_MODEL_CONFIG_KEYS,
@@ -65,6 +65,7 @@ const parseOpenaiSdkConfig = ({ keys, provider, useLegacyLogic = false })=>{
65
65
  const legacySocksProxy = useLegacyLogic ? provider[MIDSCENE_OPENAI_SOCKS_PROXY] : void 0;
66
66
  const legacyHttpProxy = useLegacyLogic ? provider[MIDSCENE_OPENAI_HTTP_PROXY] : void 0;
67
67
  const legacyOpenaiExtraConfig = useLegacyLogic ? provider[MIDSCENE_OPENAI_INIT_CONFIG_JSON] : void 0;
68
+ const legacyMaxTokens = useLegacyLogic ? provider[OPENAI_MAX_TOKENS] : void 0;
68
69
  const legacyModelFamily = useLegacyLogic ? legacyConfigToModelFamily(provider) : void 0;
69
70
  const modelFamilyRaw = provider[keys.modelFamily] || legacyModelFamily;
70
71
  const openaiApiKey = provider[keys.openaiApiKey] || legacyAPIKey;
@@ -76,6 +77,13 @@ const parseOpenaiSdkConfig = ({ keys, provider, useLegacyLogic = false })=>{
76
77
  const openaiExtraConfig = parseJson(keys.openaiExtraConfig, openaiExtraConfigStr || legacyOpenaiExtraConfig);
77
78
  const extraBodyStr = provider[keys.extraBody];
78
79
  const extraBody = parseJson(keys.extraBody, extraBodyStr);
80
+ const maxTokensStr = provider[keys.maxTokens] || legacyMaxTokens;
81
+ const maxTokens = (()=>{
82
+ const val = maxTokensStr?.trim();
83
+ if (!val) return;
84
+ const num = Number(val);
85
+ return Number.isFinite(num) ? num : void 0;
86
+ })();
79
87
  const temperature = provider[keys.temperature] ? Number(provider[keys.temperature]) : 0;
80
88
  const modelFamily = modelFamilyRaw;
81
89
  validateModelFamily(modelFamily);
@@ -88,6 +96,7 @@ const parseOpenaiSdkConfig = ({ keys, provider, useLegacyLogic = false })=>{
88
96
  openaiApiKey,
89
97
  openaiExtraConfig: normalizeOpenaiExtraConfig(openaiExtraConfig),
90
98
  extraBody,
99
+ maxTokens,
91
100
  modelFamily,
92
101
  uiTarsModelVersion,
93
102
  modelName: modelName,
@@ -57,6 +57,7 @@ const MIDSCENE_INSIGHT_MODEL_BASE_URL = 'MIDSCENE_INSIGHT_MODEL_BASE_URL';
57
57
  const MIDSCENE_INSIGHT_MODEL_API_KEY = 'MIDSCENE_INSIGHT_MODEL_API_KEY';
58
58
  const MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON';
59
59
  const MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON = 'MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON';
60
+ const MIDSCENE_INSIGHT_MODEL_MAX_TOKENS = 'MIDSCENE_INSIGHT_MODEL_MAX_TOKENS';
60
61
  const MIDSCENE_INSIGHT_MODEL_TIMEOUT = 'MIDSCENE_INSIGHT_MODEL_TIMEOUT';
61
62
  const MIDSCENE_INSIGHT_MODEL_TEMPERATURE = 'MIDSCENE_INSIGHT_MODEL_TEMPERATURE';
62
63
  const MIDSCENE_INSIGHT_MODEL_RETRY_COUNT = 'MIDSCENE_INSIGHT_MODEL_RETRY_COUNT';
@@ -72,6 +73,7 @@ const MIDSCENE_PLANNING_MODEL_BASE_URL = 'MIDSCENE_PLANNING_MODEL_BASE_URL';
72
73
  const MIDSCENE_PLANNING_MODEL_API_KEY = 'MIDSCENE_PLANNING_MODEL_API_KEY';
73
74
  const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON';
74
75
  const MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON = 'MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON';
76
+ const MIDSCENE_PLANNING_MODEL_MAX_TOKENS = 'MIDSCENE_PLANNING_MODEL_MAX_TOKENS';
75
77
  const MIDSCENE_PLANNING_MODEL_TIMEOUT = 'MIDSCENE_PLANNING_MODEL_TIMEOUT';
76
78
  const MIDSCENE_PLANNING_MODEL_TEMPERATURE = 'MIDSCENE_PLANNING_MODEL_TEMPERATURE';
77
79
  const MIDSCENE_PLANNING_MODEL_RETRY_COUNT = 'MIDSCENE_PLANNING_MODEL_RETRY_COUNT';
@@ -99,13 +101,10 @@ const BOOLEAN_ENV_KEYS = [
99
101
  MIDSCENE_REPORT_QUIET
100
102
  ];
101
103
  const NUMBER_ENV_KEYS = [
102
- MIDSCENE_MODEL_MAX_TOKENS,
103
104
  MIDSCENE_CACHE_MAX_FILENAME_LENGTH,
104
105
  MIDSCENE_REPLANNING_CYCLE_LIMIT
105
106
  ];
106
107
  const STRING_ENV_KEYS = [
107
- MIDSCENE_MODEL_MAX_TOKENS,
108
- OPENAI_MAX_TOKENS,
109
108
  MIDSCENE_ADB_PATH,
110
109
  MIDSCENE_ADB_REMOTE_HOST,
111
110
  MIDSCENE_ADB_REMOTE_PORT,
@@ -131,6 +130,7 @@ const MODEL_ENV_KEYS = [
131
130
  MIDSCENE_MODEL_BASE_URL,
132
131
  MIDSCENE_MODEL_SOCKS_PROXY,
133
132
  MIDSCENE_MODEL_HTTP_PROXY,
133
+ MIDSCENE_MODEL_MAX_TOKENS,
134
134
  MIDSCENE_MODEL_TIMEOUT,
135
135
  MIDSCENE_MODEL_TEMPERATURE,
136
136
  MIDSCENE_MODEL_RETRY_COUNT,
@@ -146,6 +146,7 @@ const MODEL_ENV_KEYS = [
146
146
  MIDSCENE_USE_VL_MODEL,
147
147
  OPENAI_API_KEY,
148
148
  OPENAI_BASE_URL,
149
+ OPENAI_MAX_TOKENS,
149
150
  MIDSCENE_OPENAI_INIT_CONFIG_JSON,
150
151
  MIDSCENE_OPENAI_HTTP_PROXY,
151
152
  MIDSCENE_OPENAI_SOCKS_PROXY,
@@ -156,6 +157,7 @@ const MODEL_ENV_KEYS = [
156
157
  MIDSCENE_INSIGHT_MODEL_API_KEY,
157
158
  MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
158
159
  MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON,
160
+ MIDSCENE_INSIGHT_MODEL_MAX_TOKENS,
159
161
  MIDSCENE_INSIGHT_MODEL_TIMEOUT,
160
162
  MIDSCENE_INSIGHT_MODEL_TEMPERATURE,
161
163
  MIDSCENE_INSIGHT_MODEL_RETRY_COUNT,
@@ -171,6 +173,7 @@ const MODEL_ENV_KEYS = [
171
173
  MIDSCENE_PLANNING_MODEL_API_KEY,
172
174
  MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
173
175
  MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON,
176
+ MIDSCENE_PLANNING_MODEL_MAX_TOKENS,
174
177
  MIDSCENE_PLANNING_MODEL_TIMEOUT,
175
178
  MIDSCENE_PLANNING_MODEL_TEMPERATURE,
176
179
  MIDSCENE_PLANNING_MODEL_RETRY_COUNT,
@@ -210,4 +213,4 @@ var types_UITarsModelVersion = /*#__PURE__*/ function(UITarsModelVersion) {
210
213
  UITarsModelVersion["DOUBAO_1_5_20B"] = "doubao-1.5-20B";
211
214
  return UITarsModelVersion;
212
215
  }({});
213
- export { ALL_ENV_KEYS, BASIC_ENV_KEYS, BOOLEAN_ENV_KEYS, DOCKER_CONTAINER, GLOBAL_ENV_KEYS, MATCH_BY_POSITION, MIDSCENE_ADB_PATH, MIDSCENE_ADB_REMOTE_HOST, MIDSCENE_ADB_REMOTE_PORT, MIDSCENE_ANDROID_IME_STRATEGY, MIDSCENE_CACHE, MIDSCENE_CACHE_MAX_FILENAME_LENGTH, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_MODE, MIDSCENE_DEBUG_MODEL_PROFILE, MIDSCENE_DEBUG_MODEL_RESPONSE, MIDSCENE_INSIGHT_MODEL_API_KEY, MIDSCENE_INSIGHT_MODEL_BASE_URL, MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON, MIDSCENE_INSIGHT_MODEL_FAMILY, MIDSCENE_INSIGHT_MODEL_HTTP_PROXY, MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON, MIDSCENE_INSIGHT_MODEL_NAME, MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET, MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT, MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED, MIDSCENE_INSIGHT_MODEL_RETRY_COUNT, MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL, MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY, MIDSCENE_INSIGHT_MODEL_TEMPERATURE, MIDSCENE_INSIGHT_MODEL_TIMEOUT, MIDSCENE_IOS_DEVICE_CLASS_OVERRIDE, MIDSCENE_IOS_DEVICE_UDID, MIDSCENE_IOS_SIMULATOR_UDID, MIDSCENE_LANGFUSE_DEBUG, MIDSCENE_LANGSMITH_DEBUG, MIDSCENE_MCP_ANDROID_MODE, MIDSCENE_MCP_CHROME_PATH, MIDSCENE_MCP_USE_PUPPETEER_MODE, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_EXTRA_BODY_JSON, MIDSCENE_MODEL_FAMILY, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_MAX_TOKENS, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_REASONING_BUDGET, MIDSCENE_MODEL_REASONING_EFFORT, MIDSCENE_MODEL_REASONING_ENABLED, MIDSCENE_MODEL_RETRY_COUNT, MIDSCENE_MODEL_RETRY_INTERVAL, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_MODEL_TEMPERATURE, MIDSCENE_MODEL_TIMEOUT, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON, MIDSCENE_PLANNING_MODEL_FAMILY, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_REASONING_BUDGET, MIDSCENE_PLANNING_MODEL_REASONING_EFFORT, MIDSCENE_PLANNING_MODEL_REASONING_ENABLED, MIDSCENE_PLANNING_MODEL_RETRY_COUNT, MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_TEMPERATURE, MIDSCENE_PLANNING_MODEL_TIMEOUT, MIDSCENE_PREFERRED_LANGUAGE, MIDSCENE_REPLANNING_CYCLE_LIMIT, MIDSCENE_REPORT_QUIET, MIDSCENE_REPORT_TAG_NAME, MIDSCENE_RUN_DIR, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MIDSCENE_USE_VL_MODEL, MODEL_ENV_KEYS, MODEL_FAMILY_VALUES, NUMBER_ENV_KEYS, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MAX_TOKENS, STRING_ENV_KEYS, types_UITarsModelVersion as UITarsModelVersion, UNUSED_ENV_KEYS };
216
+ export { ALL_ENV_KEYS, BASIC_ENV_KEYS, BOOLEAN_ENV_KEYS, DOCKER_CONTAINER, GLOBAL_ENV_KEYS, MATCH_BY_POSITION, MIDSCENE_ADB_PATH, MIDSCENE_ADB_REMOTE_HOST, MIDSCENE_ADB_REMOTE_PORT, MIDSCENE_ANDROID_IME_STRATEGY, MIDSCENE_CACHE, MIDSCENE_CACHE_MAX_FILENAME_LENGTH, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_MODE, MIDSCENE_DEBUG_MODEL_PROFILE, MIDSCENE_DEBUG_MODEL_RESPONSE, MIDSCENE_INSIGHT_MODEL_API_KEY, MIDSCENE_INSIGHT_MODEL_BASE_URL, MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON, MIDSCENE_INSIGHT_MODEL_FAMILY, MIDSCENE_INSIGHT_MODEL_HTTP_PROXY, MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON, MIDSCENE_INSIGHT_MODEL_MAX_TOKENS, MIDSCENE_INSIGHT_MODEL_NAME, MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET, MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT, MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED, MIDSCENE_INSIGHT_MODEL_RETRY_COUNT, MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL, MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY, MIDSCENE_INSIGHT_MODEL_TEMPERATURE, MIDSCENE_INSIGHT_MODEL_TIMEOUT, MIDSCENE_IOS_DEVICE_CLASS_OVERRIDE, MIDSCENE_IOS_DEVICE_UDID, MIDSCENE_IOS_SIMULATOR_UDID, MIDSCENE_LANGFUSE_DEBUG, MIDSCENE_LANGSMITH_DEBUG, MIDSCENE_MCP_ANDROID_MODE, MIDSCENE_MCP_CHROME_PATH, MIDSCENE_MCP_USE_PUPPETEER_MODE, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_EXTRA_BODY_JSON, MIDSCENE_MODEL_FAMILY, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_MAX_TOKENS, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_REASONING_BUDGET, MIDSCENE_MODEL_REASONING_EFFORT, MIDSCENE_MODEL_REASONING_ENABLED, MIDSCENE_MODEL_RETRY_COUNT, MIDSCENE_MODEL_RETRY_INTERVAL, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_MODEL_TEMPERATURE, MIDSCENE_MODEL_TIMEOUT, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON, MIDSCENE_PLANNING_MODEL_FAMILY, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_MAX_TOKENS, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_REASONING_BUDGET, MIDSCENE_PLANNING_MODEL_REASONING_EFFORT, MIDSCENE_PLANNING_MODEL_REASONING_ENABLED, MIDSCENE_PLANNING_MODEL_RETRY_COUNT, MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_TEMPERATURE, MIDSCENE_PLANNING_MODEL_TIMEOUT, MIDSCENE_PREFERRED_LANGUAGE, MIDSCENE_REPLANNING_CYCLE_LIMIT, MIDSCENE_REPORT_QUIET, MIDSCENE_REPORT_TAG_NAME, MIDSCENE_RUN_DIR, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MIDSCENE_USE_VL_MODEL, MODEL_ENV_KEYS, MODEL_FAMILY_VALUES, NUMBER_ENV_KEYS, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MAX_TOKENS, STRING_ENV_KEYS, types_UITarsModelVersion as UITarsModelVersion, UNUSED_ENV_KEYS };
@@ -2,6 +2,7 @@ import { parseBase64 } from "@midscene/shared/img";
2
2
  import { z } from "zod";
3
3
  import { getZodDescription, getZodTypeName, isMidsceneLocatorField, unwrapZodField } from "../zod-schema-utils.mjs";
4
4
  import { getErrorMessage } from "./error-formatter.mjs";
5
+ import { composeUserPrompt, promptInputExtraSchema } from "./user-prompt.mjs";
5
6
  function describeActionForMCP(action) {
6
7
  const actionDesc = action.description || `Execute ${action.name} action`;
7
8
  if (!action.paramSchema) return `${action.name} action, ${actionDesc}`;
@@ -336,6 +337,9 @@ function generateCommonTools(getAgent, initArgSchema = {}, initArgCliMetadata) {
336
337
  const agent = await getAgent(args);
337
338
  const screenshot = await agent.page?.screenshotBase64();
338
339
  if (!screenshot) return createErrorResult('Screenshot not available');
340
+ await agent.recordToReport?.('take_screenshot', {
341
+ screenshotBase64: screenshot
342
+ });
339
343
  const { mimeType, body } = parseBase64(screenshot);
340
344
  return {
341
345
  content: [
@@ -369,15 +373,7 @@ function generateCommonTools(getAgent, initArgSchema = {}, initArgCliMetadata) {
369
373
  const result = await agent.aiAction(prompt, {
370
374
  deepThink: false
371
375
  });
372
- const screenshotResult = await captureScreenshotResult(agent, 'act');
373
- if (result) {
374
- const message = 'string' == typeof result ? result : JSON.stringify(result);
375
- screenshotResult.content.unshift({
376
- type: 'text',
377
- text: `Task finished, message: ${message}`
378
- });
379
- }
380
- return screenshotResult;
376
+ return await captureScreenshotResult(agent, 'act', result);
381
377
  } catch (error) {
382
378
  const errorMessage = getErrorMessage(error);
383
379
  console.error('Error executing act:', errorMessage);
@@ -390,6 +386,7 @@ function generateCommonTools(getAgent, initArgSchema = {}, initArgCliMetadata) {
390
386
  description: 'Assert a natural language statement against the current page/screen.',
391
387
  schema: {
392
388
  prompt: z.string().describe('Natural language assertion to verify, e.g. "there is a login button visible"'),
389
+ ...promptInputExtraSchema,
393
390
  ...initArgSchema
394
391
  },
395
392
  cli: mergeToolCliMetadata(void 0, initArgCliMetadata),
@@ -398,7 +395,13 @@ function generateCommonTools(getAgent, initArgSchema = {}, initArgCliMetadata) {
398
395
  try {
399
396
  const agent = await getAgent(args);
400
397
  if (!agent.aiAssert) return createErrorResult('assert is not supported by this agent');
401
- await agent.aiAssert(prompt);
398
+ const userPrompt = composeUserPrompt({
399
+ prompt,
400
+ image: args.image,
401
+ imageName: args.imageName,
402
+ convertHttpImage2Base64: args.convertHttpImage2Base64
403
+ });
404
+ await agent.aiAssert(userPrompt);
402
405
  return {
403
406
  content: [
404
407
  {
@@ -416,4 +419,4 @@ function generateCommonTools(getAgent, initArgSchema = {}, initArgCliMetadata) {
416
419
  }
417
420
  ];
418
421
  }
419
- export { generateCommonTools, generateToolsFromActionSpace };
422
+ export { composeUserPrompt, generateCommonTools, generateToolsFromActionSpace };
@@ -0,0 +1,66 @@
1
+ import { z } from "zod";
2
+ function normalizeStringList(raw, fieldName) {
3
+ if (null == raw) return [];
4
+ if ('string' == typeof raw) {
5
+ const trimmed = raw.trim();
6
+ return trimmed ? [
7
+ trimmed
8
+ ] : [];
9
+ }
10
+ if (Array.isArray(raw)) return raw.map((item, index)=>{
11
+ if ('string' != typeof item) throw new Error(`${fieldName}[${index}]: expected a string.`);
12
+ return item.trim();
13
+ });
14
+ throw new Error(`${fieldName}: expected a string or string array, got ${typeof raw}.`);
15
+ }
16
+ function composeImages(input) {
17
+ const urls = normalizeStringList(input.image, 'image');
18
+ const names = normalizeStringList(input.imageName, 'imageName');
19
+ if (urls.length !== names.length) throw new Error(`image/imageName: expected the same number of --image and --image-name values, got ${urls.length} image(s) and ${names.length} image name(s).`);
20
+ return urls.map((url, index)=>({
21
+ name: names[index],
22
+ url
23
+ }));
24
+ }
25
+ function coerceBoolean(value) {
26
+ if (null == value) return;
27
+ if ('boolean' == typeof value) return value;
28
+ if ('string' == typeof value) {
29
+ const trimmed = value.trim();
30
+ if (!trimmed) return;
31
+ const v = trimmed.toLowerCase();
32
+ if ('true' === v || '1' === v) return true;
33
+ if ('false' === v || '0' === v) return false;
34
+ throw new Error(`convertHttpImage2Base64: expected "true", "false", "1", or "0"; got ${JSON.stringify(value)}.`);
35
+ }
36
+ throw new Error(`convertHttpImage2Base64: expected a boolean, got ${typeof value}.`);
37
+ }
38
+ function composeUserPrompt(input) {
39
+ const images = composeImages({
40
+ image: input.image,
41
+ imageName: input.imageName
42
+ });
43
+ const convertFlag = coerceBoolean(input.convertHttpImage2Base64);
44
+ if (0 === images.length && void 0 === convertFlag) return input.prompt;
45
+ const payload = {
46
+ prompt: input.prompt
47
+ };
48
+ if (images.length > 0) payload.images = images;
49
+ if (void 0 !== convertFlag) payload.convertHttpImage2Base64 = convertFlag;
50
+ return payload;
51
+ }
52
+ const promptInputExtraSchema = {
53
+ image: z.union([
54
+ z.string(),
55
+ z.array(z.string())
56
+ ]).optional().describe('Reference image URL/path. Repeat --image for multiple images.'),
57
+ imageName: z.union([
58
+ z.string(),
59
+ z.array(z.string())
60
+ ]).optional().describe('Reference image name. Repeat --image-name; must align with --image order.'),
61
+ convertHttpImage2Base64: z.union([
62
+ z.boolean(),
63
+ z.string()
64
+ ]).optional().describe('If true, convert http(s) image URLs to base64 before sending to the model.')
65
+ };
66
+ export { composeUserPrompt, promptInputExtraSchema };
@@ -24,13 +24,15 @@ var __webpack_require__ = {};
24
24
  var __webpack_exports__ = {};
25
25
  __webpack_require__.r(__webpack_exports__);
26
26
  __webpack_require__.d(__webpack_exports__, {
27
+ canonicalizeCliArgKeys: ()=>canonicalizeCliArgKeys,
27
28
  parseCliArgs: ()=>parseCliArgs,
28
- parseValue: ()=>parseValue,
29
29
  formatCliValidationError: ()=>formatCliValidationError,
30
+ parseValue: ()=>parseValue,
30
31
  getCliOptionDisplay: ()=>getCliOptionDisplay
31
32
  });
32
33
  const external_zod_namespaceObject = require("zod");
33
34
  const external_key_alias_utils_js_namespaceObject = require("../key-alias-utils.js");
35
+ const external_cli_error_js_namespaceObject = require("./cli-error.js");
34
36
  function parseValue(raw) {
35
37
  if (raw.startsWith('{') || raw.startsWith('[')) try {
36
38
  return JSON.parse(raw);
@@ -54,7 +56,20 @@ function walkCliArgs(args, setArgValue) {
54
56
  function parseCliArgs(args) {
55
57
  const result = {};
56
58
  walkCliArgs(args, (key, value)=>{
57
- result[key] = value;
59
+ const existing = result[key];
60
+ if (void 0 === existing) {
61
+ result[key] = value;
62
+ return;
63
+ }
64
+ if (Array.isArray(existing)) {
65
+ existing.push(value);
66
+ result[key] = existing;
67
+ return;
68
+ }
69
+ result[key] = [
70
+ existing,
71
+ value
72
+ ];
58
73
  });
59
74
  return result;
60
75
  }
@@ -123,11 +138,38 @@ function formatCliValidationError(scriptName, commandName, def, rawArgs) {
123
138
  const optionName = 'string' == typeof issue?.path[0] ? `--${issue.path[0]}` : 'CLI arguments';
124
139
  return `Invalid value for "${optionName}" in ${scriptName} ${commandName}: ${issue?.message ?? parsed.error.message}`;
125
140
  }
141
+ function canonicalizeCliArgKeys(scriptName, commandName, def, rawArgs) {
142
+ if (0 === Object.keys(def.schema).length) return rawArgs;
143
+ const result = {
144
+ ...rawArgs
145
+ };
146
+ for (const schemaKey of Object.keys(def.schema)){
147
+ const cliOption = def.cli?.options?.[schemaKey];
148
+ const acceptedSpellings = getAcceptedCliOptionNames(schemaKey, cliOption);
149
+ let chosenSpelling;
150
+ let chosenValue;
151
+ for (const spelling of acceptedSpellings)if (spelling !== schemaKey) {
152
+ if (spelling in result) {
153
+ if (void 0 !== chosenSpelling) throw new external_cli_error_js_namespaceObject.CLIError(`Conflicting CLI options "--${chosenSpelling}" and "--${spelling}" for ${scriptName} ${commandName}: both target "${schemaKey}". Use one spelling.`);
154
+ chosenSpelling = spelling;
155
+ chosenValue = result[spelling];
156
+ }
157
+ }
158
+ if (void 0 !== chosenSpelling) {
159
+ if (schemaKey in result && result[schemaKey] !== chosenValue) throw new external_cli_error_js_namespaceObject.CLIError(`Conflicting CLI options "--${schemaKey}" and "--${chosenSpelling}" for ${scriptName} ${commandName}: both target "${schemaKey}". Use one spelling.`);
160
+ result[schemaKey] = chosenValue;
161
+ delete result[chosenSpelling];
162
+ }
163
+ }
164
+ return result;
165
+ }
166
+ exports.canonicalizeCliArgKeys = __webpack_exports__.canonicalizeCliArgKeys;
126
167
  exports.formatCliValidationError = __webpack_exports__.formatCliValidationError;
127
168
  exports.getCliOptionDisplay = __webpack_exports__.getCliOptionDisplay;
128
169
  exports.parseCliArgs = __webpack_exports__.parseCliArgs;
129
170
  exports.parseValue = __webpack_exports__.parseValue;
130
171
  for(var __rspack_i in __webpack_exports__)if (-1 === [
172
+ "canonicalizeCliArgKeys",
131
173
  "formatCliValidationError",
132
174
  "getCliOptionDisplay",
133
175
  "parseCliArgs",
@@ -155,8 +155,9 @@ async function runToolsCLI(tools, scriptName, options) {
155
155
  }
156
156
  const cliValidationError = (0, external_cli_args_js_namespaceObject.formatCliValidationError)(scriptName, match.name, match.def, parsedArgs);
157
157
  if (cliValidationError) throw new external_cli_error_js_namespaceObject.CLIError(cliValidationError);
158
- debug('command: %s, args: %s', match.name, JSON.stringify(parsedArgs));
159
- const result = await match.def.handler(parsedArgs);
158
+ const handlerArgs = (0, external_cli_args_js_namespaceObject.canonicalizeCliArgKeys)(scriptName, match.name, match.def, parsedArgs);
159
+ debug('command: %s, args: %s', match.name, JSON.stringify(handlerArgs));
160
+ const result = await match.def.handler(handlerArgs);
160
161
  debug('command %s completed, isError: %s', match.name, result.isError ?? false);
161
162
  outputResult(result);
162
163
  await tools.destroy();
@@ -34,7 +34,7 @@ const PLAYWRIGHT_EXAMPLE_CODE = `
34
34
  IMPORTANT: Follow these exact type signatures for AI functions:
35
35
 
36
36
  // Type signatures for AI functions:
37
- aiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<void>
37
+ aiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<string | undefined>
38
38
  aiInput(text: string, locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
39
39
  aiTap(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
40
40
  aiHover(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
@@ -38,6 +38,7 @@ const INSIGHT_MODEL_CONFIG_KEYS = {
38
38
  openaiApiKey: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_API_KEY,
39
39
  openaiExtraConfig: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
40
40
  extraBody: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON,
41
+ maxTokens: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_MAX_TOKENS,
41
42
  modelFamily: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_FAMILY,
42
43
  timeout: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_TIMEOUT,
43
44
  temperature: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_TEMPERATURE,
@@ -55,6 +56,7 @@ const PLANNING_MODEL_CONFIG_KEYS = {
55
56
  openaiApiKey: external_types_js_namespaceObject.MIDSCENE_PLANNING_MODEL_API_KEY,
56
57
  openaiExtraConfig: external_types_js_namespaceObject.MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
57
58
  extraBody: external_types_js_namespaceObject.MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON,
59
+ maxTokens: external_types_js_namespaceObject.MIDSCENE_PLANNING_MODEL_MAX_TOKENS,
58
60
  modelFamily: external_types_js_namespaceObject.MIDSCENE_PLANNING_MODEL_FAMILY,
59
61
  timeout: external_types_js_namespaceObject.MIDSCENE_PLANNING_MODEL_TIMEOUT,
60
62
  temperature: external_types_js_namespaceObject.MIDSCENE_PLANNING_MODEL_TEMPERATURE,
@@ -72,6 +74,7 @@ const DEFAULT_MODEL_CONFIG_KEYS = {
72
74
  openaiApiKey: external_types_js_namespaceObject.MIDSCENE_MODEL_API_KEY,
73
75
  openaiExtraConfig: external_types_js_namespaceObject.MIDSCENE_MODEL_INIT_CONFIG_JSON,
74
76
  extraBody: external_types_js_namespaceObject.MIDSCENE_MODEL_EXTRA_BODY_JSON,
77
+ maxTokens: external_types_js_namespaceObject.MIDSCENE_MODEL_MAX_TOKENS,
75
78
  modelFamily: external_types_js_namespaceObject.MIDSCENE_MODEL_FAMILY,
76
79
  timeout: external_types_js_namespaceObject.MIDSCENE_MODEL_TIMEOUT,
77
80
  temperature: external_types_js_namespaceObject.MIDSCENE_MODEL_TEMPERATURE,
@@ -89,6 +92,7 @@ const DEFAULT_MODEL_CONFIG_KEYS_LEGACY = {
89
92
  openaiApiKey: external_types_js_namespaceObject.OPENAI_API_KEY,
90
93
  openaiExtraConfig: external_types_js_namespaceObject.MIDSCENE_OPENAI_INIT_CONFIG_JSON,
91
94
  extraBody: external_types_js_namespaceObject.MIDSCENE_MODEL_EXTRA_BODY_JSON,
95
+ maxTokens: external_types_js_namespaceObject.OPENAI_MAX_TOKENS,
92
96
  modelFamily: 'DEFAULT_MODEL_CONFIG_KEYS has no modelFamily key',
93
97
  timeout: external_types_js_namespaceObject.MIDSCENE_MODEL_TIMEOUT,
94
98
  temperature: external_types_js_namespaceObject.MIDSCENE_MODEL_TEMPERATURE,
@@ -37,7 +37,7 @@ const external_utils_js_namespaceObject = require("../utils.js");
37
37
  const external_helper_js_namespaceObject = require("./helper.js");
38
38
  const external_init_debug_js_namespaceObject = require("./init-debug.js");
39
39
  const MODEL_CONFIG_DOC_URL = 'https://midscenejs.com/model-common-config.html';
40
- const getCurrentVersion = ()=>"1.8.5-beta-20260525033347.0";
40
+ const getCurrentVersion = ()=>"1.8.5";
41
41
  const getInvalidModelFamilyMessage = (modelFamily)=>`Invalid MIDSCENE_MODEL_FAMILY value: ${modelFamily}. Current version v${getCurrentVersion()} accepts the following model families: ${external_types_js_namespaceObject.MODEL_FAMILY_VALUES.join(', ')}. You can also visit ${MODEL_CONFIG_DOC_URL} for the latest configuration information.`;
42
42
  const KEYS_MAP = {
43
43
  insight: external_constants_js_namespaceObject.INSIGHT_MODEL_CONFIG_KEYS,
@@ -97,6 +97,7 @@ const parseOpenaiSdkConfig = ({ keys, provider, useLegacyLogic = false })=>{
97
97
  const legacySocksProxy = useLegacyLogic ? provider[external_types_js_namespaceObject.MIDSCENE_OPENAI_SOCKS_PROXY] : void 0;
98
98
  const legacyHttpProxy = useLegacyLogic ? provider[external_types_js_namespaceObject.MIDSCENE_OPENAI_HTTP_PROXY] : void 0;
99
99
  const legacyOpenaiExtraConfig = useLegacyLogic ? provider[external_types_js_namespaceObject.MIDSCENE_OPENAI_INIT_CONFIG_JSON] : void 0;
100
+ const legacyMaxTokens = useLegacyLogic ? provider[external_types_js_namespaceObject.OPENAI_MAX_TOKENS] : void 0;
100
101
  const legacyModelFamily = useLegacyLogic ? legacyConfigToModelFamily(provider) : void 0;
101
102
  const modelFamilyRaw = provider[keys.modelFamily] || legacyModelFamily;
102
103
  const openaiApiKey = provider[keys.openaiApiKey] || legacyAPIKey;
@@ -108,6 +109,13 @@ const parseOpenaiSdkConfig = ({ keys, provider, useLegacyLogic = false })=>{
108
109
  const openaiExtraConfig = (0, external_helper_js_namespaceObject.parseJson)(keys.openaiExtraConfig, openaiExtraConfigStr || legacyOpenaiExtraConfig);
109
110
  const extraBodyStr = provider[keys.extraBody];
110
111
  const extraBody = (0, external_helper_js_namespaceObject.parseJson)(keys.extraBody, extraBodyStr);
112
+ const maxTokensStr = provider[keys.maxTokens] || legacyMaxTokens;
113
+ const maxTokens = (()=>{
114
+ const val = maxTokensStr?.trim();
115
+ if (!val) return;
116
+ const num = Number(val);
117
+ return Number.isFinite(num) ? num : void 0;
118
+ })();
111
119
  const temperature = provider[keys.temperature] ? Number(provider[keys.temperature]) : 0;
112
120
  const modelFamily = modelFamilyRaw;
113
121
  validateModelFamily(modelFamily);
@@ -120,6 +128,7 @@ const parseOpenaiSdkConfig = ({ keys, provider, useLegacyLogic = false })=>{
120
128
  openaiApiKey,
121
129
  openaiExtraConfig: normalizeOpenaiExtraConfig(openaiExtraConfig),
122
130
  extraBody,
131
+ maxTokens,
123
132
  modelFamily,
124
133
  uiTarsModelVersion,
125
134
  modelName: modelName,