@midscene/shared 1.8.5-beta-20260525033347.0 → 1.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/cli/cli-args.mjs +41 -2
- package/dist/es/cli/cli-runner.mjs +4 -3
- package/dist/es/constants/example-code.mjs +1 -1
- package/dist/es/env/constants.mjs +5 -1
- package/dist/es/env/parse-model-config.mjs +11 -2
- package/dist/es/env/types.mjs +7 -4
- package/dist/es/mcp/tool-generator.mjs +14 -11
- package/dist/es/mcp/user-prompt.mjs +66 -0
- package/dist/lib/cli/cli-args.js +44 -2
- package/dist/lib/cli/cli-runner.js +3 -2
- package/dist/lib/constants/example-code.js +1 -1
- package/dist/lib/env/constants.js +4 -0
- package/dist/lib/env/parse-model-config.js +10 -1
- package/dist/lib/env/types.js +12 -3
- package/dist/lib/mcp/tool-generator.js +17 -11
- package/dist/lib/mcp/user-prompt.js +103 -0
- package/dist/types/cli/cli-args.d.ts +8 -0
- package/dist/types/constants/example-code.d.ts +1 -1
- package/dist/types/env/constants.d.ts +1 -0
- package/dist/types/env/types.d.ts +15 -5
- package/dist/types/mcp/tool-generator.d.ts +2 -0
- package/dist/types/mcp/types.d.ts +21 -1
- package/dist/types/mcp/user-prompt.d.ts +13 -0
- package/package.json +1 -1
- package/src/cli/cli-args.ts +65 -1
- package/src/cli/cli-runner.ts +10 -2
- package/src/constants/example-code.ts +1 -1
- package/src/env/constants.ts +9 -0
- package/src/env/parse-model-config.ts +12 -0
- package/src/env/types.ts +19 -3
- package/src/img/transform.ts +1 -1
- package/src/mcp/tool-generator.ts +15 -11
- package/src/mcp/types.ts +21 -1
- package/src/mcp/user-prompt.ts +102 -0
package/dist/es/cli/cli-args.mjs
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
import { getKeyAliases } from "../key-alias-utils.mjs";
|
|
3
|
+
import { CLIError } from "./cli-error.mjs";
|
|
3
4
|
function parseValue(raw) {
|
|
4
5
|
if (raw.startsWith('{') || raw.startsWith('[')) try {
|
|
5
6
|
return JSON.parse(raw);
|
|
@@ -23,7 +24,20 @@ function walkCliArgs(args, setArgValue) {
|
|
|
23
24
|
function parseCliArgs(args) {
|
|
24
25
|
const result = {};
|
|
25
26
|
walkCliArgs(args, (key, value)=>{
|
|
26
|
-
result[key]
|
|
27
|
+
const existing = result[key];
|
|
28
|
+
if (void 0 === existing) {
|
|
29
|
+
result[key] = value;
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
32
|
+
if (Array.isArray(existing)) {
|
|
33
|
+
existing.push(value);
|
|
34
|
+
result[key] = existing;
|
|
35
|
+
return;
|
|
36
|
+
}
|
|
37
|
+
result[key] = [
|
|
38
|
+
existing,
|
|
39
|
+
value
|
|
40
|
+
];
|
|
27
41
|
});
|
|
28
42
|
return result;
|
|
29
43
|
}
|
|
@@ -92,4 +106,29 @@ function formatCliValidationError(scriptName, commandName, def, rawArgs) {
|
|
|
92
106
|
const optionName = 'string' == typeof issue?.path[0] ? `--${issue.path[0]}` : 'CLI arguments';
|
|
93
107
|
return `Invalid value for "${optionName}" in ${scriptName} ${commandName}: ${issue?.message ?? parsed.error.message}`;
|
|
94
108
|
}
|
|
95
|
-
|
|
109
|
+
function canonicalizeCliArgKeys(scriptName, commandName, def, rawArgs) {
|
|
110
|
+
if (0 === Object.keys(def.schema).length) return rawArgs;
|
|
111
|
+
const result = {
|
|
112
|
+
...rawArgs
|
|
113
|
+
};
|
|
114
|
+
for (const schemaKey of Object.keys(def.schema)){
|
|
115
|
+
const cliOption = def.cli?.options?.[schemaKey];
|
|
116
|
+
const acceptedSpellings = getAcceptedCliOptionNames(schemaKey, cliOption);
|
|
117
|
+
let chosenSpelling;
|
|
118
|
+
let chosenValue;
|
|
119
|
+
for (const spelling of acceptedSpellings)if (spelling !== schemaKey) {
|
|
120
|
+
if (spelling in result) {
|
|
121
|
+
if (void 0 !== chosenSpelling) throw new CLIError(`Conflicting CLI options "--${chosenSpelling}" and "--${spelling}" for ${scriptName} ${commandName}: both target "${schemaKey}". Use one spelling.`);
|
|
122
|
+
chosenSpelling = spelling;
|
|
123
|
+
chosenValue = result[spelling];
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
if (void 0 !== chosenSpelling) {
|
|
127
|
+
if (schemaKey in result && result[schemaKey] !== chosenValue) throw new CLIError(`Conflicting CLI options "--${schemaKey}" and "--${chosenSpelling}" for ${scriptName} ${commandName}: both target "${schemaKey}". Use one spelling.`);
|
|
128
|
+
result[schemaKey] = chosenValue;
|
|
129
|
+
delete result[chosenSpelling];
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return result;
|
|
133
|
+
}
|
|
134
|
+
export { canonicalizeCliArgKeys, formatCliValidationError, getCliOptionDisplay, parseCliArgs, parseValue };
|
|
@@ -3,7 +3,7 @@ import { tmpdir } from "node:os";
|
|
|
3
3
|
import { join } from "node:path";
|
|
4
4
|
import dotenv from "dotenv";
|
|
5
5
|
import { getDebug } from "../logger.mjs";
|
|
6
|
-
import { formatCliValidationError, getCliOptionDisplay, parseCliArgs, parseValue } from "./cli-args.mjs";
|
|
6
|
+
import { canonicalizeCliArgKeys, formatCliValidationError, getCliOptionDisplay, parseCliArgs, parseValue } from "./cli-args.mjs";
|
|
7
7
|
import { CLIError, reportCLIError } from "./cli-error.mjs";
|
|
8
8
|
const debug = getDebug('cli-runner');
|
|
9
9
|
function outputContentItem(item, isError) {
|
|
@@ -112,8 +112,9 @@ async function runToolsCLI(tools, scriptName, options) {
|
|
|
112
112
|
}
|
|
113
113
|
const cliValidationError = formatCliValidationError(scriptName, match.name, match.def, parsedArgs);
|
|
114
114
|
if (cliValidationError) throw new CLIError(cliValidationError);
|
|
115
|
-
|
|
116
|
-
|
|
115
|
+
const handlerArgs = canonicalizeCliArgKeys(scriptName, match.name, match.def, parsedArgs);
|
|
116
|
+
debug('command: %s, args: %s', match.name, JSON.stringify(handlerArgs));
|
|
117
|
+
const result = await match.def.handler(handlerArgs);
|
|
117
118
|
debug('command %s completed, isError: %s', match.name, result.isError ?? false);
|
|
118
119
|
outputResult(result);
|
|
119
120
|
await tools.destroy();
|
|
@@ -5,7 +5,7 @@ const PLAYWRIGHT_EXAMPLE_CODE = `
|
|
|
5
5
|
IMPORTANT: Follow these exact type signatures for AI functions:
|
|
6
6
|
|
|
7
7
|
// Type signatures for AI functions:
|
|
8
|
-
aiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<
|
|
8
|
+
aiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<string | undefined>
|
|
9
9
|
aiInput(text: string, locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
10
10
|
aiTap(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
11
11
|
aiHover(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { MIDSCENE_INSIGHT_MODEL_API_KEY, MIDSCENE_INSIGHT_MODEL_BASE_URL, MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON, MIDSCENE_INSIGHT_MODEL_FAMILY, MIDSCENE_INSIGHT_MODEL_HTTP_PROXY, MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON, MIDSCENE_INSIGHT_MODEL_NAME, MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET, MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT, MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED, MIDSCENE_INSIGHT_MODEL_RETRY_COUNT, MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL, MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY, MIDSCENE_INSIGHT_MODEL_TEMPERATURE, MIDSCENE_INSIGHT_MODEL_TIMEOUT, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_EXTRA_BODY_JSON, MIDSCENE_MODEL_FAMILY, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_REASONING_BUDGET, MIDSCENE_MODEL_REASONING_EFFORT, MIDSCENE_MODEL_REASONING_ENABLED, MIDSCENE_MODEL_RETRY_COUNT, MIDSCENE_MODEL_RETRY_INTERVAL, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_MODEL_TEMPERATURE, MIDSCENE_MODEL_TIMEOUT, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON, MIDSCENE_PLANNING_MODEL_FAMILY, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_REASONING_BUDGET, MIDSCENE_PLANNING_MODEL_REASONING_EFFORT, MIDSCENE_PLANNING_MODEL_REASONING_ENABLED, MIDSCENE_PLANNING_MODEL_RETRY_COUNT, MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_TEMPERATURE, MIDSCENE_PLANNING_MODEL_TIMEOUT, OPENAI_API_KEY, OPENAI_BASE_URL } from "./types.mjs";
|
|
1
|
+
import { MIDSCENE_INSIGHT_MODEL_API_KEY, MIDSCENE_INSIGHT_MODEL_BASE_URL, MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON, MIDSCENE_INSIGHT_MODEL_FAMILY, MIDSCENE_INSIGHT_MODEL_HTTP_PROXY, MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON, MIDSCENE_INSIGHT_MODEL_MAX_TOKENS, MIDSCENE_INSIGHT_MODEL_NAME, MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET, MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT, MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED, MIDSCENE_INSIGHT_MODEL_RETRY_COUNT, MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL, MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY, MIDSCENE_INSIGHT_MODEL_TEMPERATURE, MIDSCENE_INSIGHT_MODEL_TIMEOUT, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_EXTRA_BODY_JSON, MIDSCENE_MODEL_FAMILY, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_MAX_TOKENS, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_REASONING_BUDGET, MIDSCENE_MODEL_REASONING_EFFORT, MIDSCENE_MODEL_REASONING_ENABLED, MIDSCENE_MODEL_RETRY_COUNT, MIDSCENE_MODEL_RETRY_INTERVAL, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_MODEL_TEMPERATURE, MIDSCENE_MODEL_TIMEOUT, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON, MIDSCENE_PLANNING_MODEL_FAMILY, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_MAX_TOKENS, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_REASONING_BUDGET, MIDSCENE_PLANNING_MODEL_REASONING_EFFORT, MIDSCENE_PLANNING_MODEL_REASONING_ENABLED, MIDSCENE_PLANNING_MODEL_RETRY_COUNT, MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_TEMPERATURE, MIDSCENE_PLANNING_MODEL_TIMEOUT, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MAX_TOKENS } from "./types.mjs";
|
|
2
2
|
const INSIGHT_MODEL_CONFIG_KEYS = {
|
|
3
3
|
modelName: MIDSCENE_INSIGHT_MODEL_NAME,
|
|
4
4
|
socksProxy: MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY,
|
|
@@ -7,6 +7,7 @@ const INSIGHT_MODEL_CONFIG_KEYS = {
|
|
|
7
7
|
openaiApiKey: MIDSCENE_INSIGHT_MODEL_API_KEY,
|
|
8
8
|
openaiExtraConfig: MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
|
|
9
9
|
extraBody: MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON,
|
|
10
|
+
maxTokens: MIDSCENE_INSIGHT_MODEL_MAX_TOKENS,
|
|
10
11
|
modelFamily: MIDSCENE_INSIGHT_MODEL_FAMILY,
|
|
11
12
|
timeout: MIDSCENE_INSIGHT_MODEL_TIMEOUT,
|
|
12
13
|
temperature: MIDSCENE_INSIGHT_MODEL_TEMPERATURE,
|
|
@@ -24,6 +25,7 @@ const PLANNING_MODEL_CONFIG_KEYS = {
|
|
|
24
25
|
openaiApiKey: MIDSCENE_PLANNING_MODEL_API_KEY,
|
|
25
26
|
openaiExtraConfig: MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
|
|
26
27
|
extraBody: MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON,
|
|
28
|
+
maxTokens: MIDSCENE_PLANNING_MODEL_MAX_TOKENS,
|
|
27
29
|
modelFamily: MIDSCENE_PLANNING_MODEL_FAMILY,
|
|
28
30
|
timeout: MIDSCENE_PLANNING_MODEL_TIMEOUT,
|
|
29
31
|
temperature: MIDSCENE_PLANNING_MODEL_TEMPERATURE,
|
|
@@ -41,6 +43,7 @@ const DEFAULT_MODEL_CONFIG_KEYS = {
|
|
|
41
43
|
openaiApiKey: MIDSCENE_MODEL_API_KEY,
|
|
42
44
|
openaiExtraConfig: MIDSCENE_MODEL_INIT_CONFIG_JSON,
|
|
43
45
|
extraBody: MIDSCENE_MODEL_EXTRA_BODY_JSON,
|
|
46
|
+
maxTokens: MIDSCENE_MODEL_MAX_TOKENS,
|
|
44
47
|
modelFamily: MIDSCENE_MODEL_FAMILY,
|
|
45
48
|
timeout: MIDSCENE_MODEL_TIMEOUT,
|
|
46
49
|
temperature: MIDSCENE_MODEL_TEMPERATURE,
|
|
@@ -58,6 +61,7 @@ const DEFAULT_MODEL_CONFIG_KEYS_LEGACY = {
|
|
|
58
61
|
openaiApiKey: OPENAI_API_KEY,
|
|
59
62
|
openaiExtraConfig: MIDSCENE_OPENAI_INIT_CONFIG_JSON,
|
|
60
63
|
extraBody: MIDSCENE_MODEL_EXTRA_BODY_JSON,
|
|
64
|
+
maxTokens: OPENAI_MAX_TOKENS,
|
|
61
65
|
modelFamily: 'DEFAULT_MODEL_CONFIG_KEYS has no modelFamily key',
|
|
62
66
|
timeout: MIDSCENE_MODEL_TIMEOUT,
|
|
63
67
|
temperature: MIDSCENE_MODEL_TEMPERATURE,
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import { DEFAULT_MODEL_CONFIG_KEYS, INSIGHT_MODEL_CONFIG_KEYS, PLANNING_MODEL_CONFIG_KEYS } from "./constants.mjs";
|
|
2
|
-
import { MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MODEL_FAMILY_VALUES, OPENAI_API_KEY, OPENAI_BASE_URL, UITarsModelVersion } from "./types.mjs";
|
|
2
|
+
import { MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MODEL_FAMILY_VALUES, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MAX_TOKENS, UITarsModelVersion } from "./types.mjs";
|
|
3
3
|
import { getDebug } from "../logger.mjs";
|
|
4
4
|
import { assert } from "../utils.mjs";
|
|
5
5
|
import { maskConfig, parseJson } from "./helper.mjs";
|
|
6
6
|
import { initDebugConfig } from "./init-debug.mjs";
|
|
7
7
|
const MODEL_CONFIG_DOC_URL = 'https://midscenejs.com/model-common-config.html';
|
|
8
|
-
const getCurrentVersion = ()=>"1.8.5
|
|
8
|
+
const getCurrentVersion = ()=>"1.8.5";
|
|
9
9
|
const getInvalidModelFamilyMessage = (modelFamily)=>`Invalid MIDSCENE_MODEL_FAMILY value: ${modelFamily}. Current version v${getCurrentVersion()} accepts the following model families: ${MODEL_FAMILY_VALUES.join(', ')}. You can also visit ${MODEL_CONFIG_DOC_URL} for the latest configuration information.`;
|
|
10
10
|
const KEYS_MAP = {
|
|
11
11
|
insight: INSIGHT_MODEL_CONFIG_KEYS,
|
|
@@ -65,6 +65,7 @@ const parseOpenaiSdkConfig = ({ keys, provider, useLegacyLogic = false })=>{
|
|
|
65
65
|
const legacySocksProxy = useLegacyLogic ? provider[MIDSCENE_OPENAI_SOCKS_PROXY] : void 0;
|
|
66
66
|
const legacyHttpProxy = useLegacyLogic ? provider[MIDSCENE_OPENAI_HTTP_PROXY] : void 0;
|
|
67
67
|
const legacyOpenaiExtraConfig = useLegacyLogic ? provider[MIDSCENE_OPENAI_INIT_CONFIG_JSON] : void 0;
|
|
68
|
+
const legacyMaxTokens = useLegacyLogic ? provider[OPENAI_MAX_TOKENS] : void 0;
|
|
68
69
|
const legacyModelFamily = useLegacyLogic ? legacyConfigToModelFamily(provider) : void 0;
|
|
69
70
|
const modelFamilyRaw = provider[keys.modelFamily] || legacyModelFamily;
|
|
70
71
|
const openaiApiKey = provider[keys.openaiApiKey] || legacyAPIKey;
|
|
@@ -76,6 +77,13 @@ const parseOpenaiSdkConfig = ({ keys, provider, useLegacyLogic = false })=>{
|
|
|
76
77
|
const openaiExtraConfig = parseJson(keys.openaiExtraConfig, openaiExtraConfigStr || legacyOpenaiExtraConfig);
|
|
77
78
|
const extraBodyStr = provider[keys.extraBody];
|
|
78
79
|
const extraBody = parseJson(keys.extraBody, extraBodyStr);
|
|
80
|
+
const maxTokensStr = provider[keys.maxTokens] || legacyMaxTokens;
|
|
81
|
+
const maxTokens = (()=>{
|
|
82
|
+
const val = maxTokensStr?.trim();
|
|
83
|
+
if (!val) return;
|
|
84
|
+
const num = Number(val);
|
|
85
|
+
return Number.isFinite(num) ? num : void 0;
|
|
86
|
+
})();
|
|
79
87
|
const temperature = provider[keys.temperature] ? Number(provider[keys.temperature]) : 0;
|
|
80
88
|
const modelFamily = modelFamilyRaw;
|
|
81
89
|
validateModelFamily(modelFamily);
|
|
@@ -88,6 +96,7 @@ const parseOpenaiSdkConfig = ({ keys, provider, useLegacyLogic = false })=>{
|
|
|
88
96
|
openaiApiKey,
|
|
89
97
|
openaiExtraConfig: normalizeOpenaiExtraConfig(openaiExtraConfig),
|
|
90
98
|
extraBody,
|
|
99
|
+
maxTokens,
|
|
91
100
|
modelFamily,
|
|
92
101
|
uiTarsModelVersion,
|
|
93
102
|
modelName: modelName,
|
package/dist/es/env/types.mjs
CHANGED
|
@@ -57,6 +57,7 @@ const MIDSCENE_INSIGHT_MODEL_BASE_URL = 'MIDSCENE_INSIGHT_MODEL_BASE_URL';
|
|
|
57
57
|
const MIDSCENE_INSIGHT_MODEL_API_KEY = 'MIDSCENE_INSIGHT_MODEL_API_KEY';
|
|
58
58
|
const MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON';
|
|
59
59
|
const MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON = 'MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON';
|
|
60
|
+
const MIDSCENE_INSIGHT_MODEL_MAX_TOKENS = 'MIDSCENE_INSIGHT_MODEL_MAX_TOKENS';
|
|
60
61
|
const MIDSCENE_INSIGHT_MODEL_TIMEOUT = 'MIDSCENE_INSIGHT_MODEL_TIMEOUT';
|
|
61
62
|
const MIDSCENE_INSIGHT_MODEL_TEMPERATURE = 'MIDSCENE_INSIGHT_MODEL_TEMPERATURE';
|
|
62
63
|
const MIDSCENE_INSIGHT_MODEL_RETRY_COUNT = 'MIDSCENE_INSIGHT_MODEL_RETRY_COUNT';
|
|
@@ -72,6 +73,7 @@ const MIDSCENE_PLANNING_MODEL_BASE_URL = 'MIDSCENE_PLANNING_MODEL_BASE_URL';
|
|
|
72
73
|
const MIDSCENE_PLANNING_MODEL_API_KEY = 'MIDSCENE_PLANNING_MODEL_API_KEY';
|
|
73
74
|
const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON';
|
|
74
75
|
const MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON = 'MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON';
|
|
76
|
+
const MIDSCENE_PLANNING_MODEL_MAX_TOKENS = 'MIDSCENE_PLANNING_MODEL_MAX_TOKENS';
|
|
75
77
|
const MIDSCENE_PLANNING_MODEL_TIMEOUT = 'MIDSCENE_PLANNING_MODEL_TIMEOUT';
|
|
76
78
|
const MIDSCENE_PLANNING_MODEL_TEMPERATURE = 'MIDSCENE_PLANNING_MODEL_TEMPERATURE';
|
|
77
79
|
const MIDSCENE_PLANNING_MODEL_RETRY_COUNT = 'MIDSCENE_PLANNING_MODEL_RETRY_COUNT';
|
|
@@ -99,13 +101,10 @@ const BOOLEAN_ENV_KEYS = [
|
|
|
99
101
|
MIDSCENE_REPORT_QUIET
|
|
100
102
|
];
|
|
101
103
|
const NUMBER_ENV_KEYS = [
|
|
102
|
-
MIDSCENE_MODEL_MAX_TOKENS,
|
|
103
104
|
MIDSCENE_CACHE_MAX_FILENAME_LENGTH,
|
|
104
105
|
MIDSCENE_REPLANNING_CYCLE_LIMIT
|
|
105
106
|
];
|
|
106
107
|
const STRING_ENV_KEYS = [
|
|
107
|
-
MIDSCENE_MODEL_MAX_TOKENS,
|
|
108
|
-
OPENAI_MAX_TOKENS,
|
|
109
108
|
MIDSCENE_ADB_PATH,
|
|
110
109
|
MIDSCENE_ADB_REMOTE_HOST,
|
|
111
110
|
MIDSCENE_ADB_REMOTE_PORT,
|
|
@@ -131,6 +130,7 @@ const MODEL_ENV_KEYS = [
|
|
|
131
130
|
MIDSCENE_MODEL_BASE_URL,
|
|
132
131
|
MIDSCENE_MODEL_SOCKS_PROXY,
|
|
133
132
|
MIDSCENE_MODEL_HTTP_PROXY,
|
|
133
|
+
MIDSCENE_MODEL_MAX_TOKENS,
|
|
134
134
|
MIDSCENE_MODEL_TIMEOUT,
|
|
135
135
|
MIDSCENE_MODEL_TEMPERATURE,
|
|
136
136
|
MIDSCENE_MODEL_RETRY_COUNT,
|
|
@@ -146,6 +146,7 @@ const MODEL_ENV_KEYS = [
|
|
|
146
146
|
MIDSCENE_USE_VL_MODEL,
|
|
147
147
|
OPENAI_API_KEY,
|
|
148
148
|
OPENAI_BASE_URL,
|
|
149
|
+
OPENAI_MAX_TOKENS,
|
|
149
150
|
MIDSCENE_OPENAI_INIT_CONFIG_JSON,
|
|
150
151
|
MIDSCENE_OPENAI_HTTP_PROXY,
|
|
151
152
|
MIDSCENE_OPENAI_SOCKS_PROXY,
|
|
@@ -156,6 +157,7 @@ const MODEL_ENV_KEYS = [
|
|
|
156
157
|
MIDSCENE_INSIGHT_MODEL_API_KEY,
|
|
157
158
|
MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
|
|
158
159
|
MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON,
|
|
160
|
+
MIDSCENE_INSIGHT_MODEL_MAX_TOKENS,
|
|
159
161
|
MIDSCENE_INSIGHT_MODEL_TIMEOUT,
|
|
160
162
|
MIDSCENE_INSIGHT_MODEL_TEMPERATURE,
|
|
161
163
|
MIDSCENE_INSIGHT_MODEL_RETRY_COUNT,
|
|
@@ -171,6 +173,7 @@ const MODEL_ENV_KEYS = [
|
|
|
171
173
|
MIDSCENE_PLANNING_MODEL_API_KEY,
|
|
172
174
|
MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
|
|
173
175
|
MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON,
|
|
176
|
+
MIDSCENE_PLANNING_MODEL_MAX_TOKENS,
|
|
174
177
|
MIDSCENE_PLANNING_MODEL_TIMEOUT,
|
|
175
178
|
MIDSCENE_PLANNING_MODEL_TEMPERATURE,
|
|
176
179
|
MIDSCENE_PLANNING_MODEL_RETRY_COUNT,
|
|
@@ -210,4 +213,4 @@ var types_UITarsModelVersion = /*#__PURE__*/ function(UITarsModelVersion) {
|
|
|
210
213
|
UITarsModelVersion["DOUBAO_1_5_20B"] = "doubao-1.5-20B";
|
|
211
214
|
return UITarsModelVersion;
|
|
212
215
|
}({});
|
|
213
|
-
export { ALL_ENV_KEYS, BASIC_ENV_KEYS, BOOLEAN_ENV_KEYS, DOCKER_CONTAINER, GLOBAL_ENV_KEYS, MATCH_BY_POSITION, MIDSCENE_ADB_PATH, MIDSCENE_ADB_REMOTE_HOST, MIDSCENE_ADB_REMOTE_PORT, MIDSCENE_ANDROID_IME_STRATEGY, MIDSCENE_CACHE, MIDSCENE_CACHE_MAX_FILENAME_LENGTH, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_MODE, MIDSCENE_DEBUG_MODEL_PROFILE, MIDSCENE_DEBUG_MODEL_RESPONSE, MIDSCENE_INSIGHT_MODEL_API_KEY, MIDSCENE_INSIGHT_MODEL_BASE_URL, MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON, MIDSCENE_INSIGHT_MODEL_FAMILY, MIDSCENE_INSIGHT_MODEL_HTTP_PROXY, MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON, MIDSCENE_INSIGHT_MODEL_NAME, MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET, MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT, MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED, MIDSCENE_INSIGHT_MODEL_RETRY_COUNT, MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL, MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY, MIDSCENE_INSIGHT_MODEL_TEMPERATURE, MIDSCENE_INSIGHT_MODEL_TIMEOUT, MIDSCENE_IOS_DEVICE_CLASS_OVERRIDE, MIDSCENE_IOS_DEVICE_UDID, MIDSCENE_IOS_SIMULATOR_UDID, MIDSCENE_LANGFUSE_DEBUG, MIDSCENE_LANGSMITH_DEBUG, MIDSCENE_MCP_ANDROID_MODE, MIDSCENE_MCP_CHROME_PATH, MIDSCENE_MCP_USE_PUPPETEER_MODE, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_EXTRA_BODY_JSON, MIDSCENE_MODEL_FAMILY, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_MAX_TOKENS, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_REASONING_BUDGET, MIDSCENE_MODEL_REASONING_EFFORT, MIDSCENE_MODEL_REASONING_ENABLED, MIDSCENE_MODEL_RETRY_COUNT, MIDSCENE_MODEL_RETRY_INTERVAL, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_MODEL_TEMPERATURE, MIDSCENE_MODEL_TIMEOUT, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON, MIDSCENE_PLANNING_MODEL_FAMILY, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_REASONING_BUDGET, MIDSCENE_PLANNING_MODEL_REASONING_EFFORT, MIDSCENE_PLANNING_MODEL_REASONING_ENABLED, MIDSCENE_PLANNING_MODEL_RETRY_COUNT, MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_TEMPERATURE, MIDSCENE_PLANNING_MODEL_TIMEOUT, MIDSCENE_PREFERRED_LANGUAGE, MIDSCENE_REPLANNING_CYCLE_LIMIT, MIDSCENE_REPORT_QUIET, MIDSCENE_REPORT_TAG_NAME, MIDSCENE_RUN_DIR, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MIDSCENE_USE_VL_MODEL, MODEL_ENV_KEYS, MODEL_FAMILY_VALUES, NUMBER_ENV_KEYS, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MAX_TOKENS, STRING_ENV_KEYS, types_UITarsModelVersion as UITarsModelVersion, UNUSED_ENV_KEYS };
|
|
216
|
+
export { ALL_ENV_KEYS, BASIC_ENV_KEYS, BOOLEAN_ENV_KEYS, DOCKER_CONTAINER, GLOBAL_ENV_KEYS, MATCH_BY_POSITION, MIDSCENE_ADB_PATH, MIDSCENE_ADB_REMOTE_HOST, MIDSCENE_ADB_REMOTE_PORT, MIDSCENE_ANDROID_IME_STRATEGY, MIDSCENE_CACHE, MIDSCENE_CACHE_MAX_FILENAME_LENGTH, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_MODE, MIDSCENE_DEBUG_MODEL_PROFILE, MIDSCENE_DEBUG_MODEL_RESPONSE, MIDSCENE_INSIGHT_MODEL_API_KEY, MIDSCENE_INSIGHT_MODEL_BASE_URL, MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON, MIDSCENE_INSIGHT_MODEL_FAMILY, MIDSCENE_INSIGHT_MODEL_HTTP_PROXY, MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON, MIDSCENE_INSIGHT_MODEL_MAX_TOKENS, MIDSCENE_INSIGHT_MODEL_NAME, MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET, MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT, MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED, MIDSCENE_INSIGHT_MODEL_RETRY_COUNT, MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL, MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY, MIDSCENE_INSIGHT_MODEL_TEMPERATURE, MIDSCENE_INSIGHT_MODEL_TIMEOUT, MIDSCENE_IOS_DEVICE_CLASS_OVERRIDE, MIDSCENE_IOS_DEVICE_UDID, MIDSCENE_IOS_SIMULATOR_UDID, MIDSCENE_LANGFUSE_DEBUG, MIDSCENE_LANGSMITH_DEBUG, MIDSCENE_MCP_ANDROID_MODE, MIDSCENE_MCP_CHROME_PATH, MIDSCENE_MCP_USE_PUPPETEER_MODE, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_EXTRA_BODY_JSON, MIDSCENE_MODEL_FAMILY, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_MAX_TOKENS, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_REASONING_BUDGET, MIDSCENE_MODEL_REASONING_EFFORT, MIDSCENE_MODEL_REASONING_ENABLED, MIDSCENE_MODEL_RETRY_COUNT, MIDSCENE_MODEL_RETRY_INTERVAL, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_MODEL_TEMPERATURE, MIDSCENE_MODEL_TIMEOUT, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON, MIDSCENE_PLANNING_MODEL_FAMILY, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_MAX_TOKENS, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_REASONING_BUDGET, MIDSCENE_PLANNING_MODEL_REASONING_EFFORT, MIDSCENE_PLANNING_MODEL_REASONING_ENABLED, MIDSCENE_PLANNING_MODEL_RETRY_COUNT, MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_TEMPERATURE, MIDSCENE_PLANNING_MODEL_TIMEOUT, MIDSCENE_PREFERRED_LANGUAGE, MIDSCENE_REPLANNING_CYCLE_LIMIT, MIDSCENE_REPORT_QUIET, MIDSCENE_REPORT_TAG_NAME, MIDSCENE_RUN_DIR, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MIDSCENE_USE_VL_MODEL, MODEL_ENV_KEYS, MODEL_FAMILY_VALUES, NUMBER_ENV_KEYS, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MAX_TOKENS, STRING_ENV_KEYS, types_UITarsModelVersion as UITarsModelVersion, UNUSED_ENV_KEYS };
|
|
@@ -2,6 +2,7 @@ import { parseBase64 } from "@midscene/shared/img";
|
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { getZodDescription, getZodTypeName, isMidsceneLocatorField, unwrapZodField } from "../zod-schema-utils.mjs";
|
|
4
4
|
import { getErrorMessage } from "./error-formatter.mjs";
|
|
5
|
+
import { composeUserPrompt, promptInputExtraSchema } from "./user-prompt.mjs";
|
|
5
6
|
function describeActionForMCP(action) {
|
|
6
7
|
const actionDesc = action.description || `Execute ${action.name} action`;
|
|
7
8
|
if (!action.paramSchema) return `${action.name} action, ${actionDesc}`;
|
|
@@ -336,6 +337,9 @@ function generateCommonTools(getAgent, initArgSchema = {}, initArgCliMetadata) {
|
|
|
336
337
|
const agent = await getAgent(args);
|
|
337
338
|
const screenshot = await agent.page?.screenshotBase64();
|
|
338
339
|
if (!screenshot) return createErrorResult('Screenshot not available');
|
|
340
|
+
await agent.recordToReport?.('take_screenshot', {
|
|
341
|
+
screenshotBase64: screenshot
|
|
342
|
+
});
|
|
339
343
|
const { mimeType, body } = parseBase64(screenshot);
|
|
340
344
|
return {
|
|
341
345
|
content: [
|
|
@@ -369,15 +373,7 @@ function generateCommonTools(getAgent, initArgSchema = {}, initArgCliMetadata) {
|
|
|
369
373
|
const result = await agent.aiAction(prompt, {
|
|
370
374
|
deepThink: false
|
|
371
375
|
});
|
|
372
|
-
|
|
373
|
-
if (result) {
|
|
374
|
-
const message = 'string' == typeof result ? result : JSON.stringify(result);
|
|
375
|
-
screenshotResult.content.unshift({
|
|
376
|
-
type: 'text',
|
|
377
|
-
text: `Task finished, message: ${message}`
|
|
378
|
-
});
|
|
379
|
-
}
|
|
380
|
-
return screenshotResult;
|
|
376
|
+
return await captureScreenshotResult(agent, 'act', result);
|
|
381
377
|
} catch (error) {
|
|
382
378
|
const errorMessage = getErrorMessage(error);
|
|
383
379
|
console.error('Error executing act:', errorMessage);
|
|
@@ -390,6 +386,7 @@ function generateCommonTools(getAgent, initArgSchema = {}, initArgCliMetadata) {
|
|
|
390
386
|
description: 'Assert a natural language statement against the current page/screen.',
|
|
391
387
|
schema: {
|
|
392
388
|
prompt: z.string().describe('Natural language assertion to verify, e.g. "there is a login button visible"'),
|
|
389
|
+
...promptInputExtraSchema,
|
|
393
390
|
...initArgSchema
|
|
394
391
|
},
|
|
395
392
|
cli: mergeToolCliMetadata(void 0, initArgCliMetadata),
|
|
@@ -398,7 +395,13 @@ function generateCommonTools(getAgent, initArgSchema = {}, initArgCliMetadata) {
|
|
|
398
395
|
try {
|
|
399
396
|
const agent = await getAgent(args);
|
|
400
397
|
if (!agent.aiAssert) return createErrorResult('assert is not supported by this agent');
|
|
401
|
-
|
|
398
|
+
const userPrompt = composeUserPrompt({
|
|
399
|
+
prompt,
|
|
400
|
+
image: args.image,
|
|
401
|
+
imageName: args.imageName,
|
|
402
|
+
convertHttpImage2Base64: args.convertHttpImage2Base64
|
|
403
|
+
});
|
|
404
|
+
await agent.aiAssert(userPrompt);
|
|
402
405
|
return {
|
|
403
406
|
content: [
|
|
404
407
|
{
|
|
@@ -416,4 +419,4 @@ function generateCommonTools(getAgent, initArgSchema = {}, initArgCliMetadata) {
|
|
|
416
419
|
}
|
|
417
420
|
];
|
|
418
421
|
}
|
|
419
|
-
export { generateCommonTools, generateToolsFromActionSpace };
|
|
422
|
+
export { composeUserPrompt, generateCommonTools, generateToolsFromActionSpace };
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
function normalizeStringList(raw, fieldName) {
|
|
3
|
+
if (null == raw) return [];
|
|
4
|
+
if ('string' == typeof raw) {
|
|
5
|
+
const trimmed = raw.trim();
|
|
6
|
+
return trimmed ? [
|
|
7
|
+
trimmed
|
|
8
|
+
] : [];
|
|
9
|
+
}
|
|
10
|
+
if (Array.isArray(raw)) return raw.map((item, index)=>{
|
|
11
|
+
if ('string' != typeof item) throw new Error(`${fieldName}[${index}]: expected a string.`);
|
|
12
|
+
return item.trim();
|
|
13
|
+
});
|
|
14
|
+
throw new Error(`${fieldName}: expected a string or string array, got ${typeof raw}.`);
|
|
15
|
+
}
|
|
16
|
+
function composeImages(input) {
|
|
17
|
+
const urls = normalizeStringList(input.image, 'image');
|
|
18
|
+
const names = normalizeStringList(input.imageName, 'imageName');
|
|
19
|
+
if (urls.length !== names.length) throw new Error(`image/imageName: expected the same number of --image and --image-name values, got ${urls.length} image(s) and ${names.length} image name(s).`);
|
|
20
|
+
return urls.map((url, index)=>({
|
|
21
|
+
name: names[index],
|
|
22
|
+
url
|
|
23
|
+
}));
|
|
24
|
+
}
|
|
25
|
+
function coerceBoolean(value) {
|
|
26
|
+
if (null == value) return;
|
|
27
|
+
if ('boolean' == typeof value) return value;
|
|
28
|
+
if ('string' == typeof value) {
|
|
29
|
+
const trimmed = value.trim();
|
|
30
|
+
if (!trimmed) return;
|
|
31
|
+
const v = trimmed.toLowerCase();
|
|
32
|
+
if ('true' === v || '1' === v) return true;
|
|
33
|
+
if ('false' === v || '0' === v) return false;
|
|
34
|
+
throw new Error(`convertHttpImage2Base64: expected "true", "false", "1", or "0"; got ${JSON.stringify(value)}.`);
|
|
35
|
+
}
|
|
36
|
+
throw new Error(`convertHttpImage2Base64: expected a boolean, got ${typeof value}.`);
|
|
37
|
+
}
|
|
38
|
+
function composeUserPrompt(input) {
|
|
39
|
+
const images = composeImages({
|
|
40
|
+
image: input.image,
|
|
41
|
+
imageName: input.imageName
|
|
42
|
+
});
|
|
43
|
+
const convertFlag = coerceBoolean(input.convertHttpImage2Base64);
|
|
44
|
+
if (0 === images.length && void 0 === convertFlag) return input.prompt;
|
|
45
|
+
const payload = {
|
|
46
|
+
prompt: input.prompt
|
|
47
|
+
};
|
|
48
|
+
if (images.length > 0) payload.images = images;
|
|
49
|
+
if (void 0 !== convertFlag) payload.convertHttpImage2Base64 = convertFlag;
|
|
50
|
+
return payload;
|
|
51
|
+
}
|
|
52
|
+
const promptInputExtraSchema = {
|
|
53
|
+
image: z.union([
|
|
54
|
+
z.string(),
|
|
55
|
+
z.array(z.string())
|
|
56
|
+
]).optional().describe('Reference image URL/path. Repeat --image for multiple images.'),
|
|
57
|
+
imageName: z.union([
|
|
58
|
+
z.string(),
|
|
59
|
+
z.array(z.string())
|
|
60
|
+
]).optional().describe('Reference image name. Repeat --image-name; must align with --image order.'),
|
|
61
|
+
convertHttpImage2Base64: z.union([
|
|
62
|
+
z.boolean(),
|
|
63
|
+
z.string()
|
|
64
|
+
]).optional().describe('If true, convert http(s) image URLs to base64 before sending to the model.')
|
|
65
|
+
};
|
|
66
|
+
export { composeUserPrompt, promptInputExtraSchema };
|
package/dist/lib/cli/cli-args.js
CHANGED
|
@@ -24,13 +24,15 @@ var __webpack_require__ = {};
|
|
|
24
24
|
var __webpack_exports__ = {};
|
|
25
25
|
__webpack_require__.r(__webpack_exports__);
|
|
26
26
|
__webpack_require__.d(__webpack_exports__, {
|
|
27
|
+
canonicalizeCliArgKeys: ()=>canonicalizeCliArgKeys,
|
|
27
28
|
parseCliArgs: ()=>parseCliArgs,
|
|
28
|
-
parseValue: ()=>parseValue,
|
|
29
29
|
formatCliValidationError: ()=>formatCliValidationError,
|
|
30
|
+
parseValue: ()=>parseValue,
|
|
30
31
|
getCliOptionDisplay: ()=>getCliOptionDisplay
|
|
31
32
|
});
|
|
32
33
|
const external_zod_namespaceObject = require("zod");
|
|
33
34
|
const external_key_alias_utils_js_namespaceObject = require("../key-alias-utils.js");
|
|
35
|
+
const external_cli_error_js_namespaceObject = require("./cli-error.js");
|
|
34
36
|
function parseValue(raw) {
|
|
35
37
|
if (raw.startsWith('{') || raw.startsWith('[')) try {
|
|
36
38
|
return JSON.parse(raw);
|
|
@@ -54,7 +56,20 @@ function walkCliArgs(args, setArgValue) {
|
|
|
54
56
|
function parseCliArgs(args) {
|
|
55
57
|
const result = {};
|
|
56
58
|
walkCliArgs(args, (key, value)=>{
|
|
57
|
-
result[key]
|
|
59
|
+
const existing = result[key];
|
|
60
|
+
if (void 0 === existing) {
|
|
61
|
+
result[key] = value;
|
|
62
|
+
return;
|
|
63
|
+
}
|
|
64
|
+
if (Array.isArray(existing)) {
|
|
65
|
+
existing.push(value);
|
|
66
|
+
result[key] = existing;
|
|
67
|
+
return;
|
|
68
|
+
}
|
|
69
|
+
result[key] = [
|
|
70
|
+
existing,
|
|
71
|
+
value
|
|
72
|
+
];
|
|
58
73
|
});
|
|
59
74
|
return result;
|
|
60
75
|
}
|
|
@@ -123,11 +138,38 @@ function formatCliValidationError(scriptName, commandName, def, rawArgs) {
|
|
|
123
138
|
const optionName = 'string' == typeof issue?.path[0] ? `--${issue.path[0]}` : 'CLI arguments';
|
|
124
139
|
return `Invalid value for "${optionName}" in ${scriptName} ${commandName}: ${issue?.message ?? parsed.error.message}`;
|
|
125
140
|
}
|
|
141
|
+
function canonicalizeCliArgKeys(scriptName, commandName, def, rawArgs) {
|
|
142
|
+
if (0 === Object.keys(def.schema).length) return rawArgs;
|
|
143
|
+
const result = {
|
|
144
|
+
...rawArgs
|
|
145
|
+
};
|
|
146
|
+
for (const schemaKey of Object.keys(def.schema)){
|
|
147
|
+
const cliOption = def.cli?.options?.[schemaKey];
|
|
148
|
+
const acceptedSpellings = getAcceptedCliOptionNames(schemaKey, cliOption);
|
|
149
|
+
let chosenSpelling;
|
|
150
|
+
let chosenValue;
|
|
151
|
+
for (const spelling of acceptedSpellings)if (spelling !== schemaKey) {
|
|
152
|
+
if (spelling in result) {
|
|
153
|
+
if (void 0 !== chosenSpelling) throw new external_cli_error_js_namespaceObject.CLIError(`Conflicting CLI options "--${chosenSpelling}" and "--${spelling}" for ${scriptName} ${commandName}: both target "${schemaKey}". Use one spelling.`);
|
|
154
|
+
chosenSpelling = spelling;
|
|
155
|
+
chosenValue = result[spelling];
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
if (void 0 !== chosenSpelling) {
|
|
159
|
+
if (schemaKey in result && result[schemaKey] !== chosenValue) throw new external_cli_error_js_namespaceObject.CLIError(`Conflicting CLI options "--${schemaKey}" and "--${chosenSpelling}" for ${scriptName} ${commandName}: both target "${schemaKey}". Use one spelling.`);
|
|
160
|
+
result[schemaKey] = chosenValue;
|
|
161
|
+
delete result[chosenSpelling];
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
return result;
|
|
165
|
+
}
|
|
166
|
+
exports.canonicalizeCliArgKeys = __webpack_exports__.canonicalizeCliArgKeys;
|
|
126
167
|
exports.formatCliValidationError = __webpack_exports__.formatCliValidationError;
|
|
127
168
|
exports.getCliOptionDisplay = __webpack_exports__.getCliOptionDisplay;
|
|
128
169
|
exports.parseCliArgs = __webpack_exports__.parseCliArgs;
|
|
129
170
|
exports.parseValue = __webpack_exports__.parseValue;
|
|
130
171
|
for(var __rspack_i in __webpack_exports__)if (-1 === [
|
|
172
|
+
"canonicalizeCliArgKeys",
|
|
131
173
|
"formatCliValidationError",
|
|
132
174
|
"getCliOptionDisplay",
|
|
133
175
|
"parseCliArgs",
|
|
@@ -155,8 +155,9 @@ async function runToolsCLI(tools, scriptName, options) {
|
|
|
155
155
|
}
|
|
156
156
|
const cliValidationError = (0, external_cli_args_js_namespaceObject.formatCliValidationError)(scriptName, match.name, match.def, parsedArgs);
|
|
157
157
|
if (cliValidationError) throw new external_cli_error_js_namespaceObject.CLIError(cliValidationError);
|
|
158
|
-
|
|
159
|
-
|
|
158
|
+
const handlerArgs = (0, external_cli_args_js_namespaceObject.canonicalizeCliArgKeys)(scriptName, match.name, match.def, parsedArgs);
|
|
159
|
+
debug('command: %s, args: %s', match.name, JSON.stringify(handlerArgs));
|
|
160
|
+
const result = await match.def.handler(handlerArgs);
|
|
160
161
|
debug('command %s completed, isError: %s', match.name, result.isError ?? false);
|
|
161
162
|
outputResult(result);
|
|
162
163
|
await tools.destroy();
|
|
@@ -34,7 +34,7 @@ const PLAYWRIGHT_EXAMPLE_CODE = `
|
|
|
34
34
|
IMPORTANT: Follow these exact type signatures for AI functions:
|
|
35
35
|
|
|
36
36
|
// Type signatures for AI functions:
|
|
37
|
-
aiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<
|
|
37
|
+
aiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<string | undefined>
|
|
38
38
|
aiInput(text: string, locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
39
39
|
aiTap(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
40
40
|
aiHover(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
@@ -38,6 +38,7 @@ const INSIGHT_MODEL_CONFIG_KEYS = {
|
|
|
38
38
|
openaiApiKey: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_API_KEY,
|
|
39
39
|
openaiExtraConfig: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
|
|
40
40
|
extraBody: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON,
|
|
41
|
+
maxTokens: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_MAX_TOKENS,
|
|
41
42
|
modelFamily: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_FAMILY,
|
|
42
43
|
timeout: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_TIMEOUT,
|
|
43
44
|
temperature: external_types_js_namespaceObject.MIDSCENE_INSIGHT_MODEL_TEMPERATURE,
|
|
@@ -55,6 +56,7 @@ const PLANNING_MODEL_CONFIG_KEYS = {
|
|
|
55
56
|
openaiApiKey: external_types_js_namespaceObject.MIDSCENE_PLANNING_MODEL_API_KEY,
|
|
56
57
|
openaiExtraConfig: external_types_js_namespaceObject.MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
|
|
57
58
|
extraBody: external_types_js_namespaceObject.MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON,
|
|
59
|
+
maxTokens: external_types_js_namespaceObject.MIDSCENE_PLANNING_MODEL_MAX_TOKENS,
|
|
58
60
|
modelFamily: external_types_js_namespaceObject.MIDSCENE_PLANNING_MODEL_FAMILY,
|
|
59
61
|
timeout: external_types_js_namespaceObject.MIDSCENE_PLANNING_MODEL_TIMEOUT,
|
|
60
62
|
temperature: external_types_js_namespaceObject.MIDSCENE_PLANNING_MODEL_TEMPERATURE,
|
|
@@ -72,6 +74,7 @@ const DEFAULT_MODEL_CONFIG_KEYS = {
|
|
|
72
74
|
openaiApiKey: external_types_js_namespaceObject.MIDSCENE_MODEL_API_KEY,
|
|
73
75
|
openaiExtraConfig: external_types_js_namespaceObject.MIDSCENE_MODEL_INIT_CONFIG_JSON,
|
|
74
76
|
extraBody: external_types_js_namespaceObject.MIDSCENE_MODEL_EXTRA_BODY_JSON,
|
|
77
|
+
maxTokens: external_types_js_namespaceObject.MIDSCENE_MODEL_MAX_TOKENS,
|
|
75
78
|
modelFamily: external_types_js_namespaceObject.MIDSCENE_MODEL_FAMILY,
|
|
76
79
|
timeout: external_types_js_namespaceObject.MIDSCENE_MODEL_TIMEOUT,
|
|
77
80
|
temperature: external_types_js_namespaceObject.MIDSCENE_MODEL_TEMPERATURE,
|
|
@@ -89,6 +92,7 @@ const DEFAULT_MODEL_CONFIG_KEYS_LEGACY = {
|
|
|
89
92
|
openaiApiKey: external_types_js_namespaceObject.OPENAI_API_KEY,
|
|
90
93
|
openaiExtraConfig: external_types_js_namespaceObject.MIDSCENE_OPENAI_INIT_CONFIG_JSON,
|
|
91
94
|
extraBody: external_types_js_namespaceObject.MIDSCENE_MODEL_EXTRA_BODY_JSON,
|
|
95
|
+
maxTokens: external_types_js_namespaceObject.OPENAI_MAX_TOKENS,
|
|
92
96
|
modelFamily: 'DEFAULT_MODEL_CONFIG_KEYS has no modelFamily key',
|
|
93
97
|
timeout: external_types_js_namespaceObject.MIDSCENE_MODEL_TIMEOUT,
|
|
94
98
|
temperature: external_types_js_namespaceObject.MIDSCENE_MODEL_TEMPERATURE,
|
|
@@ -37,7 +37,7 @@ const external_utils_js_namespaceObject = require("../utils.js");
|
|
|
37
37
|
const external_helper_js_namespaceObject = require("./helper.js");
|
|
38
38
|
const external_init_debug_js_namespaceObject = require("./init-debug.js");
|
|
39
39
|
const MODEL_CONFIG_DOC_URL = 'https://midscenejs.com/model-common-config.html';
|
|
40
|
-
const getCurrentVersion = ()=>"1.8.5
|
|
40
|
+
const getCurrentVersion = ()=>"1.8.5";
|
|
41
41
|
const getInvalidModelFamilyMessage = (modelFamily)=>`Invalid MIDSCENE_MODEL_FAMILY value: ${modelFamily}. Current version v${getCurrentVersion()} accepts the following model families: ${external_types_js_namespaceObject.MODEL_FAMILY_VALUES.join(', ')}. You can also visit ${MODEL_CONFIG_DOC_URL} for the latest configuration information.`;
|
|
42
42
|
const KEYS_MAP = {
|
|
43
43
|
insight: external_constants_js_namespaceObject.INSIGHT_MODEL_CONFIG_KEYS,
|
|
@@ -97,6 +97,7 @@ const parseOpenaiSdkConfig = ({ keys, provider, useLegacyLogic = false })=>{
|
|
|
97
97
|
const legacySocksProxy = useLegacyLogic ? provider[external_types_js_namespaceObject.MIDSCENE_OPENAI_SOCKS_PROXY] : void 0;
|
|
98
98
|
const legacyHttpProxy = useLegacyLogic ? provider[external_types_js_namespaceObject.MIDSCENE_OPENAI_HTTP_PROXY] : void 0;
|
|
99
99
|
const legacyOpenaiExtraConfig = useLegacyLogic ? provider[external_types_js_namespaceObject.MIDSCENE_OPENAI_INIT_CONFIG_JSON] : void 0;
|
|
100
|
+
const legacyMaxTokens = useLegacyLogic ? provider[external_types_js_namespaceObject.OPENAI_MAX_TOKENS] : void 0;
|
|
100
101
|
const legacyModelFamily = useLegacyLogic ? legacyConfigToModelFamily(provider) : void 0;
|
|
101
102
|
const modelFamilyRaw = provider[keys.modelFamily] || legacyModelFamily;
|
|
102
103
|
const openaiApiKey = provider[keys.openaiApiKey] || legacyAPIKey;
|
|
@@ -108,6 +109,13 @@ const parseOpenaiSdkConfig = ({ keys, provider, useLegacyLogic = false })=>{
|
|
|
108
109
|
const openaiExtraConfig = (0, external_helper_js_namespaceObject.parseJson)(keys.openaiExtraConfig, openaiExtraConfigStr || legacyOpenaiExtraConfig);
|
|
109
110
|
const extraBodyStr = provider[keys.extraBody];
|
|
110
111
|
const extraBody = (0, external_helper_js_namespaceObject.parseJson)(keys.extraBody, extraBodyStr);
|
|
112
|
+
const maxTokensStr = provider[keys.maxTokens] || legacyMaxTokens;
|
|
113
|
+
const maxTokens = (()=>{
|
|
114
|
+
const val = maxTokensStr?.trim();
|
|
115
|
+
if (!val) return;
|
|
116
|
+
const num = Number(val);
|
|
117
|
+
return Number.isFinite(num) ? num : void 0;
|
|
118
|
+
})();
|
|
111
119
|
const temperature = provider[keys.temperature] ? Number(provider[keys.temperature]) : 0;
|
|
112
120
|
const modelFamily = modelFamilyRaw;
|
|
113
121
|
validateModelFamily(modelFamily);
|
|
@@ -120,6 +128,7 @@ const parseOpenaiSdkConfig = ({ keys, provider, useLegacyLogic = false })=>{
|
|
|
120
128
|
openaiApiKey,
|
|
121
129
|
openaiExtraConfig: normalizeOpenaiExtraConfig(openaiExtraConfig),
|
|
122
130
|
extraBody,
|
|
131
|
+
maxTokens,
|
|
123
132
|
modelFamily,
|
|
124
133
|
uiTarsModelVersion,
|
|
125
134
|
modelName: modelName,
|