@midscene/core 0.25.4-beta-20250807040242.0 → 0.25.4-beta-20250807062119.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/ai-model.d.ts +6 -7
- package/dist/es/ai-model.js +1 -1
- package/dist/es/{chunk-I5LBWOQA.js → chunk-G2JTYWI6.js} +373 -156
- package/dist/es/chunk-G2JTYWI6.js.map +1 -0
- package/dist/es/{chunk-EK3JQ4ZV.js → chunk-JH54OF4E.js} +3 -3
- package/dist/es/index.d.ts +6 -6
- package/dist/es/index.js +5 -4
- package/dist/es/index.js.map +1 -1
- package/dist/es/{llm-planning-45dd50cd.d.ts → llm-planning-f449f3b8.d.ts} +3 -2
- package/dist/es/{types-da4fb35b.d.ts → types-7435eba0.d.ts} +8 -1
- package/dist/es/utils.d.ts +1 -1
- package/dist/es/utils.js +1 -1
- package/dist/lib/ai-model.d.ts +6 -7
- package/dist/lib/ai-model.js +2 -2
- package/dist/lib/{chunk-I5LBWOQA.js → chunk-G2JTYWI6.js} +358 -141
- package/dist/lib/chunk-G2JTYWI6.js.map +1 -0
- package/dist/lib/{chunk-EK3JQ4ZV.js → chunk-JH54OF4E.js} +3 -3
- package/dist/lib/index.d.ts +6 -6
- package/dist/lib/index.js +15 -14
- package/dist/lib/index.js.map +1 -1
- package/dist/lib/{llm-planning-45dd50cd.d.ts → llm-planning-f449f3b8.d.ts} +3 -2
- package/dist/{types/types-da4fb35b.d.ts → lib/types-7435eba0.d.ts} +8 -1
- package/dist/lib/utils.d.ts +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/types/ai-model.d.ts +6 -7
- package/dist/types/index.d.ts +6 -6
- package/dist/types/{llm-planning-45dd50cd.d.ts → llm-planning-f449f3b8.d.ts} +3 -2
- package/dist/{lib/types-da4fb35b.d.ts → types/types-7435eba0.d.ts} +8 -1
- package/dist/types/utils.d.ts +1 -1
- package/package.json +3 -3
- package/dist/es/chunk-I5LBWOQA.js.map +0 -1
- package/dist/lib/chunk-I5LBWOQA.js.map +0 -1
- /package/dist/es/{chunk-EK3JQ4ZV.js.map → chunk-JH54OF4E.js.map} +0 -0
- /package/dist/lib/{chunk-EK3JQ4ZV.js.map → chunk-JH54OF4E.js.map} +0 -0
|
@@ -5,35 +5,16 @@ import {
|
|
|
5
5
|
getBearerTokenProvider
|
|
6
6
|
} from "@azure/identity";
|
|
7
7
|
import {
|
|
8
|
-
ANTHROPIC_API_KEY,
|
|
9
|
-
AZURE_OPENAI_API_VERSION,
|
|
10
|
-
AZURE_OPENAI_DEPLOYMENT,
|
|
11
|
-
AZURE_OPENAI_ENDPOINT,
|
|
12
|
-
AZURE_OPENAI_KEY,
|
|
13
8
|
MIDSCENE_API_TYPE,
|
|
14
|
-
MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,
|
|
15
|
-
MIDSCENE_AZURE_OPENAI_SCOPE,
|
|
16
|
-
MIDSCENE_DEBUG_AI_PROFILE,
|
|
17
|
-
MIDSCENE_DEBUG_AI_RESPONSE,
|
|
18
9
|
MIDSCENE_LANGSMITH_DEBUG,
|
|
19
|
-
MIDSCENE_MODEL_NAME,
|
|
20
|
-
MIDSCENE_OPENAI_HTTP_PROXY,
|
|
21
|
-
MIDSCENE_OPENAI_INIT_CONFIG_JSON,
|
|
22
|
-
MIDSCENE_OPENAI_SOCKS_PROXY,
|
|
23
|
-
MIDSCENE_USE_ANTHROPIC_SDK,
|
|
24
|
-
MIDSCENE_USE_AZURE_OPENAI,
|
|
25
|
-
OPENAI_API_KEY,
|
|
26
|
-
OPENAI_BASE_URL,
|
|
27
10
|
OPENAI_MAX_TOKENS,
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
getAIConfigInBoolean,
|
|
31
|
-
getAIConfigInJson,
|
|
11
|
+
getAIConfig as getAIConfig2,
|
|
12
|
+
getAIConfigInBoolean as getAIConfigInBoolean2,
|
|
32
13
|
uiTarsModelVersion,
|
|
33
14
|
vlLocateMode as vlLocateMode3
|
|
34
15
|
} from "@midscene/shared/env";
|
|
35
|
-
import {
|
|
36
|
-
import { assert as
|
|
16
|
+
import { getDebug as getDebug3 } from "@midscene/shared/logger";
|
|
17
|
+
import { assert as assert4 } from "@midscene/shared/utils";
|
|
37
18
|
import { ifInBrowser } from "@midscene/shared/utils";
|
|
38
19
|
import { HttpsProxyAgent } from "https-proxy-agent";
|
|
39
20
|
import { jsonrepair } from "jsonrepair";
|
|
@@ -55,10 +36,11 @@ var AIActionType = /* @__PURE__ */ ((AIActionType2) => {
|
|
|
55
36
|
AIActionType2[AIActionType2["DESCRIBE_ELEMENT"] = 4] = "DESCRIBE_ELEMENT";
|
|
56
37
|
return AIActionType2;
|
|
57
38
|
})(AIActionType || {});
|
|
58
|
-
async function callAiFn(msgs, AIActionTypeValue) {
|
|
39
|
+
async function callAiFn(msgs, AIActionTypeValue, modelPreferences) {
|
|
59
40
|
const { content, usage } = await callToGetJSONObject(
|
|
60
41
|
msgs,
|
|
61
|
-
AIActionTypeValue
|
|
42
|
+
AIActionTypeValue,
|
|
43
|
+
modelPreferences
|
|
62
44
|
);
|
|
63
45
|
return { content, usage };
|
|
64
46
|
}
|
|
@@ -1163,24 +1145,57 @@ pageDescription:
|
|
|
1163
1145
|
});
|
|
1164
1146
|
};
|
|
1165
1147
|
|
|
1166
|
-
// src/ai-model/service-caller/
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1148
|
+
// src/ai-model/service-caller/utils.ts
|
|
1149
|
+
import {
|
|
1150
|
+
ANTHROPIC_API_KEY,
|
|
1151
|
+
AZURE_OPENAI_API_VERSION,
|
|
1152
|
+
AZURE_OPENAI_DEPLOYMENT,
|
|
1153
|
+
AZURE_OPENAI_ENDPOINT,
|
|
1154
|
+
AZURE_OPENAI_KEY,
|
|
1155
|
+
MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,
|
|
1156
|
+
MIDSCENE_AZURE_OPENAI_SCOPE,
|
|
1157
|
+
MIDSCENE_DEBUG_AI_PROFILE,
|
|
1158
|
+
MIDSCENE_DEBUG_AI_RESPONSE,
|
|
1159
|
+
MIDSCENE_MODEL_NAME,
|
|
1160
|
+
MIDSCENE_OPENAI_HTTP_PROXY,
|
|
1161
|
+
MIDSCENE_OPENAI_INIT_CONFIG_JSON,
|
|
1162
|
+
MIDSCENE_OPENAI_SOCKS_PROXY,
|
|
1163
|
+
MIDSCENE_USE_ANTHROPIC_SDK,
|
|
1164
|
+
MIDSCENE_USE_AZURE_OPENAI,
|
|
1165
|
+
MIDSCENE_VQA_ANTHROPIC_API_KEY,
|
|
1166
|
+
MIDSCENE_VQA_AZURE_OPENAI_API_VERSION,
|
|
1167
|
+
MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT,
|
|
1168
|
+
MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT,
|
|
1169
|
+
MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON,
|
|
1170
|
+
MIDSCENE_VQA_AZURE_OPENAI_KEY,
|
|
1171
|
+
MIDSCENE_VQA_AZURE_OPENAI_SCOPE,
|
|
1172
|
+
MIDSCENE_VQA_MODEL_NAME,
|
|
1173
|
+
MIDSCENE_VQA_OPENAI_API_KEY,
|
|
1174
|
+
MIDSCENE_VQA_OPENAI_BASE_URL,
|
|
1175
|
+
MIDSCENE_VQA_OPENAI_HTTP_PROXY,
|
|
1176
|
+
MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON,
|
|
1177
|
+
MIDSCENE_VQA_OPENAI_SOCKS_PROXY,
|
|
1178
|
+
MIDSCENE_VQA_OPENAI_USE_AZURE,
|
|
1179
|
+
MIDSCENE_VQA_USE_ANTHROPIC_SDK,
|
|
1180
|
+
MIDSCENE_VQA_USE_AZURE_OPENAI,
|
|
1181
|
+
OPENAI_API_KEY,
|
|
1182
|
+
OPENAI_BASE_URL,
|
|
1183
|
+
OPENAI_USE_AZURE,
|
|
1184
|
+
getAIConfig,
|
|
1185
|
+
getAIConfigInBoolean,
|
|
1186
|
+
getAIConfigInJson
|
|
1187
|
+
} from "@midscene/shared/env";
|
|
1188
|
+
import { enableDebug, getDebug as getDebug2 } from "@midscene/shared/logger";
|
|
1189
|
+
import { assert as assert3 } from "@midscene/shared/utils";
|
|
1190
|
+
function getModelName() {
|
|
1191
|
+
let modelName = "gpt-4o";
|
|
1192
|
+
const nameInConfig = getAIConfig(MIDSCENE_MODEL_NAME);
|
|
1193
|
+
if (nameInConfig) {
|
|
1194
|
+
modelName = nameInConfig;
|
|
1195
|
+
}
|
|
1196
|
+
return modelName;
|
|
1179
1197
|
}
|
|
1180
|
-
var debugConfigInitialized = false;
|
|
1181
1198
|
function initDebugConfig() {
|
|
1182
|
-
if (debugConfigInitialized)
|
|
1183
|
-
return;
|
|
1184
1199
|
const shouldPrintTiming = getAIConfigInBoolean(MIDSCENE_DEBUG_AI_PROFILE);
|
|
1185
1200
|
let debugConfig = "";
|
|
1186
1201
|
if (shouldPrintTiming) {
|
|
@@ -1205,27 +1220,232 @@ function initDebugConfig() {
|
|
|
1205
1220
|
if (debugConfig) {
|
|
1206
1221
|
enableDebug(debugConfig);
|
|
1207
1222
|
}
|
|
1208
|
-
debugConfigInitialized = true;
|
|
1209
1223
|
}
|
|
1210
|
-
var
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1224
|
+
var createAssert = (modelNameKey, modelName) => (value, key, modelVendorFlag) => {
|
|
1225
|
+
if (modelVendorFlag) {
|
|
1226
|
+
assert3(
|
|
1227
|
+
value,
|
|
1228
|
+
`The ${key} must be a non-empty string because of the ${modelNameKey} is declared as ${modelName} and ${modelVendorFlag} has also been specified, but got: ${value}
|
|
1229
|
+
Please check your config.`
|
|
1230
|
+
);
|
|
1231
|
+
} else {
|
|
1232
|
+
assert3(
|
|
1233
|
+
value,
|
|
1234
|
+
`The ${key} must be a non-empty string because of the ${modelNameKey} is declared as ${modelName}, but got: ${value}
|
|
1235
|
+
Please check your config.`
|
|
1236
|
+
);
|
|
1216
1237
|
}
|
|
1217
|
-
|
|
1218
|
-
|
|
1238
|
+
};
|
|
1239
|
+
var getModelConfigFromEnv = (modelName, keys, valueAssert) => {
|
|
1240
|
+
const socksProxy = getAIConfig(keys.socksProxy);
|
|
1241
|
+
const httpProxy = getAIConfig(keys.httpProxy);
|
|
1242
|
+
if (getAIConfig(keys.openaiUseAzureDeprecated)) {
|
|
1243
|
+
const openaiBaseURL = getAIConfig(keys.openaiBaseURL);
|
|
1244
|
+
const openaiApiKey = getAIConfig(keys.openaiApiKey);
|
|
1245
|
+
const openaiExtraConfig = getAIConfigInJson(keys.openaiExtraConfig);
|
|
1246
|
+
valueAssert(
|
|
1247
|
+
openaiBaseURL,
|
|
1248
|
+
keys.openaiBaseURL,
|
|
1249
|
+
keys.openaiUseAzureDeprecated
|
|
1250
|
+
);
|
|
1251
|
+
valueAssert(openaiApiKey, keys.openaiApiKey, keys.openaiUseAzureDeprecated);
|
|
1252
|
+
return {
|
|
1253
|
+
socksProxy,
|
|
1254
|
+
httpProxy,
|
|
1255
|
+
modelName,
|
|
1256
|
+
openaiUseAzureDeprecated: true,
|
|
1257
|
+
openaiApiKey,
|
|
1258
|
+
openaiBaseURL,
|
|
1259
|
+
openaiExtraConfig
|
|
1260
|
+
};
|
|
1261
|
+
} else if (getAIConfig(keys.useAzureOpenai)) {
|
|
1262
|
+
const azureOpenaiScope = getAIConfig(keys.azureOpenaiScope);
|
|
1263
|
+
const azureOpenaiApiKey = getAIConfig(keys.azureOpenaiApiKey);
|
|
1264
|
+
const azureOpenaiEndpoint = getAIConfig(keys.azureOpenaiEndpoint);
|
|
1265
|
+
const azureOpenaiDeployment = getAIConfig(keys.azureOpenaiDeployment);
|
|
1266
|
+
const azureOpenaiApiVersion = getAIConfig(keys.azureOpenaiApiVersion);
|
|
1267
|
+
const azureExtraConfig = getAIConfigInJson(keys.azureExtraConfig);
|
|
1268
|
+
const openaiExtraConfig = getAIConfigInJson(keys.openaiExtraConfig);
|
|
1269
|
+
valueAssert(azureOpenaiApiKey, keys.azureOpenaiApiKey, keys.useAzureOpenai);
|
|
1270
|
+
return {
|
|
1271
|
+
socksProxy,
|
|
1272
|
+
httpProxy,
|
|
1273
|
+
modelName,
|
|
1274
|
+
useAzureOpenai: true,
|
|
1275
|
+
azureOpenaiScope,
|
|
1276
|
+
azureOpenaiApiKey,
|
|
1277
|
+
azureOpenaiEndpoint,
|
|
1278
|
+
azureOpenaiDeployment,
|
|
1279
|
+
azureOpenaiApiVersion,
|
|
1280
|
+
azureExtraConfig,
|
|
1281
|
+
openaiExtraConfig
|
|
1282
|
+
};
|
|
1283
|
+
} else if (getAIConfig(keys.useAnthropicSdk)) {
|
|
1284
|
+
const anthropicApiKey = getAIConfig(keys.anthropicApiKey);
|
|
1285
|
+
valueAssert(anthropicApiKey, keys.anthropicApiKey, keys.useAnthropicSdk);
|
|
1286
|
+
return {
|
|
1287
|
+
socksProxy,
|
|
1288
|
+
httpProxy,
|
|
1289
|
+
modelName,
|
|
1290
|
+
useAnthropicSdk: true,
|
|
1291
|
+
anthropicApiKey
|
|
1292
|
+
};
|
|
1293
|
+
} else {
|
|
1294
|
+
const openaiBaseURL = getAIConfig(keys.openaiBaseURL);
|
|
1295
|
+
const openaiApiKey = getAIConfig(keys.openaiApiKey);
|
|
1296
|
+
const openaiExtraConfig = getAIConfigInJson(keys.openaiExtraConfig);
|
|
1297
|
+
valueAssert(openaiBaseURL, keys.openaiBaseURL);
|
|
1298
|
+
valueAssert(openaiApiKey, keys.openaiApiKey);
|
|
1299
|
+
return {
|
|
1300
|
+
socksProxy,
|
|
1301
|
+
httpProxy,
|
|
1302
|
+
modelName,
|
|
1303
|
+
openaiBaseURL,
|
|
1304
|
+
openaiApiKey,
|
|
1305
|
+
openaiExtraConfig
|
|
1306
|
+
};
|
|
1307
|
+
}
|
|
1308
|
+
};
|
|
1309
|
+
var maskKey = (key, maskChar = "*") => {
|
|
1310
|
+
if (typeof key !== "string" || key.length === 0) {
|
|
1311
|
+
return key;
|
|
1312
|
+
}
|
|
1313
|
+
const prefixLen = 3;
|
|
1314
|
+
const suffixLen = 3;
|
|
1315
|
+
const keepLength = prefixLen + suffixLen;
|
|
1316
|
+
if (key.length <= keepLength) {
|
|
1317
|
+
return key;
|
|
1318
|
+
}
|
|
1319
|
+
const prefix = key.substring(0, prefixLen);
|
|
1320
|
+
const suffix = key.substring(key.length - suffixLen);
|
|
1321
|
+
const maskLength = key.length - keepLength;
|
|
1322
|
+
const mask = maskChar.repeat(maskLength);
|
|
1323
|
+
return `${prefix}${mask}${suffix}`;
|
|
1324
|
+
};
|
|
1325
|
+
var maskConfig = (config) => {
|
|
1326
|
+
return Object.fromEntries(
|
|
1327
|
+
Object.entries(config).map(([key, value]) => [
|
|
1328
|
+
key,
|
|
1329
|
+
["openaiApiKey", "azureOpenaiApiKey", "anthropicApiKey"].includes(key) ? maskKey(value) : value
|
|
1330
|
+
])
|
|
1331
|
+
);
|
|
1332
|
+
};
|
|
1333
|
+
var decideModelConfig = (modelPreferences) => {
|
|
1334
|
+
initDebugConfig();
|
|
1335
|
+
const debugLog = getDebug2("ai:decideModelConfig");
|
|
1336
|
+
debugLog("modelPreferences", modelPreferences);
|
|
1337
|
+
const isVQAIntent = modelPreferences?.intent === "VQA";
|
|
1338
|
+
const vqaModelName = getAIConfig(MIDSCENE_VQA_MODEL_NAME);
|
|
1339
|
+
if (isVQAIntent && vqaModelName) {
|
|
1340
|
+
debugLog(
|
|
1341
|
+
`current action is a VQA action and detected ${MIDSCENE_VQA_MODEL_NAME} ${vqaModelName}, will only read VQA related model config from process.env`
|
|
1342
|
+
);
|
|
1343
|
+
const config = getModelConfigFromEnv(
|
|
1344
|
+
vqaModelName,
|
|
1345
|
+
{
|
|
1346
|
+
/**
|
|
1347
|
+
* proxy
|
|
1348
|
+
*/
|
|
1349
|
+
socksProxy: MIDSCENE_VQA_OPENAI_SOCKS_PROXY,
|
|
1350
|
+
httpProxy: MIDSCENE_VQA_OPENAI_HTTP_PROXY,
|
|
1351
|
+
/**
|
|
1352
|
+
* OpenAI
|
|
1353
|
+
*/
|
|
1354
|
+
openaiBaseURL: MIDSCENE_VQA_OPENAI_BASE_URL,
|
|
1355
|
+
openaiApiKey: MIDSCENE_VQA_OPENAI_API_KEY,
|
|
1356
|
+
openaiExtraConfig: MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON,
|
|
1357
|
+
/**
|
|
1358
|
+
* Azure
|
|
1359
|
+
*/
|
|
1360
|
+
openaiUseAzureDeprecated: MIDSCENE_VQA_OPENAI_USE_AZURE,
|
|
1361
|
+
useAzureOpenai: MIDSCENE_VQA_USE_AZURE_OPENAI,
|
|
1362
|
+
azureOpenaiScope: MIDSCENE_VQA_AZURE_OPENAI_SCOPE,
|
|
1363
|
+
azureOpenaiApiKey: MIDSCENE_VQA_AZURE_OPENAI_KEY,
|
|
1364
|
+
azureOpenaiEndpoint: MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT,
|
|
1365
|
+
azureOpenaiApiVersion: MIDSCENE_VQA_AZURE_OPENAI_API_VERSION,
|
|
1366
|
+
azureOpenaiDeployment: MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT,
|
|
1367
|
+
azureExtraConfig: MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON,
|
|
1368
|
+
/**
|
|
1369
|
+
* Anthropic
|
|
1370
|
+
*/
|
|
1371
|
+
useAnthropicSdk: MIDSCENE_VQA_USE_ANTHROPIC_SDK,
|
|
1372
|
+
anthropicApiKey: MIDSCENE_VQA_ANTHROPIC_API_KEY
|
|
1373
|
+
},
|
|
1374
|
+
createAssert(MIDSCENE_VQA_MODEL_NAME, vqaModelName)
|
|
1375
|
+
);
|
|
1376
|
+
debugLog("got model config for VQA usage:", maskConfig(config));
|
|
1377
|
+
return config;
|
|
1378
|
+
} else {
|
|
1379
|
+
debugLog("read model config from process.env as normal.");
|
|
1380
|
+
const commonModelName = getAIConfig(MIDSCENE_MODEL_NAME);
|
|
1381
|
+
assert3(
|
|
1382
|
+
commonModelName,
|
|
1383
|
+
`${MIDSCENE_MODEL_NAME} is empty, please check your config.`
|
|
1384
|
+
);
|
|
1385
|
+
const config = getModelConfigFromEnv(
|
|
1386
|
+
commonModelName,
|
|
1387
|
+
{
|
|
1388
|
+
/**
|
|
1389
|
+
* proxy
|
|
1390
|
+
*/
|
|
1391
|
+
socksProxy: MIDSCENE_OPENAI_SOCKS_PROXY,
|
|
1392
|
+
httpProxy: MIDSCENE_OPENAI_HTTP_PROXY,
|
|
1393
|
+
/**
|
|
1394
|
+
* OpenAI
|
|
1395
|
+
*/
|
|
1396
|
+
openaiBaseURL: OPENAI_BASE_URL,
|
|
1397
|
+
openaiApiKey: OPENAI_API_KEY,
|
|
1398
|
+
openaiExtraConfig: MIDSCENE_OPENAI_INIT_CONFIG_JSON,
|
|
1399
|
+
/**
|
|
1400
|
+
* Azure
|
|
1401
|
+
*/
|
|
1402
|
+
openaiUseAzureDeprecated: OPENAI_USE_AZURE,
|
|
1403
|
+
useAzureOpenai: MIDSCENE_USE_AZURE_OPENAI,
|
|
1404
|
+
azureOpenaiScope: MIDSCENE_AZURE_OPENAI_SCOPE,
|
|
1405
|
+
azureOpenaiApiKey: AZURE_OPENAI_KEY,
|
|
1406
|
+
azureOpenaiEndpoint: AZURE_OPENAI_ENDPOINT,
|
|
1407
|
+
azureOpenaiApiVersion: AZURE_OPENAI_API_VERSION,
|
|
1408
|
+
azureOpenaiDeployment: AZURE_OPENAI_DEPLOYMENT,
|
|
1409
|
+
azureExtraConfig: MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,
|
|
1410
|
+
/**
|
|
1411
|
+
* Anthropic
|
|
1412
|
+
*/
|
|
1413
|
+
useAnthropicSdk: MIDSCENE_USE_ANTHROPIC_SDK,
|
|
1414
|
+
anthropicApiKey: ANTHROPIC_API_KEY
|
|
1415
|
+
},
|
|
1416
|
+
createAssert(MIDSCENE_MODEL_NAME, commonModelName)
|
|
1417
|
+
);
|
|
1418
|
+
debugLog("got model config for common usage:", maskConfig(config));
|
|
1419
|
+
return config;
|
|
1420
|
+
}
|
|
1421
|
+
};
|
|
1422
|
+
|
|
1423
|
+
// src/ai-model/service-caller/index.ts
|
|
1219
1424
|
async function createChatClient({
|
|
1220
|
-
AIActionTypeValue
|
|
1425
|
+
AIActionTypeValue,
|
|
1426
|
+
modelPreferences
|
|
1221
1427
|
}) {
|
|
1222
|
-
|
|
1428
|
+
const {
|
|
1429
|
+
socksProxy,
|
|
1430
|
+
httpProxy,
|
|
1431
|
+
modelName,
|
|
1432
|
+
openaiBaseURL,
|
|
1433
|
+
openaiApiKey,
|
|
1434
|
+
openaiExtraConfig,
|
|
1435
|
+
openaiUseAzureDeprecated,
|
|
1436
|
+
useAzureOpenai,
|
|
1437
|
+
azureOpenaiScope,
|
|
1438
|
+
azureOpenaiApiKey,
|
|
1439
|
+
azureOpenaiEndpoint,
|
|
1440
|
+
azureOpenaiApiVersion,
|
|
1441
|
+
azureOpenaiDeployment,
|
|
1442
|
+
azureExtraConfig,
|
|
1443
|
+
useAnthropicSdk,
|
|
1444
|
+
anthropicApiKey
|
|
1445
|
+
} = decideModelConfig(modelPreferences);
|
|
1223
1446
|
let openai;
|
|
1224
|
-
const extraConfig = getAIConfigInJson(MIDSCENE_OPENAI_INIT_CONFIG_JSON);
|
|
1225
|
-
const socksProxy = getAIConfig(MIDSCENE_OPENAI_SOCKS_PROXY);
|
|
1226
|
-
const httpProxy = getAIConfig(MIDSCENE_OPENAI_HTTP_PROXY);
|
|
1227
1447
|
let proxyAgent = void 0;
|
|
1228
|
-
const debugProxy =
|
|
1448
|
+
const debugProxy = getDebug3("ai:call:proxy");
|
|
1229
1449
|
if (httpProxy) {
|
|
1230
1450
|
debugProxy("using http proxy", httpProxy);
|
|
1231
1451
|
proxyAgent = new HttpsProxyAgent(httpProxy);
|
|
@@ -1233,70 +1453,56 @@ async function createChatClient({
|
|
|
1233
1453
|
debugProxy("using socks proxy", socksProxy);
|
|
1234
1454
|
proxyAgent = new SocksProxyAgent(socksProxy);
|
|
1235
1455
|
}
|
|
1236
|
-
if (
|
|
1456
|
+
if (openaiUseAzureDeprecated) {
|
|
1237
1457
|
openai = new AzureOpenAI({
|
|
1238
|
-
baseURL:
|
|
1239
|
-
apiKey:
|
|
1458
|
+
baseURL: openaiBaseURL,
|
|
1459
|
+
apiKey: openaiApiKey,
|
|
1240
1460
|
httpAgent: proxyAgent,
|
|
1241
|
-
...
|
|
1461
|
+
...openaiExtraConfig,
|
|
1242
1462
|
dangerouslyAllowBrowser: true
|
|
1243
1463
|
});
|
|
1244
|
-
} else if (
|
|
1245
|
-
const extraAzureConfig = getAIConfigInJson(
|
|
1246
|
-
MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON
|
|
1247
|
-
);
|
|
1248
|
-
const scope = getAIConfig(MIDSCENE_AZURE_OPENAI_SCOPE);
|
|
1464
|
+
} else if (useAzureOpenai) {
|
|
1249
1465
|
let tokenProvider = void 0;
|
|
1250
|
-
if (
|
|
1251
|
-
|
|
1466
|
+
if (azureOpenaiScope) {
|
|
1467
|
+
assert4(
|
|
1252
1468
|
!ifInBrowser,
|
|
1253
1469
|
"Azure OpenAI is not supported in browser with Midscene."
|
|
1254
1470
|
);
|
|
1255
1471
|
const credential = new DefaultAzureCredential();
|
|
1256
|
-
|
|
1257
|
-
tokenProvider = getBearerTokenProvider(credential, scope);
|
|
1472
|
+
tokenProvider = getBearerTokenProvider(credential, azureOpenaiScope);
|
|
1258
1473
|
openai = new AzureOpenAI({
|
|
1259
1474
|
azureADTokenProvider: tokenProvider,
|
|
1260
|
-
endpoint:
|
|
1261
|
-
apiVersion:
|
|
1262
|
-
deployment:
|
|
1263
|
-
...
|
|
1264
|
-
...
|
|
1475
|
+
endpoint: azureOpenaiEndpoint,
|
|
1476
|
+
apiVersion: azureOpenaiApiVersion,
|
|
1477
|
+
deployment: azureOpenaiDeployment,
|
|
1478
|
+
...openaiExtraConfig,
|
|
1479
|
+
...azureExtraConfig
|
|
1265
1480
|
});
|
|
1266
1481
|
} else {
|
|
1267
1482
|
openai = new AzureOpenAI({
|
|
1268
|
-
apiKey:
|
|
1269
|
-
endpoint:
|
|
1270
|
-
apiVersion:
|
|
1271
|
-
deployment:
|
|
1483
|
+
apiKey: azureOpenaiApiKey,
|
|
1484
|
+
endpoint: azureOpenaiEndpoint,
|
|
1485
|
+
apiVersion: azureOpenaiApiVersion,
|
|
1486
|
+
deployment: azureOpenaiDeployment,
|
|
1272
1487
|
dangerouslyAllowBrowser: true,
|
|
1273
|
-
...
|
|
1274
|
-
...
|
|
1488
|
+
...openaiExtraConfig,
|
|
1489
|
+
...azureExtraConfig
|
|
1275
1490
|
});
|
|
1276
1491
|
}
|
|
1277
|
-
} else if (!
|
|
1278
|
-
const baseURL = getAIConfig(OPENAI_BASE_URL);
|
|
1279
|
-
if (typeof baseURL === "string") {
|
|
1280
|
-
if (!/^https?:\/\//.test(baseURL)) {
|
|
1281
|
-
throw new Error(
|
|
1282
|
-
`OPENAI_BASE_URL must be a valid URL starting with http:// or https://, but got: ${baseURL}
|
|
1283
|
-
Please check your config.`
|
|
1284
|
-
);
|
|
1285
|
-
}
|
|
1286
|
-
}
|
|
1492
|
+
} else if (!useAnthropicSdk) {
|
|
1287
1493
|
openai = new OpenAI({
|
|
1288
|
-
baseURL:
|
|
1289
|
-
apiKey:
|
|
1494
|
+
baseURL: openaiBaseURL,
|
|
1495
|
+
apiKey: openaiApiKey,
|
|
1290
1496
|
httpAgent: proxyAgent,
|
|
1291
|
-
...
|
|
1497
|
+
...openaiExtraConfig,
|
|
1292
1498
|
defaultHeaders: {
|
|
1293
|
-
...
|
|
1499
|
+
...openaiExtraConfig?.defaultHeaders || {},
|
|
1294
1500
|
[MIDSCENE_API_TYPE]: AIActionTypeValue.toString()
|
|
1295
1501
|
},
|
|
1296
1502
|
dangerouslyAllowBrowser: true
|
|
1297
1503
|
});
|
|
1298
1504
|
}
|
|
1299
|
-
if (openai &&
|
|
1505
|
+
if (openai && getAIConfigInBoolean2(MIDSCENE_LANGSMITH_DEBUG)) {
|
|
1300
1506
|
if (ifInBrowser) {
|
|
1301
1507
|
throw new Error("langsmith is not supported in browser");
|
|
1302
1508
|
}
|
|
@@ -1307,14 +1513,13 @@ Please check your config.`
|
|
|
1307
1513
|
if (typeof openai !== "undefined") {
|
|
1308
1514
|
return {
|
|
1309
1515
|
completion: openai.chat.completions,
|
|
1310
|
-
style: "openai"
|
|
1516
|
+
style: "openai",
|
|
1517
|
+
modelName
|
|
1311
1518
|
};
|
|
1312
1519
|
}
|
|
1313
|
-
if (
|
|
1314
|
-
const apiKey = getAIConfig(ANTHROPIC_API_KEY);
|
|
1315
|
-
assert3(apiKey, "ANTHROPIC_API_KEY is required");
|
|
1520
|
+
if (useAnthropicSdk) {
|
|
1316
1521
|
openai = new Anthropic({
|
|
1317
|
-
apiKey,
|
|
1522
|
+
apiKey: anthropicApiKey,
|
|
1318
1523
|
httpAgent: proxyAgent,
|
|
1319
1524
|
dangerouslyAllowBrowser: true
|
|
1320
1525
|
});
|
|
@@ -1322,25 +1527,23 @@ Please check your config.`
|
|
|
1322
1527
|
if (typeof openai !== "undefined" && openai.messages) {
|
|
1323
1528
|
return {
|
|
1324
1529
|
completion: openai.messages,
|
|
1325
|
-
style: "anthropic"
|
|
1530
|
+
style: "anthropic",
|
|
1531
|
+
modelName
|
|
1326
1532
|
};
|
|
1327
1533
|
}
|
|
1328
1534
|
throw new Error("Openai SDK or Anthropic SDK is not initialized");
|
|
1329
1535
|
}
|
|
1330
|
-
async function call2(messages, AIActionTypeValue,
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
);
|
|
1335
|
-
const { completion, style } = await createChatClient({
|
|
1336
|
-
AIActionTypeValue
|
|
1536
|
+
async function call2(messages, AIActionTypeValue, options, modelPreferences) {
|
|
1537
|
+
const { completion, style, modelName } = await createChatClient({
|
|
1538
|
+
AIActionTypeValue,
|
|
1539
|
+
modelPreferences
|
|
1337
1540
|
});
|
|
1338
|
-
const
|
|
1339
|
-
const
|
|
1340
|
-
const
|
|
1341
|
-
const
|
|
1541
|
+
const responseFormat = getResponseFormat(modelName, AIActionTypeValue);
|
|
1542
|
+
const maxTokens = getAIConfig2(OPENAI_MAX_TOKENS);
|
|
1543
|
+
const debugCall = getDebug3("ai:call");
|
|
1544
|
+
const debugProfileStats = getDebug3("ai:profile:stats");
|
|
1545
|
+
const debugProfileDetail = getDebug3("ai:profile:detail");
|
|
1342
1546
|
const startTime = Date.now();
|
|
1343
|
-
const model = getModelName();
|
|
1344
1547
|
const isStreaming = options?.stream && options?.onChunk;
|
|
1345
1548
|
let content;
|
|
1346
1549
|
let accumulated = "";
|
|
@@ -1357,12 +1560,12 @@ async function call2(messages, AIActionTypeValue, responseFormat, options) {
|
|
|
1357
1560
|
try {
|
|
1358
1561
|
if (style === "openai") {
|
|
1359
1562
|
debugCall(
|
|
1360
|
-
`sending ${isStreaming ? "streaming " : ""}request to ${
|
|
1563
|
+
`sending ${isStreaming ? "streaming " : ""}request to ${modelName}`
|
|
1361
1564
|
);
|
|
1362
1565
|
if (isStreaming) {
|
|
1363
1566
|
const stream = await completion.create(
|
|
1364
1567
|
{
|
|
1365
|
-
model,
|
|
1568
|
+
model: modelName,
|
|
1366
1569
|
messages,
|
|
1367
1570
|
response_format: responseFormat,
|
|
1368
1571
|
...commonConfig
|
|
@@ -1419,23 +1622,23 @@ async function call2(messages, AIActionTypeValue, responseFormat, options) {
|
|
|
1419
1622
|
}
|
|
1420
1623
|
content = accumulated;
|
|
1421
1624
|
debugProfileStats(
|
|
1422
|
-
`streaming model, ${
|
|
1625
|
+
`streaming model, ${modelName}, mode, ${vlLocateMode3() || "default"}, cost-ms, ${timeCost}`
|
|
1423
1626
|
);
|
|
1424
1627
|
} else {
|
|
1425
1628
|
const result = await completion.create({
|
|
1426
|
-
model,
|
|
1629
|
+
model: modelName,
|
|
1427
1630
|
messages,
|
|
1428
1631
|
response_format: responseFormat,
|
|
1429
1632
|
...commonConfig
|
|
1430
1633
|
});
|
|
1431
1634
|
timeCost = Date.now() - startTime;
|
|
1432
1635
|
debugProfileStats(
|
|
1433
|
-
`model, ${
|
|
1636
|
+
`model, ${modelName}, mode, ${vlLocateMode3() || "default"}, ui-tars-version, ${uiTarsModelVersion()}, prompt-tokens, ${result.usage?.prompt_tokens || ""}, completion-tokens, ${result.usage?.completion_tokens || ""}, total-tokens, ${result.usage?.total_tokens || ""}, cost-ms, ${timeCost}, requestId, ${result._request_id || ""}`
|
|
1434
1637
|
);
|
|
1435
1638
|
debugProfileDetail(
|
|
1436
1639
|
`model usage detail: ${JSON.stringify(result.usage)}`
|
|
1437
1640
|
);
|
|
1438
|
-
|
|
1641
|
+
assert4(
|
|
1439
1642
|
result.choices,
|
|
1440
1643
|
`invalid response from LLM service: ${JSON.stringify(result)}`
|
|
1441
1644
|
);
|
|
@@ -1443,12 +1646,12 @@ async function call2(messages, AIActionTypeValue, responseFormat, options) {
|
|
|
1443
1646
|
usage = result.usage;
|
|
1444
1647
|
}
|
|
1445
1648
|
debugCall(`response: ${content}`);
|
|
1446
|
-
|
|
1649
|
+
assert4(content, "empty content");
|
|
1447
1650
|
} else if (style === "anthropic") {
|
|
1448
1651
|
const convertImageContent = (content2) => {
|
|
1449
1652
|
if (content2.type === "image_url") {
|
|
1450
1653
|
const imgBase64 = content2.image_url.url;
|
|
1451
|
-
|
|
1654
|
+
assert4(imgBase64, "image_url is required");
|
|
1452
1655
|
return {
|
|
1453
1656
|
source: {
|
|
1454
1657
|
type: "base64",
|
|
@@ -1462,7 +1665,7 @@ async function call2(messages, AIActionTypeValue, responseFormat, options) {
|
|
|
1462
1665
|
};
|
|
1463
1666
|
if (isStreaming) {
|
|
1464
1667
|
const stream = await completion.create({
|
|
1465
|
-
model,
|
|
1668
|
+
model: modelName,
|
|
1466
1669
|
system: "You are a versatile professional in software UI automation",
|
|
1467
1670
|
messages: messages.map((m) => ({
|
|
1468
1671
|
role: "user",
|
|
@@ -1506,7 +1709,7 @@ async function call2(messages, AIActionTypeValue, responseFormat, options) {
|
|
|
1506
1709
|
content = accumulated;
|
|
1507
1710
|
} else {
|
|
1508
1711
|
const result = await completion.create({
|
|
1509
|
-
model,
|
|
1712
|
+
model: modelName,
|
|
1510
1713
|
system: "You are a versatile professional in software UI automation",
|
|
1511
1714
|
messages: messages.map((m) => ({
|
|
1512
1715
|
role: "user",
|
|
@@ -1519,7 +1722,7 @@ async function call2(messages, AIActionTypeValue, responseFormat, options) {
|
|
|
1519
1722
|
content = result.content[0].text;
|
|
1520
1723
|
usage = result.usage;
|
|
1521
1724
|
}
|
|
1522
|
-
|
|
1725
|
+
assert4(content, "empty content");
|
|
1523
1726
|
}
|
|
1524
1727
|
if (isStreaming && !usage) {
|
|
1525
1728
|
const estimatedTokens = Math.max(
|
|
@@ -1553,10 +1756,9 @@ async function call2(messages, AIActionTypeValue, responseFormat, options) {
|
|
|
1553
1756
|
throw newError;
|
|
1554
1757
|
}
|
|
1555
1758
|
}
|
|
1556
|
-
|
|
1759
|
+
var getResponseFormat = (modelName, AIActionTypeValue) => {
|
|
1557
1760
|
let responseFormat;
|
|
1558
|
-
|
|
1559
|
-
if (model.includes("gpt-4")) {
|
|
1761
|
+
if (modelName.includes("gpt-4")) {
|
|
1560
1762
|
switch (AIActionTypeValue) {
|
|
1561
1763
|
case 0 /* ASSERT */:
|
|
1562
1764
|
responseFormat = assertSchema;
|
|
@@ -1573,11 +1775,19 @@ async function callToGetJSONObject(messages, AIActionTypeValue) {
|
|
|
1573
1775
|
break;
|
|
1574
1776
|
}
|
|
1575
1777
|
}
|
|
1576
|
-
if (
|
|
1778
|
+
if (modelName === "gpt-4o-2024-05-13") {
|
|
1577
1779
|
responseFormat = { type: "json_object" /* JSON */ };
|
|
1578
1780
|
}
|
|
1579
|
-
|
|
1580
|
-
|
|
1781
|
+
return responseFormat;
|
|
1782
|
+
};
|
|
1783
|
+
async function callToGetJSONObject(messages, AIActionTypeValue, modelPreferences) {
|
|
1784
|
+
const response = await call2(
|
|
1785
|
+
messages,
|
|
1786
|
+
AIActionTypeValue,
|
|
1787
|
+
void 0,
|
|
1788
|
+
modelPreferences
|
|
1789
|
+
);
|
|
1790
|
+
assert4(response, "empty response");
|
|
1581
1791
|
const jsonContent = safeParseJson(response.content);
|
|
1582
1792
|
return { content: jsonContent, usage: response.usage };
|
|
1583
1793
|
}
|
|
@@ -1861,7 +2071,7 @@ Respond with YAML only, no explanations.`
|
|
|
1861
2071
|
});
|
|
1862
2072
|
}
|
|
1863
2073
|
if (options.stream && options.onChunk) {
|
|
1864
|
-
return await call2(prompt, 2 /* EXTRACT_DATA */,
|
|
2074
|
+
return await call2(prompt, 2 /* EXTRACT_DATA */, {
|
|
1865
2075
|
stream: true,
|
|
1866
2076
|
onChunk: options.onChunk
|
|
1867
2077
|
});
|
|
@@ -1984,7 +2194,7 @@ ${PLAYWRIGHT_EXAMPLE_CODE}`;
|
|
|
1984
2194
|
}
|
|
1985
2195
|
];
|
|
1986
2196
|
if (options.stream && options.onChunk) {
|
|
1987
|
-
return await call2(prompt, 2 /* EXTRACT_DATA */,
|
|
2197
|
+
return await call2(prompt, 2 /* EXTRACT_DATA */, {
|
|
1988
2198
|
stream: true,
|
|
1989
2199
|
onChunk: options.onChunk
|
|
1990
2200
|
});
|
|
@@ -2005,7 +2215,7 @@ ${PLAYWRIGHT_EXAMPLE_CODE}`;
|
|
|
2005
2215
|
import {
|
|
2006
2216
|
MIDSCENE_USE_QWEN_VL,
|
|
2007
2217
|
MIDSCENE_USE_VLM_UI_TARS,
|
|
2008
|
-
getAIConfigInBoolean as
|
|
2218
|
+
getAIConfigInBoolean as getAIConfigInBoolean3,
|
|
2009
2219
|
vlLocateMode as vlLocateMode4
|
|
2010
2220
|
} from "@midscene/shared/env";
|
|
2011
2221
|
import {
|
|
@@ -2013,8 +2223,8 @@ import {
|
|
|
2013
2223
|
paddingToMatchBlockByBase64,
|
|
2014
2224
|
preProcessImageUrl
|
|
2015
2225
|
} from "@midscene/shared/img";
|
|
2016
|
-
import { getDebug as
|
|
2017
|
-
import { assert as
|
|
2226
|
+
import { getDebug as getDebug4 } from "@midscene/shared/logger";
|
|
2227
|
+
import { assert as assert5 } from "@midscene/shared/utils";
|
|
2018
2228
|
|
|
2019
2229
|
// src/ai-model/prompt/extraction.ts
|
|
2020
2230
|
import { PromptTemplate as PromptTemplate3 } from "@langchain/core/prompts";
|
|
@@ -2169,8 +2379,8 @@ var sectionLocatorInstruction = new PromptTemplate4({
|
|
|
2169
2379
|
});
|
|
2170
2380
|
|
|
2171
2381
|
// src/ai-model/inspect.ts
|
|
2172
|
-
var debugInspect =
|
|
2173
|
-
var debugSection =
|
|
2382
|
+
var debugInspect = getDebug4("ai:inspect");
|
|
2383
|
+
var debugSection = getDebug4("ai:section");
|
|
2174
2384
|
var extraTextFromUserPrompt = (prompt) => {
|
|
2175
2385
|
if (typeof prompt === "string") {
|
|
2176
2386
|
return prompt;
|
|
@@ -2224,7 +2434,7 @@ async function AiLocateElement(options) {
|
|
|
2224
2434
|
const { context, targetElementDescription, callAI } = options;
|
|
2225
2435
|
const { screenshotBase64 } = context;
|
|
2226
2436
|
const { description, elementById, insertElementByPosition } = await describeUserPage(context);
|
|
2227
|
-
|
|
2437
|
+
assert5(
|
|
2228
2438
|
targetElementDescription,
|
|
2229
2439
|
"cannot find the target element description"
|
|
2230
2440
|
);
|
|
@@ -2235,11 +2445,11 @@ async function AiLocateElement(options) {
|
|
|
2235
2445
|
const systemPrompt = systemPromptToLocateElement(vlLocateMode4());
|
|
2236
2446
|
let imagePayload = screenshotBase64;
|
|
2237
2447
|
if (options.searchConfig) {
|
|
2238
|
-
|
|
2448
|
+
assert5(
|
|
2239
2449
|
options.searchConfig.rect,
|
|
2240
2450
|
"searchArea is provided but its rect cannot be found. Failed to locate element"
|
|
2241
2451
|
);
|
|
2242
|
-
|
|
2452
|
+
assert5(
|
|
2243
2453
|
options.searchConfig.imageBase64,
|
|
2244
2454
|
"searchArea is provided but its imageBase64 cannot be found. Failed to locate element"
|
|
2245
2455
|
);
|
|
@@ -2391,7 +2601,7 @@ async function AiLocateSection(options) {
|
|
|
2391
2601
|
imageBase64 = await cropByRect(
|
|
2392
2602
|
screenshotBase64,
|
|
2393
2603
|
sectionRect,
|
|
2394
|
-
|
|
2604
|
+
getAIConfigInBoolean3(MIDSCENE_USE_QWEN_VL)
|
|
2395
2605
|
);
|
|
2396
2606
|
}
|
|
2397
2607
|
return {
|
|
@@ -2403,7 +2613,13 @@ async function AiLocateSection(options) {
|
|
|
2403
2613
|
};
|
|
2404
2614
|
}
|
|
2405
2615
|
async function AiExtractElementInfo(options) {
|
|
2406
|
-
const {
|
|
2616
|
+
const {
|
|
2617
|
+
dataQuery,
|
|
2618
|
+
context,
|
|
2619
|
+
extractOption,
|
|
2620
|
+
multimodalPrompt,
|
|
2621
|
+
modelPreferences
|
|
2622
|
+
} = options;
|
|
2407
2623
|
const systemPrompt = systemPromptToExtract();
|
|
2408
2624
|
const { screenshotBase64 } = context;
|
|
2409
2625
|
const { description, elementById } = await describeUserPage(context, {
|
|
@@ -2452,7 +2668,8 @@ async function AiExtractElementInfo(options) {
|
|
|
2452
2668
|
}
|
|
2453
2669
|
const result = await callAiFn(
|
|
2454
2670
|
msgs,
|
|
2455
|
-
2 /* EXTRACT_DATA
|
|
2671
|
+
2 /* EXTRACT_DATA */,
|
|
2672
|
+
modelPreferences
|
|
2456
2673
|
);
|
|
2457
2674
|
return {
|
|
2458
2675
|
parseResult: result.content,
|
|
@@ -2462,10 +2679,10 @@ async function AiExtractElementInfo(options) {
|
|
|
2462
2679
|
}
|
|
2463
2680
|
async function AiAssert(options) {
|
|
2464
2681
|
const { assertion, context } = options;
|
|
2465
|
-
|
|
2682
|
+
assert5(assertion, "assertion should not be empty");
|
|
2466
2683
|
const { screenshotBase64 } = context;
|
|
2467
2684
|
const systemPrompt = systemPromptToAssert({
|
|
2468
|
-
isUITars:
|
|
2685
|
+
isUITars: getAIConfigInBoolean3(MIDSCENE_USE_VLM_UI_TARS)
|
|
2469
2686
|
});
|
|
2470
2687
|
const assertionText = extraTextFromUserPrompt(assertion);
|
|
2471
2688
|
const msgs = [
|
|
@@ -2512,7 +2729,7 @@ ${assertionText}
|
|
|
2512
2729
|
// src/ai-model/llm-planning.ts
|
|
2513
2730
|
import { vlLocateMode as vlLocateMode5 } from "@midscene/shared/env";
|
|
2514
2731
|
import { paddingToMatchBlockByBase64 as paddingToMatchBlockByBase642 } from "@midscene/shared/img";
|
|
2515
|
-
import { assert as
|
|
2732
|
+
import { assert as assert6 } from "@midscene/shared/utils";
|
|
2516
2733
|
async function plan(userInstruction, opts) {
|
|
2517
2734
|
const { callAI, context } = opts || {};
|
|
2518
2735
|
const { screenshotBase64, size } = context;
|
|
@@ -2574,7 +2791,7 @@ async function plan(userInstruction, opts) {
|
|
|
2574
2791
|
usage,
|
|
2575
2792
|
yamlFlow: buildYamlFlowFromPlans(actions, planFromAI.sleep)
|
|
2576
2793
|
};
|
|
2577
|
-
|
|
2794
|
+
assert6(planFromAI, "can't get plans from AI");
|
|
2578
2795
|
if (vlLocateMode5()) {
|
|
2579
2796
|
actions.forEach((action) => {
|
|
2580
2797
|
if (action.locate) {
|
|
@@ -2590,7 +2807,7 @@ async function plan(userInstruction, opts) {
|
|
|
2590
2807
|
}
|
|
2591
2808
|
}
|
|
2592
2809
|
});
|
|
2593
|
-
|
|
2810
|
+
assert6(!planFromAI.error, `Failed to plan actions: ${planFromAI.error}`);
|
|
2594
2811
|
} else {
|
|
2595
2812
|
actions.forEach((action) => {
|
|
2596
2813
|
if (action.locate?.id) {
|
|
@@ -2618,8 +2835,8 @@ import {
|
|
|
2618
2835
|
} from "@midscene/shared/env";
|
|
2619
2836
|
import { resizeImgBase64 } from "@midscene/shared/img";
|
|
2620
2837
|
import { transformHotkeyInput } from "@midscene/shared/keyboard-layout";
|
|
2621
|
-
import { getDebug as
|
|
2622
|
-
import { assert as
|
|
2838
|
+
import { getDebug as getDebug5 } from "@midscene/shared/logger";
|
|
2839
|
+
import { assert as assert7 } from "@midscene/shared/utils";
|
|
2623
2840
|
import { actionParser } from "@ui-tars/action-parser";
|
|
2624
2841
|
|
|
2625
2842
|
// src/ai-model/prompt/ui-tars-planning.ts
|
|
@@ -2658,7 +2875,7 @@ finished(content='xxx') # Use escape characters \\', \\", and \\n in content par
|
|
|
2658
2875
|
var getSummary = (prediction) => prediction.replace(/Reflection:[\s\S]*?(?=Action_Summary:|Action:|$)/g, "").trim();
|
|
2659
2876
|
|
|
2660
2877
|
// src/ai-model/ui-tars-planning.ts
|
|
2661
|
-
var debug =
|
|
2878
|
+
var debug = getDebug5("ui-tars-planning");
|
|
2662
2879
|
var bboxSize = 10;
|
|
2663
2880
|
var pointToBbox = (point, width, height) => {
|
|
2664
2881
|
return [
|
|
@@ -2696,7 +2913,7 @@ async function vlmPlanning(options) {
|
|
|
2696
2913
|
const transformActions = [];
|
|
2697
2914
|
parsed.forEach((action) => {
|
|
2698
2915
|
if (action.action_type === "click") {
|
|
2699
|
-
|
|
2916
|
+
assert7(action.action_inputs.start_box, "start_box is required");
|
|
2700
2917
|
const point = getPoint(action.action_inputs.start_box, size);
|
|
2701
2918
|
transformActions.push({
|
|
2702
2919
|
type: "Locate",
|
|
@@ -2723,8 +2940,8 @@ async function vlmPlanning(options) {
|
|
|
2723
2940
|
param: action.thought || ""
|
|
2724
2941
|
});
|
|
2725
2942
|
} else if (action.action_type === "drag") {
|
|
2726
|
-
|
|
2727
|
-
|
|
2943
|
+
assert7(action.action_inputs.start_box, "start_box is required");
|
|
2944
|
+
assert7(action.action_inputs.end_box, "end_box is required");
|
|
2728
2945
|
const startPoint = getPoint(action.action_inputs.start_box, size);
|
|
2729
2946
|
const endPoint = getPoint(action.action_inputs.end_box, size);
|
|
2730
2947
|
transformActions.push({
|
|
@@ -2806,7 +3023,7 @@ async function vlmPlanning(options) {
|
|
|
2806
3023
|
param: {}
|
|
2807
3024
|
});
|
|
2808
3025
|
} else if (action.action_type === "androidLongPress") {
|
|
2809
|
-
|
|
3026
|
+
assert7(
|
|
2810
3027
|
action.action_inputs.start_coords,
|
|
2811
3028
|
"start_coords is required for androidLongPress"
|
|
2812
3029
|
);
|
|
@@ -2922,4 +3139,4 @@ export {
|
|
|
2922
3139
|
resizeImageForUiTars
|
|
2923
3140
|
};
|
|
2924
3141
|
|
|
2925
|
-
//# sourceMappingURL=chunk-
|
|
3142
|
+
//# sourceMappingURL=chunk-G2JTYWI6.js.map
|