@midscene/core 0.25.4-beta-20250807062119.0 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/ai-model.d.ts +7 -6
- package/dist/es/ai-model.js +1 -1
- package/dist/es/{chunk-G2JTYWI6.js → chunk-I5LBWOQA.js} +156 -373
- package/dist/es/chunk-I5LBWOQA.js.map +1 -0
- package/dist/es/{chunk-JH54OF4E.js → chunk-NBFEZEAH.js} +3 -3
- package/dist/es/index.d.ts +6 -6
- package/dist/es/index.js +4 -5
- package/dist/es/index.js.map +1 -1
- package/dist/es/{llm-planning-f449f3b8.d.ts → llm-planning-92cec090.d.ts} +2 -3
- package/dist/es/{types-7435eba0.d.ts → types-b4a208c6.d.ts} +3 -9
- package/dist/es/utils.d.ts +1 -1
- package/dist/es/utils.js +1 -1
- package/dist/lib/ai-model.d.ts +7 -6
- package/dist/lib/ai-model.js +2 -2
- package/dist/lib/{chunk-G2JTYWI6.js → chunk-I5LBWOQA.js} +141 -358
- package/dist/lib/chunk-I5LBWOQA.js.map +1 -0
- package/dist/lib/{chunk-JH54OF4E.js → chunk-NBFEZEAH.js} +3 -3
- package/dist/lib/index.d.ts +6 -6
- package/dist/lib/index.js +14 -15
- package/dist/lib/index.js.map +1 -1
- package/dist/lib/{llm-planning-f449f3b8.d.ts → llm-planning-92cec090.d.ts} +2 -3
- package/dist/{types/types-7435eba0.d.ts → lib/types-b4a208c6.d.ts} +3 -9
- package/dist/lib/utils.d.ts +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/types/ai-model.d.ts +7 -6
- package/dist/types/index.d.ts +6 -6
- package/dist/types/{llm-planning-f449f3b8.d.ts → llm-planning-92cec090.d.ts} +2 -3
- package/dist/{lib/types-7435eba0.d.ts → types/types-b4a208c6.d.ts} +3 -9
- package/dist/types/utils.d.ts +1 -1
- package/package.json +3 -3
- package/dist/es/chunk-G2JTYWI6.js.map +0 -1
- package/dist/lib/chunk-G2JTYWI6.js.map +0 -1
- /package/dist/es/{chunk-JH54OF4E.js.map → chunk-NBFEZEAH.js.map} +0 -0
- /package/dist/lib/{chunk-JH54OF4E.js.map → chunk-NBFEZEAH.js.map} +0 -0
|
@@ -5,16 +5,35 @@ import {
|
|
|
5
5
|
getBearerTokenProvider
|
|
6
6
|
} from "@azure/identity";
|
|
7
7
|
import {
|
|
8
|
+
ANTHROPIC_API_KEY,
|
|
9
|
+
AZURE_OPENAI_API_VERSION,
|
|
10
|
+
AZURE_OPENAI_DEPLOYMENT,
|
|
11
|
+
AZURE_OPENAI_ENDPOINT,
|
|
12
|
+
AZURE_OPENAI_KEY,
|
|
8
13
|
MIDSCENE_API_TYPE,
|
|
14
|
+
MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,
|
|
15
|
+
MIDSCENE_AZURE_OPENAI_SCOPE,
|
|
16
|
+
MIDSCENE_DEBUG_AI_PROFILE,
|
|
17
|
+
MIDSCENE_DEBUG_AI_RESPONSE,
|
|
9
18
|
MIDSCENE_LANGSMITH_DEBUG,
|
|
19
|
+
MIDSCENE_MODEL_NAME,
|
|
20
|
+
MIDSCENE_OPENAI_HTTP_PROXY,
|
|
21
|
+
MIDSCENE_OPENAI_INIT_CONFIG_JSON,
|
|
22
|
+
MIDSCENE_OPENAI_SOCKS_PROXY,
|
|
23
|
+
MIDSCENE_USE_ANTHROPIC_SDK,
|
|
24
|
+
MIDSCENE_USE_AZURE_OPENAI,
|
|
25
|
+
OPENAI_API_KEY,
|
|
26
|
+
OPENAI_BASE_URL,
|
|
10
27
|
OPENAI_MAX_TOKENS,
|
|
11
|
-
|
|
12
|
-
|
|
28
|
+
OPENAI_USE_AZURE,
|
|
29
|
+
getAIConfig,
|
|
30
|
+
getAIConfigInBoolean,
|
|
31
|
+
getAIConfigInJson,
|
|
13
32
|
uiTarsModelVersion,
|
|
14
33
|
vlLocateMode as vlLocateMode3
|
|
15
34
|
} from "@midscene/shared/env";
|
|
16
|
-
import { getDebug as
|
|
17
|
-
import { assert as
|
|
35
|
+
import { enableDebug, getDebug as getDebug2 } from "@midscene/shared/logger";
|
|
36
|
+
import { assert as assert3 } from "@midscene/shared/utils";
|
|
18
37
|
import { ifInBrowser } from "@midscene/shared/utils";
|
|
19
38
|
import { HttpsProxyAgent } from "https-proxy-agent";
|
|
20
39
|
import { jsonrepair } from "jsonrepair";
|
|
@@ -36,11 +55,10 @@ var AIActionType = /* @__PURE__ */ ((AIActionType2) => {
|
|
|
36
55
|
AIActionType2[AIActionType2["DESCRIBE_ELEMENT"] = 4] = "DESCRIBE_ELEMENT";
|
|
37
56
|
return AIActionType2;
|
|
38
57
|
})(AIActionType || {});
|
|
39
|
-
async function callAiFn(msgs, AIActionTypeValue
|
|
58
|
+
async function callAiFn(msgs, AIActionTypeValue) {
|
|
40
59
|
const { content, usage } = await callToGetJSONObject(
|
|
41
60
|
msgs,
|
|
42
|
-
AIActionTypeValue
|
|
43
|
-
modelPreferences
|
|
61
|
+
AIActionTypeValue
|
|
44
62
|
);
|
|
45
63
|
return { content, usage };
|
|
46
64
|
}
|
|
@@ -1145,57 +1163,24 @@ pageDescription:
|
|
|
1145
1163
|
});
|
|
1146
1164
|
};
|
|
1147
1165
|
|
|
1148
|
-
// src/ai-model/service-caller/
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
MIDSCENE_OPENAI_INIT_CONFIG_JSON,
|
|
1162
|
-
MIDSCENE_OPENAI_SOCKS_PROXY,
|
|
1163
|
-
MIDSCENE_USE_ANTHROPIC_SDK,
|
|
1164
|
-
MIDSCENE_USE_AZURE_OPENAI,
|
|
1165
|
-
MIDSCENE_VQA_ANTHROPIC_API_KEY,
|
|
1166
|
-
MIDSCENE_VQA_AZURE_OPENAI_API_VERSION,
|
|
1167
|
-
MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT,
|
|
1168
|
-
MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT,
|
|
1169
|
-
MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON,
|
|
1170
|
-
MIDSCENE_VQA_AZURE_OPENAI_KEY,
|
|
1171
|
-
MIDSCENE_VQA_AZURE_OPENAI_SCOPE,
|
|
1172
|
-
MIDSCENE_VQA_MODEL_NAME,
|
|
1173
|
-
MIDSCENE_VQA_OPENAI_API_KEY,
|
|
1174
|
-
MIDSCENE_VQA_OPENAI_BASE_URL,
|
|
1175
|
-
MIDSCENE_VQA_OPENAI_HTTP_PROXY,
|
|
1176
|
-
MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON,
|
|
1177
|
-
MIDSCENE_VQA_OPENAI_SOCKS_PROXY,
|
|
1178
|
-
MIDSCENE_VQA_OPENAI_USE_AZURE,
|
|
1179
|
-
MIDSCENE_VQA_USE_ANTHROPIC_SDK,
|
|
1180
|
-
MIDSCENE_VQA_USE_AZURE_OPENAI,
|
|
1181
|
-
OPENAI_API_KEY,
|
|
1182
|
-
OPENAI_BASE_URL,
|
|
1183
|
-
OPENAI_USE_AZURE,
|
|
1184
|
-
getAIConfig,
|
|
1185
|
-
getAIConfigInBoolean,
|
|
1186
|
-
getAIConfigInJson
|
|
1187
|
-
} from "@midscene/shared/env";
|
|
1188
|
-
import { enableDebug, getDebug as getDebug2 } from "@midscene/shared/logger";
|
|
1189
|
-
import { assert as assert3 } from "@midscene/shared/utils";
|
|
1190
|
-
function getModelName() {
|
|
1191
|
-
let modelName = "gpt-4o";
|
|
1192
|
-
const nameInConfig = getAIConfig(MIDSCENE_MODEL_NAME);
|
|
1193
|
-
if (nameInConfig) {
|
|
1194
|
-
modelName = nameInConfig;
|
|
1195
|
-
}
|
|
1196
|
-
return modelName;
|
|
1166
|
+
// src/ai-model/service-caller/index.ts
|
|
1167
|
+
function checkAIConfig() {
|
|
1168
|
+
const openaiKey = getAIConfig(OPENAI_API_KEY);
|
|
1169
|
+
const azureConfig = getAIConfig(MIDSCENE_USE_AZURE_OPENAI);
|
|
1170
|
+
const anthropicKey = getAIConfig(ANTHROPIC_API_KEY);
|
|
1171
|
+
const initConfigJson = getAIConfig(MIDSCENE_OPENAI_INIT_CONFIG_JSON);
|
|
1172
|
+
if (openaiKey)
|
|
1173
|
+
return true;
|
|
1174
|
+
if (azureConfig)
|
|
1175
|
+
return true;
|
|
1176
|
+
if (anthropicKey)
|
|
1177
|
+
return true;
|
|
1178
|
+
return Boolean(initConfigJson);
|
|
1197
1179
|
}
|
|
1180
|
+
var debugConfigInitialized = false;
|
|
1198
1181
|
function initDebugConfig() {
|
|
1182
|
+
if (debugConfigInitialized)
|
|
1183
|
+
return;
|
|
1199
1184
|
const shouldPrintTiming = getAIConfigInBoolean(MIDSCENE_DEBUG_AI_PROFILE);
|
|
1200
1185
|
let debugConfig = "";
|
|
1201
1186
|
if (shouldPrintTiming) {
|
|
@@ -1220,232 +1205,27 @@ function initDebugConfig() {
|
|
|
1220
1205
|
if (debugConfig) {
|
|
1221
1206
|
enableDebug(debugConfig);
|
|
1222
1207
|
}
|
|
1208
|
+
debugConfigInitialized = true;
|
|
1223
1209
|
}
|
|
1224
|
-
var
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
);
|
|
1231
|
-
} else {
|
|
1232
|
-
assert3(
|
|
1233
|
-
value,
|
|
1234
|
-
`The ${key} must be a non-empty string because of the ${modelNameKey} is declared as ${modelName}, but got: ${value}
|
|
1235
|
-
Please check your config.`
|
|
1236
|
-
);
|
|
1237
|
-
}
|
|
1238
|
-
};
|
|
1239
|
-
var getModelConfigFromEnv = (modelName, keys, valueAssert) => {
|
|
1240
|
-
const socksProxy = getAIConfig(keys.socksProxy);
|
|
1241
|
-
const httpProxy = getAIConfig(keys.httpProxy);
|
|
1242
|
-
if (getAIConfig(keys.openaiUseAzureDeprecated)) {
|
|
1243
|
-
const openaiBaseURL = getAIConfig(keys.openaiBaseURL);
|
|
1244
|
-
const openaiApiKey = getAIConfig(keys.openaiApiKey);
|
|
1245
|
-
const openaiExtraConfig = getAIConfigInJson(keys.openaiExtraConfig);
|
|
1246
|
-
valueAssert(
|
|
1247
|
-
openaiBaseURL,
|
|
1248
|
-
keys.openaiBaseURL,
|
|
1249
|
-
keys.openaiUseAzureDeprecated
|
|
1250
|
-
);
|
|
1251
|
-
valueAssert(openaiApiKey, keys.openaiApiKey, keys.openaiUseAzureDeprecated);
|
|
1252
|
-
return {
|
|
1253
|
-
socksProxy,
|
|
1254
|
-
httpProxy,
|
|
1255
|
-
modelName,
|
|
1256
|
-
openaiUseAzureDeprecated: true,
|
|
1257
|
-
openaiApiKey,
|
|
1258
|
-
openaiBaseURL,
|
|
1259
|
-
openaiExtraConfig
|
|
1260
|
-
};
|
|
1261
|
-
} else if (getAIConfig(keys.useAzureOpenai)) {
|
|
1262
|
-
const azureOpenaiScope = getAIConfig(keys.azureOpenaiScope);
|
|
1263
|
-
const azureOpenaiApiKey = getAIConfig(keys.azureOpenaiApiKey);
|
|
1264
|
-
const azureOpenaiEndpoint = getAIConfig(keys.azureOpenaiEndpoint);
|
|
1265
|
-
const azureOpenaiDeployment = getAIConfig(keys.azureOpenaiDeployment);
|
|
1266
|
-
const azureOpenaiApiVersion = getAIConfig(keys.azureOpenaiApiVersion);
|
|
1267
|
-
const azureExtraConfig = getAIConfigInJson(keys.azureExtraConfig);
|
|
1268
|
-
const openaiExtraConfig = getAIConfigInJson(keys.openaiExtraConfig);
|
|
1269
|
-
valueAssert(azureOpenaiApiKey, keys.azureOpenaiApiKey, keys.useAzureOpenai);
|
|
1270
|
-
return {
|
|
1271
|
-
socksProxy,
|
|
1272
|
-
httpProxy,
|
|
1273
|
-
modelName,
|
|
1274
|
-
useAzureOpenai: true,
|
|
1275
|
-
azureOpenaiScope,
|
|
1276
|
-
azureOpenaiApiKey,
|
|
1277
|
-
azureOpenaiEndpoint,
|
|
1278
|
-
azureOpenaiDeployment,
|
|
1279
|
-
azureOpenaiApiVersion,
|
|
1280
|
-
azureExtraConfig,
|
|
1281
|
-
openaiExtraConfig
|
|
1282
|
-
};
|
|
1283
|
-
} else if (getAIConfig(keys.useAnthropicSdk)) {
|
|
1284
|
-
const anthropicApiKey = getAIConfig(keys.anthropicApiKey);
|
|
1285
|
-
valueAssert(anthropicApiKey, keys.anthropicApiKey, keys.useAnthropicSdk);
|
|
1286
|
-
return {
|
|
1287
|
-
socksProxy,
|
|
1288
|
-
httpProxy,
|
|
1289
|
-
modelName,
|
|
1290
|
-
useAnthropicSdk: true,
|
|
1291
|
-
anthropicApiKey
|
|
1292
|
-
};
|
|
1293
|
-
} else {
|
|
1294
|
-
const openaiBaseURL = getAIConfig(keys.openaiBaseURL);
|
|
1295
|
-
const openaiApiKey = getAIConfig(keys.openaiApiKey);
|
|
1296
|
-
const openaiExtraConfig = getAIConfigInJson(keys.openaiExtraConfig);
|
|
1297
|
-
valueAssert(openaiBaseURL, keys.openaiBaseURL);
|
|
1298
|
-
valueAssert(openaiApiKey, keys.openaiApiKey);
|
|
1299
|
-
return {
|
|
1300
|
-
socksProxy,
|
|
1301
|
-
httpProxy,
|
|
1302
|
-
modelName,
|
|
1303
|
-
openaiBaseURL,
|
|
1304
|
-
openaiApiKey,
|
|
1305
|
-
openaiExtraConfig
|
|
1306
|
-
};
|
|
1307
|
-
}
|
|
1308
|
-
};
|
|
1309
|
-
var maskKey = (key, maskChar = "*") => {
|
|
1310
|
-
if (typeof key !== "string" || key.length === 0) {
|
|
1311
|
-
return key;
|
|
1312
|
-
}
|
|
1313
|
-
const prefixLen = 3;
|
|
1314
|
-
const suffixLen = 3;
|
|
1315
|
-
const keepLength = prefixLen + suffixLen;
|
|
1316
|
-
if (key.length <= keepLength) {
|
|
1317
|
-
return key;
|
|
1318
|
-
}
|
|
1319
|
-
const prefix = key.substring(0, prefixLen);
|
|
1320
|
-
const suffix = key.substring(key.length - suffixLen);
|
|
1321
|
-
const maskLength = key.length - keepLength;
|
|
1322
|
-
const mask = maskChar.repeat(maskLength);
|
|
1323
|
-
return `${prefix}${mask}${suffix}`;
|
|
1324
|
-
};
|
|
1325
|
-
var maskConfig = (config) => {
|
|
1326
|
-
return Object.fromEntries(
|
|
1327
|
-
Object.entries(config).map(([key, value]) => [
|
|
1328
|
-
key,
|
|
1329
|
-
["openaiApiKey", "azureOpenaiApiKey", "anthropicApiKey"].includes(key) ? maskKey(value) : value
|
|
1330
|
-
])
|
|
1331
|
-
);
|
|
1332
|
-
};
|
|
1333
|
-
var decideModelConfig = (modelPreferences) => {
|
|
1334
|
-
initDebugConfig();
|
|
1335
|
-
const debugLog = getDebug2("ai:decideModelConfig");
|
|
1336
|
-
debugLog("modelPreferences", modelPreferences);
|
|
1337
|
-
const isVQAIntent = modelPreferences?.intent === "VQA";
|
|
1338
|
-
const vqaModelName = getAIConfig(MIDSCENE_VQA_MODEL_NAME);
|
|
1339
|
-
if (isVQAIntent && vqaModelName) {
|
|
1340
|
-
debugLog(
|
|
1341
|
-
`current action is a VQA action and detected ${MIDSCENE_VQA_MODEL_NAME} ${vqaModelName}, will only read VQA related model config from process.env`
|
|
1342
|
-
);
|
|
1343
|
-
const config = getModelConfigFromEnv(
|
|
1344
|
-
vqaModelName,
|
|
1345
|
-
{
|
|
1346
|
-
/**
|
|
1347
|
-
* proxy
|
|
1348
|
-
*/
|
|
1349
|
-
socksProxy: MIDSCENE_VQA_OPENAI_SOCKS_PROXY,
|
|
1350
|
-
httpProxy: MIDSCENE_VQA_OPENAI_HTTP_PROXY,
|
|
1351
|
-
/**
|
|
1352
|
-
* OpenAI
|
|
1353
|
-
*/
|
|
1354
|
-
openaiBaseURL: MIDSCENE_VQA_OPENAI_BASE_URL,
|
|
1355
|
-
openaiApiKey: MIDSCENE_VQA_OPENAI_API_KEY,
|
|
1356
|
-
openaiExtraConfig: MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON,
|
|
1357
|
-
/**
|
|
1358
|
-
* Azure
|
|
1359
|
-
*/
|
|
1360
|
-
openaiUseAzureDeprecated: MIDSCENE_VQA_OPENAI_USE_AZURE,
|
|
1361
|
-
useAzureOpenai: MIDSCENE_VQA_USE_AZURE_OPENAI,
|
|
1362
|
-
azureOpenaiScope: MIDSCENE_VQA_AZURE_OPENAI_SCOPE,
|
|
1363
|
-
azureOpenaiApiKey: MIDSCENE_VQA_AZURE_OPENAI_KEY,
|
|
1364
|
-
azureOpenaiEndpoint: MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT,
|
|
1365
|
-
azureOpenaiApiVersion: MIDSCENE_VQA_AZURE_OPENAI_API_VERSION,
|
|
1366
|
-
azureOpenaiDeployment: MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT,
|
|
1367
|
-
azureExtraConfig: MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON,
|
|
1368
|
-
/**
|
|
1369
|
-
* Anthropic
|
|
1370
|
-
*/
|
|
1371
|
-
useAnthropicSdk: MIDSCENE_VQA_USE_ANTHROPIC_SDK,
|
|
1372
|
-
anthropicApiKey: MIDSCENE_VQA_ANTHROPIC_API_KEY
|
|
1373
|
-
},
|
|
1374
|
-
createAssert(MIDSCENE_VQA_MODEL_NAME, vqaModelName)
|
|
1375
|
-
);
|
|
1376
|
-
debugLog("got model config for VQA usage:", maskConfig(config));
|
|
1377
|
-
return config;
|
|
1378
|
-
} else {
|
|
1379
|
-
debugLog("read model config from process.env as normal.");
|
|
1380
|
-
const commonModelName = getAIConfig(MIDSCENE_MODEL_NAME);
|
|
1381
|
-
assert3(
|
|
1382
|
-
commonModelName,
|
|
1383
|
-
`${MIDSCENE_MODEL_NAME} is empty, please check your config.`
|
|
1384
|
-
);
|
|
1385
|
-
const config = getModelConfigFromEnv(
|
|
1386
|
-
commonModelName,
|
|
1387
|
-
{
|
|
1388
|
-
/**
|
|
1389
|
-
* proxy
|
|
1390
|
-
*/
|
|
1391
|
-
socksProxy: MIDSCENE_OPENAI_SOCKS_PROXY,
|
|
1392
|
-
httpProxy: MIDSCENE_OPENAI_HTTP_PROXY,
|
|
1393
|
-
/**
|
|
1394
|
-
* OpenAI
|
|
1395
|
-
*/
|
|
1396
|
-
openaiBaseURL: OPENAI_BASE_URL,
|
|
1397
|
-
openaiApiKey: OPENAI_API_KEY,
|
|
1398
|
-
openaiExtraConfig: MIDSCENE_OPENAI_INIT_CONFIG_JSON,
|
|
1399
|
-
/**
|
|
1400
|
-
* Azure
|
|
1401
|
-
*/
|
|
1402
|
-
openaiUseAzureDeprecated: OPENAI_USE_AZURE,
|
|
1403
|
-
useAzureOpenai: MIDSCENE_USE_AZURE_OPENAI,
|
|
1404
|
-
azureOpenaiScope: MIDSCENE_AZURE_OPENAI_SCOPE,
|
|
1405
|
-
azureOpenaiApiKey: AZURE_OPENAI_KEY,
|
|
1406
|
-
azureOpenaiEndpoint: AZURE_OPENAI_ENDPOINT,
|
|
1407
|
-
azureOpenaiApiVersion: AZURE_OPENAI_API_VERSION,
|
|
1408
|
-
azureOpenaiDeployment: AZURE_OPENAI_DEPLOYMENT,
|
|
1409
|
-
azureExtraConfig: MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,
|
|
1410
|
-
/**
|
|
1411
|
-
* Anthropic
|
|
1412
|
-
*/
|
|
1413
|
-
useAnthropicSdk: MIDSCENE_USE_ANTHROPIC_SDK,
|
|
1414
|
-
anthropicApiKey: ANTHROPIC_API_KEY
|
|
1415
|
-
},
|
|
1416
|
-
createAssert(MIDSCENE_MODEL_NAME, commonModelName)
|
|
1417
|
-
);
|
|
1418
|
-
debugLog("got model config for common usage:", maskConfig(config));
|
|
1419
|
-
return config;
|
|
1210
|
+
var defaultModel = "gpt-4o";
|
|
1211
|
+
function getModelName() {
|
|
1212
|
+
let modelName = defaultModel;
|
|
1213
|
+
const nameInConfig = getAIConfig(MIDSCENE_MODEL_NAME);
|
|
1214
|
+
if (nameInConfig) {
|
|
1215
|
+
modelName = nameInConfig;
|
|
1420
1216
|
}
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
// src/ai-model/service-caller/index.ts
|
|
1217
|
+
return modelName;
|
|
1218
|
+
}
|
|
1424
1219
|
async function createChatClient({
|
|
1425
|
-
AIActionTypeValue
|
|
1426
|
-
modelPreferences
|
|
1220
|
+
AIActionTypeValue
|
|
1427
1221
|
}) {
|
|
1428
|
-
|
|
1429
|
-
socksProxy,
|
|
1430
|
-
httpProxy,
|
|
1431
|
-
modelName,
|
|
1432
|
-
openaiBaseURL,
|
|
1433
|
-
openaiApiKey,
|
|
1434
|
-
openaiExtraConfig,
|
|
1435
|
-
openaiUseAzureDeprecated,
|
|
1436
|
-
useAzureOpenai,
|
|
1437
|
-
azureOpenaiScope,
|
|
1438
|
-
azureOpenaiApiKey,
|
|
1439
|
-
azureOpenaiEndpoint,
|
|
1440
|
-
azureOpenaiApiVersion,
|
|
1441
|
-
azureOpenaiDeployment,
|
|
1442
|
-
azureExtraConfig,
|
|
1443
|
-
useAnthropicSdk,
|
|
1444
|
-
anthropicApiKey
|
|
1445
|
-
} = decideModelConfig(modelPreferences);
|
|
1222
|
+
initDebugConfig();
|
|
1446
1223
|
let openai;
|
|
1224
|
+
const extraConfig = getAIConfigInJson(MIDSCENE_OPENAI_INIT_CONFIG_JSON);
|
|
1225
|
+
const socksProxy = getAIConfig(MIDSCENE_OPENAI_SOCKS_PROXY);
|
|
1226
|
+
const httpProxy = getAIConfig(MIDSCENE_OPENAI_HTTP_PROXY);
|
|
1447
1227
|
let proxyAgent = void 0;
|
|
1448
|
-
const debugProxy =
|
|
1228
|
+
const debugProxy = getDebug2("ai:call:proxy");
|
|
1449
1229
|
if (httpProxy) {
|
|
1450
1230
|
debugProxy("using http proxy", httpProxy);
|
|
1451
1231
|
proxyAgent = new HttpsProxyAgent(httpProxy);
|
|
@@ -1453,56 +1233,70 @@ async function createChatClient({
|
|
|
1453
1233
|
debugProxy("using socks proxy", socksProxy);
|
|
1454
1234
|
proxyAgent = new SocksProxyAgent(socksProxy);
|
|
1455
1235
|
}
|
|
1456
|
-
if (
|
|
1236
|
+
if (getAIConfig(OPENAI_USE_AZURE)) {
|
|
1457
1237
|
openai = new AzureOpenAI({
|
|
1458
|
-
baseURL:
|
|
1459
|
-
apiKey:
|
|
1238
|
+
baseURL: getAIConfig(OPENAI_BASE_URL),
|
|
1239
|
+
apiKey: getAIConfig(OPENAI_API_KEY),
|
|
1460
1240
|
httpAgent: proxyAgent,
|
|
1461
|
-
...
|
|
1241
|
+
...extraConfig,
|
|
1462
1242
|
dangerouslyAllowBrowser: true
|
|
1463
1243
|
});
|
|
1464
|
-
} else if (
|
|
1244
|
+
} else if (getAIConfig(MIDSCENE_USE_AZURE_OPENAI)) {
|
|
1245
|
+
const extraAzureConfig = getAIConfigInJson(
|
|
1246
|
+
MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON
|
|
1247
|
+
);
|
|
1248
|
+
const scope = getAIConfig(MIDSCENE_AZURE_OPENAI_SCOPE);
|
|
1465
1249
|
let tokenProvider = void 0;
|
|
1466
|
-
if (
|
|
1467
|
-
|
|
1250
|
+
if (scope) {
|
|
1251
|
+
assert3(
|
|
1468
1252
|
!ifInBrowser,
|
|
1469
1253
|
"Azure OpenAI is not supported in browser with Midscene."
|
|
1470
1254
|
);
|
|
1471
1255
|
const credential = new DefaultAzureCredential();
|
|
1472
|
-
|
|
1256
|
+
assert3(scope, "MIDSCENE_AZURE_OPENAI_SCOPE is required");
|
|
1257
|
+
tokenProvider = getBearerTokenProvider(credential, scope);
|
|
1473
1258
|
openai = new AzureOpenAI({
|
|
1474
1259
|
azureADTokenProvider: tokenProvider,
|
|
1475
|
-
endpoint:
|
|
1476
|
-
apiVersion:
|
|
1477
|
-
deployment:
|
|
1478
|
-
...
|
|
1479
|
-
...
|
|
1260
|
+
endpoint: getAIConfig(AZURE_OPENAI_ENDPOINT),
|
|
1261
|
+
apiVersion: getAIConfig(AZURE_OPENAI_API_VERSION),
|
|
1262
|
+
deployment: getAIConfig(AZURE_OPENAI_DEPLOYMENT),
|
|
1263
|
+
...extraConfig,
|
|
1264
|
+
...extraAzureConfig
|
|
1480
1265
|
});
|
|
1481
1266
|
} else {
|
|
1482
1267
|
openai = new AzureOpenAI({
|
|
1483
|
-
apiKey:
|
|
1484
|
-
endpoint:
|
|
1485
|
-
apiVersion:
|
|
1486
|
-
deployment:
|
|
1268
|
+
apiKey: getAIConfig(AZURE_OPENAI_KEY),
|
|
1269
|
+
endpoint: getAIConfig(AZURE_OPENAI_ENDPOINT),
|
|
1270
|
+
apiVersion: getAIConfig(AZURE_OPENAI_API_VERSION),
|
|
1271
|
+
deployment: getAIConfig(AZURE_OPENAI_DEPLOYMENT),
|
|
1487
1272
|
dangerouslyAllowBrowser: true,
|
|
1488
|
-
...
|
|
1489
|
-
...
|
|
1273
|
+
...extraConfig,
|
|
1274
|
+
...extraAzureConfig
|
|
1490
1275
|
});
|
|
1491
1276
|
}
|
|
1492
|
-
} else if (!
|
|
1277
|
+
} else if (!getAIConfig(MIDSCENE_USE_ANTHROPIC_SDK)) {
|
|
1278
|
+
const baseURL = getAIConfig(OPENAI_BASE_URL);
|
|
1279
|
+
if (typeof baseURL === "string") {
|
|
1280
|
+
if (!/^https?:\/\//.test(baseURL)) {
|
|
1281
|
+
throw new Error(
|
|
1282
|
+
`OPENAI_BASE_URL must be a valid URL starting with http:// or https://, but got: ${baseURL}
|
|
1283
|
+
Please check your config.`
|
|
1284
|
+
);
|
|
1285
|
+
}
|
|
1286
|
+
}
|
|
1493
1287
|
openai = new OpenAI({
|
|
1494
|
-
baseURL:
|
|
1495
|
-
apiKey:
|
|
1288
|
+
baseURL: getAIConfig(OPENAI_BASE_URL),
|
|
1289
|
+
apiKey: getAIConfig(OPENAI_API_KEY),
|
|
1496
1290
|
httpAgent: proxyAgent,
|
|
1497
|
-
...
|
|
1291
|
+
...extraConfig,
|
|
1498
1292
|
defaultHeaders: {
|
|
1499
|
-
...
|
|
1293
|
+
...extraConfig?.defaultHeaders || {},
|
|
1500
1294
|
[MIDSCENE_API_TYPE]: AIActionTypeValue.toString()
|
|
1501
1295
|
},
|
|
1502
1296
|
dangerouslyAllowBrowser: true
|
|
1503
1297
|
});
|
|
1504
1298
|
}
|
|
1505
|
-
if (openai &&
|
|
1299
|
+
if (openai && getAIConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)) {
|
|
1506
1300
|
if (ifInBrowser) {
|
|
1507
1301
|
throw new Error("langsmith is not supported in browser");
|
|
1508
1302
|
}
|
|
@@ -1513,13 +1307,14 @@ async function createChatClient({
|
|
|
1513
1307
|
if (typeof openai !== "undefined") {
|
|
1514
1308
|
return {
|
|
1515
1309
|
completion: openai.chat.completions,
|
|
1516
|
-
style: "openai"
|
|
1517
|
-
modelName
|
|
1310
|
+
style: "openai"
|
|
1518
1311
|
};
|
|
1519
1312
|
}
|
|
1520
|
-
if (
|
|
1313
|
+
if (getAIConfig(MIDSCENE_USE_ANTHROPIC_SDK)) {
|
|
1314
|
+
const apiKey = getAIConfig(ANTHROPIC_API_KEY);
|
|
1315
|
+
assert3(apiKey, "ANTHROPIC_API_KEY is required");
|
|
1521
1316
|
openai = new Anthropic({
|
|
1522
|
-
apiKey
|
|
1317
|
+
apiKey,
|
|
1523
1318
|
httpAgent: proxyAgent,
|
|
1524
1319
|
dangerouslyAllowBrowser: true
|
|
1525
1320
|
});
|
|
@@ -1527,23 +1322,25 @@ async function createChatClient({
|
|
|
1527
1322
|
if (typeof openai !== "undefined" && openai.messages) {
|
|
1528
1323
|
return {
|
|
1529
1324
|
completion: openai.messages,
|
|
1530
|
-
style: "anthropic"
|
|
1531
|
-
modelName
|
|
1325
|
+
style: "anthropic"
|
|
1532
1326
|
};
|
|
1533
1327
|
}
|
|
1534
1328
|
throw new Error("Openai SDK or Anthropic SDK is not initialized");
|
|
1535
1329
|
}
|
|
1536
|
-
async function call2(messages, AIActionTypeValue,
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
|
|
1330
|
+
async function call2(messages, AIActionTypeValue, responseFormat, options) {
|
|
1331
|
+
assert3(
|
|
1332
|
+
checkAIConfig(),
|
|
1333
|
+
"Cannot find config for AI model service. If you are using a self-hosted model without validating the API key, please set `OPENAI_API_KEY` to any non-null value. https://midscenejs.com/model-provider.html"
|
|
1334
|
+
);
|
|
1335
|
+
const { completion, style } = await createChatClient({
|
|
1336
|
+
AIActionTypeValue
|
|
1540
1337
|
});
|
|
1541
|
-
const
|
|
1542
|
-
const
|
|
1543
|
-
const
|
|
1544
|
-
const
|
|
1545
|
-
const debugProfileDetail = getDebug3("ai:profile:detail");
|
|
1338
|
+
const maxTokens = getAIConfig(OPENAI_MAX_TOKENS);
|
|
1339
|
+
const debugCall = getDebug2("ai:call");
|
|
1340
|
+
const debugProfileStats = getDebug2("ai:profile:stats");
|
|
1341
|
+
const debugProfileDetail = getDebug2("ai:profile:detail");
|
|
1546
1342
|
const startTime = Date.now();
|
|
1343
|
+
const model = getModelName();
|
|
1547
1344
|
const isStreaming = options?.stream && options?.onChunk;
|
|
1548
1345
|
let content;
|
|
1549
1346
|
let accumulated = "";
|
|
@@ -1560,12 +1357,12 @@ async function call2(messages, AIActionTypeValue, options, modelPreferences) {
|
|
|
1560
1357
|
try {
|
|
1561
1358
|
if (style === "openai") {
|
|
1562
1359
|
debugCall(
|
|
1563
|
-
`sending ${isStreaming ? "streaming " : ""}request to ${
|
|
1360
|
+
`sending ${isStreaming ? "streaming " : ""}request to ${model}`
|
|
1564
1361
|
);
|
|
1565
1362
|
if (isStreaming) {
|
|
1566
1363
|
const stream = await completion.create(
|
|
1567
1364
|
{
|
|
1568
|
-
model
|
|
1365
|
+
model,
|
|
1569
1366
|
messages,
|
|
1570
1367
|
response_format: responseFormat,
|
|
1571
1368
|
...commonConfig
|
|
@@ -1622,23 +1419,23 @@ async function call2(messages, AIActionTypeValue, options, modelPreferences) {
|
|
|
1622
1419
|
}
|
|
1623
1420
|
content = accumulated;
|
|
1624
1421
|
debugProfileStats(
|
|
1625
|
-
`streaming model, ${
|
|
1422
|
+
`streaming model, ${model}, mode, ${vlLocateMode3() || "default"}, cost-ms, ${timeCost}`
|
|
1626
1423
|
);
|
|
1627
1424
|
} else {
|
|
1628
1425
|
const result = await completion.create({
|
|
1629
|
-
model
|
|
1426
|
+
model,
|
|
1630
1427
|
messages,
|
|
1631
1428
|
response_format: responseFormat,
|
|
1632
1429
|
...commonConfig
|
|
1633
1430
|
});
|
|
1634
1431
|
timeCost = Date.now() - startTime;
|
|
1635
1432
|
debugProfileStats(
|
|
1636
|
-
`model, ${
|
|
1433
|
+
`model, ${model}, mode, ${vlLocateMode3() || "default"}, ui-tars-version, ${uiTarsModelVersion()}, prompt-tokens, ${result.usage?.prompt_tokens || ""}, completion-tokens, ${result.usage?.completion_tokens || ""}, total-tokens, ${result.usage?.total_tokens || ""}, cost-ms, ${timeCost}, requestId, ${result._request_id || ""}`
|
|
1637
1434
|
);
|
|
1638
1435
|
debugProfileDetail(
|
|
1639
1436
|
`model usage detail: ${JSON.stringify(result.usage)}`
|
|
1640
1437
|
);
|
|
1641
|
-
|
|
1438
|
+
assert3(
|
|
1642
1439
|
result.choices,
|
|
1643
1440
|
`invalid response from LLM service: ${JSON.stringify(result)}`
|
|
1644
1441
|
);
|
|
@@ -1646,12 +1443,12 @@ async function call2(messages, AIActionTypeValue, options, modelPreferences) {
|
|
|
1646
1443
|
usage = result.usage;
|
|
1647
1444
|
}
|
|
1648
1445
|
debugCall(`response: ${content}`);
|
|
1649
|
-
|
|
1446
|
+
assert3(content, "empty content");
|
|
1650
1447
|
} else if (style === "anthropic") {
|
|
1651
1448
|
const convertImageContent = (content2) => {
|
|
1652
1449
|
if (content2.type === "image_url") {
|
|
1653
1450
|
const imgBase64 = content2.image_url.url;
|
|
1654
|
-
|
|
1451
|
+
assert3(imgBase64, "image_url is required");
|
|
1655
1452
|
return {
|
|
1656
1453
|
source: {
|
|
1657
1454
|
type: "base64",
|
|
@@ -1665,7 +1462,7 @@ async function call2(messages, AIActionTypeValue, options, modelPreferences) {
|
|
|
1665
1462
|
};
|
|
1666
1463
|
if (isStreaming) {
|
|
1667
1464
|
const stream = await completion.create({
|
|
1668
|
-
model
|
|
1465
|
+
model,
|
|
1669
1466
|
system: "You are a versatile professional in software UI automation",
|
|
1670
1467
|
messages: messages.map((m) => ({
|
|
1671
1468
|
role: "user",
|
|
@@ -1709,7 +1506,7 @@ async function call2(messages, AIActionTypeValue, options, modelPreferences) {
|
|
|
1709
1506
|
content = accumulated;
|
|
1710
1507
|
} else {
|
|
1711
1508
|
const result = await completion.create({
|
|
1712
|
-
model
|
|
1509
|
+
model,
|
|
1713
1510
|
system: "You are a versatile professional in software UI automation",
|
|
1714
1511
|
messages: messages.map((m) => ({
|
|
1715
1512
|
role: "user",
|
|
@@ -1722,7 +1519,7 @@ async function call2(messages, AIActionTypeValue, options, modelPreferences) {
|
|
|
1722
1519
|
content = result.content[0].text;
|
|
1723
1520
|
usage = result.usage;
|
|
1724
1521
|
}
|
|
1725
|
-
|
|
1522
|
+
assert3(content, "empty content");
|
|
1726
1523
|
}
|
|
1727
1524
|
if (isStreaming && !usage) {
|
|
1728
1525
|
const estimatedTokens = Math.max(
|
|
@@ -1756,9 +1553,10 @@ async function call2(messages, AIActionTypeValue, options, modelPreferences) {
|
|
|
1756
1553
|
throw newError;
|
|
1757
1554
|
}
|
|
1758
1555
|
}
|
|
1759
|
-
|
|
1556
|
+
async function callToGetJSONObject(messages, AIActionTypeValue) {
|
|
1760
1557
|
let responseFormat;
|
|
1761
|
-
|
|
1558
|
+
const model = getModelName();
|
|
1559
|
+
if (model.includes("gpt-4")) {
|
|
1762
1560
|
switch (AIActionTypeValue) {
|
|
1763
1561
|
case 0 /* ASSERT */:
|
|
1764
1562
|
responseFormat = assertSchema;
|
|
@@ -1775,19 +1573,11 @@ var getResponseFormat = (modelName, AIActionTypeValue) => {
|
|
|
1775
1573
|
break;
|
|
1776
1574
|
}
|
|
1777
1575
|
}
|
|
1778
|
-
if (
|
|
1576
|
+
if (model === "gpt-4o-2024-05-13") {
|
|
1779
1577
|
responseFormat = { type: "json_object" /* JSON */ };
|
|
1780
1578
|
}
|
|
1781
|
-
|
|
1782
|
-
|
|
1783
|
-
async function callToGetJSONObject(messages, AIActionTypeValue, modelPreferences) {
|
|
1784
|
-
const response = await call2(
|
|
1785
|
-
messages,
|
|
1786
|
-
AIActionTypeValue,
|
|
1787
|
-
void 0,
|
|
1788
|
-
modelPreferences
|
|
1789
|
-
);
|
|
1790
|
-
assert4(response, "empty response");
|
|
1579
|
+
const response = await call2(messages, AIActionTypeValue, responseFormat);
|
|
1580
|
+
assert3(response, "empty response");
|
|
1791
1581
|
const jsonContent = safeParseJson(response.content);
|
|
1792
1582
|
return { content: jsonContent, usage: response.usage };
|
|
1793
1583
|
}
|
|
@@ -2071,7 +1861,7 @@ Respond with YAML only, no explanations.`
|
|
|
2071
1861
|
});
|
|
2072
1862
|
}
|
|
2073
1863
|
if (options.stream && options.onChunk) {
|
|
2074
|
-
return await call2(prompt, 2 /* EXTRACT_DATA */, {
|
|
1864
|
+
return await call2(prompt, 2 /* EXTRACT_DATA */, void 0, {
|
|
2075
1865
|
stream: true,
|
|
2076
1866
|
onChunk: options.onChunk
|
|
2077
1867
|
});
|
|
@@ -2194,7 +1984,7 @@ ${PLAYWRIGHT_EXAMPLE_CODE}`;
|
|
|
2194
1984
|
}
|
|
2195
1985
|
];
|
|
2196
1986
|
if (options.stream && options.onChunk) {
|
|
2197
|
-
return await call2(prompt, 2 /* EXTRACT_DATA */, {
|
|
1987
|
+
return await call2(prompt, 2 /* EXTRACT_DATA */, void 0, {
|
|
2198
1988
|
stream: true,
|
|
2199
1989
|
onChunk: options.onChunk
|
|
2200
1990
|
});
|
|
@@ -2215,7 +2005,7 @@ ${PLAYWRIGHT_EXAMPLE_CODE}`;
|
|
|
2215
2005
|
import {
|
|
2216
2006
|
MIDSCENE_USE_QWEN_VL,
|
|
2217
2007
|
MIDSCENE_USE_VLM_UI_TARS,
|
|
2218
|
-
getAIConfigInBoolean as
|
|
2008
|
+
getAIConfigInBoolean as getAIConfigInBoolean2,
|
|
2219
2009
|
vlLocateMode as vlLocateMode4
|
|
2220
2010
|
} from "@midscene/shared/env";
|
|
2221
2011
|
import {
|
|
@@ -2223,8 +2013,8 @@ import {
|
|
|
2223
2013
|
paddingToMatchBlockByBase64,
|
|
2224
2014
|
preProcessImageUrl
|
|
2225
2015
|
} from "@midscene/shared/img";
|
|
2226
|
-
import { getDebug as
|
|
2227
|
-
import { assert as
|
|
2016
|
+
import { getDebug as getDebug3 } from "@midscene/shared/logger";
|
|
2017
|
+
import { assert as assert4 } from "@midscene/shared/utils";
|
|
2228
2018
|
|
|
2229
2019
|
// src/ai-model/prompt/extraction.ts
|
|
2230
2020
|
import { PromptTemplate as PromptTemplate3 } from "@langchain/core/prompts";
|
|
@@ -2379,8 +2169,8 @@ var sectionLocatorInstruction = new PromptTemplate4({
|
|
|
2379
2169
|
});
|
|
2380
2170
|
|
|
2381
2171
|
// src/ai-model/inspect.ts
|
|
2382
|
-
var debugInspect =
|
|
2383
|
-
var debugSection =
|
|
2172
|
+
var debugInspect = getDebug3("ai:inspect");
|
|
2173
|
+
var debugSection = getDebug3("ai:section");
|
|
2384
2174
|
var extraTextFromUserPrompt = (prompt) => {
|
|
2385
2175
|
if (typeof prompt === "string") {
|
|
2386
2176
|
return prompt;
|
|
@@ -2434,7 +2224,7 @@ async function AiLocateElement(options) {
|
|
|
2434
2224
|
const { context, targetElementDescription, callAI } = options;
|
|
2435
2225
|
const { screenshotBase64 } = context;
|
|
2436
2226
|
const { description, elementById, insertElementByPosition } = await describeUserPage(context);
|
|
2437
|
-
|
|
2227
|
+
assert4(
|
|
2438
2228
|
targetElementDescription,
|
|
2439
2229
|
"cannot find the target element description"
|
|
2440
2230
|
);
|
|
@@ -2445,11 +2235,11 @@ async function AiLocateElement(options) {
|
|
|
2445
2235
|
const systemPrompt = systemPromptToLocateElement(vlLocateMode4());
|
|
2446
2236
|
let imagePayload = screenshotBase64;
|
|
2447
2237
|
if (options.searchConfig) {
|
|
2448
|
-
|
|
2238
|
+
assert4(
|
|
2449
2239
|
options.searchConfig.rect,
|
|
2450
2240
|
"searchArea is provided but its rect cannot be found. Failed to locate element"
|
|
2451
2241
|
);
|
|
2452
|
-
|
|
2242
|
+
assert4(
|
|
2453
2243
|
options.searchConfig.imageBase64,
|
|
2454
2244
|
"searchArea is provided but its imageBase64 cannot be found. Failed to locate element"
|
|
2455
2245
|
);
|
|
@@ -2601,7 +2391,7 @@ async function AiLocateSection(options) {
|
|
|
2601
2391
|
imageBase64 = await cropByRect(
|
|
2602
2392
|
screenshotBase64,
|
|
2603
2393
|
sectionRect,
|
|
2604
|
-
|
|
2394
|
+
getAIConfigInBoolean2(MIDSCENE_USE_QWEN_VL)
|
|
2605
2395
|
);
|
|
2606
2396
|
}
|
|
2607
2397
|
return {
|
|
@@ -2613,13 +2403,7 @@ async function AiLocateSection(options) {
|
|
|
2613
2403
|
};
|
|
2614
2404
|
}
|
|
2615
2405
|
async function AiExtractElementInfo(options) {
|
|
2616
|
-
const {
|
|
2617
|
-
dataQuery,
|
|
2618
|
-
context,
|
|
2619
|
-
extractOption,
|
|
2620
|
-
multimodalPrompt,
|
|
2621
|
-
modelPreferences
|
|
2622
|
-
} = options;
|
|
2406
|
+
const { dataQuery, context, extractOption, multimodalPrompt } = options;
|
|
2623
2407
|
const systemPrompt = systemPromptToExtract();
|
|
2624
2408
|
const { screenshotBase64 } = context;
|
|
2625
2409
|
const { description, elementById } = await describeUserPage(context, {
|
|
@@ -2668,8 +2452,7 @@ async function AiExtractElementInfo(options) {
|
|
|
2668
2452
|
}
|
|
2669
2453
|
const result = await callAiFn(
|
|
2670
2454
|
msgs,
|
|
2671
|
-
2 /* EXTRACT_DATA
|
|
2672
|
-
modelPreferences
|
|
2455
|
+
2 /* EXTRACT_DATA */
|
|
2673
2456
|
);
|
|
2674
2457
|
return {
|
|
2675
2458
|
parseResult: result.content,
|
|
@@ -2679,10 +2462,10 @@ async function AiExtractElementInfo(options) {
|
|
|
2679
2462
|
}
|
|
2680
2463
|
async function AiAssert(options) {
|
|
2681
2464
|
const { assertion, context } = options;
|
|
2682
|
-
|
|
2465
|
+
assert4(assertion, "assertion should not be empty");
|
|
2683
2466
|
const { screenshotBase64 } = context;
|
|
2684
2467
|
const systemPrompt = systemPromptToAssert({
|
|
2685
|
-
isUITars:
|
|
2468
|
+
isUITars: getAIConfigInBoolean2(MIDSCENE_USE_VLM_UI_TARS)
|
|
2686
2469
|
});
|
|
2687
2470
|
const assertionText = extraTextFromUserPrompt(assertion);
|
|
2688
2471
|
const msgs = [
|
|
@@ -2729,7 +2512,7 @@ ${assertionText}
|
|
|
2729
2512
|
// src/ai-model/llm-planning.ts
|
|
2730
2513
|
import { vlLocateMode as vlLocateMode5 } from "@midscene/shared/env";
|
|
2731
2514
|
import { paddingToMatchBlockByBase64 as paddingToMatchBlockByBase642 } from "@midscene/shared/img";
|
|
2732
|
-
import { assert as
|
|
2515
|
+
import { assert as assert5 } from "@midscene/shared/utils";
|
|
2733
2516
|
async function plan(userInstruction, opts) {
|
|
2734
2517
|
const { callAI, context } = opts || {};
|
|
2735
2518
|
const { screenshotBase64, size } = context;
|
|
@@ -2791,7 +2574,7 @@ async function plan(userInstruction, opts) {
|
|
|
2791
2574
|
usage,
|
|
2792
2575
|
yamlFlow: buildYamlFlowFromPlans(actions, planFromAI.sleep)
|
|
2793
2576
|
};
|
|
2794
|
-
|
|
2577
|
+
assert5(planFromAI, "can't get plans from AI");
|
|
2795
2578
|
if (vlLocateMode5()) {
|
|
2796
2579
|
actions.forEach((action) => {
|
|
2797
2580
|
if (action.locate) {
|
|
@@ -2807,7 +2590,7 @@ async function plan(userInstruction, opts) {
|
|
|
2807
2590
|
}
|
|
2808
2591
|
}
|
|
2809
2592
|
});
|
|
2810
|
-
|
|
2593
|
+
assert5(!planFromAI.error, `Failed to plan actions: ${planFromAI.error}`);
|
|
2811
2594
|
} else {
|
|
2812
2595
|
actions.forEach((action) => {
|
|
2813
2596
|
if (action.locate?.id) {
|
|
@@ -2835,8 +2618,8 @@ import {
|
|
|
2835
2618
|
} from "@midscene/shared/env";
|
|
2836
2619
|
import { resizeImgBase64 } from "@midscene/shared/img";
|
|
2837
2620
|
import { transformHotkeyInput } from "@midscene/shared/keyboard-layout";
|
|
2838
|
-
import { getDebug as
|
|
2839
|
-
import { assert as
|
|
2621
|
+
import { getDebug as getDebug4 } from "@midscene/shared/logger";
|
|
2622
|
+
import { assert as assert6 } from "@midscene/shared/utils";
|
|
2840
2623
|
import { actionParser } from "@ui-tars/action-parser";
|
|
2841
2624
|
|
|
2842
2625
|
// src/ai-model/prompt/ui-tars-planning.ts
|
|
@@ -2875,7 +2658,7 @@ finished(content='xxx') # Use escape characters \\', \\", and \\n in content par
|
|
|
2875
2658
|
var getSummary = (prediction) => prediction.replace(/Reflection:[\s\S]*?(?=Action_Summary:|Action:|$)/g, "").trim();
|
|
2876
2659
|
|
|
2877
2660
|
// src/ai-model/ui-tars-planning.ts
|
|
2878
|
-
var debug =
|
|
2661
|
+
var debug = getDebug4("ui-tars-planning");
|
|
2879
2662
|
var bboxSize = 10;
|
|
2880
2663
|
var pointToBbox = (point, width, height) => {
|
|
2881
2664
|
return [
|
|
@@ -2913,7 +2696,7 @@ async function vlmPlanning(options) {
|
|
|
2913
2696
|
const transformActions = [];
|
|
2914
2697
|
parsed.forEach((action) => {
|
|
2915
2698
|
if (action.action_type === "click") {
|
|
2916
|
-
|
|
2699
|
+
assert6(action.action_inputs.start_box, "start_box is required");
|
|
2917
2700
|
const point = getPoint(action.action_inputs.start_box, size);
|
|
2918
2701
|
transformActions.push({
|
|
2919
2702
|
type: "Locate",
|
|
@@ -2940,8 +2723,8 @@ async function vlmPlanning(options) {
|
|
|
2940
2723
|
param: action.thought || ""
|
|
2941
2724
|
});
|
|
2942
2725
|
} else if (action.action_type === "drag") {
|
|
2943
|
-
|
|
2944
|
-
|
|
2726
|
+
assert6(action.action_inputs.start_box, "start_box is required");
|
|
2727
|
+
assert6(action.action_inputs.end_box, "end_box is required");
|
|
2945
2728
|
const startPoint = getPoint(action.action_inputs.start_box, size);
|
|
2946
2729
|
const endPoint = getPoint(action.action_inputs.end_box, size);
|
|
2947
2730
|
transformActions.push({
|
|
@@ -3023,7 +2806,7 @@ async function vlmPlanning(options) {
|
|
|
3023
2806
|
param: {}
|
|
3024
2807
|
});
|
|
3025
2808
|
} else if (action.action_type === "androidLongPress") {
|
|
3026
|
-
|
|
2809
|
+
assert6(
|
|
3027
2810
|
action.action_inputs.start_coords,
|
|
3028
2811
|
"start_coords is required for androidLongPress"
|
|
3029
2812
|
);
|
|
@@ -3139,4 +2922,4 @@ export {
|
|
|
3139
2922
|
resizeImageForUiTars
|
|
3140
2923
|
};
|
|
3141
2924
|
|
|
3142
|
-
//# sourceMappingURL=chunk-
|
|
2925
|
+
//# sourceMappingURL=chunk-I5LBWOQA.js.map
|