@midscene/core 0.25.4-beta-20250807040242.0 → 0.25.4-beta-20250807062119.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/dist/es/ai-model.d.ts +6 -7
  2. package/dist/es/ai-model.js +1 -1
  3. package/dist/es/{chunk-I5LBWOQA.js → chunk-G2JTYWI6.js} +373 -156
  4. package/dist/es/chunk-G2JTYWI6.js.map +1 -0
  5. package/dist/es/{chunk-EK3JQ4ZV.js → chunk-JH54OF4E.js} +3 -3
  6. package/dist/es/index.d.ts +6 -6
  7. package/dist/es/index.js +5 -4
  8. package/dist/es/index.js.map +1 -1
  9. package/dist/es/{llm-planning-45dd50cd.d.ts → llm-planning-f449f3b8.d.ts} +3 -2
  10. package/dist/es/{types-da4fb35b.d.ts → types-7435eba0.d.ts} +8 -1
  11. package/dist/es/utils.d.ts +1 -1
  12. package/dist/es/utils.js +1 -1
  13. package/dist/lib/ai-model.d.ts +6 -7
  14. package/dist/lib/ai-model.js +2 -2
  15. package/dist/lib/{chunk-I5LBWOQA.js → chunk-G2JTYWI6.js} +358 -141
  16. package/dist/lib/chunk-G2JTYWI6.js.map +1 -0
  17. package/dist/lib/{chunk-EK3JQ4ZV.js → chunk-JH54OF4E.js} +3 -3
  18. package/dist/lib/index.d.ts +6 -6
  19. package/dist/lib/index.js +15 -14
  20. package/dist/lib/index.js.map +1 -1
  21. package/dist/lib/{llm-planning-45dd50cd.d.ts → llm-planning-f449f3b8.d.ts} +3 -2
  22. package/dist/{types/types-da4fb35b.d.ts → lib/types-7435eba0.d.ts} +8 -1
  23. package/dist/lib/utils.d.ts +1 -1
  24. package/dist/lib/utils.js +2 -2
  25. package/dist/types/ai-model.d.ts +6 -7
  26. package/dist/types/index.d.ts +6 -6
  27. package/dist/types/{llm-planning-45dd50cd.d.ts → llm-planning-f449f3b8.d.ts} +3 -2
  28. package/dist/{lib/types-da4fb35b.d.ts → types/types-7435eba0.d.ts} +8 -1
  29. package/dist/types/utils.d.ts +1 -1
  30. package/package.json +3 -3
  31. package/dist/es/chunk-I5LBWOQA.js.map +0 -1
  32. package/dist/lib/chunk-I5LBWOQA.js.map +0 -1
  33. /package/dist/es/{chunk-EK3JQ4ZV.js.map → chunk-JH54OF4E.js.map} +0 -0
  34. /package/dist/lib/{chunk-EK3JQ4ZV.js.map → chunk-JH54OF4E.js.map} +0 -0
@@ -10,25 +10,6 @@ var _identity = require('@azure/identity');
10
10
 
11
11
 
12
12
 
13
-
14
-
15
-
16
-
17
-
18
-
19
-
20
-
21
-
22
-
23
-
24
-
25
-
26
-
27
-
28
-
29
-
30
-
31
-
32
13
 
33
14
 
34
15
  var _env = require('@midscene/shared/env');
@@ -55,10 +36,11 @@ var AIActionType = /* @__PURE__ */ ((AIActionType2) => {
55
36
  AIActionType2[AIActionType2["DESCRIBE_ELEMENT"] = 4] = "DESCRIBE_ELEMENT";
56
37
  return AIActionType2;
57
38
  })(AIActionType || {});
58
- async function callAiFn(msgs, AIActionTypeValue) {
39
+ async function callAiFn(msgs, AIActionTypeValue, modelPreferences) {
59
40
  const { content, usage } = await callToGetJSONObject(
60
41
  msgs,
61
- AIActionTypeValue
42
+ AIActionTypeValue,
43
+ modelPreferences
62
44
  );
63
45
  return { content, usage };
64
46
  }
@@ -1163,24 +1145,57 @@ pageDescription:
1163
1145
  });
1164
1146
  };
1165
1147
 
1166
- // src/ai-model/service-caller/index.ts
1167
- function checkAIConfig() {
1168
- const openaiKey = _env.getAIConfig.call(void 0, _env.OPENAI_API_KEY);
1169
- const azureConfig = _env.getAIConfig.call(void 0, _env.MIDSCENE_USE_AZURE_OPENAI);
1170
- const anthropicKey = _env.getAIConfig.call(void 0, _env.ANTHROPIC_API_KEY);
1171
- const initConfigJson = _env.getAIConfig.call(void 0, _env.MIDSCENE_OPENAI_INIT_CONFIG_JSON);
1172
- if (openaiKey)
1173
- return true;
1174
- if (azureConfig)
1175
- return true;
1176
- if (anthropicKey)
1177
- return true;
1178
- return Boolean(initConfigJson);
1148
+ // src/ai-model/service-caller/utils.ts
1149
+
1150
+
1151
+
1152
+
1153
+
1154
+
1155
+
1156
+
1157
+
1158
+
1159
+
1160
+
1161
+
1162
+
1163
+
1164
+
1165
+
1166
+
1167
+
1168
+
1169
+
1170
+
1171
+
1172
+
1173
+
1174
+
1175
+
1176
+
1177
+
1178
+
1179
+
1180
+
1181
+
1182
+
1183
+
1184
+
1185
+
1186
+
1187
+
1188
+
1189
+
1190
+ function getModelName() {
1191
+ let modelName = "gpt-4o";
1192
+ const nameInConfig = _env.getAIConfig.call(void 0, _env.MIDSCENE_MODEL_NAME);
1193
+ if (nameInConfig) {
1194
+ modelName = nameInConfig;
1195
+ }
1196
+ return modelName;
1179
1197
  }
1180
- var debugConfigInitialized = false;
1181
1198
  function initDebugConfig() {
1182
- if (debugConfigInitialized)
1183
- return;
1184
1199
  const shouldPrintTiming = _env.getAIConfigInBoolean.call(void 0, _env.MIDSCENE_DEBUG_AI_PROFILE);
1185
1200
  let debugConfig = "";
1186
1201
  if (shouldPrintTiming) {
@@ -1205,25 +1220,230 @@ function initDebugConfig() {
1205
1220
  if (debugConfig) {
1206
1221
  _logger.enableDebug.call(void 0, debugConfig);
1207
1222
  }
1208
- debugConfigInitialized = true;
1209
1223
  }
1210
- var defaultModel = "gpt-4o";
1211
- function getModelName() {
1212
- let modelName = defaultModel;
1213
- const nameInConfig = _env.getAIConfig.call(void 0, _env.MIDSCENE_MODEL_NAME);
1214
- if (nameInConfig) {
1215
- modelName = nameInConfig;
1224
+ var createAssert = (modelNameKey, modelName) => (value, key, modelVendorFlag) => {
1225
+ if (modelVendorFlag) {
1226
+ _utils.assert.call(void 0,
1227
+ value,
1228
+ `The ${key} must be a non-empty string because of the ${modelNameKey} is declared as ${modelName} and ${modelVendorFlag} has also been specified, but got: ${value}
1229
+ Please check your config.`
1230
+ );
1231
+ } else {
1232
+ _utils.assert.call(void 0,
1233
+ value,
1234
+ `The ${key} must be a non-empty string because of the ${modelNameKey} is declared as ${modelName}, but got: ${value}
1235
+ Please check your config.`
1236
+ );
1216
1237
  }
1217
- return modelName;
1218
- }
1238
+ };
1239
+ var getModelConfigFromEnv = (modelName, keys, valueAssert) => {
1240
+ const socksProxy = _env.getAIConfig.call(void 0, keys.socksProxy);
1241
+ const httpProxy = _env.getAIConfig.call(void 0, keys.httpProxy);
1242
+ if (_env.getAIConfig.call(void 0, keys.openaiUseAzureDeprecated)) {
1243
+ const openaiBaseURL = _env.getAIConfig.call(void 0, keys.openaiBaseURL);
1244
+ const openaiApiKey = _env.getAIConfig.call(void 0, keys.openaiApiKey);
1245
+ const openaiExtraConfig = _env.getAIConfigInJson.call(void 0, keys.openaiExtraConfig);
1246
+ valueAssert(
1247
+ openaiBaseURL,
1248
+ keys.openaiBaseURL,
1249
+ keys.openaiUseAzureDeprecated
1250
+ );
1251
+ valueAssert(openaiApiKey, keys.openaiApiKey, keys.openaiUseAzureDeprecated);
1252
+ return {
1253
+ socksProxy,
1254
+ httpProxy,
1255
+ modelName,
1256
+ openaiUseAzureDeprecated: true,
1257
+ openaiApiKey,
1258
+ openaiBaseURL,
1259
+ openaiExtraConfig
1260
+ };
1261
+ } else if (_env.getAIConfig.call(void 0, keys.useAzureOpenai)) {
1262
+ const azureOpenaiScope = _env.getAIConfig.call(void 0, keys.azureOpenaiScope);
1263
+ const azureOpenaiApiKey = _env.getAIConfig.call(void 0, keys.azureOpenaiApiKey);
1264
+ const azureOpenaiEndpoint = _env.getAIConfig.call(void 0, keys.azureOpenaiEndpoint);
1265
+ const azureOpenaiDeployment = _env.getAIConfig.call(void 0, keys.azureOpenaiDeployment);
1266
+ const azureOpenaiApiVersion = _env.getAIConfig.call(void 0, keys.azureOpenaiApiVersion);
1267
+ const azureExtraConfig = _env.getAIConfigInJson.call(void 0, keys.azureExtraConfig);
1268
+ const openaiExtraConfig = _env.getAIConfigInJson.call(void 0, keys.openaiExtraConfig);
1269
+ valueAssert(azureOpenaiApiKey, keys.azureOpenaiApiKey, keys.useAzureOpenai);
1270
+ return {
1271
+ socksProxy,
1272
+ httpProxy,
1273
+ modelName,
1274
+ useAzureOpenai: true,
1275
+ azureOpenaiScope,
1276
+ azureOpenaiApiKey,
1277
+ azureOpenaiEndpoint,
1278
+ azureOpenaiDeployment,
1279
+ azureOpenaiApiVersion,
1280
+ azureExtraConfig,
1281
+ openaiExtraConfig
1282
+ };
1283
+ } else if (_env.getAIConfig.call(void 0, keys.useAnthropicSdk)) {
1284
+ const anthropicApiKey = _env.getAIConfig.call(void 0, keys.anthropicApiKey);
1285
+ valueAssert(anthropicApiKey, keys.anthropicApiKey, keys.useAnthropicSdk);
1286
+ return {
1287
+ socksProxy,
1288
+ httpProxy,
1289
+ modelName,
1290
+ useAnthropicSdk: true,
1291
+ anthropicApiKey
1292
+ };
1293
+ } else {
1294
+ const openaiBaseURL = _env.getAIConfig.call(void 0, keys.openaiBaseURL);
1295
+ const openaiApiKey = _env.getAIConfig.call(void 0, keys.openaiApiKey);
1296
+ const openaiExtraConfig = _env.getAIConfigInJson.call(void 0, keys.openaiExtraConfig);
1297
+ valueAssert(openaiBaseURL, keys.openaiBaseURL);
1298
+ valueAssert(openaiApiKey, keys.openaiApiKey);
1299
+ return {
1300
+ socksProxy,
1301
+ httpProxy,
1302
+ modelName,
1303
+ openaiBaseURL,
1304
+ openaiApiKey,
1305
+ openaiExtraConfig
1306
+ };
1307
+ }
1308
+ };
1309
+ var maskKey = (key, maskChar = "*") => {
1310
+ if (typeof key !== "string" || key.length === 0) {
1311
+ return key;
1312
+ }
1313
+ const prefixLen = 3;
1314
+ const suffixLen = 3;
1315
+ const keepLength = prefixLen + suffixLen;
1316
+ if (key.length <= keepLength) {
1317
+ return key;
1318
+ }
1319
+ const prefix = key.substring(0, prefixLen);
1320
+ const suffix = key.substring(key.length - suffixLen);
1321
+ const maskLength = key.length - keepLength;
1322
+ const mask = maskChar.repeat(maskLength);
1323
+ return `${prefix}${mask}${suffix}`;
1324
+ };
1325
+ var maskConfig = (config) => {
1326
+ return Object.fromEntries(
1327
+ Object.entries(config).map(([key, value]) => [
1328
+ key,
1329
+ ["openaiApiKey", "azureOpenaiApiKey", "anthropicApiKey"].includes(key) ? maskKey(value) : value
1330
+ ])
1331
+ );
1332
+ };
1333
+ var decideModelConfig = (modelPreferences) => {
1334
+ initDebugConfig();
1335
+ const debugLog = _logger.getDebug.call(void 0, "ai:decideModelConfig");
1336
+ debugLog("modelPreferences", modelPreferences);
1337
+ const isVQAIntent = _optionalChain([modelPreferences, 'optionalAccess', _21 => _21.intent]) === "VQA";
1338
+ const vqaModelName = _env.getAIConfig.call(void 0, _env.MIDSCENE_VQA_MODEL_NAME);
1339
+ if (isVQAIntent && vqaModelName) {
1340
+ debugLog(
1341
+ `current action is a VQA action and detected ${_env.MIDSCENE_VQA_MODEL_NAME} ${vqaModelName}, will only read VQA related model config from process.env`
1342
+ );
1343
+ const config = getModelConfigFromEnv(
1344
+ vqaModelName,
1345
+ {
1346
+ /**
1347
+ * proxy
1348
+ */
1349
+ socksProxy: _env.MIDSCENE_VQA_OPENAI_SOCKS_PROXY,
1350
+ httpProxy: _env.MIDSCENE_VQA_OPENAI_HTTP_PROXY,
1351
+ /**
1352
+ * OpenAI
1353
+ */
1354
+ openaiBaseURL: _env.MIDSCENE_VQA_OPENAI_BASE_URL,
1355
+ openaiApiKey: _env.MIDSCENE_VQA_OPENAI_API_KEY,
1356
+ openaiExtraConfig: _env.MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON,
1357
+ /**
1358
+ * Azure
1359
+ */
1360
+ openaiUseAzureDeprecated: _env.MIDSCENE_VQA_OPENAI_USE_AZURE,
1361
+ useAzureOpenai: _env.MIDSCENE_VQA_USE_AZURE_OPENAI,
1362
+ azureOpenaiScope: _env.MIDSCENE_VQA_AZURE_OPENAI_SCOPE,
1363
+ azureOpenaiApiKey: _env.MIDSCENE_VQA_AZURE_OPENAI_KEY,
1364
+ azureOpenaiEndpoint: _env.MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT,
1365
+ azureOpenaiApiVersion: _env.MIDSCENE_VQA_AZURE_OPENAI_API_VERSION,
1366
+ azureOpenaiDeployment: _env.MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT,
1367
+ azureExtraConfig: _env.MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON,
1368
+ /**
1369
+ * Anthropic
1370
+ */
1371
+ useAnthropicSdk: _env.MIDSCENE_VQA_USE_ANTHROPIC_SDK,
1372
+ anthropicApiKey: _env.MIDSCENE_VQA_ANTHROPIC_API_KEY
1373
+ },
1374
+ createAssert(_env.MIDSCENE_VQA_MODEL_NAME, vqaModelName)
1375
+ );
1376
+ debugLog("got model config for VQA usage:", maskConfig(config));
1377
+ return config;
1378
+ } else {
1379
+ debugLog("read model config from process.env as normal.");
1380
+ const commonModelName = _env.getAIConfig.call(void 0, _env.MIDSCENE_MODEL_NAME);
1381
+ _utils.assert.call(void 0,
1382
+ commonModelName,
1383
+ `${_env.MIDSCENE_MODEL_NAME} is empty, please check your config.`
1384
+ );
1385
+ const config = getModelConfigFromEnv(
1386
+ commonModelName,
1387
+ {
1388
+ /**
1389
+ * proxy
1390
+ */
1391
+ socksProxy: _env.MIDSCENE_OPENAI_SOCKS_PROXY,
1392
+ httpProxy: _env.MIDSCENE_OPENAI_HTTP_PROXY,
1393
+ /**
1394
+ * OpenAI
1395
+ */
1396
+ openaiBaseURL: _env.OPENAI_BASE_URL,
1397
+ openaiApiKey: _env.OPENAI_API_KEY,
1398
+ openaiExtraConfig: _env.MIDSCENE_OPENAI_INIT_CONFIG_JSON,
1399
+ /**
1400
+ * Azure
1401
+ */
1402
+ openaiUseAzureDeprecated: _env.OPENAI_USE_AZURE,
1403
+ useAzureOpenai: _env.MIDSCENE_USE_AZURE_OPENAI,
1404
+ azureOpenaiScope: _env.MIDSCENE_AZURE_OPENAI_SCOPE,
1405
+ azureOpenaiApiKey: _env.AZURE_OPENAI_KEY,
1406
+ azureOpenaiEndpoint: _env.AZURE_OPENAI_ENDPOINT,
1407
+ azureOpenaiApiVersion: _env.AZURE_OPENAI_API_VERSION,
1408
+ azureOpenaiDeployment: _env.AZURE_OPENAI_DEPLOYMENT,
1409
+ azureExtraConfig: _env.MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,
1410
+ /**
1411
+ * Anthropic
1412
+ */
1413
+ useAnthropicSdk: _env.MIDSCENE_USE_ANTHROPIC_SDK,
1414
+ anthropicApiKey: _env.ANTHROPIC_API_KEY
1415
+ },
1416
+ createAssert(_env.MIDSCENE_MODEL_NAME, commonModelName)
1417
+ );
1418
+ debugLog("got model config for common usage:", maskConfig(config));
1419
+ return config;
1420
+ }
1421
+ };
1422
+
1423
+ // src/ai-model/service-caller/index.ts
1219
1424
  async function createChatClient({
1220
- AIActionTypeValue
1425
+ AIActionTypeValue,
1426
+ modelPreferences
1221
1427
  }) {
1222
- initDebugConfig();
1428
+ const {
1429
+ socksProxy,
1430
+ httpProxy,
1431
+ modelName,
1432
+ openaiBaseURL,
1433
+ openaiApiKey,
1434
+ openaiExtraConfig,
1435
+ openaiUseAzureDeprecated,
1436
+ useAzureOpenai,
1437
+ azureOpenaiScope,
1438
+ azureOpenaiApiKey,
1439
+ azureOpenaiEndpoint,
1440
+ azureOpenaiApiVersion,
1441
+ azureOpenaiDeployment,
1442
+ azureExtraConfig,
1443
+ useAnthropicSdk,
1444
+ anthropicApiKey
1445
+ } = decideModelConfig(modelPreferences);
1223
1446
  let openai;
1224
- const extraConfig = _env.getAIConfigInJson.call(void 0, _env.MIDSCENE_OPENAI_INIT_CONFIG_JSON);
1225
- const socksProxy = _env.getAIConfig.call(void 0, _env.MIDSCENE_OPENAI_SOCKS_PROXY);
1226
- const httpProxy = _env.getAIConfig.call(void 0, _env.MIDSCENE_OPENAI_HTTP_PROXY);
1227
1447
  let proxyAgent = void 0;
1228
1448
  const debugProxy = _logger.getDebug.call(void 0, "ai:call:proxy");
1229
1449
  if (httpProxy) {
@@ -1233,64 +1453,50 @@ async function createChatClient({
1233
1453
  debugProxy("using socks proxy", socksProxy);
1234
1454
  proxyAgent = new (0, _socksproxyagent.SocksProxyAgent)(socksProxy);
1235
1455
  }
1236
- if (_env.getAIConfig.call(void 0, _env.OPENAI_USE_AZURE)) {
1456
+ if (openaiUseAzureDeprecated) {
1237
1457
  openai = new (0, _openai.AzureOpenAI)({
1238
- baseURL: _env.getAIConfig.call(void 0, _env.OPENAI_BASE_URL),
1239
- apiKey: _env.getAIConfig.call(void 0, _env.OPENAI_API_KEY),
1458
+ baseURL: openaiBaseURL,
1459
+ apiKey: openaiApiKey,
1240
1460
  httpAgent: proxyAgent,
1241
- ...extraConfig,
1461
+ ...openaiExtraConfig,
1242
1462
  dangerouslyAllowBrowser: true
1243
1463
  });
1244
- } else if (_env.getAIConfig.call(void 0, _env.MIDSCENE_USE_AZURE_OPENAI)) {
1245
- const extraAzureConfig = _env.getAIConfigInJson.call(void 0,
1246
- _env.MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON
1247
- );
1248
- const scope = _env.getAIConfig.call(void 0, _env.MIDSCENE_AZURE_OPENAI_SCOPE);
1464
+ } else if (useAzureOpenai) {
1249
1465
  let tokenProvider = void 0;
1250
- if (scope) {
1466
+ if (azureOpenaiScope) {
1251
1467
  _utils.assert.call(void 0,
1252
1468
  !_utils.ifInBrowser,
1253
1469
  "Azure OpenAI is not supported in browser with Midscene."
1254
1470
  );
1255
1471
  const credential = new (0, _identity.DefaultAzureCredential)();
1256
- _utils.assert.call(void 0, scope, "MIDSCENE_AZURE_OPENAI_SCOPE is required");
1257
- tokenProvider = _identity.getBearerTokenProvider.call(void 0, credential, scope);
1472
+ tokenProvider = _identity.getBearerTokenProvider.call(void 0, credential, azureOpenaiScope);
1258
1473
  openai = new (0, _openai.AzureOpenAI)({
1259
1474
  azureADTokenProvider: tokenProvider,
1260
- endpoint: _env.getAIConfig.call(void 0, _env.AZURE_OPENAI_ENDPOINT),
1261
- apiVersion: _env.getAIConfig.call(void 0, _env.AZURE_OPENAI_API_VERSION),
1262
- deployment: _env.getAIConfig.call(void 0, _env.AZURE_OPENAI_DEPLOYMENT),
1263
- ...extraConfig,
1264
- ...extraAzureConfig
1475
+ endpoint: azureOpenaiEndpoint,
1476
+ apiVersion: azureOpenaiApiVersion,
1477
+ deployment: azureOpenaiDeployment,
1478
+ ...openaiExtraConfig,
1479
+ ...azureExtraConfig
1265
1480
  });
1266
1481
  } else {
1267
1482
  openai = new (0, _openai.AzureOpenAI)({
1268
- apiKey: _env.getAIConfig.call(void 0, _env.AZURE_OPENAI_KEY),
1269
- endpoint: _env.getAIConfig.call(void 0, _env.AZURE_OPENAI_ENDPOINT),
1270
- apiVersion: _env.getAIConfig.call(void 0, _env.AZURE_OPENAI_API_VERSION),
1271
- deployment: _env.getAIConfig.call(void 0, _env.AZURE_OPENAI_DEPLOYMENT),
1483
+ apiKey: azureOpenaiApiKey,
1484
+ endpoint: azureOpenaiEndpoint,
1485
+ apiVersion: azureOpenaiApiVersion,
1486
+ deployment: azureOpenaiDeployment,
1272
1487
  dangerouslyAllowBrowser: true,
1273
- ...extraConfig,
1274
- ...extraAzureConfig
1488
+ ...openaiExtraConfig,
1489
+ ...azureExtraConfig
1275
1490
  });
1276
1491
  }
1277
- } else if (!_env.getAIConfig.call(void 0, _env.MIDSCENE_USE_ANTHROPIC_SDK)) {
1278
- const baseURL = _env.getAIConfig.call(void 0, _env.OPENAI_BASE_URL);
1279
- if (typeof baseURL === "string") {
1280
- if (!/^https?:\/\//.test(baseURL)) {
1281
- throw new Error(
1282
- `OPENAI_BASE_URL must be a valid URL starting with http:// or https://, but got: ${baseURL}
1283
- Please check your config.`
1284
- );
1285
- }
1286
- }
1492
+ } else if (!useAnthropicSdk) {
1287
1493
  openai = new (0, _openai2.default)({
1288
- baseURL: _env.getAIConfig.call(void 0, _env.OPENAI_BASE_URL),
1289
- apiKey: _env.getAIConfig.call(void 0, _env.OPENAI_API_KEY),
1494
+ baseURL: openaiBaseURL,
1495
+ apiKey: openaiApiKey,
1290
1496
  httpAgent: proxyAgent,
1291
- ...extraConfig,
1497
+ ...openaiExtraConfig,
1292
1498
  defaultHeaders: {
1293
- ..._optionalChain([extraConfig, 'optionalAccess', _21 => _21.defaultHeaders]) || {},
1499
+ ..._optionalChain([openaiExtraConfig, 'optionalAccess', _22 => _22.defaultHeaders]) || {},
1294
1500
  [_env.MIDSCENE_API_TYPE]: AIActionTypeValue.toString()
1295
1501
  },
1296
1502
  dangerouslyAllowBrowser: true
@@ -1307,14 +1513,13 @@ Please check your config.`
1307
1513
  if (typeof openai !== "undefined") {
1308
1514
  return {
1309
1515
  completion: openai.chat.completions,
1310
- style: "openai"
1516
+ style: "openai",
1517
+ modelName
1311
1518
  };
1312
1519
  }
1313
- if (_env.getAIConfig.call(void 0, _env.MIDSCENE_USE_ANTHROPIC_SDK)) {
1314
- const apiKey = _env.getAIConfig.call(void 0, _env.ANTHROPIC_API_KEY);
1315
- _utils.assert.call(void 0, apiKey, "ANTHROPIC_API_KEY is required");
1520
+ if (useAnthropicSdk) {
1316
1521
  openai = new (0, _sdk.Anthropic)({
1317
- apiKey,
1522
+ apiKey: anthropicApiKey,
1318
1523
  httpAgent: proxyAgent,
1319
1524
  dangerouslyAllowBrowser: true
1320
1525
  });
@@ -1322,26 +1527,24 @@ Please check your config.`
1322
1527
  if (typeof openai !== "undefined" && openai.messages) {
1323
1528
  return {
1324
1529
  completion: openai.messages,
1325
- style: "anthropic"
1530
+ style: "anthropic",
1531
+ modelName
1326
1532
  };
1327
1533
  }
1328
1534
  throw new Error("Openai SDK or Anthropic SDK is not initialized");
1329
1535
  }
1330
- async function call2(messages, AIActionTypeValue, responseFormat, options) {
1331
- _utils.assert.call(void 0,
1332
- checkAIConfig(),
1333
- "Cannot find config for AI model service. If you are using a self-hosted model without validating the API key, please set `OPENAI_API_KEY` to any non-null value. https://midscenejs.com/model-provider.html"
1334
- );
1335
- const { completion, style } = await createChatClient({
1336
- AIActionTypeValue
1536
+ async function call2(messages, AIActionTypeValue, options, modelPreferences) {
1537
+ const { completion, style, modelName } = await createChatClient({
1538
+ AIActionTypeValue,
1539
+ modelPreferences
1337
1540
  });
1541
+ const responseFormat = getResponseFormat(modelName, AIActionTypeValue);
1338
1542
  const maxTokens = _env.getAIConfig.call(void 0, _env.OPENAI_MAX_TOKENS);
1339
1543
  const debugCall = _logger.getDebug.call(void 0, "ai:call");
1340
1544
  const debugProfileStats = _logger.getDebug.call(void 0, "ai:profile:stats");
1341
1545
  const debugProfileDetail = _logger.getDebug.call(void 0, "ai:profile:detail");
1342
1546
  const startTime = Date.now();
1343
- const model = getModelName();
1344
- const isStreaming = _optionalChain([options, 'optionalAccess', _22 => _22.stream]) && _optionalChain([options, 'optionalAccess', _23 => _23.onChunk]);
1547
+ const isStreaming = _optionalChain([options, 'optionalAccess', _23 => _23.stream]) && _optionalChain([options, 'optionalAccess', _24 => _24.onChunk]);
1345
1548
  let content;
1346
1549
  let accumulated = "";
1347
1550
  let usage;
@@ -1357,12 +1560,12 @@ async function call2(messages, AIActionTypeValue, responseFormat, options) {
1357
1560
  try {
1358
1561
  if (style === "openai") {
1359
1562
  debugCall(
1360
- `sending ${isStreaming ? "streaming " : ""}request to ${model}`
1563
+ `sending ${isStreaming ? "streaming " : ""}request to ${modelName}`
1361
1564
  );
1362
1565
  if (isStreaming) {
1363
1566
  const stream = await completion.create(
1364
1567
  {
1365
- model,
1568
+ model: modelName,
1366
1569
  messages,
1367
1570
  response_format: responseFormat,
1368
1571
  ...commonConfig
@@ -1372,8 +1575,8 @@ async function call2(messages, AIActionTypeValue, responseFormat, options) {
1372
1575
  }
1373
1576
  );
1374
1577
  for await (const chunk of stream) {
1375
- const content2 = _optionalChain([chunk, 'access', _24 => _24.choices, 'optionalAccess', _25 => _25[0], 'optionalAccess', _26 => _26.delta, 'optionalAccess', _27 => _27.content]) || "";
1376
- const reasoning_content = _optionalChain([chunk, 'access', _28 => _28.choices, 'optionalAccess', _29 => _29[0], 'optionalAccess', _30 => _30.delta, 'optionalAccess', _31 => _31.reasoning_content]) || "";
1578
+ const content2 = _optionalChain([chunk, 'access', _25 => _25.choices, 'optionalAccess', _26 => _26[0], 'optionalAccess', _27 => _27.delta, 'optionalAccess', _28 => _28.content]) || "";
1579
+ const reasoning_content = _optionalChain([chunk, 'access', _29 => _29.choices, 'optionalAccess', _30 => _30[0], 'optionalAccess', _31 => _31.delta, 'optionalAccess', _32 => _32.reasoning_content]) || "";
1377
1580
  if (chunk.usage) {
1378
1581
  usage = chunk.usage;
1379
1582
  }
@@ -1388,7 +1591,7 @@ async function call2(messages, AIActionTypeValue, responseFormat, options) {
1388
1591
  };
1389
1592
  options.onChunk(chunkData);
1390
1593
  }
1391
- if (_optionalChain([chunk, 'access', _32 => _32.choices, 'optionalAccess', _33 => _33[0], 'optionalAccess', _34 => _34.finish_reason])) {
1594
+ if (_optionalChain([chunk, 'access', _33 => _33.choices, 'optionalAccess', _34 => _34[0], 'optionalAccess', _35 => _35.finish_reason])) {
1392
1595
  timeCost = Date.now() - startTime;
1393
1596
  if (!usage) {
1394
1597
  const estimatedTokens = Math.max(
@@ -1419,18 +1622,18 @@ async function call2(messages, AIActionTypeValue, responseFormat, options) {
1419
1622
  }
1420
1623
  content = accumulated;
1421
1624
  debugProfileStats(
1422
- `streaming model, ${model}, mode, ${_env.vlLocateMode.call(void 0, ) || "default"}, cost-ms, ${timeCost}`
1625
+ `streaming model, ${modelName}, mode, ${_env.vlLocateMode.call(void 0, ) || "default"}, cost-ms, ${timeCost}`
1423
1626
  );
1424
1627
  } else {
1425
1628
  const result = await completion.create({
1426
- model,
1629
+ model: modelName,
1427
1630
  messages,
1428
1631
  response_format: responseFormat,
1429
1632
  ...commonConfig
1430
1633
  });
1431
1634
  timeCost = Date.now() - startTime;
1432
1635
  debugProfileStats(
1433
- `model, ${model}, mode, ${_env.vlLocateMode.call(void 0, ) || "default"}, ui-tars-version, ${_env.uiTarsModelVersion.call(void 0, )}, prompt-tokens, ${_optionalChain([result, 'access', _35 => _35.usage, 'optionalAccess', _36 => _36.prompt_tokens]) || ""}, completion-tokens, ${_optionalChain([result, 'access', _37 => _37.usage, 'optionalAccess', _38 => _38.completion_tokens]) || ""}, total-tokens, ${_optionalChain([result, 'access', _39 => _39.usage, 'optionalAccess', _40 => _40.total_tokens]) || ""}, cost-ms, ${timeCost}, requestId, ${result._request_id || ""}`
1636
+ `model, ${modelName}, mode, ${_env.vlLocateMode.call(void 0, ) || "default"}, ui-tars-version, ${_env.uiTarsModelVersion.call(void 0, )}, prompt-tokens, ${_optionalChain([result, 'access', _36 => _36.usage, 'optionalAccess', _37 => _37.prompt_tokens]) || ""}, completion-tokens, ${_optionalChain([result, 'access', _38 => _38.usage, 'optionalAccess', _39 => _39.completion_tokens]) || ""}, total-tokens, ${_optionalChain([result, 'access', _40 => _40.usage, 'optionalAccess', _41 => _41.total_tokens]) || ""}, cost-ms, ${timeCost}, requestId, ${result._request_id || ""}`
1434
1637
  );
1435
1638
  debugProfileDetail(
1436
1639
  `model usage detail: ${JSON.stringify(result.usage)}`
@@ -1462,7 +1665,7 @@ async function call2(messages, AIActionTypeValue, responseFormat, options) {
1462
1665
  };
1463
1666
  if (isStreaming) {
1464
1667
  const stream = await completion.create({
1465
- model,
1668
+ model: modelName,
1466
1669
  system: "You are a versatile professional in software UI automation",
1467
1670
  messages: messages.map((m) => ({
1468
1671
  role: "user",
@@ -1472,7 +1675,7 @@ async function call2(messages, AIActionTypeValue, responseFormat, options) {
1472
1675
  ...commonConfig
1473
1676
  });
1474
1677
  for await (const chunk of stream) {
1475
- const content2 = _optionalChain([chunk, 'access', _41 => _41.delta, 'optionalAccess', _42 => _42.text]) || "";
1678
+ const content2 = _optionalChain([chunk, 'access', _42 => _42.delta, 'optionalAccess', _43 => _43.text]) || "";
1476
1679
  if (content2) {
1477
1680
  accumulated += content2;
1478
1681
  const chunkData = {
@@ -1506,7 +1709,7 @@ async function call2(messages, AIActionTypeValue, responseFormat, options) {
1506
1709
  content = accumulated;
1507
1710
  } else {
1508
1711
  const result = await completion.create({
1509
- model,
1712
+ model: modelName,
1510
1713
  system: "You are a versatile professional in software UI automation",
1511
1714
  messages: messages.map((m) => ({
1512
1715
  role: "user",
@@ -1553,10 +1756,9 @@ async function call2(messages, AIActionTypeValue, responseFormat, options) {
1553
1756
  throw newError;
1554
1757
  }
1555
1758
  }
1556
- async function callToGetJSONObject(messages, AIActionTypeValue) {
1759
+ var getResponseFormat = (modelName, AIActionTypeValue) => {
1557
1760
  let responseFormat;
1558
- const model = getModelName();
1559
- if (model.includes("gpt-4")) {
1761
+ if (modelName.includes("gpt-4")) {
1560
1762
  switch (AIActionTypeValue) {
1561
1763
  case 0 /* ASSERT */:
1562
1764
  responseFormat = assertSchema;
@@ -1573,10 +1775,18 @@ async function callToGetJSONObject(messages, AIActionTypeValue) {
1573
1775
  break;
1574
1776
  }
1575
1777
  }
1576
- if (model === "gpt-4o-2024-05-13") {
1778
+ if (modelName === "gpt-4o-2024-05-13") {
1577
1779
  responseFormat = { type: "json_object" /* JSON */ };
1578
1780
  }
1579
- const response = await call2(messages, AIActionTypeValue, responseFormat);
1781
+ return responseFormat;
1782
+ };
1783
+ async function callToGetJSONObject(messages, AIActionTypeValue, modelPreferences) {
1784
+ const response = await call2(
1785
+ messages,
1786
+ AIActionTypeValue,
1787
+ void 0,
1788
+ modelPreferences
1789
+ );
1580
1790
  _utils.assert.call(void 0, response, "empty response");
1581
1791
  const jsonContent = safeParseJson(response.content);
1582
1792
  return { content: jsonContent, usage: response.usage };
@@ -1615,8 +1825,8 @@ function preprocessDoubaoBboxJson(input) {
1615
1825
  }
1616
1826
  function safeParseJson(input) {
1617
1827
  const cleanJsonString = extractJSONFromCodeBlock(input);
1618
- if (_optionalChain([cleanJsonString, 'optionalAccess', _43 => _43.match, 'call', _44 => _44(/\((\d+),(\d+)\)/)])) {
1619
- return _optionalChain([cleanJsonString, 'access', _45 => _45.match, 'call', _46 => _46(/\((\d+),(\d+)\)/), 'optionalAccess', _47 => _47.slice, 'call', _48 => _48(1), 'access', _49 => _49.map, 'call', _50 => _50(Number)]);
1828
+ if (_optionalChain([cleanJsonString, 'optionalAccess', _44 => _44.match, 'call', _45 => _45(/\((\d+),(\d+)\)/)])) {
1829
+ return _optionalChain([cleanJsonString, 'access', _46 => _46.match, 'call', _47 => _47(/\((\d+),(\d+)\)/), 'optionalAccess', _48 => _48.slice, 'call', _49 => _49(1), 'access', _50 => _50.map, 'call', _51 => _51(Number)]);
1620
1830
  }
1621
1831
  try {
1622
1832
  return JSON.parse(cleanJsonString);
@@ -1800,7 +2010,7 @@ Respond with YAML only, no explanations.`
1800
2010
  });
1801
2011
  }
1802
2012
  const response = await call2(prompt, 2 /* EXTRACT_DATA */);
1803
- if (_optionalChain([response, 'optionalAccess', _51 => _51.content]) && typeof response.content === "string") {
2013
+ if (_optionalChain([response, 'optionalAccess', _52 => _52.content]) && typeof response.content === "string") {
1804
2014
  return response.content;
1805
2015
  }
1806
2016
  throw new Error("Failed to generate YAML test configuration");
@@ -1861,13 +2071,13 @@ Respond with YAML only, no explanations.`
1861
2071
  });
1862
2072
  }
1863
2073
  if (options.stream && options.onChunk) {
1864
- return await call2(prompt, 2 /* EXTRACT_DATA */, void 0, {
2074
+ return await call2(prompt, 2 /* EXTRACT_DATA */, {
1865
2075
  stream: true,
1866
2076
  onChunk: options.onChunk
1867
2077
  });
1868
2078
  } else {
1869
2079
  const response = await call2(prompt, 2 /* EXTRACT_DATA */);
1870
- if (_optionalChain([response, 'optionalAccess', _52 => _52.content]) && typeof response.content === "string") {
2080
+ if (_optionalChain([response, 'optionalAccess', _53 => _53.content]) && typeof response.content === "string") {
1871
2081
  return {
1872
2082
  content: response.content,
1873
2083
  usage: response.usage,
@@ -1930,7 +2140,7 @@ ${_constants.PLAYWRIGHT_EXAMPLE_CODE}`;
1930
2140
  }
1931
2141
  ];
1932
2142
  const response = await call2(prompt, 2 /* EXTRACT_DATA */);
1933
- if (_optionalChain([response, 'optionalAccess', _53 => _53.content]) && typeof response.content === "string") {
2143
+ if (_optionalChain([response, 'optionalAccess', _54 => _54.content]) && typeof response.content === "string") {
1934
2144
  return response.content;
1935
2145
  }
1936
2146
  throw new Error("Failed to generate Playwright test code");
@@ -1984,13 +2194,13 @@ ${_constants.PLAYWRIGHT_EXAMPLE_CODE}`;
1984
2194
  }
1985
2195
  ];
1986
2196
  if (options.stream && options.onChunk) {
1987
- return await call2(prompt, 2 /* EXTRACT_DATA */, void 0, {
2197
+ return await call2(prompt, 2 /* EXTRACT_DATA */, {
1988
2198
  stream: true,
1989
2199
  onChunk: options.onChunk
1990
2200
  });
1991
2201
  } else {
1992
2202
  const response = await call2(prompt, 2 /* EXTRACT_DATA */);
1993
- if (_optionalChain([response, 'optionalAccess', _54 => _54.content]) && typeof response.content === "string") {
2203
+ if (_optionalChain([response, 'optionalAccess', _55 => _55.content]) && typeof response.content === "string") {
1994
2204
  return {
1995
2205
  content: response.content,
1996
2206
  usage: response.usage,
@@ -2180,7 +2390,7 @@ var extraTextFromUserPrompt = (prompt) => {
2180
2390
  };
2181
2391
  var promptsToChatParam = async (multimodalPrompt) => {
2182
2392
  const msgs = [];
2183
- if (_optionalChain([multimodalPrompt, 'optionalAccess', _55 => _55.images, 'optionalAccess', _56 => _56.length])) {
2393
+ if (_optionalChain([multimodalPrompt, 'optionalAccess', _56 => _56.images, 'optionalAccess', _57 => _57.length])) {
2184
2394
  msgs.push({
2185
2395
  role: "user",
2186
2396
  content: [
@@ -2289,10 +2499,10 @@ async function AiLocateElement(options) {
2289
2499
  if ("bbox" in res.content && Array.isArray(res.content.bbox)) {
2290
2500
  resRect = adaptBboxToRect(
2291
2501
  res.content.bbox,
2292
- _optionalChain([options, 'access', _57 => _57.searchConfig, 'optionalAccess', _58 => _58.rect, 'optionalAccess', _59 => _59.width]) || context.size.width,
2293
- _optionalChain([options, 'access', _60 => _60.searchConfig, 'optionalAccess', _61 => _61.rect, 'optionalAccess', _62 => _62.height]) || context.size.height,
2294
- _optionalChain([options, 'access', _63 => _63.searchConfig, 'optionalAccess', _64 => _64.rect, 'optionalAccess', _65 => _65.left]),
2295
- _optionalChain([options, 'access', _66 => _66.searchConfig, 'optionalAccess', _67 => _67.rect, 'optionalAccess', _68 => _68.top])
2502
+ _optionalChain([options, 'access', _58 => _58.searchConfig, 'optionalAccess', _59 => _59.rect, 'optionalAccess', _60 => _60.width]) || context.size.width,
2503
+ _optionalChain([options, 'access', _61 => _61.searchConfig, 'optionalAccess', _62 => _62.rect, 'optionalAccess', _63 => _63.height]) || context.size.height,
2504
+ _optionalChain([options, 'access', _64 => _64.searchConfig, 'optionalAccess', _65 => _65.rect, 'optionalAccess', _66 => _66.left]),
2505
+ _optionalChain([options, 'access', _67 => _67.searchConfig, 'optionalAccess', _68 => _68.rect, 'optionalAccess', _69 => _69.top])
2296
2506
  );
2297
2507
  debugInspect("resRect", resRect);
2298
2508
  const rectCenter = {
@@ -2311,7 +2521,7 @@ async function AiLocateElement(options) {
2311
2521
  }
2312
2522
  } catch (e) {
2313
2523
  const msg = e instanceof Error ? `Failed to parse bbox: ${e.message}` : "unknown error in locate";
2314
- if (!errors || _optionalChain([errors, 'optionalAccess', _69 => _69.length]) === 0) {
2524
+ if (!errors || _optionalChain([errors, 'optionalAccess', _70 => _70.length]) === 0) {
2315
2525
  errors = [msg];
2316
2526
  } else {
2317
2527
  errors.push(`(${msg})`);
@@ -2403,21 +2613,27 @@ async function AiLocateSection(options) {
2403
2613
  };
2404
2614
  }
2405
2615
  async function AiExtractElementInfo(options) {
2406
- const { dataQuery, context, extractOption, multimodalPrompt } = options;
2616
+ const {
2617
+ dataQuery,
2618
+ context,
2619
+ extractOption,
2620
+ multimodalPrompt,
2621
+ modelPreferences
2622
+ } = options;
2407
2623
  const systemPrompt = systemPromptToExtract();
2408
2624
  const { screenshotBase64 } = context;
2409
2625
  const { description, elementById } = await describeUserPage(context, {
2410
2626
  truncateTextLength: 200,
2411
2627
  filterNonTextContent: false,
2412
2628
  visibleOnly: false,
2413
- domIncluded: _optionalChain([extractOption, 'optionalAccess', _70 => _70.domIncluded])
2629
+ domIncluded: _optionalChain([extractOption, 'optionalAccess', _71 => _71.domIncluded])
2414
2630
  });
2415
2631
  const extractDataPromptText = await extractDataQueryPrompt(
2416
2632
  description,
2417
2633
  dataQuery
2418
2634
  );
2419
2635
  const userContent = [];
2420
- if (_optionalChain([extractOption, 'optionalAccess', _71 => _71.screenshotIncluded]) !== false) {
2636
+ if (_optionalChain([extractOption, 'optionalAccess', _72 => _72.screenshotIncluded]) !== false) {
2421
2637
  userContent.push({
2422
2638
  type: "image_url",
2423
2639
  image_url: {
@@ -2437,7 +2653,7 @@ async function AiExtractElementInfo(options) {
2437
2653
  content: userContent
2438
2654
  }
2439
2655
  ];
2440
- if (_optionalChain([options, 'access', _72 => _72.extractOption, 'optionalAccess', _73 => _73.returnThought])) {
2656
+ if (_optionalChain([options, 'access', _73 => _73.extractOption, 'optionalAccess', _74 => _74.returnThought])) {
2441
2657
  msgs.push({
2442
2658
  role: "user",
2443
2659
  content: "Please provide reasons."
@@ -2452,7 +2668,8 @@ async function AiExtractElementInfo(options) {
2452
2668
  }
2453
2669
  const result = await callAiFn(
2454
2670
  msgs,
2455
- 2 /* EXTRACT_DATA */
2671
+ 2 /* EXTRACT_DATA */,
2672
+ modelPreferences
2456
2673
  );
2457
2674
  return {
2458
2675
  parseResult: result.content,
@@ -2566,7 +2783,7 @@ async function plan(userInstruction, opts) {
2566
2783
  const { content, usage } = await call3(msgs, 3 /* PLAN */);
2567
2784
  const rawResponse = JSON.stringify(content, void 0, 2);
2568
2785
  const planFromAI = content;
2569
- const actions = (_optionalChain([planFromAI, 'access', _74 => _74.action, 'optionalAccess', _75 => _75.type]) ? [planFromAI.action] : planFromAI.actions) || [];
2786
+ const actions = (_optionalChain([planFromAI, 'access', _75 => _75.action, 'optionalAccess', _76 => _76.type]) ? [planFromAI.action] : planFromAI.actions) || [];
2570
2787
  const returnValue = {
2571
2788
  ...planFromAI,
2572
2789
  actions,
@@ -2593,7 +2810,7 @@ async function plan(userInstruction, opts) {
2593
2810
  _utils.assert.call(void 0, !planFromAI.error, `Failed to plan actions: ${planFromAI.error}`);
2594
2811
  } else {
2595
2812
  actions.forEach((action) => {
2596
- if (_optionalChain([action, 'access', _76 => _76.locate, 'optionalAccess', _77 => _77.id])) {
2813
+ if (_optionalChain([action, 'access', _77 => _77.locate, 'optionalAccess', _78 => _78.id])) {
2597
2814
  const element = elementById(action.locate.id);
2598
2815
  if (element) {
2599
2816
  action.locate.id = element.id;
@@ -2922,4 +3139,4 @@ async function resizeImageForUiTars(imageBase64, size) {
2922
3139
 
2923
3140
  exports.systemPromptToLocateElement = systemPromptToLocateElement; exports.elementByPositionWithElementInfo = elementByPositionWithElementInfo; exports.describeUserPage = describeUserPage; exports.call = call2; exports.callToGetJSONObject = callToGetJSONObject; exports.callAiFnWithStringResponse = callAiFnWithStringResponse; exports.AIActionType = AIActionType; exports.callAiFn = callAiFn; exports.adaptBboxToRect = adaptBboxToRect; exports.expandSearchArea = expandSearchArea; exports.generateYamlTest = generateYamlTest; exports.generateYamlTestStream = generateYamlTestStream; exports.generatePlaywrightTest = generatePlaywrightTest; exports.generatePlaywrightTestStream = generatePlaywrightTestStream; exports.AiLocateElement = AiLocateElement; exports.AiLocateSection = AiLocateSection; exports.AiExtractElementInfo = AiExtractElementInfo; exports.AiAssert = AiAssert; exports.plan = plan; exports.vlmPlanning = vlmPlanning; exports.resizeImageForUiTars = resizeImageForUiTars;
2924
3141
 
2925
- //# sourceMappingURL=chunk-I5LBWOQA.js.map
3142
+ //# sourceMappingURL=chunk-G2JTYWI6.js.map