@midscene/core 0.25.4-beta-20250807062119.0 → 0.25.4-beta-20250808064529.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/dist/es/ai-model.d.ts +7 -6
  2. package/dist/es/ai-model.js +1 -1
  3. package/dist/es/{chunk-G2JTYWI6.js → chunk-I5LBWOQA.js} +156 -373
  4. package/dist/es/chunk-I5LBWOQA.js.map +1 -0
  5. package/dist/es/{chunk-JH54OF4E.js → chunk-UIEDQYHD.js} +3 -3
  6. package/dist/es/index.d.ts +6 -6
  7. package/dist/es/index.js +4 -5
  8. package/dist/es/index.js.map +1 -1
  9. package/dist/es/{llm-planning-f449f3b8.d.ts → llm-planning-92cec090.d.ts} +2 -3
  10. package/dist/es/{types-7435eba0.d.ts → types-b4a208c6.d.ts} +3 -9
  11. package/dist/es/utils.d.ts +1 -1
  12. package/dist/es/utils.js +1 -1
  13. package/dist/lib/ai-model.d.ts +7 -6
  14. package/dist/lib/ai-model.js +2 -2
  15. package/dist/lib/{chunk-G2JTYWI6.js → chunk-I5LBWOQA.js} +141 -358
  16. package/dist/lib/chunk-I5LBWOQA.js.map +1 -0
  17. package/dist/lib/{chunk-JH54OF4E.js → chunk-UIEDQYHD.js} +3 -3
  18. package/dist/lib/index.d.ts +6 -6
  19. package/dist/lib/index.js +14 -15
  20. package/dist/lib/index.js.map +1 -1
  21. package/dist/lib/{llm-planning-f449f3b8.d.ts → llm-planning-92cec090.d.ts} +2 -3
  22. package/dist/{types/types-7435eba0.d.ts → lib/types-b4a208c6.d.ts} +3 -9
  23. package/dist/lib/utils.d.ts +1 -1
  24. package/dist/lib/utils.js +2 -2
  25. package/dist/types/ai-model.d.ts +7 -6
  26. package/dist/types/index.d.ts +6 -6
  27. package/dist/types/{llm-planning-f449f3b8.d.ts → llm-planning-92cec090.d.ts} +2 -3
  28. package/dist/{lib/types-7435eba0.d.ts → types/types-b4a208c6.d.ts} +3 -9
  29. package/dist/types/utils.d.ts +1 -1
  30. package/package.json +3 -3
  31. package/dist/es/chunk-G2JTYWI6.js.map +0 -1
  32. package/dist/lib/chunk-G2JTYWI6.js.map +0 -1
  33. /package/dist/es/{chunk-JH54OF4E.js.map → chunk-UIEDQYHD.js.map} +0 -0
  34. /package/dist/lib/{chunk-JH54OF4E.js.map → chunk-UIEDQYHD.js.map} +0 -0
@@ -10,6 +10,25 @@ var _identity = require('@azure/identity');
10
10
 
11
11
 
12
12
 
13
+
14
+
15
+
16
+
17
+
18
+
19
+
20
+
21
+
22
+
23
+
24
+
25
+
26
+
27
+
28
+
29
+
30
+
31
+
13
32
 
14
33
 
15
34
  var _env = require('@midscene/shared/env');
@@ -36,11 +55,10 @@ var AIActionType = /* @__PURE__ */ ((AIActionType2) => {
36
55
  AIActionType2[AIActionType2["DESCRIBE_ELEMENT"] = 4] = "DESCRIBE_ELEMENT";
37
56
  return AIActionType2;
38
57
  })(AIActionType || {});
39
- async function callAiFn(msgs, AIActionTypeValue, modelPreferences) {
58
+ async function callAiFn(msgs, AIActionTypeValue) {
40
59
  const { content, usage } = await callToGetJSONObject(
41
60
  msgs,
42
- AIActionTypeValue,
43
- modelPreferences
61
+ AIActionTypeValue
44
62
  );
45
63
  return { content, usage };
46
64
  }
@@ -1145,57 +1163,24 @@ pageDescription:
1145
1163
  });
1146
1164
  };
1147
1165
 
1148
- // src/ai-model/service-caller/utils.ts
1149
-
1150
-
1151
-
1152
-
1153
-
1154
-
1155
-
1156
-
1157
-
1158
-
1159
-
1160
-
1161
-
1162
-
1163
-
1164
-
1165
-
1166
-
1167
-
1168
-
1169
-
1170
-
1171
-
1172
-
1173
-
1174
-
1175
-
1176
-
1177
-
1178
-
1179
-
1180
-
1181
-
1182
-
1183
-
1184
-
1185
-
1186
-
1187
-
1188
-
1189
-
1190
- function getModelName() {
1191
- let modelName = "gpt-4o";
1192
- const nameInConfig = _env.getAIConfig.call(void 0, _env.MIDSCENE_MODEL_NAME);
1193
- if (nameInConfig) {
1194
- modelName = nameInConfig;
1195
- }
1196
- return modelName;
1166
+ // src/ai-model/service-caller/index.ts
1167
+ function checkAIConfig() {
1168
+ const openaiKey = _env.getAIConfig.call(void 0, _env.OPENAI_API_KEY);
1169
+ const azureConfig = _env.getAIConfig.call(void 0, _env.MIDSCENE_USE_AZURE_OPENAI);
1170
+ const anthropicKey = _env.getAIConfig.call(void 0, _env.ANTHROPIC_API_KEY);
1171
+ const initConfigJson = _env.getAIConfig.call(void 0, _env.MIDSCENE_OPENAI_INIT_CONFIG_JSON);
1172
+ if (openaiKey)
1173
+ return true;
1174
+ if (azureConfig)
1175
+ return true;
1176
+ if (anthropicKey)
1177
+ return true;
1178
+ return Boolean(initConfigJson);
1197
1179
  }
1180
+ var debugConfigInitialized = false;
1198
1181
  function initDebugConfig() {
1182
+ if (debugConfigInitialized)
1183
+ return;
1199
1184
  const shouldPrintTiming = _env.getAIConfigInBoolean.call(void 0, _env.MIDSCENE_DEBUG_AI_PROFILE);
1200
1185
  let debugConfig = "";
1201
1186
  if (shouldPrintTiming) {
@@ -1220,230 +1205,25 @@ function initDebugConfig() {
1220
1205
  if (debugConfig) {
1221
1206
  _logger.enableDebug.call(void 0, debugConfig);
1222
1207
  }
1208
+ debugConfigInitialized = true;
1223
1209
  }
1224
- var createAssert = (modelNameKey, modelName) => (value, key, modelVendorFlag) => {
1225
- if (modelVendorFlag) {
1226
- _utils.assert.call(void 0,
1227
- value,
1228
- `The ${key} must be a non-empty string because of the ${modelNameKey} is declared as ${modelName} and ${modelVendorFlag} has also been specified, but got: ${value}
1229
- Please check your config.`
1230
- );
1231
- } else {
1232
- _utils.assert.call(void 0,
1233
- value,
1234
- `The ${key} must be a non-empty string because of the ${modelNameKey} is declared as ${modelName}, but got: ${value}
1235
- Please check your config.`
1236
- );
1237
- }
1238
- };
1239
- var getModelConfigFromEnv = (modelName, keys, valueAssert) => {
1240
- const socksProxy = _env.getAIConfig.call(void 0, keys.socksProxy);
1241
- const httpProxy = _env.getAIConfig.call(void 0, keys.httpProxy);
1242
- if (_env.getAIConfig.call(void 0, keys.openaiUseAzureDeprecated)) {
1243
- const openaiBaseURL = _env.getAIConfig.call(void 0, keys.openaiBaseURL);
1244
- const openaiApiKey = _env.getAIConfig.call(void 0, keys.openaiApiKey);
1245
- const openaiExtraConfig = _env.getAIConfigInJson.call(void 0, keys.openaiExtraConfig);
1246
- valueAssert(
1247
- openaiBaseURL,
1248
- keys.openaiBaseURL,
1249
- keys.openaiUseAzureDeprecated
1250
- );
1251
- valueAssert(openaiApiKey, keys.openaiApiKey, keys.openaiUseAzureDeprecated);
1252
- return {
1253
- socksProxy,
1254
- httpProxy,
1255
- modelName,
1256
- openaiUseAzureDeprecated: true,
1257
- openaiApiKey,
1258
- openaiBaseURL,
1259
- openaiExtraConfig
1260
- };
1261
- } else if (_env.getAIConfig.call(void 0, keys.useAzureOpenai)) {
1262
- const azureOpenaiScope = _env.getAIConfig.call(void 0, keys.azureOpenaiScope);
1263
- const azureOpenaiApiKey = _env.getAIConfig.call(void 0, keys.azureOpenaiApiKey);
1264
- const azureOpenaiEndpoint = _env.getAIConfig.call(void 0, keys.azureOpenaiEndpoint);
1265
- const azureOpenaiDeployment = _env.getAIConfig.call(void 0, keys.azureOpenaiDeployment);
1266
- const azureOpenaiApiVersion = _env.getAIConfig.call(void 0, keys.azureOpenaiApiVersion);
1267
- const azureExtraConfig = _env.getAIConfigInJson.call(void 0, keys.azureExtraConfig);
1268
- const openaiExtraConfig = _env.getAIConfigInJson.call(void 0, keys.openaiExtraConfig);
1269
- valueAssert(azureOpenaiApiKey, keys.azureOpenaiApiKey, keys.useAzureOpenai);
1270
- return {
1271
- socksProxy,
1272
- httpProxy,
1273
- modelName,
1274
- useAzureOpenai: true,
1275
- azureOpenaiScope,
1276
- azureOpenaiApiKey,
1277
- azureOpenaiEndpoint,
1278
- azureOpenaiDeployment,
1279
- azureOpenaiApiVersion,
1280
- azureExtraConfig,
1281
- openaiExtraConfig
1282
- };
1283
- } else if (_env.getAIConfig.call(void 0, keys.useAnthropicSdk)) {
1284
- const anthropicApiKey = _env.getAIConfig.call(void 0, keys.anthropicApiKey);
1285
- valueAssert(anthropicApiKey, keys.anthropicApiKey, keys.useAnthropicSdk);
1286
- return {
1287
- socksProxy,
1288
- httpProxy,
1289
- modelName,
1290
- useAnthropicSdk: true,
1291
- anthropicApiKey
1292
- };
1293
- } else {
1294
- const openaiBaseURL = _env.getAIConfig.call(void 0, keys.openaiBaseURL);
1295
- const openaiApiKey = _env.getAIConfig.call(void 0, keys.openaiApiKey);
1296
- const openaiExtraConfig = _env.getAIConfigInJson.call(void 0, keys.openaiExtraConfig);
1297
- valueAssert(openaiBaseURL, keys.openaiBaseURL);
1298
- valueAssert(openaiApiKey, keys.openaiApiKey);
1299
- return {
1300
- socksProxy,
1301
- httpProxy,
1302
- modelName,
1303
- openaiBaseURL,
1304
- openaiApiKey,
1305
- openaiExtraConfig
1306
- };
1307
- }
1308
- };
1309
- var maskKey = (key, maskChar = "*") => {
1310
- if (typeof key !== "string" || key.length === 0) {
1311
- return key;
1312
- }
1313
- const prefixLen = 3;
1314
- const suffixLen = 3;
1315
- const keepLength = prefixLen + suffixLen;
1316
- if (key.length <= keepLength) {
1317
- return key;
1318
- }
1319
- const prefix = key.substring(0, prefixLen);
1320
- const suffix = key.substring(key.length - suffixLen);
1321
- const maskLength = key.length - keepLength;
1322
- const mask = maskChar.repeat(maskLength);
1323
- return `${prefix}${mask}${suffix}`;
1324
- };
1325
- var maskConfig = (config) => {
1326
- return Object.fromEntries(
1327
- Object.entries(config).map(([key, value]) => [
1328
- key,
1329
- ["openaiApiKey", "azureOpenaiApiKey", "anthropicApiKey"].includes(key) ? maskKey(value) : value
1330
- ])
1331
- );
1332
- };
1333
- var decideModelConfig = (modelPreferences) => {
1334
- initDebugConfig();
1335
- const debugLog = _logger.getDebug.call(void 0, "ai:decideModelConfig");
1336
- debugLog("modelPreferences", modelPreferences);
1337
- const isVQAIntent = _optionalChain([modelPreferences, 'optionalAccess', _21 => _21.intent]) === "VQA";
1338
- const vqaModelName = _env.getAIConfig.call(void 0, _env.MIDSCENE_VQA_MODEL_NAME);
1339
- if (isVQAIntent && vqaModelName) {
1340
- debugLog(
1341
- `current action is a VQA action and detected ${_env.MIDSCENE_VQA_MODEL_NAME} ${vqaModelName}, will only read VQA related model config from process.env`
1342
- );
1343
- const config = getModelConfigFromEnv(
1344
- vqaModelName,
1345
- {
1346
- /**
1347
- * proxy
1348
- */
1349
- socksProxy: _env.MIDSCENE_VQA_OPENAI_SOCKS_PROXY,
1350
- httpProxy: _env.MIDSCENE_VQA_OPENAI_HTTP_PROXY,
1351
- /**
1352
- * OpenAI
1353
- */
1354
- openaiBaseURL: _env.MIDSCENE_VQA_OPENAI_BASE_URL,
1355
- openaiApiKey: _env.MIDSCENE_VQA_OPENAI_API_KEY,
1356
- openaiExtraConfig: _env.MIDSCENE_VQA_OPENAI_INIT_CONFIG_JSON,
1357
- /**
1358
- * Azure
1359
- */
1360
- openaiUseAzureDeprecated: _env.MIDSCENE_VQA_OPENAI_USE_AZURE,
1361
- useAzureOpenai: _env.MIDSCENE_VQA_USE_AZURE_OPENAI,
1362
- azureOpenaiScope: _env.MIDSCENE_VQA_AZURE_OPENAI_SCOPE,
1363
- azureOpenaiApiKey: _env.MIDSCENE_VQA_AZURE_OPENAI_KEY,
1364
- azureOpenaiEndpoint: _env.MIDSCENE_VQA_AZURE_OPENAI_ENDPOINT,
1365
- azureOpenaiApiVersion: _env.MIDSCENE_VQA_AZURE_OPENAI_API_VERSION,
1366
- azureOpenaiDeployment: _env.MIDSCENE_VQA_AZURE_OPENAI_DEPLOYMENT,
1367
- azureExtraConfig: _env.MIDSCENE_VQA_AZURE_OPENAI_INIT_CONFIG_JSON,
1368
- /**
1369
- * Anthropic
1370
- */
1371
- useAnthropicSdk: _env.MIDSCENE_VQA_USE_ANTHROPIC_SDK,
1372
- anthropicApiKey: _env.MIDSCENE_VQA_ANTHROPIC_API_KEY
1373
- },
1374
- createAssert(_env.MIDSCENE_VQA_MODEL_NAME, vqaModelName)
1375
- );
1376
- debugLog("got model config for VQA usage:", maskConfig(config));
1377
- return config;
1378
- } else {
1379
- debugLog("read model config from process.env as normal.");
1380
- const commonModelName = _env.getAIConfig.call(void 0, _env.MIDSCENE_MODEL_NAME);
1381
- _utils.assert.call(void 0,
1382
- commonModelName,
1383
- `${_env.MIDSCENE_MODEL_NAME} is empty, please check your config.`
1384
- );
1385
- const config = getModelConfigFromEnv(
1386
- commonModelName,
1387
- {
1388
- /**
1389
- * proxy
1390
- */
1391
- socksProxy: _env.MIDSCENE_OPENAI_SOCKS_PROXY,
1392
- httpProxy: _env.MIDSCENE_OPENAI_HTTP_PROXY,
1393
- /**
1394
- * OpenAI
1395
- */
1396
- openaiBaseURL: _env.OPENAI_BASE_URL,
1397
- openaiApiKey: _env.OPENAI_API_KEY,
1398
- openaiExtraConfig: _env.MIDSCENE_OPENAI_INIT_CONFIG_JSON,
1399
- /**
1400
- * Azure
1401
- */
1402
- openaiUseAzureDeprecated: _env.OPENAI_USE_AZURE,
1403
- useAzureOpenai: _env.MIDSCENE_USE_AZURE_OPENAI,
1404
- azureOpenaiScope: _env.MIDSCENE_AZURE_OPENAI_SCOPE,
1405
- azureOpenaiApiKey: _env.AZURE_OPENAI_KEY,
1406
- azureOpenaiEndpoint: _env.AZURE_OPENAI_ENDPOINT,
1407
- azureOpenaiApiVersion: _env.AZURE_OPENAI_API_VERSION,
1408
- azureOpenaiDeployment: _env.AZURE_OPENAI_DEPLOYMENT,
1409
- azureExtraConfig: _env.MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON,
1410
- /**
1411
- * Anthropic
1412
- */
1413
- useAnthropicSdk: _env.MIDSCENE_USE_ANTHROPIC_SDK,
1414
- anthropicApiKey: _env.ANTHROPIC_API_KEY
1415
- },
1416
- createAssert(_env.MIDSCENE_MODEL_NAME, commonModelName)
1417
- );
1418
- debugLog("got model config for common usage:", maskConfig(config));
1419
- return config;
1210
+ var defaultModel = "gpt-4o";
1211
+ function getModelName() {
1212
+ let modelName = defaultModel;
1213
+ const nameInConfig = _env.getAIConfig.call(void 0, _env.MIDSCENE_MODEL_NAME);
1214
+ if (nameInConfig) {
1215
+ modelName = nameInConfig;
1420
1216
  }
1421
- };
1422
-
1423
- // src/ai-model/service-caller/index.ts
1217
+ return modelName;
1218
+ }
1424
1219
  async function createChatClient({
1425
- AIActionTypeValue,
1426
- modelPreferences
1220
+ AIActionTypeValue
1427
1221
  }) {
1428
- const {
1429
- socksProxy,
1430
- httpProxy,
1431
- modelName,
1432
- openaiBaseURL,
1433
- openaiApiKey,
1434
- openaiExtraConfig,
1435
- openaiUseAzureDeprecated,
1436
- useAzureOpenai,
1437
- azureOpenaiScope,
1438
- azureOpenaiApiKey,
1439
- azureOpenaiEndpoint,
1440
- azureOpenaiApiVersion,
1441
- azureOpenaiDeployment,
1442
- azureExtraConfig,
1443
- useAnthropicSdk,
1444
- anthropicApiKey
1445
- } = decideModelConfig(modelPreferences);
1222
+ initDebugConfig();
1446
1223
  let openai;
1224
+ const extraConfig = _env.getAIConfigInJson.call(void 0, _env.MIDSCENE_OPENAI_INIT_CONFIG_JSON);
1225
+ const socksProxy = _env.getAIConfig.call(void 0, _env.MIDSCENE_OPENAI_SOCKS_PROXY);
1226
+ const httpProxy = _env.getAIConfig.call(void 0, _env.MIDSCENE_OPENAI_HTTP_PROXY);
1447
1227
  let proxyAgent = void 0;
1448
1228
  const debugProxy = _logger.getDebug.call(void 0, "ai:call:proxy");
1449
1229
  if (httpProxy) {
@@ -1453,50 +1233,64 @@ async function createChatClient({
1453
1233
  debugProxy("using socks proxy", socksProxy);
1454
1234
  proxyAgent = new (0, _socksproxyagent.SocksProxyAgent)(socksProxy);
1455
1235
  }
1456
- if (openaiUseAzureDeprecated) {
1236
+ if (_env.getAIConfig.call(void 0, _env.OPENAI_USE_AZURE)) {
1457
1237
  openai = new (0, _openai.AzureOpenAI)({
1458
- baseURL: openaiBaseURL,
1459
- apiKey: openaiApiKey,
1238
+ baseURL: _env.getAIConfig.call(void 0, _env.OPENAI_BASE_URL),
1239
+ apiKey: _env.getAIConfig.call(void 0, _env.OPENAI_API_KEY),
1460
1240
  httpAgent: proxyAgent,
1461
- ...openaiExtraConfig,
1241
+ ...extraConfig,
1462
1242
  dangerouslyAllowBrowser: true
1463
1243
  });
1464
- } else if (useAzureOpenai) {
1244
+ } else if (_env.getAIConfig.call(void 0, _env.MIDSCENE_USE_AZURE_OPENAI)) {
1245
+ const extraAzureConfig = _env.getAIConfigInJson.call(void 0,
1246
+ _env.MIDSCENE_AZURE_OPENAI_INIT_CONFIG_JSON
1247
+ );
1248
+ const scope = _env.getAIConfig.call(void 0, _env.MIDSCENE_AZURE_OPENAI_SCOPE);
1465
1249
  let tokenProvider = void 0;
1466
- if (azureOpenaiScope) {
1250
+ if (scope) {
1467
1251
  _utils.assert.call(void 0,
1468
1252
  !_utils.ifInBrowser,
1469
1253
  "Azure OpenAI is not supported in browser with Midscene."
1470
1254
  );
1471
1255
  const credential = new (0, _identity.DefaultAzureCredential)();
1472
- tokenProvider = _identity.getBearerTokenProvider.call(void 0, credential, azureOpenaiScope);
1256
+ _utils.assert.call(void 0, scope, "MIDSCENE_AZURE_OPENAI_SCOPE is required");
1257
+ tokenProvider = _identity.getBearerTokenProvider.call(void 0, credential, scope);
1473
1258
  openai = new (0, _openai.AzureOpenAI)({
1474
1259
  azureADTokenProvider: tokenProvider,
1475
- endpoint: azureOpenaiEndpoint,
1476
- apiVersion: azureOpenaiApiVersion,
1477
- deployment: azureOpenaiDeployment,
1478
- ...openaiExtraConfig,
1479
- ...azureExtraConfig
1260
+ endpoint: _env.getAIConfig.call(void 0, _env.AZURE_OPENAI_ENDPOINT),
1261
+ apiVersion: _env.getAIConfig.call(void 0, _env.AZURE_OPENAI_API_VERSION),
1262
+ deployment: _env.getAIConfig.call(void 0, _env.AZURE_OPENAI_DEPLOYMENT),
1263
+ ...extraConfig,
1264
+ ...extraAzureConfig
1480
1265
  });
1481
1266
  } else {
1482
1267
  openai = new (0, _openai.AzureOpenAI)({
1483
- apiKey: azureOpenaiApiKey,
1484
- endpoint: azureOpenaiEndpoint,
1485
- apiVersion: azureOpenaiApiVersion,
1486
- deployment: azureOpenaiDeployment,
1268
+ apiKey: _env.getAIConfig.call(void 0, _env.AZURE_OPENAI_KEY),
1269
+ endpoint: _env.getAIConfig.call(void 0, _env.AZURE_OPENAI_ENDPOINT),
1270
+ apiVersion: _env.getAIConfig.call(void 0, _env.AZURE_OPENAI_API_VERSION),
1271
+ deployment: _env.getAIConfig.call(void 0, _env.AZURE_OPENAI_DEPLOYMENT),
1487
1272
  dangerouslyAllowBrowser: true,
1488
- ...openaiExtraConfig,
1489
- ...azureExtraConfig
1273
+ ...extraConfig,
1274
+ ...extraAzureConfig
1490
1275
  });
1491
1276
  }
1492
- } else if (!useAnthropicSdk) {
1277
+ } else if (!_env.getAIConfig.call(void 0, _env.MIDSCENE_USE_ANTHROPIC_SDK)) {
1278
+ const baseURL = _env.getAIConfig.call(void 0, _env.OPENAI_BASE_URL);
1279
+ if (typeof baseURL === "string") {
1280
+ if (!/^https?:\/\//.test(baseURL)) {
1281
+ throw new Error(
1282
+ `OPENAI_BASE_URL must be a valid URL starting with http:// or https://, but got: ${baseURL}
1283
+ Please check your config.`
1284
+ );
1285
+ }
1286
+ }
1493
1287
  openai = new (0, _openai2.default)({
1494
- baseURL: openaiBaseURL,
1495
- apiKey: openaiApiKey,
1288
+ baseURL: _env.getAIConfig.call(void 0, _env.OPENAI_BASE_URL),
1289
+ apiKey: _env.getAIConfig.call(void 0, _env.OPENAI_API_KEY),
1496
1290
  httpAgent: proxyAgent,
1497
- ...openaiExtraConfig,
1291
+ ...extraConfig,
1498
1292
  defaultHeaders: {
1499
- ..._optionalChain([openaiExtraConfig, 'optionalAccess', _22 => _22.defaultHeaders]) || {},
1293
+ ..._optionalChain([extraConfig, 'optionalAccess', _21 => _21.defaultHeaders]) || {},
1500
1294
  [_env.MIDSCENE_API_TYPE]: AIActionTypeValue.toString()
1501
1295
  },
1502
1296
  dangerouslyAllowBrowser: true
@@ -1513,13 +1307,14 @@ async function createChatClient({
1513
1307
  if (typeof openai !== "undefined") {
1514
1308
  return {
1515
1309
  completion: openai.chat.completions,
1516
- style: "openai",
1517
- modelName
1310
+ style: "openai"
1518
1311
  };
1519
1312
  }
1520
- if (useAnthropicSdk) {
1313
+ if (_env.getAIConfig.call(void 0, _env.MIDSCENE_USE_ANTHROPIC_SDK)) {
1314
+ const apiKey = _env.getAIConfig.call(void 0, _env.ANTHROPIC_API_KEY);
1315
+ _utils.assert.call(void 0, apiKey, "ANTHROPIC_API_KEY is required");
1521
1316
  openai = new (0, _sdk.Anthropic)({
1522
- apiKey: anthropicApiKey,
1317
+ apiKey,
1523
1318
  httpAgent: proxyAgent,
1524
1319
  dangerouslyAllowBrowser: true
1525
1320
  });
@@ -1527,24 +1322,26 @@ async function createChatClient({
1527
1322
  if (typeof openai !== "undefined" && openai.messages) {
1528
1323
  return {
1529
1324
  completion: openai.messages,
1530
- style: "anthropic",
1531
- modelName
1325
+ style: "anthropic"
1532
1326
  };
1533
1327
  }
1534
1328
  throw new Error("Openai SDK or Anthropic SDK is not initialized");
1535
1329
  }
1536
- async function call2(messages, AIActionTypeValue, options, modelPreferences) {
1537
- const { completion, style, modelName } = await createChatClient({
1538
- AIActionTypeValue,
1539
- modelPreferences
1330
+ async function call2(messages, AIActionTypeValue, responseFormat, options) {
1331
+ _utils.assert.call(void 0,
1332
+ checkAIConfig(),
1333
+ "Cannot find config for AI model service. If you are using a self-hosted model without validating the API key, please set `OPENAI_API_KEY` to any non-null value. https://midscenejs.com/model-provider.html"
1334
+ );
1335
+ const { completion, style } = await createChatClient({
1336
+ AIActionTypeValue
1540
1337
  });
1541
- const responseFormat = getResponseFormat(modelName, AIActionTypeValue);
1542
1338
  const maxTokens = _env.getAIConfig.call(void 0, _env.OPENAI_MAX_TOKENS);
1543
1339
  const debugCall = _logger.getDebug.call(void 0, "ai:call");
1544
1340
  const debugProfileStats = _logger.getDebug.call(void 0, "ai:profile:stats");
1545
1341
  const debugProfileDetail = _logger.getDebug.call(void 0, "ai:profile:detail");
1546
1342
  const startTime = Date.now();
1547
- const isStreaming = _optionalChain([options, 'optionalAccess', _23 => _23.stream]) && _optionalChain([options, 'optionalAccess', _24 => _24.onChunk]);
1343
+ const model = getModelName();
1344
+ const isStreaming = _optionalChain([options, 'optionalAccess', _22 => _22.stream]) && _optionalChain([options, 'optionalAccess', _23 => _23.onChunk]);
1548
1345
  let content;
1549
1346
  let accumulated = "";
1550
1347
  let usage;
@@ -1560,12 +1357,12 @@ async function call2(messages, AIActionTypeValue, options, modelPreferences) {
1560
1357
  try {
1561
1358
  if (style === "openai") {
1562
1359
  debugCall(
1563
- `sending ${isStreaming ? "streaming " : ""}request to ${modelName}`
1360
+ `sending ${isStreaming ? "streaming " : ""}request to ${model}`
1564
1361
  );
1565
1362
  if (isStreaming) {
1566
1363
  const stream = await completion.create(
1567
1364
  {
1568
- model: modelName,
1365
+ model,
1569
1366
  messages,
1570
1367
  response_format: responseFormat,
1571
1368
  ...commonConfig
@@ -1575,8 +1372,8 @@ async function call2(messages, AIActionTypeValue, options, modelPreferences) {
1575
1372
  }
1576
1373
  );
1577
1374
  for await (const chunk of stream) {
1578
- const content2 = _optionalChain([chunk, 'access', _25 => _25.choices, 'optionalAccess', _26 => _26[0], 'optionalAccess', _27 => _27.delta, 'optionalAccess', _28 => _28.content]) || "";
1579
- const reasoning_content = _optionalChain([chunk, 'access', _29 => _29.choices, 'optionalAccess', _30 => _30[0], 'optionalAccess', _31 => _31.delta, 'optionalAccess', _32 => _32.reasoning_content]) || "";
1375
+ const content2 = _optionalChain([chunk, 'access', _24 => _24.choices, 'optionalAccess', _25 => _25[0], 'optionalAccess', _26 => _26.delta, 'optionalAccess', _27 => _27.content]) || "";
1376
+ const reasoning_content = _optionalChain([chunk, 'access', _28 => _28.choices, 'optionalAccess', _29 => _29[0], 'optionalAccess', _30 => _30.delta, 'optionalAccess', _31 => _31.reasoning_content]) || "";
1580
1377
  if (chunk.usage) {
1581
1378
  usage = chunk.usage;
1582
1379
  }
@@ -1591,7 +1388,7 @@ async function call2(messages, AIActionTypeValue, options, modelPreferences) {
1591
1388
  };
1592
1389
  options.onChunk(chunkData);
1593
1390
  }
1594
- if (_optionalChain([chunk, 'access', _33 => _33.choices, 'optionalAccess', _34 => _34[0], 'optionalAccess', _35 => _35.finish_reason])) {
1391
+ if (_optionalChain([chunk, 'access', _32 => _32.choices, 'optionalAccess', _33 => _33[0], 'optionalAccess', _34 => _34.finish_reason])) {
1595
1392
  timeCost = Date.now() - startTime;
1596
1393
  if (!usage) {
1597
1394
  const estimatedTokens = Math.max(
@@ -1622,18 +1419,18 @@ async function call2(messages, AIActionTypeValue, options, modelPreferences) {
1622
1419
  }
1623
1420
  content = accumulated;
1624
1421
  debugProfileStats(
1625
- `streaming model, ${modelName}, mode, ${_env.vlLocateMode.call(void 0, ) || "default"}, cost-ms, ${timeCost}`
1422
+ `streaming model, ${model}, mode, ${_env.vlLocateMode.call(void 0, ) || "default"}, cost-ms, ${timeCost}`
1626
1423
  );
1627
1424
  } else {
1628
1425
  const result = await completion.create({
1629
- model: modelName,
1426
+ model,
1630
1427
  messages,
1631
1428
  response_format: responseFormat,
1632
1429
  ...commonConfig
1633
1430
  });
1634
1431
  timeCost = Date.now() - startTime;
1635
1432
  debugProfileStats(
1636
- `model, ${modelName}, mode, ${_env.vlLocateMode.call(void 0, ) || "default"}, ui-tars-version, ${_env.uiTarsModelVersion.call(void 0, )}, prompt-tokens, ${_optionalChain([result, 'access', _36 => _36.usage, 'optionalAccess', _37 => _37.prompt_tokens]) || ""}, completion-tokens, ${_optionalChain([result, 'access', _38 => _38.usage, 'optionalAccess', _39 => _39.completion_tokens]) || ""}, total-tokens, ${_optionalChain([result, 'access', _40 => _40.usage, 'optionalAccess', _41 => _41.total_tokens]) || ""}, cost-ms, ${timeCost}, requestId, ${result._request_id || ""}`
1433
+ `model, ${model}, mode, ${_env.vlLocateMode.call(void 0, ) || "default"}, ui-tars-version, ${_env.uiTarsModelVersion.call(void 0, )}, prompt-tokens, ${_optionalChain([result, 'access', _35 => _35.usage, 'optionalAccess', _36 => _36.prompt_tokens]) || ""}, completion-tokens, ${_optionalChain([result, 'access', _37 => _37.usage, 'optionalAccess', _38 => _38.completion_tokens]) || ""}, total-tokens, ${_optionalChain([result, 'access', _39 => _39.usage, 'optionalAccess', _40 => _40.total_tokens]) || ""}, cost-ms, ${timeCost}, requestId, ${result._request_id || ""}`
1637
1434
  );
1638
1435
  debugProfileDetail(
1639
1436
  `model usage detail: ${JSON.stringify(result.usage)}`
@@ -1665,7 +1462,7 @@ async function call2(messages, AIActionTypeValue, options, modelPreferences) {
1665
1462
  };
1666
1463
  if (isStreaming) {
1667
1464
  const stream = await completion.create({
1668
- model: modelName,
1465
+ model,
1669
1466
  system: "You are a versatile professional in software UI automation",
1670
1467
  messages: messages.map((m) => ({
1671
1468
  role: "user",
@@ -1675,7 +1472,7 @@ async function call2(messages, AIActionTypeValue, options, modelPreferences) {
1675
1472
  ...commonConfig
1676
1473
  });
1677
1474
  for await (const chunk of stream) {
1678
- const content2 = _optionalChain([chunk, 'access', _42 => _42.delta, 'optionalAccess', _43 => _43.text]) || "";
1475
+ const content2 = _optionalChain([chunk, 'access', _41 => _41.delta, 'optionalAccess', _42 => _42.text]) || "";
1679
1476
  if (content2) {
1680
1477
  accumulated += content2;
1681
1478
  const chunkData = {
@@ -1709,7 +1506,7 @@ async function call2(messages, AIActionTypeValue, options, modelPreferences) {
1709
1506
  content = accumulated;
1710
1507
  } else {
1711
1508
  const result = await completion.create({
1712
- model: modelName,
1509
+ model,
1713
1510
  system: "You are a versatile professional in software UI automation",
1714
1511
  messages: messages.map((m) => ({
1715
1512
  role: "user",
@@ -1756,9 +1553,10 @@ async function call2(messages, AIActionTypeValue, options, modelPreferences) {
1756
1553
  throw newError;
1757
1554
  }
1758
1555
  }
1759
- var getResponseFormat = (modelName, AIActionTypeValue) => {
1556
+ async function callToGetJSONObject(messages, AIActionTypeValue) {
1760
1557
  let responseFormat;
1761
- if (modelName.includes("gpt-4")) {
1558
+ const model = getModelName();
1559
+ if (model.includes("gpt-4")) {
1762
1560
  switch (AIActionTypeValue) {
1763
1561
  case 0 /* ASSERT */:
1764
1562
  responseFormat = assertSchema;
@@ -1775,18 +1573,10 @@ var getResponseFormat = (modelName, AIActionTypeValue) => {
1775
1573
  break;
1776
1574
  }
1777
1575
  }
1778
- if (modelName === "gpt-4o-2024-05-13") {
1576
+ if (model === "gpt-4o-2024-05-13") {
1779
1577
  responseFormat = { type: "json_object" /* JSON */ };
1780
1578
  }
1781
- return responseFormat;
1782
- };
1783
- async function callToGetJSONObject(messages, AIActionTypeValue, modelPreferences) {
1784
- const response = await call2(
1785
- messages,
1786
- AIActionTypeValue,
1787
- void 0,
1788
- modelPreferences
1789
- );
1579
+ const response = await call2(messages, AIActionTypeValue, responseFormat);
1790
1580
  _utils.assert.call(void 0, response, "empty response");
1791
1581
  const jsonContent = safeParseJson(response.content);
1792
1582
  return { content: jsonContent, usage: response.usage };
@@ -1825,8 +1615,8 @@ function preprocessDoubaoBboxJson(input) {
1825
1615
  }
1826
1616
  function safeParseJson(input) {
1827
1617
  const cleanJsonString = extractJSONFromCodeBlock(input);
1828
- if (_optionalChain([cleanJsonString, 'optionalAccess', _44 => _44.match, 'call', _45 => _45(/\((\d+),(\d+)\)/)])) {
1829
- return _optionalChain([cleanJsonString, 'access', _46 => _46.match, 'call', _47 => _47(/\((\d+),(\d+)\)/), 'optionalAccess', _48 => _48.slice, 'call', _49 => _49(1), 'access', _50 => _50.map, 'call', _51 => _51(Number)]);
1618
+ if (_optionalChain([cleanJsonString, 'optionalAccess', _43 => _43.match, 'call', _44 => _44(/\((\d+),(\d+)\)/)])) {
1619
+ return _optionalChain([cleanJsonString, 'access', _45 => _45.match, 'call', _46 => _46(/\((\d+),(\d+)\)/), 'optionalAccess', _47 => _47.slice, 'call', _48 => _48(1), 'access', _49 => _49.map, 'call', _50 => _50(Number)]);
1830
1620
  }
1831
1621
  try {
1832
1622
  return JSON.parse(cleanJsonString);
@@ -2010,7 +1800,7 @@ Respond with YAML only, no explanations.`
2010
1800
  });
2011
1801
  }
2012
1802
  const response = await call2(prompt, 2 /* EXTRACT_DATA */);
2013
- if (_optionalChain([response, 'optionalAccess', _52 => _52.content]) && typeof response.content === "string") {
1803
+ if (_optionalChain([response, 'optionalAccess', _51 => _51.content]) && typeof response.content === "string") {
2014
1804
  return response.content;
2015
1805
  }
2016
1806
  throw new Error("Failed to generate YAML test configuration");
@@ -2071,13 +1861,13 @@ Respond with YAML only, no explanations.`
2071
1861
  });
2072
1862
  }
2073
1863
  if (options.stream && options.onChunk) {
2074
- return await call2(prompt, 2 /* EXTRACT_DATA */, {
1864
+ return await call2(prompt, 2 /* EXTRACT_DATA */, void 0, {
2075
1865
  stream: true,
2076
1866
  onChunk: options.onChunk
2077
1867
  });
2078
1868
  } else {
2079
1869
  const response = await call2(prompt, 2 /* EXTRACT_DATA */);
2080
- if (_optionalChain([response, 'optionalAccess', _53 => _53.content]) && typeof response.content === "string") {
1870
+ if (_optionalChain([response, 'optionalAccess', _52 => _52.content]) && typeof response.content === "string") {
2081
1871
  return {
2082
1872
  content: response.content,
2083
1873
  usage: response.usage,
@@ -2140,7 +1930,7 @@ ${_constants.PLAYWRIGHT_EXAMPLE_CODE}`;
2140
1930
  }
2141
1931
  ];
2142
1932
  const response = await call2(prompt, 2 /* EXTRACT_DATA */);
2143
- if (_optionalChain([response, 'optionalAccess', _54 => _54.content]) && typeof response.content === "string") {
1933
+ if (_optionalChain([response, 'optionalAccess', _53 => _53.content]) && typeof response.content === "string") {
2144
1934
  return response.content;
2145
1935
  }
2146
1936
  throw new Error("Failed to generate Playwright test code");
@@ -2194,13 +1984,13 @@ ${_constants.PLAYWRIGHT_EXAMPLE_CODE}`;
2194
1984
  }
2195
1985
  ];
2196
1986
  if (options.stream && options.onChunk) {
2197
- return await call2(prompt, 2 /* EXTRACT_DATA */, {
1987
+ return await call2(prompt, 2 /* EXTRACT_DATA */, void 0, {
2198
1988
  stream: true,
2199
1989
  onChunk: options.onChunk
2200
1990
  });
2201
1991
  } else {
2202
1992
  const response = await call2(prompt, 2 /* EXTRACT_DATA */);
2203
- if (_optionalChain([response, 'optionalAccess', _55 => _55.content]) && typeof response.content === "string") {
1993
+ if (_optionalChain([response, 'optionalAccess', _54 => _54.content]) && typeof response.content === "string") {
2204
1994
  return {
2205
1995
  content: response.content,
2206
1996
  usage: response.usage,
@@ -2390,7 +2180,7 @@ var extraTextFromUserPrompt = (prompt) => {
2390
2180
  };
2391
2181
  var promptsToChatParam = async (multimodalPrompt) => {
2392
2182
  const msgs = [];
2393
- if (_optionalChain([multimodalPrompt, 'optionalAccess', _56 => _56.images, 'optionalAccess', _57 => _57.length])) {
2183
+ if (_optionalChain([multimodalPrompt, 'optionalAccess', _55 => _55.images, 'optionalAccess', _56 => _56.length])) {
2394
2184
  msgs.push({
2395
2185
  role: "user",
2396
2186
  content: [
@@ -2499,10 +2289,10 @@ async function AiLocateElement(options) {
2499
2289
  if ("bbox" in res.content && Array.isArray(res.content.bbox)) {
2500
2290
  resRect = adaptBboxToRect(
2501
2291
  res.content.bbox,
2502
- _optionalChain([options, 'access', _58 => _58.searchConfig, 'optionalAccess', _59 => _59.rect, 'optionalAccess', _60 => _60.width]) || context.size.width,
2503
- _optionalChain([options, 'access', _61 => _61.searchConfig, 'optionalAccess', _62 => _62.rect, 'optionalAccess', _63 => _63.height]) || context.size.height,
2504
- _optionalChain([options, 'access', _64 => _64.searchConfig, 'optionalAccess', _65 => _65.rect, 'optionalAccess', _66 => _66.left]),
2505
- _optionalChain([options, 'access', _67 => _67.searchConfig, 'optionalAccess', _68 => _68.rect, 'optionalAccess', _69 => _69.top])
2292
+ _optionalChain([options, 'access', _57 => _57.searchConfig, 'optionalAccess', _58 => _58.rect, 'optionalAccess', _59 => _59.width]) || context.size.width,
2293
+ _optionalChain([options, 'access', _60 => _60.searchConfig, 'optionalAccess', _61 => _61.rect, 'optionalAccess', _62 => _62.height]) || context.size.height,
2294
+ _optionalChain([options, 'access', _63 => _63.searchConfig, 'optionalAccess', _64 => _64.rect, 'optionalAccess', _65 => _65.left]),
2295
+ _optionalChain([options, 'access', _66 => _66.searchConfig, 'optionalAccess', _67 => _67.rect, 'optionalAccess', _68 => _68.top])
2506
2296
  );
2507
2297
  debugInspect("resRect", resRect);
2508
2298
  const rectCenter = {
@@ -2521,7 +2311,7 @@ async function AiLocateElement(options) {
2521
2311
  }
2522
2312
  } catch (e) {
2523
2313
  const msg = e instanceof Error ? `Failed to parse bbox: ${e.message}` : "unknown error in locate";
2524
- if (!errors || _optionalChain([errors, 'optionalAccess', _70 => _70.length]) === 0) {
2314
+ if (!errors || _optionalChain([errors, 'optionalAccess', _69 => _69.length]) === 0) {
2525
2315
  errors = [msg];
2526
2316
  } else {
2527
2317
  errors.push(`(${msg})`);
@@ -2613,27 +2403,21 @@ async function AiLocateSection(options) {
2613
2403
  };
2614
2404
  }
2615
2405
  async function AiExtractElementInfo(options) {
2616
- const {
2617
- dataQuery,
2618
- context,
2619
- extractOption,
2620
- multimodalPrompt,
2621
- modelPreferences
2622
- } = options;
2406
+ const { dataQuery, context, extractOption, multimodalPrompt } = options;
2623
2407
  const systemPrompt = systemPromptToExtract();
2624
2408
  const { screenshotBase64 } = context;
2625
2409
  const { description, elementById } = await describeUserPage(context, {
2626
2410
  truncateTextLength: 200,
2627
2411
  filterNonTextContent: false,
2628
2412
  visibleOnly: false,
2629
- domIncluded: _optionalChain([extractOption, 'optionalAccess', _71 => _71.domIncluded])
2413
+ domIncluded: _optionalChain([extractOption, 'optionalAccess', _70 => _70.domIncluded])
2630
2414
  });
2631
2415
  const extractDataPromptText = await extractDataQueryPrompt(
2632
2416
  description,
2633
2417
  dataQuery
2634
2418
  );
2635
2419
  const userContent = [];
2636
- if (_optionalChain([extractOption, 'optionalAccess', _72 => _72.screenshotIncluded]) !== false) {
2420
+ if (_optionalChain([extractOption, 'optionalAccess', _71 => _71.screenshotIncluded]) !== false) {
2637
2421
  userContent.push({
2638
2422
  type: "image_url",
2639
2423
  image_url: {
@@ -2653,7 +2437,7 @@ async function AiExtractElementInfo(options) {
2653
2437
  content: userContent
2654
2438
  }
2655
2439
  ];
2656
- if (_optionalChain([options, 'access', _73 => _73.extractOption, 'optionalAccess', _74 => _74.returnThought])) {
2440
+ if (_optionalChain([options, 'access', _72 => _72.extractOption, 'optionalAccess', _73 => _73.returnThought])) {
2657
2441
  msgs.push({
2658
2442
  role: "user",
2659
2443
  content: "Please provide reasons."
@@ -2668,8 +2452,7 @@ async function AiExtractElementInfo(options) {
2668
2452
  }
2669
2453
  const result = await callAiFn(
2670
2454
  msgs,
2671
- 2 /* EXTRACT_DATA */,
2672
- modelPreferences
2455
+ 2 /* EXTRACT_DATA */
2673
2456
  );
2674
2457
  return {
2675
2458
  parseResult: result.content,
@@ -2783,7 +2566,7 @@ async function plan(userInstruction, opts) {
2783
2566
  const { content, usage } = await call3(msgs, 3 /* PLAN */);
2784
2567
  const rawResponse = JSON.stringify(content, void 0, 2);
2785
2568
  const planFromAI = content;
2786
- const actions = (_optionalChain([planFromAI, 'access', _75 => _75.action, 'optionalAccess', _76 => _76.type]) ? [planFromAI.action] : planFromAI.actions) || [];
2569
+ const actions = (_optionalChain([planFromAI, 'access', _74 => _74.action, 'optionalAccess', _75 => _75.type]) ? [planFromAI.action] : planFromAI.actions) || [];
2787
2570
  const returnValue = {
2788
2571
  ...planFromAI,
2789
2572
  actions,
@@ -2810,7 +2593,7 @@ async function plan(userInstruction, opts) {
2810
2593
  _utils.assert.call(void 0, !planFromAI.error, `Failed to plan actions: ${planFromAI.error}`);
2811
2594
  } else {
2812
2595
  actions.forEach((action) => {
2813
- if (_optionalChain([action, 'access', _77 => _77.locate, 'optionalAccess', _78 => _78.id])) {
2596
+ if (_optionalChain([action, 'access', _76 => _76.locate, 'optionalAccess', _77 => _77.id])) {
2814
2597
  const element = elementById(action.locate.id);
2815
2598
  if (element) {
2816
2599
  action.locate.id = element.id;
@@ -3139,4 +2922,4 @@ async function resizeImageForUiTars(imageBase64, size) {
3139
2922
 
3140
2923
  exports.systemPromptToLocateElement = systemPromptToLocateElement; exports.elementByPositionWithElementInfo = elementByPositionWithElementInfo; exports.describeUserPage = describeUserPage; exports.call = call2; exports.callToGetJSONObject = callToGetJSONObject; exports.callAiFnWithStringResponse = callAiFnWithStringResponse; exports.AIActionType = AIActionType; exports.callAiFn = callAiFn; exports.adaptBboxToRect = adaptBboxToRect; exports.expandSearchArea = expandSearchArea; exports.generateYamlTest = generateYamlTest; exports.generateYamlTestStream = generateYamlTestStream; exports.generatePlaywrightTest = generatePlaywrightTest; exports.generatePlaywrightTestStream = generatePlaywrightTestStream; exports.AiLocateElement = AiLocateElement; exports.AiLocateSection = AiLocateSection; exports.AiExtractElementInfo = AiExtractElementInfo; exports.AiAssert = AiAssert; exports.plan = plan; exports.vlmPlanning = vlmPlanning; exports.resizeImageForUiTars = resizeImageForUiTars;
3141
2924
 
3142
- //# sourceMappingURL=chunk-G2JTYWI6.js.map
2925
+ //# sourceMappingURL=chunk-I5LBWOQA.js.map